💄 Optimized web articles
This commit is contained in:
		| @@ -0,0 +1,7 @@ | ||||
| namespace DysonNetwork.Sphere.Connection.WebReader; | ||||
|  | ||||
| public class ScrapedArticle | ||||
| { | ||||
|     public LinkEmbed LinkEmbed { get; set; } = null!; | ||||
|     public string? Content { get; set; } | ||||
| } | ||||
| @@ -23,6 +23,11 @@ public class WebArticle : ModelBase | ||||
|     public WebFeed Feed { get; set; } = null!; | ||||
| } | ||||
|  | ||||
| public class WebFeedConfig | ||||
| { | ||||
|     public bool ScrapPage { get; set; } | ||||
| } | ||||
|  | ||||
| public class WebFeed : ModelBase | ||||
| { | ||||
|     public Guid Id { get; set; } = Guid.NewGuid(); | ||||
| @@ -31,6 +36,7 @@ public class WebFeed : ModelBase | ||||
|     [MaxLength(8192)] public string? Description { get; set; } | ||||
|      | ||||
|     [Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; } | ||||
|     [Column(TypeName = "jsonb")] public WebFeedConfig Config { get; set; } = new(); | ||||
|  | ||||
|     public Guid PublisherId { get; set; } | ||||
|     public Publisher.Publisher Publisher { get; set; } = null!; | ||||
|   | ||||
| @@ -1,13 +1,15 @@ | ||||
| using System.ComponentModel.DataAnnotations; | ||||
| using DysonNetwork.Sphere.Permission; | ||||
| using Microsoft.AspNetCore.Authorization; | ||||
| using Microsoft.AspNetCore.Mvc; | ||||
| using Microsoft.EntityFrameworkCore; | ||||
|  | ||||
| namespace DysonNetwork.Sphere.Connection.WebReader; | ||||
|  | ||||
| [Authorize] | ||||
| [ApiController] | ||||
| [Route("feeds")] | ||||
| public class WebFeedController(WebFeedService webFeedService) : ControllerBase | ||||
| public class WebFeedController(WebFeedService webFeedService, AppDatabase database) : ControllerBase | ||||
| { | ||||
|     public class CreateWebFeedRequest | ||||
|     { | ||||
| @@ -30,4 +32,31 @@ public class WebFeedController(WebFeedService webFeedService) : ControllerBase | ||||
|         var feed = await webFeedService.CreateWebFeedAsync(request, User); | ||||
|         return Ok(feed); | ||||
|     } | ||||
|      | ||||
|     [HttpPost("scrape/{feedId}")] | ||||
|     [RequiredPermission("maintenance", "web-feeds")] | ||||
|     public async Task<ActionResult> ScrapeFeed(Guid feedId) | ||||
|     { | ||||
|         var feed = await database.Set<WebFeed>().FindAsync(feedId); | ||||
|         if (feed == null) | ||||
|         { | ||||
|             return NotFound(); | ||||
|         } | ||||
|  | ||||
|         await webFeedService.ScrapeFeedAsync(feed); | ||||
|         return Ok(); | ||||
|     } | ||||
|  | ||||
|     [HttpPost("scrape-all")] | ||||
|     [RequiredPermission("maintenance", "web-feeds")] | ||||
|     public async Task<ActionResult> ScrapeAllFeeds() | ||||
|     { | ||||
|         var feeds = await database.Set<WebFeed>().ToListAsync(); | ||||
|         foreach (var feed in feeds) | ||||
|         { | ||||
|             await webFeedService.ScrapeFeedAsync(feed); | ||||
|         } | ||||
|  | ||||
|         return Ok(); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -11,10 +11,11 @@ public class WebFeedService( | ||||
|     AppDatabase database, | ||||
|     IHttpClientFactory httpClientFactory, | ||||
|     ILogger<WebFeedService> logger, | ||||
|     AccountService accountService | ||||
| ) | ||||
|     AccountService accountService, | ||||
|     WebReaderService webReaderService | ||||
|     ) | ||||
| { | ||||
|     public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest dto, ClaimsPrincipal claims) | ||||
|     public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest request, ClaimsPrincipal claims) | ||||
|     { | ||||
|         if (claims.Identity?.Name == null) | ||||
|         { | ||||
| @@ -29,9 +30,9 @@ public class WebFeedService( | ||||
|  | ||||
|         var feed = new WebFeed | ||||
|         { | ||||
|             Url = dto.Url, | ||||
|             Title = dto.Title, | ||||
|             Description = dto.Description, | ||||
|             Url = request.Url, | ||||
|             Title = request.Title, | ||||
|             Description = request.Description, | ||||
|             PublisherId = account.Id, | ||||
|         }; | ||||
|  | ||||
| @@ -73,14 +74,29 @@ public class WebFeedService( | ||||
|                 continue; | ||||
|             } | ||||
|  | ||||
|             var content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text; | ||||
|             LinkEmbed preview; | ||||
|  | ||||
|             if (feed.Config.ScrapPage) | ||||
|             { | ||||
|                 var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken); | ||||
|                 preview = scrapedArticle.LinkEmbed; | ||||
|                 content = scrapedArticle.Content; | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken); | ||||
|             } | ||||
|  | ||||
|             var newArticle = new WebArticle | ||||
|             { | ||||
|                 FeedId = feed.Id, | ||||
|                 Title = item.Title.Text, | ||||
|                 Url = itemUrl, | ||||
|                 Author = item.Authors.FirstOrDefault()?.Name, | ||||
|                 Content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text, | ||||
|                 Content = content, | ||||
|                 PublishedAt = item.PublishDate.UtcDateTime, | ||||
|                 Preview = preview, | ||||
|             }; | ||||
|  | ||||
|             database.Set<WebArticle>().Add(newArticle); | ||||
|   | ||||
| @@ -2,6 +2,7 @@ using System.Globalization; | ||||
| using AngleSharp; | ||||
| using AngleSharp.Dom; | ||||
| using DysonNetwork.Sphere.Storage; | ||||
| using HtmlAgilityPack; | ||||
|  | ||||
| namespace DysonNetwork.Sphere.Connection.WebReader; | ||||
|  | ||||
| @@ -17,6 +18,30 @@ public class WebReaderService( | ||||
|     private const string LinkPreviewCachePrefix = "scrap:preview:"; | ||||
|     private const string LinkPreviewCacheGroup = "scrap:preview"; | ||||
|  | ||||
|     public async Task<ScrapedArticle> ScrapeArticleAsync(string url, CancellationToken cancellationToken = default) | ||||
|     { | ||||
|         var linkEmbed = await GetLinkPreviewAsync(url, cancellationToken); | ||||
|         var content = await GetArticleContentAsync(url, cancellationToken); | ||||
|         return new ScrapedArticle | ||||
|         { | ||||
|             LinkEmbed = linkEmbed, | ||||
|             Content = content | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     private async Task<string?> GetArticleContentAsync(string url, CancellationToken cancellationToken) | ||||
|     { | ||||
|         var httpClient = httpClientFactory.CreateClient("WebReader"); | ||||
|         var response = await httpClient.GetAsync(url, cancellationToken); | ||||
|         response.EnsureSuccessStatusCode(); | ||||
|         var html = await response.Content.ReadAsStringAsync(cancellationToken); | ||||
|         var doc = new HtmlDocument(); | ||||
|         doc.LoadHtml(html); | ||||
|         var articleNode = doc.DocumentNode.SelectSingleNode("//article"); | ||||
|         return articleNode?.InnerHtml; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     /// <summary> | ||||
|     /// Generate a link preview embed from a URL | ||||
|     /// </summary> | ||||
|   | ||||
		Reference in New Issue
	
	Block a user