💄 Optimized web articles
This commit is contained in:
@ -0,0 +1,7 @@
|
||||
namespace DysonNetwork.Sphere.Connection.WebReader;
|
||||
|
||||
public class ScrapedArticle
|
||||
{
|
||||
public LinkEmbed LinkEmbed { get; set; } = null!;
|
||||
public string? Content { get; set; }
|
||||
}
|
@ -23,6 +23,11 @@ public class WebArticle : ModelBase
|
||||
public WebFeed Feed { get; set; } = null!;
|
||||
}
|
||||
|
||||
public class WebFeedConfig
|
||||
{
|
||||
public bool ScrapPage { get; set; }
|
||||
}
|
||||
|
||||
public class WebFeed : ModelBase
|
||||
{
|
||||
public Guid Id { get; set; } = Guid.NewGuid();
|
||||
@ -31,6 +36,7 @@ public class WebFeed : ModelBase
|
||||
[MaxLength(8192)] public string? Description { get; set; }
|
||||
|
||||
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
|
||||
[Column(TypeName = "jsonb")] public WebFeedConfig Config { get; set; } = new();
|
||||
|
||||
public Guid PublisherId { get; set; }
|
||||
public Publisher.Publisher Publisher { get; set; } = null!;
|
||||
|
@ -1,13 +1,15 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using DysonNetwork.Sphere.Permission;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
|
||||
namespace DysonNetwork.Sphere.Connection.WebReader;
|
||||
|
||||
[Authorize]
|
||||
[ApiController]
|
||||
[Route("feeds")]
|
||||
public class WebFeedController(WebFeedService webFeedService) : ControllerBase
|
||||
public class WebFeedController(WebFeedService webFeedService, AppDatabase database) : ControllerBase
|
||||
{
|
||||
public class CreateWebFeedRequest
|
||||
{
|
||||
@ -30,4 +32,31 @@ public class WebFeedController(WebFeedService webFeedService) : ControllerBase
|
||||
var feed = await webFeedService.CreateWebFeedAsync(request, User);
|
||||
return Ok(feed);
|
||||
}
|
||||
|
||||
[HttpPost("scrape/{feedId}")]
|
||||
[RequiredPermission("maintenance", "web-feeds")]
|
||||
public async Task<ActionResult> ScrapeFeed(Guid feedId)
|
||||
{
|
||||
var feed = await database.Set<WebFeed>().FindAsync(feedId);
|
||||
if (feed == null)
|
||||
{
|
||||
return NotFound();
|
||||
}
|
||||
|
||||
await webFeedService.ScrapeFeedAsync(feed);
|
||||
return Ok();
|
||||
}
|
||||
|
||||
[HttpPost("scrape-all")]
|
||||
[RequiredPermission("maintenance", "web-feeds")]
|
||||
public async Task<ActionResult> ScrapeAllFeeds()
|
||||
{
|
||||
var feeds = await database.Set<WebFeed>().ToListAsync();
|
||||
foreach (var feed in feeds)
|
||||
{
|
||||
await webFeedService.ScrapeFeedAsync(feed);
|
||||
}
|
||||
|
||||
return Ok();
|
||||
}
|
||||
}
|
||||
|
@ -11,10 +11,11 @@ public class WebFeedService(
|
||||
AppDatabase database,
|
||||
IHttpClientFactory httpClientFactory,
|
||||
ILogger<WebFeedService> logger,
|
||||
AccountService accountService
|
||||
)
|
||||
AccountService accountService,
|
||||
WebReaderService webReaderService
|
||||
)
|
||||
{
|
||||
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest dto, ClaimsPrincipal claims)
|
||||
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest request, ClaimsPrincipal claims)
|
||||
{
|
||||
if (claims.Identity?.Name == null)
|
||||
{
|
||||
@ -29,9 +30,9 @@ public class WebFeedService(
|
||||
|
||||
var feed = new WebFeed
|
||||
{
|
||||
Url = dto.Url,
|
||||
Title = dto.Title,
|
||||
Description = dto.Description,
|
||||
Url = request.Url,
|
||||
Title = request.Title,
|
||||
Description = request.Description,
|
||||
PublisherId = account.Id,
|
||||
};
|
||||
|
||||
@ -73,14 +74,29 @@ public class WebFeedService(
|
||||
continue;
|
||||
}
|
||||
|
||||
var content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text;
|
||||
LinkEmbed preview;
|
||||
|
||||
if (feed.Config.ScrapPage)
|
||||
{
|
||||
var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken);
|
||||
preview = scrapedArticle.LinkEmbed;
|
||||
content = scrapedArticle.Content;
|
||||
}
|
||||
else
|
||||
{
|
||||
preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken);
|
||||
}
|
||||
|
||||
var newArticle = new WebArticle
|
||||
{
|
||||
FeedId = feed.Id,
|
||||
Title = item.Title.Text,
|
||||
Url = itemUrl,
|
||||
Author = item.Authors.FirstOrDefault()?.Name,
|
||||
Content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text,
|
||||
Content = content,
|
||||
PublishedAt = item.PublishDate.UtcDateTime,
|
||||
Preview = preview,
|
||||
};
|
||||
|
||||
database.Set<WebArticle>().Add(newArticle);
|
||||
|
@ -2,6 +2,7 @@ using System.Globalization;
|
||||
using AngleSharp;
|
||||
using AngleSharp.Dom;
|
||||
using DysonNetwork.Sphere.Storage;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace DysonNetwork.Sphere.Connection.WebReader;
|
||||
|
||||
@ -17,6 +18,30 @@ public class WebReaderService(
|
||||
private const string LinkPreviewCachePrefix = "scrap:preview:";
|
||||
private const string LinkPreviewCacheGroup = "scrap:preview";
|
||||
|
||||
public async Task<ScrapedArticle> ScrapeArticleAsync(string url, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var linkEmbed = await GetLinkPreviewAsync(url, cancellationToken);
|
||||
var content = await GetArticleContentAsync(url, cancellationToken);
|
||||
return new ScrapedArticle
|
||||
{
|
||||
LinkEmbed = linkEmbed,
|
||||
Content = content
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<string?> GetArticleContentAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
var httpClient = httpClientFactory.CreateClient("WebReader");
|
||||
var response = await httpClient.GetAsync(url, cancellationToken);
|
||||
response.EnsureSuccessStatusCode();
|
||||
var html = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(html);
|
||||
var articleNode = doc.DocumentNode.SelectSingleNode("//article");
|
||||
return articleNode?.InnerHtml;
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Generate a link preview embed from a URL
|
||||
/// </summary>
|
||||
|
Reference in New Issue
Block a user