💄 Optimized web articles

This commit is contained in:
2025-06-26 18:34:51 +08:00
parent 1a137fbb6a
commit f170793928
7 changed files with 93 additions and 8 deletions

View File

@ -0,0 +1,7 @@
namespace DysonNetwork.Sphere.Connection.WebReader;
public class ScrapedArticle
{
public LinkEmbed LinkEmbed { get; set; } = null!;
public string? Content { get; set; }
}

View File

@ -23,6 +23,11 @@ public class WebArticle : ModelBase
public WebFeed Feed { get; set; } = null!;
}
public class WebFeedConfig
{
public bool ScrapPage { get; set; }
}
public class WebFeed : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
@ -31,6 +36,7 @@ public class WebFeed : ModelBase
[MaxLength(8192)] public string? Description { get; set; }
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
[Column(TypeName = "jsonb")] public WebFeedConfig Config { get; set; } = new();
public Guid PublisherId { get; set; }
public Publisher.Publisher Publisher { get; set; } = null!;

View File

@ -1,13 +1,15 @@
using System.ComponentModel.DataAnnotations;
using DysonNetwork.Sphere.Permission;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
namespace DysonNetwork.Sphere.Connection.WebReader;
[Authorize]
[ApiController]
[Route("feeds")]
public class WebFeedController(WebFeedService webFeedService) : ControllerBase
public class WebFeedController(WebFeedService webFeedService, AppDatabase database) : ControllerBase
{
public class CreateWebFeedRequest
{
@ -30,4 +32,31 @@ public class WebFeedController(WebFeedService webFeedService) : ControllerBase
var feed = await webFeedService.CreateWebFeedAsync(request, User);
return Ok(feed);
}
[HttpPost("scrape/{feedId}")]
[RequiredPermission("maintenance", "web-feeds")]
public async Task<ActionResult> ScrapeFeed(Guid feedId)
{
var feed = await database.Set<WebFeed>().FindAsync(feedId);
if (feed == null)
{
return NotFound();
}
await webFeedService.ScrapeFeedAsync(feed);
return Ok();
}
[HttpPost("scrape-all")]
[RequiredPermission("maintenance", "web-feeds")]
public async Task<ActionResult> ScrapeAllFeeds()
{
var feeds = await database.Set<WebFeed>().ToListAsync();
foreach (var feed in feeds)
{
await webFeedService.ScrapeFeedAsync(feed);
}
return Ok();
}
}

View File

@ -11,10 +11,11 @@ public class WebFeedService(
AppDatabase database,
IHttpClientFactory httpClientFactory,
ILogger<WebFeedService> logger,
AccountService accountService
)
AccountService accountService,
WebReaderService webReaderService
)
{
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest dto, ClaimsPrincipal claims)
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest request, ClaimsPrincipal claims)
{
if (claims.Identity?.Name == null)
{
@ -29,9 +30,9 @@ public class WebFeedService(
var feed = new WebFeed
{
Url = dto.Url,
Title = dto.Title,
Description = dto.Description,
Url = request.Url,
Title = request.Title,
Description = request.Description,
PublisherId = account.Id,
};
@ -73,14 +74,29 @@ public class WebFeedService(
continue;
}
var content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text;
LinkEmbed preview;
if (feed.Config.ScrapPage)
{
var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken);
preview = scrapedArticle.LinkEmbed;
content = scrapedArticle.Content;
}
else
{
preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken);
}
var newArticle = new WebArticle
{
FeedId = feed.Id,
Title = item.Title.Text,
Url = itemUrl,
Author = item.Authors.FirstOrDefault()?.Name,
Content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text,
Content = content,
PublishedAt = item.PublishDate.UtcDateTime,
Preview = preview,
};
database.Set<WebArticle>().Add(newArticle);

View File

@ -2,6 +2,7 @@ using System.Globalization;
using AngleSharp;
using AngleSharp.Dom;
using DysonNetwork.Sphere.Storage;
using HtmlAgilityPack;
namespace DysonNetwork.Sphere.Connection.WebReader;
@ -17,6 +18,30 @@ public class WebReaderService(
private const string LinkPreviewCachePrefix = "scrap:preview:";
private const string LinkPreviewCacheGroup = "scrap:preview";
public async Task<ScrapedArticle> ScrapeArticleAsync(string url, CancellationToken cancellationToken = default)
{
var linkEmbed = await GetLinkPreviewAsync(url, cancellationToken);
var content = await GetArticleContentAsync(url, cancellationToken);
return new ScrapedArticle
{
LinkEmbed = linkEmbed,
Content = content
};
}
private async Task<string?> GetArticleContentAsync(string url, CancellationToken cancellationToken)
{
var httpClient = httpClientFactory.CreateClient("WebReader");
var response = await httpClient.GetAsync(url, cancellationToken);
response.EnsureSuccessStatusCode();
var html = await response.Content.ReadAsStringAsync(cancellationToken);
var doc = new HtmlDocument();
doc.LoadHtml(html);
var articleNode = doc.DocumentNode.SelectSingleNode("//article");
return articleNode?.InnerHtml;
}
/// <summary>
/// Generate a link preview embed from a URL
/// </summary>