💄 Optimized web articles

This commit is contained in:
LittleSheep 2025-06-26 18:34:51 +08:00
parent 1a137fbb6a
commit f170793928
7 changed files with 93 additions and 8 deletions

View File

@ -0,0 +1,7 @@
namespace DysonNetwork.Sphere.Connection.WebReader;
public class ScrapedArticle
{
public LinkEmbed LinkEmbed { get; set; } = null!;
public string? Content { get; set; }
}

View File

@ -23,6 +23,11 @@ public class WebArticle : ModelBase
public WebFeed Feed { get; set; } = null!;
}
public class WebFeedConfig
{
public bool ScrapPage { get; set; }
}
public class WebFeed : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
@ -31,6 +36,7 @@ public class WebFeed : ModelBase
[MaxLength(8192)] public string? Description { get; set; }
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
[Column(TypeName = "jsonb")] public WebFeedConfig Config { get; set; } = new();
public Guid PublisherId { get; set; }
public Publisher.Publisher Publisher { get; set; } = null!;

View File

@ -1,13 +1,15 @@
using System.ComponentModel.DataAnnotations;
using DysonNetwork.Sphere.Permission;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
namespace DysonNetwork.Sphere.Connection.WebReader;
[Authorize]
[ApiController]
[Route("feeds")]
public class WebFeedController(WebFeedService webFeedService) : ControllerBase
public class WebFeedController(WebFeedService webFeedService, AppDatabase database) : ControllerBase
{
public class CreateWebFeedRequest
{
@ -30,4 +32,31 @@ public class WebFeedController(WebFeedService webFeedService) : ControllerBase
var feed = await webFeedService.CreateWebFeedAsync(request, User);
return Ok(feed);
}
[HttpPost("scrape/{feedId}")]
[RequiredPermission("maintenance", "web-feeds")]
public async Task<ActionResult> ScrapeFeed(Guid feedId)
{
var feed = await database.Set<WebFeed>().FindAsync(feedId);
if (feed == null)
{
return NotFound();
}
await webFeedService.ScrapeFeedAsync(feed);
return Ok();
}
[HttpPost("scrape-all")]
[RequiredPermission("maintenance", "web-feeds")]
public async Task<ActionResult> ScrapeAllFeeds()
{
var feeds = await database.Set<WebFeed>().ToListAsync();
foreach (var feed in feeds)
{
await webFeedService.ScrapeFeedAsync(feed);
}
return Ok();
}
}

View File

@ -11,10 +11,11 @@ public class WebFeedService(
AppDatabase database,
IHttpClientFactory httpClientFactory,
ILogger<WebFeedService> logger,
AccountService accountService
)
AccountService accountService,
WebReaderService webReaderService
)
{
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest dto, ClaimsPrincipal claims)
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest request, ClaimsPrincipal claims)
{
if (claims.Identity?.Name == null)
{
@ -29,9 +30,9 @@ public class WebFeedService(
var feed = new WebFeed
{
Url = dto.Url,
Title = dto.Title,
Description = dto.Description,
Url = request.Url,
Title = request.Title,
Description = request.Description,
PublisherId = account.Id,
};
@ -73,14 +74,29 @@ public class WebFeedService(
continue;
}
var content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text;
LinkEmbed preview;
if (feed.Config.ScrapPage)
{
var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken);
preview = scrapedArticle.LinkEmbed;
content = scrapedArticle.Content;
}
else
{
preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken);
}
var newArticle = new WebArticle
{
FeedId = feed.Id,
Title = item.Title.Text,
Url = itemUrl,
Author = item.Authors.FirstOrDefault()?.Name,
Content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text,
Content = content,
PublishedAt = item.PublishDate.UtcDateTime,
Preview = preview,
};
database.Set<WebArticle>().Add(newArticle);

View File

@ -2,6 +2,7 @@ using System.Globalization;
using AngleSharp;
using AngleSharp.Dom;
using DysonNetwork.Sphere.Storage;
using HtmlAgilityPack;
namespace DysonNetwork.Sphere.Connection.WebReader;
@ -17,6 +18,30 @@ public class WebReaderService(
private const string LinkPreviewCachePrefix = "scrap:preview:";
private const string LinkPreviewCacheGroup = "scrap:preview";
public async Task<ScrapedArticle> ScrapeArticleAsync(string url, CancellationToken cancellationToken = default)
{
var linkEmbed = await GetLinkPreviewAsync(url, cancellationToken);
var content = await GetArticleContentAsync(url, cancellationToken);
return new ScrapedArticle
{
LinkEmbed = linkEmbed,
Content = content
};
}
private async Task<string?> GetArticleContentAsync(string url, CancellationToken cancellationToken)
{
var httpClient = httpClientFactory.CreateClient("WebReader");
var response = await httpClient.GetAsync(url, cancellationToken);
response.EnsureSuccessStatusCode();
var html = await response.Content.ReadAsStringAsync(cancellationToken);
var doc = new HtmlDocument();
doc.LoadHtml(html);
var articleNode = doc.DocumentNode.SelectSingleNode("//article");
return articleNode?.InnerHtml;
}
/// <summary>
/// Generate a link preview embed from a URL
/// </summary>

View File

@ -23,6 +23,7 @@
<PackageReference Include="EFCore.BulkExtensions.PostgreSql" Version="9.0.1" />
<PackageReference Include="EFCore.NamingConventions" Version="9.0.0" />
<PackageReference Include="FFMpegCore" Version="5.2.0" />
<PackageReference Include="HtmlAgilityPack" Version="1.12.1" />
<PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.0.8" />
<PackageReference Include="MailKit" Version="4.11.0" />
<PackageReference Include="MaxMind.GeoIP2" Version="5.3.0" />

View File

@ -78,6 +78,7 @@
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStackFrameIterator_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F3bef61b8a21d4c8e96872ecdd7782fa0e55000_003F7a_003F870020d0_003FStackFrameIterator_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStackFrameIterator_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fb6f0571a6bc744b0b551fd4578292582e54c00_003Fdf_003F3fcdc4d2_003FStackFrameIterator_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStatusCodeResult_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E3_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F0b5acdd962e549369896cece0026e556214600_003F7c_003F8b7572ae_003FStatusCodeResult_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ASyndicationFeed_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F5b43b9cf654743f8b9a2eee23c625dd21dd30_003Fad_003Fd26b4d73_003FSyndicationFeed_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ATagging_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E3_003Fresharper_002Dhost_003FSourcesCache_003F36f4c2e6baa65ba603de42eedad12ea36845aa35a910a6a82d82baf688e3e1_003FTagging_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AThrowHelper_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fb6f0571a6bc744b0b551fd4578292582e54c00_003F12_003Fe0a28ad6_003FThrowHelper_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ATotp_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F48c9d2a1b3c84b32b36ebc6f20a927ea4600_003F7b_003Ff98e5727_003FTotp_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>