💄 Optimized web articles
This commit is contained in:
parent
1a137fbb6a
commit
f170793928
@ -0,0 +1,7 @@
|
|||||||
|
namespace DysonNetwork.Sphere.Connection.WebReader;
|
||||||
|
|
||||||
|
public class ScrapedArticle
|
||||||
|
{
|
||||||
|
public LinkEmbed LinkEmbed { get; set; } = null!;
|
||||||
|
public string? Content { get; set; }
|
||||||
|
}
|
@ -23,6 +23,11 @@ public class WebArticle : ModelBase
|
|||||||
public WebFeed Feed { get; set; } = null!;
|
public WebFeed Feed { get; set; } = null!;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class WebFeedConfig
|
||||||
|
{
|
||||||
|
public bool ScrapPage { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
public class WebFeed : ModelBase
|
public class WebFeed : ModelBase
|
||||||
{
|
{
|
||||||
public Guid Id { get; set; } = Guid.NewGuid();
|
public Guid Id { get; set; } = Guid.NewGuid();
|
||||||
@ -31,6 +36,7 @@ public class WebFeed : ModelBase
|
|||||||
[MaxLength(8192)] public string? Description { get; set; }
|
[MaxLength(8192)] public string? Description { get; set; }
|
||||||
|
|
||||||
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
|
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
|
||||||
|
[Column(TypeName = "jsonb")] public WebFeedConfig Config { get; set; } = new();
|
||||||
|
|
||||||
public Guid PublisherId { get; set; }
|
public Guid PublisherId { get; set; }
|
||||||
public Publisher.Publisher Publisher { get; set; } = null!;
|
public Publisher.Publisher Publisher { get; set; } = null!;
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
using System.ComponentModel.DataAnnotations;
|
using System.ComponentModel.DataAnnotations;
|
||||||
|
using DysonNetwork.Sphere.Permission;
|
||||||
using Microsoft.AspNetCore.Authorization;
|
using Microsoft.AspNetCore.Authorization;
|
||||||
using Microsoft.AspNetCore.Mvc;
|
using Microsoft.AspNetCore.Mvc;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
namespace DysonNetwork.Sphere.Connection.WebReader;
|
namespace DysonNetwork.Sphere.Connection.WebReader;
|
||||||
|
|
||||||
[Authorize]
|
[Authorize]
|
||||||
[ApiController]
|
[ApiController]
|
||||||
[Route("feeds")]
|
[Route("feeds")]
|
||||||
public class WebFeedController(WebFeedService webFeedService) : ControllerBase
|
public class WebFeedController(WebFeedService webFeedService, AppDatabase database) : ControllerBase
|
||||||
{
|
{
|
||||||
public class CreateWebFeedRequest
|
public class CreateWebFeedRequest
|
||||||
{
|
{
|
||||||
@ -30,4 +32,31 @@ public class WebFeedController(WebFeedService webFeedService) : ControllerBase
|
|||||||
var feed = await webFeedService.CreateWebFeedAsync(request, User);
|
var feed = await webFeedService.CreateWebFeedAsync(request, User);
|
||||||
return Ok(feed);
|
return Ok(feed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[HttpPost("scrape/{feedId}")]
|
||||||
|
[RequiredPermission("maintenance", "web-feeds")]
|
||||||
|
public async Task<ActionResult> ScrapeFeed(Guid feedId)
|
||||||
|
{
|
||||||
|
var feed = await database.Set<WebFeed>().FindAsync(feedId);
|
||||||
|
if (feed == null)
|
||||||
|
{
|
||||||
|
return NotFound();
|
||||||
|
}
|
||||||
|
|
||||||
|
await webFeedService.ScrapeFeedAsync(feed);
|
||||||
|
return Ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
[HttpPost("scrape-all")]
|
||||||
|
[RequiredPermission("maintenance", "web-feeds")]
|
||||||
|
public async Task<ActionResult> ScrapeAllFeeds()
|
||||||
|
{
|
||||||
|
var feeds = await database.Set<WebFeed>().ToListAsync();
|
||||||
|
foreach (var feed in feeds)
|
||||||
|
{
|
||||||
|
await webFeedService.ScrapeFeedAsync(feed);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,11 @@ public class WebFeedService(
|
|||||||
AppDatabase database,
|
AppDatabase database,
|
||||||
IHttpClientFactory httpClientFactory,
|
IHttpClientFactory httpClientFactory,
|
||||||
ILogger<WebFeedService> logger,
|
ILogger<WebFeedService> logger,
|
||||||
AccountService accountService
|
AccountService accountService,
|
||||||
)
|
WebReaderService webReaderService
|
||||||
|
)
|
||||||
{
|
{
|
||||||
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest dto, ClaimsPrincipal claims)
|
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest request, ClaimsPrincipal claims)
|
||||||
{
|
{
|
||||||
if (claims.Identity?.Name == null)
|
if (claims.Identity?.Name == null)
|
||||||
{
|
{
|
||||||
@ -29,9 +30,9 @@ public class WebFeedService(
|
|||||||
|
|
||||||
var feed = new WebFeed
|
var feed = new WebFeed
|
||||||
{
|
{
|
||||||
Url = dto.Url,
|
Url = request.Url,
|
||||||
Title = dto.Title,
|
Title = request.Title,
|
||||||
Description = dto.Description,
|
Description = request.Description,
|
||||||
PublisherId = account.Id,
|
PublisherId = account.Id,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -73,14 +74,29 @@ public class WebFeedService(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text;
|
||||||
|
LinkEmbed preview;
|
||||||
|
|
||||||
|
if (feed.Config.ScrapPage)
|
||||||
|
{
|
||||||
|
var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken);
|
||||||
|
preview = scrapedArticle.LinkEmbed;
|
||||||
|
content = scrapedArticle.Content;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken);
|
||||||
|
}
|
||||||
|
|
||||||
var newArticle = new WebArticle
|
var newArticle = new WebArticle
|
||||||
{
|
{
|
||||||
FeedId = feed.Id,
|
FeedId = feed.Id,
|
||||||
Title = item.Title.Text,
|
Title = item.Title.Text,
|
||||||
Url = itemUrl,
|
Url = itemUrl,
|
||||||
Author = item.Authors.FirstOrDefault()?.Name,
|
Author = item.Authors.FirstOrDefault()?.Name,
|
||||||
Content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text,
|
Content = content,
|
||||||
PublishedAt = item.PublishDate.UtcDateTime,
|
PublishedAt = item.PublishDate.UtcDateTime,
|
||||||
|
Preview = preview,
|
||||||
};
|
};
|
||||||
|
|
||||||
database.Set<WebArticle>().Add(newArticle);
|
database.Set<WebArticle>().Add(newArticle);
|
||||||
|
@ -2,6 +2,7 @@ using System.Globalization;
|
|||||||
using AngleSharp;
|
using AngleSharp;
|
||||||
using AngleSharp.Dom;
|
using AngleSharp.Dom;
|
||||||
using DysonNetwork.Sphere.Storage;
|
using DysonNetwork.Sphere.Storage;
|
||||||
|
using HtmlAgilityPack;
|
||||||
|
|
||||||
namespace DysonNetwork.Sphere.Connection.WebReader;
|
namespace DysonNetwork.Sphere.Connection.WebReader;
|
||||||
|
|
||||||
@ -17,6 +18,30 @@ public class WebReaderService(
|
|||||||
private const string LinkPreviewCachePrefix = "scrap:preview:";
|
private const string LinkPreviewCachePrefix = "scrap:preview:";
|
||||||
private const string LinkPreviewCacheGroup = "scrap:preview";
|
private const string LinkPreviewCacheGroup = "scrap:preview";
|
||||||
|
|
||||||
|
public async Task<ScrapedArticle> ScrapeArticleAsync(string url, CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
var linkEmbed = await GetLinkPreviewAsync(url, cancellationToken);
|
||||||
|
var content = await GetArticleContentAsync(url, cancellationToken);
|
||||||
|
return new ScrapedArticle
|
||||||
|
{
|
||||||
|
LinkEmbed = linkEmbed,
|
||||||
|
Content = content
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<string?> GetArticleContentAsync(string url, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
var httpClient = httpClientFactory.CreateClient("WebReader");
|
||||||
|
var response = await httpClient.GetAsync(url, cancellationToken);
|
||||||
|
response.EnsureSuccessStatusCode();
|
||||||
|
var html = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||||
|
var doc = new HtmlDocument();
|
||||||
|
doc.LoadHtml(html);
|
||||||
|
var articleNode = doc.DocumentNode.SelectSingleNode("//article");
|
||||||
|
return articleNode?.InnerHtml;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Generate a link preview embed from a URL
|
/// Generate a link preview embed from a URL
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
<PackageReference Include="EFCore.BulkExtensions.PostgreSql" Version="9.0.1" />
|
<PackageReference Include="EFCore.BulkExtensions.PostgreSql" Version="9.0.1" />
|
||||||
<PackageReference Include="EFCore.NamingConventions" Version="9.0.0" />
|
<PackageReference Include="EFCore.NamingConventions" Version="9.0.0" />
|
||||||
<PackageReference Include="FFMpegCore" Version="5.2.0" />
|
<PackageReference Include="FFMpegCore" Version="5.2.0" />
|
||||||
|
<PackageReference Include="HtmlAgilityPack" Version="1.12.1" />
|
||||||
<PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.0.8" />
|
<PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.0.8" />
|
||||||
<PackageReference Include="MailKit" Version="4.11.0" />
|
<PackageReference Include="MailKit" Version="4.11.0" />
|
||||||
<PackageReference Include="MaxMind.GeoIP2" Version="5.3.0" />
|
<PackageReference Include="MaxMind.GeoIP2" Version="5.3.0" />
|
||||||
|
@ -78,6 +78,7 @@
|
|||||||
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStackFrameIterator_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F3bef61b8a21d4c8e96872ecdd7782fa0e55000_003F7a_003F870020d0_003FStackFrameIterator_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStackFrameIterator_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F3bef61b8a21d4c8e96872ecdd7782fa0e55000_003F7a_003F870020d0_003FStackFrameIterator_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStackFrameIterator_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fb6f0571a6bc744b0b551fd4578292582e54c00_003Fdf_003F3fcdc4d2_003FStackFrameIterator_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStackFrameIterator_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fb6f0571a6bc744b0b551fd4578292582e54c00_003Fdf_003F3fcdc4d2_003FStackFrameIterator_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStatusCodeResult_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E3_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F0b5acdd962e549369896cece0026e556214600_003F7c_003F8b7572ae_003FStatusCodeResult_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AStatusCodeResult_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E3_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F0b5acdd962e549369896cece0026e556214600_003F7c_003F8b7572ae_003FStatusCodeResult_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ASyndicationFeed_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F5b43b9cf654743f8b9a2eee23c625dd21dd30_003Fad_003Fd26b4d73_003FSyndicationFeed_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ATagging_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E3_003Fresharper_002Dhost_003FSourcesCache_003F36f4c2e6baa65ba603de42eedad12ea36845aa35a910a6a82d82baf688e3e1_003FTagging_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ATagging_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E3_003Fresharper_002Dhost_003FSourcesCache_003F36f4c2e6baa65ba603de42eedad12ea36845aa35a910a6a82d82baf688e3e1_003FTagging_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AThrowHelper_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fb6f0571a6bc744b0b551fd4578292582e54c00_003F12_003Fe0a28ad6_003FThrowHelper_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AThrowHelper_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fb6f0571a6bc744b0b551fd4578292582e54c00_003F12_003Fe0a28ad6_003FThrowHelper_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ATotp_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F48c9d2a1b3c84b32b36ebc6f20a927ea4600_003F7b_003Ff98e5727_003FTotp_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ATotp_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2025_002E1_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F48c9d2a1b3c84b32b36ebc6f20a927ea4600_003F7b_003Ff98e5727_003FTotp_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user