♻️ Move the web reader from Sphere to Insight (w.i.p)

This commit is contained in:
2026-01-02 00:15:56 +08:00
parent c4b2b2f61f
commit ede49333f8
18 changed files with 59 additions and 64 deletions

View File

@@ -15,6 +15,10 @@ public class AppDatabase(
public DbSet<SnThinkingThought> ThinkingThoughts { get; set; }
public DbSet<SnUnpaidAccount> UnpaidAccounts { get; set; }
public DbSet<SnWebArticle> WebArticles { get; set; }
public DbSet<SnWebFeed> WebFeeds { get; set; }
public DbSet<SnWebFeedSubscription> WebFeedSubscriptions { get; set; }
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
{
optionsBuilder.UseNpgsql(

View File

@@ -7,6 +7,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="1.4.0" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.1" />
<PackageReference Include="Microsoft.EntityFrameworkCore.Design" Version="10.0.1">
<PrivateAssets>all</PrivateAssets>
@@ -18,6 +19,7 @@
<PackageReference Include="Quartz" Version="3.15.1" />
<PackageReference Include="Quartz.AspNetCore" Version="3.15.1" />
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.15.1" />
<PackageReference Include="System.ServiceModel.Syndication" Version="10.0.1" />
</ItemGroup>
<ItemGroup>

View File

@@ -1,4 +1,6 @@
namespace DysonNetwork.Sphere.WebReader;
using DysonNetwork.Shared.Models.Embed;
namespace DysonNetwork.Insight.Reader;
public class ScrapedArticle
{

View File

@@ -1,7 +1,7 @@
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
[ApiController]
[Route("/api/feeds/articles")]

View File

@@ -1,14 +1,16 @@
using System.ComponentModel.DataAnnotations;
using DysonNetwork.Shared.Models;
using DysonNetwork.Shared.Proto;
using DysonNetwork.Shared.Registry;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
[Authorize]
[ApiController]
[Route("/api/publishers/{pubName}/feeds")]
public class WebFeedController(WebFeedService webFeed, Publisher.PublisherService ps) : ControllerBase
public class WebFeedController(WebFeedService webFeed, RemotePublisherService ps) : ControllerBase
{
public record WebFeedRequest(
[MaxLength(8192)] string? Url,

View File

@@ -1,9 +1,10 @@
using DysonNetwork.Shared.Models;
using DysonNetwork.Shared.Proto;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.EntityFrameworkCore;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
[ApiController]
[Route("/api/feeds")]
@@ -39,7 +40,7 @@ public class WebFeedPublicController(
return Ok(existingSubscription);
// Create new subscription
var subscription = new WebFeedSubscription
var subscription = new SnWebFeedSubscription
{
FeedId = feedId,
AccountId = accountId
@@ -83,7 +84,7 @@ public class WebFeedPublicController(
/// <returns>Subscription status</returns>
[HttpGet("{feedId:guid}/subscription")]
[Authorize]
public async Task<ActionResult<WebFeedSubscription>> GetSubscriptionStatus(Guid feedId)
public async Task<ActionResult<SnWebFeedSubscription>> GetSubscriptionStatus(Guid feedId)
{
if (HttpContext.Items["CurrentUser"] is not Account currentUser)
return Unauthorized();
@@ -105,7 +106,7 @@ public class WebFeedPublicController(
/// <returns>List of subscribed feeds</returns>
[HttpGet("subscribed")]
[Authorize]
public async Task<ActionResult<WebFeed>> GetSubscribedFeeds(
public async Task<ActionResult<SnWebFeed>> GetSubscribedFeeds(
[FromQuery] int offset = 0,
[FromQuery] int take = 20
)
@@ -137,7 +138,7 @@ public class WebFeedPublicController(
/// </summary>
[HttpGet]
[Authorize]
public async Task<ActionResult<WebFeed>> GetWebFeedArticles(
public async Task<ActionResult<SnWebFeed>> GetWebFeedArticles(
[FromQuery] int offset = 0,
[FromQuery] int take = 20
)
@@ -174,7 +175,7 @@ public class WebFeedPublicController(
/// <returns>Feed metadata</returns>
[AllowAnonymous]
[HttpGet("{feedId:guid}")]
public async Task<ActionResult<WebFeed>> GetFeedById(Guid feedId)
public async Task<ActionResult<SnWebFeed>> GetFeedById(Guid feedId)
{
var feed = await webFeed.GetFeedAsync(feedId);
if (feed == null)
@@ -192,7 +193,7 @@ public class WebFeedPublicController(
/// <returns>List of articles from the feed</returns>
[AllowAnonymous]
[HttpGet("{feedId:guid}/articles")]
public async Task<ActionResult<WebArticle>> GetFeedArticles(
public async Task<ActionResult<SnWebArticle>> GetFeedArticles(
[FromRoute] Guid feedId,
[FromQuery] int offset = 0,
[FromQuery] int take = 20
@@ -224,7 +225,7 @@ public class WebFeedPublicController(
/// </summary>
[HttpGet("explore")]
[Authorize]
public async Task<ActionResult<WebFeed>> ExploreFeeds(
public async Task<ActionResult<SnWebFeed>> ExploreFeeds(
[FromQuery] int offset = 0,
[FromQuery] int take = 20,
[FromQuery] string? query = null

View File

@@ -1,7 +1,8 @@
using DysonNetwork.Shared.Models;
using Microsoft.EntityFrameworkCore;
using Quartz;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
[DisallowConcurrentExecution]
public class WebFeedScraperJob(
@@ -15,7 +16,7 @@ public class WebFeedScraperJob(
{
logger.LogInformation("Starting web feed scraper job.");
var feeds = await database.Set<WebFeed>().ToListAsync(context.CancellationToken);
var feeds = await database.Set<SnWebFeed>().ToListAsync(context.CancellationToken);
foreach (var feed in feeds)
{

View File

@@ -1,20 +1,21 @@
using System.ServiceModel.Syndication;
using System.Xml;
using DysonNetwork.Shared.Models;
using DysonNetwork.Shared.Models.Embed;
using Microsoft.EntityFrameworkCore;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
public class WebFeedService(
AppDatabase database,
IHttpClientFactory httpClientFactory,
ILogger<WebFeedService> logger,
WebReaderService webReaderService
WebReaderService readerService
)
{
public async Task<WebFeed> CreateWebFeedAsync(Shared.Models.SnPublisher publisher,
WebFeedController.WebFeedRequest request)
public async Task<SnWebFeed> CreateWebFeedAsync(SnPublisher publisher, WebFeedController.WebFeedRequest request)
{
var feed = new WebFeed
var feed = new SnWebFeed
{
Url = request.Url!,
Title = request.Title!,
@@ -29,7 +30,7 @@ public class WebFeedService(
return feed;
}
public async Task<WebFeed?> GetFeedAsync(Guid id, Guid? publisherId = null)
public async Task<SnWebFeed?> GetFeedAsync(Guid id, Guid? publisherId = null)
{
var query = database.WebFeeds
.Include(a => a.Publisher)
@@ -40,12 +41,12 @@ public class WebFeedService(
return await query.FirstOrDefaultAsync();
}
public async Task<List<WebFeed>> GetFeedsByPublisherAsync(Guid publisherId)
public async Task<List<SnWebFeed>> GetFeedsByPublisherAsync(Guid publisherId)
{
return await database.WebFeeds.Where(a => a.PublisherId == publisherId).ToListAsync();
}
public async Task<WebFeed> UpdateFeedAsync(WebFeed feed, WebFeedController.WebFeedRequest request)
public async Task<SnWebFeed> UpdateFeedAsync(SnWebFeed feed, WebFeedController.WebFeedRequest request)
{
if (request.Url is not null)
feed.Url = request.Url;
@@ -76,7 +77,7 @@ public class WebFeedService(
return true;
}
public async Task ScrapeFeedAsync(WebFeed feed, CancellationToken cancellationToken = default)
public async Task ScrapeFeedAsync(SnWebFeed feed, CancellationToken cancellationToken = default)
{
var httpClient = httpClientFactory.CreateClient();
var response = await httpClient.GetAsync(feed.Url, cancellationToken);
@@ -98,7 +99,7 @@ public class WebFeedService(
if (string.IsNullOrEmpty(itemUrl))
continue;
var articleExists = await database.Set<WebArticle>()
var articleExists = await database.Set<SnWebArticle>()
.AnyAsync(a => a.FeedId == feed.Id && a.Url == itemUrl, cancellationToken);
if (articleExists)
@@ -109,17 +110,17 @@ public class WebFeedService(
if (feed.Config.ScrapPage)
{
var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken);
var scrapedArticle = await readerService.ScrapeArticleAsync(itemUrl, cancellationToken);
preview = scrapedArticle.LinkEmbed;
if (scrapedArticle.Content is not null)
content = scrapedArticle.Content;
}
else
{
preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken);
preview = await readerService.GetLinkPreviewAsync(itemUrl, cancellationToken);
}
var newArticle = new WebArticle
var newArticle = new SnWebArticle
{
FeedId = feed.Id,
Title = item.Title.Text,

View File

@@ -1,9 +1,10 @@
using DysonNetwork.Shared.Auth;
using DysonNetwork.Shared.Models.Embed;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.RateLimiting;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
/// <summary>
/// Controller for web scraping and link preview services

View File

@@ -1,4 +1,4 @@
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
/// <summary>
/// Exception thrown when an error occurs during web reading operations

View File

@@ -2,9 +2,10 @@ using System.Globalization;
using AngleSharp;
using AngleSharp.Dom;
using DysonNetwork.Shared.Cache;
using DysonNetwork.Shared.Models.Embed;
using HtmlAgilityPack;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Insight.Reader;
/// <summary>
/// The service is amin to providing scrapping service to the Solar Network.

View File

@@ -10,9 +10,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="1.4.0" />
<PackageReference Include="Grpc.AspNetCore.Server" Version="2.76.0" />
<PackageReference Include="HtmlAgilityPack" Version="1.12.4" />
<PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.1.0" />
<PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer" Version="10.0.1" />
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="10.0.1" />

View File

@@ -1,7 +1,7 @@
using System.Text.Json;
using DysonNetwork.Shared.Proto;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Shared.Models.Embed;
/// <summary>
/// The embeddable can be used in the post or messages' meta's embeds fields

View File

@@ -1,4 +1,4 @@
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Shared.Models.Embed;
/// <summary>
/// The link embed is a part of the embeddable implementations

View File

@@ -1,11 +1,12 @@
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;
using System.Text.Json.Serialization;
using DysonNetwork.Shared.Models;
using DysonNetwork.Shared.Models.Embed;
using NodaTime;
namespace DysonNetwork.Sphere.WebReader;
namespace DysonNetwork.Shared.Models;
public class WebArticle : ModelBase
public class SnWebArticle : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
@@ -22,7 +23,7 @@ public class WebArticle : ModelBase
public DateTime? PublishedAt { get; set; }
public Guid FeedId { get; set; }
public WebFeed Feed { get; set; } = null!;
public SnWebFeed Feed { get; set; } = null!;
}
public class WebFeedConfig
@@ -30,28 +31,31 @@ public class WebFeedConfig
public bool ScrapPage { get; set; }
}
public class WebFeed : ModelBase
public class SnWebFeed : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
[MaxLength(8192)] public string Url { get; set; } = null!;
[MaxLength(4096)] public string Title { get; set; } = null!;
[MaxLength(8192)] public string? Description { get; set; }
public Instant? VerifiedAt { get; set; }
[JsonIgnore] [MaxLength(8192)] public string? VerificationKey { get; set; }
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
[Column(TypeName = "jsonb")] public WebFeedConfig Config { get; set; } = new();
public Guid PublisherId { get; set; }
public SnPublisher Publisher { get; set; } = null!;
[JsonIgnore] public List<WebArticle> Articles { get; set; } = new List<WebArticle>();
[JsonIgnore] public List<SnWebArticle> Articles { get; set; } = new();
}
public class WebFeedSubscription : ModelBase
public class SnWebFeedSubscription : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
public Guid FeedId { get; set; }
public WebFeed Feed { get; set; } = null!;
public SnWebFeed Feed { get; set; } = null!;
public Guid AccountId { get; set; }
[NotMapped] public SnAccount Account { get; set; } = null!;
}

View File

@@ -1,20 +1,13 @@
using System.Linq.Expressions;
using DysonNetwork.Shared.Data;
using DysonNetwork.Shared.Models;
using DysonNetwork.Sphere.WebReader;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Design;
using Microsoft.EntityFrameworkCore.Query;
using NodaTime;
using Quartz;
namespace DysonNetwork.Sphere;
public interface IIdentifiedResource
{
public string ResourceIdentifier { get; }
}
public class AppDatabase(
DbContextOptions<AppDatabase> options,
IConfiguration configuration
@@ -53,10 +46,6 @@ public class AppDatabase(
public DbSet<SnFediverseRelationship> FediverseRelationships { get; set; } = null!;
public DbSet<SnActivityPubDelivery> ActivityPubDeliveries { get; set; } = null!;
public DbSet<WebArticle> WebArticles { get; set; } = null!;
public DbSet<WebFeed> WebFeeds { get; set; } = null!;
public DbSet<WebFeedSubscription> WebFeedSubscriptions { get; set; } = null!;
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
{
optionsBuilder.UseNpgsql(
@@ -140,13 +129,6 @@ public class AppDatabase(
.HasForeignKey(m => m.SenderId)
.OnDelete(DeleteBehavior.Cascade);
modelBuilder.Entity<WebFeed>()
.HasIndex(f => f.Url)
.IsUnique();
modelBuilder.Entity<WebArticle>()
.HasIndex(a => a.Url)
.IsUnique();
modelBuilder.Entity<SnFediverseActor>()
.HasOne(a => a.Instance)
.WithMany(i => i.Actors)

View File

@@ -11,9 +11,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="1.4.0" />
<PackageReference Include="Grpc.AspNetCore.Server" Version="2.76.0" />
<PackageReference Include="HtmlAgilityPack" Version="1.12.4" />
<PackageReference Include="jieba.NET" Version="0.42.2" />
<PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.1.0" />
<PackageReference Include="Markdig" Version="0.44.0" />
@@ -38,7 +36,6 @@
<PackageReference Include="StackExchange.Redis.Extensions.AspNetCore" Version="11.0.0" />
<PackageReference Include="Swashbuckle.AspNetCore.Annotations" Version="10.1.0" />
<PackageReference Include="System.Drawing.Common" Version="10.0.1" />
<PackageReference Include="System.ServiceModel.Syndication" Version="10.0.1" />
<PackageReference Include="TencentCloudSDK.Tmt" Version="3.0.1335" />
</ItemGroup>

View File

@@ -30,7 +30,6 @@
<PackageReference Include="Quartz" Version="3.15.1" />
<PackageReference Include="Quartz.AspNetCore" Version="3.15.1" />
<PackageReference Include="SimpleMvcSitemap" Version="4.0.1" />
<PackageReference Include="System.ServiceModel.Syndication" Version="10.0.1" />
</ItemGroup>
<ItemGroup>