Web articles and feed

This commit is contained in:
2025-06-26 17:36:45 +08:00
parent 21cf212d8f
commit 1a137fbb6a
12 changed files with 4335 additions and 0 deletions

View File

@ -0,0 +1,39 @@
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;
namespace DysonNetwork.Sphere.Connection.WebReader;
public class WebArticle : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
[MaxLength(4096)] public string Title { get; set; }
[MaxLength(8192)] public string Url { get; set; }
[MaxLength(4096)] public string? Author { get; set; }
[Column(TypeName = "jsonb")] public Dictionary<string, object>? Meta { get; set; }
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
// ReSharper disable once EntityFramework.ModelValidation.UnlimitedStringLength
public string? Content { get; set; }
public DateTime? PublishedAt { get; set; }
public Guid FeedId { get; set; }
public WebFeed Feed { get; set; } = null!;
}
public class WebFeed : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
[MaxLength(8192)] public string Url { get; set; }
[MaxLength(4096)] public string Title { get; set; }
[MaxLength(8192)] public string? Description { get; set; }
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
public Guid PublisherId { get; set; }
public Publisher.Publisher Publisher { get; set; } = null!;
public ICollection<WebArticle> Articles { get; set; } = new List<WebArticle>();
}

View File

@ -0,0 +1,33 @@
using System.ComponentModel.DataAnnotations;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
namespace DysonNetwork.Sphere.Connection.WebReader;
[Authorize]
[ApiController]
[Route("feeds")]
public class WebFeedController(WebFeedService webFeedService) : ControllerBase
{
public class CreateWebFeedRequest
{
[Required]
[MaxLength(8192)]
public required string Url { get; set; }
[Required]
[MaxLength(4096)]
public required string Title { get; set; }
[MaxLength(8192)]
public string? Description { get; set; }
}
[HttpPost]
public async Task<IActionResult> CreateWebFeed([FromBody] CreateWebFeedRequest request)
{
var feed = await webFeedService.CreateWebFeedAsync(request, User);
return Ok(feed);
}
}

View File

@ -0,0 +1,35 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Quartz;
namespace DysonNetwork.Sphere.Connection.WebReader;
[DisallowConcurrentExecution]
public class WebFeedScraperJob(
AppDatabase database,
WebFeedService webFeedService,
ILogger<WebFeedScraperJob> logger
)
: IJob
{
public async Task Execute(IJobExecutionContext context)
{
logger.LogInformation("Starting web feed scraper job.");
var feeds = await database.Set<WebFeed>().ToListAsync(context.CancellationToken);
foreach (var feed in feeds)
{
try
{
await webFeedService.ScrapeFeedAsync(feed, context.CancellationToken);
}
catch (Exception ex)
{
logger.LogError(ex, "Failed to scrape web feed {FeedId}", feed.Id);
}
}
logger.LogInformation("Web feed scraper job finished.");
}
}

View File

@ -0,0 +1,91 @@
using System.Security.Claims;
using System.ServiceModel.Syndication;
using System.Xml;
using DysonNetwork.Sphere.Account;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace DysonNetwork.Sphere.Connection.WebReader;
public class WebFeedService(
AppDatabase database,
IHttpClientFactory httpClientFactory,
ILogger<WebFeedService> logger,
AccountService accountService
)
{
public async Task<WebFeed> CreateWebFeedAsync(WebFeedController.CreateWebFeedRequest dto, ClaimsPrincipal claims)
{
if (claims.Identity?.Name == null)
{
throw new UnauthorizedAccessException();
}
var account = await accountService.LookupAccount(claims.Identity.Name);
if (account == null)
{
throw new UnauthorizedAccessException();
}
var feed = new WebFeed
{
Url = dto.Url,
Title = dto.Title,
Description = dto.Description,
PublisherId = account.Id,
};
database.Set<WebFeed>().Add(feed);
await database.SaveChangesAsync();
return feed;
}
public async Task ScrapeFeedAsync(WebFeed feed, CancellationToken cancellationToken = default)
{
var httpClient = httpClientFactory.CreateClient();
var response = await httpClient.GetAsync(feed.Url, cancellationToken);
response.EnsureSuccessStatusCode();
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var reader = XmlReader.Create(stream);
var syndicationFeed = SyndicationFeed.Load(reader);
if (syndicationFeed == null)
{
logger.LogWarning("Could not parse syndication feed for {FeedUrl}", feed.Url);
return;
}
foreach (var item in syndicationFeed.Items)
{
var itemUrl = item.Links.FirstOrDefault()?.Uri.ToString();
if (string.IsNullOrEmpty(itemUrl))
{
continue;
}
var articleExists = await database.Set<WebArticle>()
.AnyAsync(a => a.FeedId == feed.Id && a.Url == itemUrl, cancellationToken);
if (articleExists)
{
continue;
}
var newArticle = new WebArticle
{
FeedId = feed.Id,
Title = item.Title.Text,
Url = itemUrl,
Author = item.Authors.FirstOrDefault()?.Name,
Content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text,
PublishedAt = item.PublishDate.UtcDateTime,
};
database.Set<WebArticle>().Add(newArticle);
}
await database.SaveChangesAsync(cancellationToken);
}
}