using System.ServiceModel.Syndication; using System.Xml; using Microsoft.EntityFrameworkCore; namespace DysonNetwork.Sphere.Connection.WebReader; public class WebFeedService( AppDatabase database, IHttpClientFactory httpClientFactory, ILogger logger, WebReaderService webReaderService ) { public async Task CreateWebFeedAsync(Publisher.Publisher publisher, WebFeedController.WebFeedRequest request) { var feed = new WebFeed { Url = request.Url!, Title = request.Title!, Description = request.Description, PublisherId = publisher.Id, }; database.Set().Add(feed); await database.SaveChangesAsync(); return feed; } public async Task GetFeedAsync(Guid id, Guid? publisherId = null) { var query = database.WebFeeds.Where(a => a.Id == id).AsQueryable(); if (publisherId.HasValue) query = query.Where(a => a.PublisherId == publisherId.Value); return await query.FirstOrDefaultAsync(); } public async Task> GetFeedsByPublisherAsync(Guid publisherId) { return await database.WebFeeds.Where(a => a.PublisherId == publisherId).ToListAsync(); } public async Task UpdateFeedAsync(WebFeed feed, WebFeedController.WebFeedRequest request) { if (request.Url is not null) feed.Url = request.Url; if (request.Title is not null) feed.Title = request.Title; if (request.Description is not null) feed.Description = request.Description; database.Update(feed); await database.SaveChangesAsync(); return feed; } public async Task DeleteFeedAsync(Guid id) { var feed = await database.WebFeeds.FindAsync(id); if (feed == null) { return false; } database.WebFeeds.Remove(feed); await database.SaveChangesAsync(); return true; } public async Task ScrapeFeedAsync(WebFeed feed, CancellationToken cancellationToken = default) { var httpClient = httpClientFactory.CreateClient(); var response = await httpClient.GetAsync(feed.Url, cancellationToken); response.EnsureSuccessStatusCode(); await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); using var reader = XmlReader.Create(stream); var syndicationFeed = SyndicationFeed.Load(reader); if (syndicationFeed == null) { logger.LogWarning("Could not parse syndication feed for {FeedUrl}", feed.Url); return; } foreach (var item in syndicationFeed.Items) { var itemUrl = item.Links.FirstOrDefault()?.Uri.ToString(); if (string.IsNullOrEmpty(itemUrl)) { continue; } var articleExists = await database.Set() .AnyAsync(a => a.FeedId == feed.Id && a.Url == itemUrl, cancellationToken); if (articleExists) { continue; } var content = (item.Content as TextSyndicationContent)?.Text ?? item.Summary.Text; LinkEmbed preview; if (feed.Config.ScrapPage) { var scrapedArticle = await webReaderService.ScrapeArticleAsync(itemUrl, cancellationToken); preview = scrapedArticle.LinkEmbed; content = scrapedArticle.Content; } else { preview = await webReaderService.GetLinkPreviewAsync(itemUrl, cancellationToken); } var newArticle = new WebArticle { FeedId = feed.Id, Title = item.Title.Text, Url = itemUrl, Author = item.Authors.FirstOrDefault()?.Name, Content = content, PublishedAt = item.PublishDate.UtcDateTime, Preview = preview, }; database.Set().Add(newArticle); } await database.SaveChangesAsync(cancellationToken); } }