✨ Bug fixes on web feed & scraping
This commit is contained in:
parent
ca5be5a01c
commit
ccb8a4e3f4
@ -31,8 +31,8 @@ public class WebFeedConfig
|
|||||||
public class WebFeed : ModelBase
|
public class WebFeed : ModelBase
|
||||||
{
|
{
|
||||||
public Guid Id { get; set; } = Guid.NewGuid();
|
public Guid Id { get; set; } = Guid.NewGuid();
|
||||||
[MaxLength(8192)] public string Url { get; set; }
|
[MaxLength(8192)] public string Url { get; set; } = null!;
|
||||||
[MaxLength(4096)] public string Title { get; set; }
|
[MaxLength(4096)] public string Title { get; set; } = null!;
|
||||||
[MaxLength(8192)] public string? Description { get; set; }
|
[MaxLength(8192)] public string? Description { get; set; }
|
||||||
|
|
||||||
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
|
[Column(TypeName = "jsonb")] public LinkEmbed? Preview { get; set; }
|
||||||
|
@ -13,7 +13,8 @@ public class WebFeedController(WebFeedService webFeed, PublisherService ps) : Co
|
|||||||
public record WebFeedRequest(
|
public record WebFeedRequest(
|
||||||
[MaxLength(8192)] string? Url,
|
[MaxLength(8192)] string? Url,
|
||||||
[MaxLength(4096)] string? Title,
|
[MaxLength(4096)] string? Title,
|
||||||
[MaxLength(8192)] string? Description
|
[MaxLength(8192)] string? Description,
|
||||||
|
WebFeedConfig? Config
|
||||||
);
|
);
|
||||||
|
|
||||||
[HttpGet]
|
[HttpGet]
|
||||||
|
@ -11,13 +11,15 @@ public class WebFeedService(
|
|||||||
WebReaderService webReaderService
|
WebReaderService webReaderService
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
public async Task<WebFeed> CreateWebFeedAsync(Publisher.Publisher publisher, WebFeedController.WebFeedRequest request)
|
public async Task<WebFeed> CreateWebFeedAsync(Publisher.Publisher publisher,
|
||||||
|
WebFeedController.WebFeedRequest request)
|
||||||
{
|
{
|
||||||
var feed = new WebFeed
|
var feed = new WebFeed
|
||||||
{
|
{
|
||||||
Url = request.Url!,
|
Url = request.Url!,
|
||||||
Title = request.Title!,
|
Title = request.Title!,
|
||||||
Description = request.Description,
|
Description = request.Description,
|
||||||
|
Config = request.Config ?? new WebFeedConfig(),
|
||||||
PublisherId = publisher.Id,
|
PublisherId = publisher.Id,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -48,6 +50,8 @@ public class WebFeedService(
|
|||||||
feed.Title = request.Title;
|
feed.Title = request.Title;
|
||||||
if (request.Description is not null)
|
if (request.Description is not null)
|
||||||
feed.Description = request.Description;
|
feed.Description = request.Description;
|
||||||
|
if (request.Config is not null)
|
||||||
|
feed.Config = request.Config;
|
||||||
|
|
||||||
database.Update(feed);
|
database.Update(feed);
|
||||||
await database.SaveChangesAsync();
|
await database.SaveChangesAsync();
|
||||||
|
@ -33,7 +33,11 @@ public class WebReaderService(
|
|||||||
{
|
{
|
||||||
var httpClient = httpClientFactory.CreateClient("WebReader");
|
var httpClient = httpClientFactory.CreateClient("WebReader");
|
||||||
var response = await httpClient.GetAsync(url, cancellationToken);
|
var response = await httpClient.GetAsync(url, cancellationToken);
|
||||||
response.EnsureSuccessStatusCode();
|
if (!response.IsSuccessStatusCode)
|
||||||
|
{
|
||||||
|
logger.LogWarning("Failed to scrap article content for URL: {Url}", url);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
var html = await response.Content.ReadAsStringAsync(cancellationToken);
|
var html = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||||
var doc = new HtmlDocument();
|
var doc = new HtmlDocument();
|
||||||
doc.LoadHtml(html);
|
doc.LoadHtml(html);
|
||||||
@ -74,7 +78,8 @@ public class WebReaderService(
|
|||||||
// Cache miss or bypass, fetch fresh data
|
// Cache miss or bypass, fetch fresh data
|
||||||
logger.LogDebug("Fetching fresh link preview for URL: {Url}", url);
|
logger.LogDebug("Fetching fresh link preview for URL: {Url}", url);
|
||||||
var httpClient = httpClientFactory.CreateClient("WebReader");
|
var httpClient = httpClientFactory.CreateClient("WebReader");
|
||||||
httpClient.MaxResponseContentBufferSize = 10 * 1024 * 1024; // 10MB, prevent scrap some directly accessible files
|
httpClient.MaxResponseContentBufferSize =
|
||||||
|
10 * 1024 * 1024; // 10MB, prevent scrap some directly accessible files
|
||||||
httpClient.Timeout = TimeSpan.FromSeconds(3);
|
httpClient.Timeout = TimeSpan.FromSeconds(3);
|
||||||
// Setting UA to facebook's bot to get the opengraph.
|
// Setting UA to facebook's bot to get the opengraph.
|
||||||
httpClient.DefaultRequestHeaders.Add("User-Agent", "facebookexternalhit/1.1");
|
httpClient.DefaultRequestHeaders.Add("User-Agent", "facebookexternalhit/1.1");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user