✨ Bug fixes on web feed & scraping
This commit is contained in:
@ -33,7 +33,11 @@ public class WebReaderService(
|
||||
{
|
||||
var httpClient = httpClientFactory.CreateClient("WebReader");
|
||||
var response = await httpClient.GetAsync(url, cancellationToken);
|
||||
response.EnsureSuccessStatusCode();
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
logger.LogWarning("Failed to scrap article content for URL: {Url}", url);
|
||||
return null;
|
||||
}
|
||||
var html = await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(html);
|
||||
@ -74,7 +78,8 @@ public class WebReaderService(
|
||||
// Cache miss or bypass, fetch fresh data
|
||||
logger.LogDebug("Fetching fresh link preview for URL: {Url}", url);
|
||||
var httpClient = httpClientFactory.CreateClient("WebReader");
|
||||
httpClient.MaxResponseContentBufferSize = 10 * 1024 * 1024; // 10MB, prevent scrap some directly accessible files
|
||||
httpClient.MaxResponseContentBufferSize =
|
||||
10 * 1024 * 1024; // 10MB, prevent scrap some directly accessible files
|
||||
httpClient.Timeout = TimeSpan.FromSeconds(3);
|
||||
// Setting UA to facebook's bot to get the opengraph.
|
||||
httpClient.DefaultRequestHeaders.Add("User-Agent", "facebookexternalhit/1.1");
|
||||
|
Reference in New Issue
Block a user