using System.Globalization;
using AngleSharp;
using AngleSharp.Dom;
using DysonNetwork.Shared.Cache;
using HtmlAgilityPack;
namespace DysonNetwork.Sphere.WebReader;
/// 
/// The service is amin to providing scrapping service to the Solar Network.
/// Such as news feed, external articles and link preview.
/// 
public class WebReaderService(
    IHttpClientFactory httpClientFactory,
    ILogger logger,
    ICacheService cache
)
{
    private const string LinkPreviewCachePrefix = "scrap:preview:";
    private const string LinkPreviewCacheGroup = "scrap:preview";
    public async Task ScrapeArticleAsync(string url, CancellationToken cancellationToken = default)
    {
        var linkEmbed = await GetLinkPreviewAsync(url, cancellationToken);
        var content = await GetArticleContentAsync(url, cancellationToken);
        return new ScrapedArticle
        {
            LinkEmbed = linkEmbed,
            Content = content
        };
    }
    private async Task GetArticleContentAsync(string url, CancellationToken cancellationToken)
    {
        var httpClient = httpClientFactory.CreateClient("WebReader");
        var response = await httpClient.GetAsync(url, cancellationToken);
        if (!response.IsSuccessStatusCode)
        {
            logger.LogWarning("Failed to scrap article content for URL: {Url}", url);
            return null;
        }
        var html = await response.Content.ReadAsStringAsync(cancellationToken);
        var doc = new HtmlDocument();
        doc.LoadHtml(html);
        var articleNode = doc.DocumentNode.SelectSingleNode("//article");
        return articleNode?.InnerHtml;
    }
    /// 
    /// Generate a link preview embed from a URL
    /// 
    /// The URL to generate the preview for
    /// Cancellation token
    /// If true, bypass cache and fetch fresh data
    /// Custom cache expiration time
    /// A LinkEmbed object containing the preview data
    public async Task GetLinkPreviewAsync(
        string url,
        CancellationToken cancellationToken = default,
        TimeSpan? cacheExpiry = null,
        bool bypassCache = false
    )
    {
        // Ensure URL is valid
        if (!Uri.TryCreate(url, UriKind.Absolute, out var uri))
        {
            throw new ArgumentException(@"Invalid URL format", nameof(url));
        }
        // Try to get from cache if not bypassing
        if (!bypassCache)
        {
            var cachedPreview = await GetCachedLinkPreview(url);
            if (cachedPreview is not null)
                return cachedPreview;
        }
        // Cache miss or bypass, fetch fresh data
        logger.LogDebug("Fetching fresh link preview for URL: {Url}", url);
        var httpClient = httpClientFactory.CreateClient("WebReader");
        httpClient.MaxResponseContentBufferSize =
            10 * 1024 * 1024; // 10MB, prevent scrap some directly accessible files
        httpClient.Timeout = TimeSpan.FromSeconds(3);
        // Setting UA to facebook's bot to get the opengraph.
        httpClient.DefaultRequestHeaders.Add("User-Agent", "facebookexternalhit/1.1");
        try
        {
            var response = await httpClient.GetAsync(url, cancellationToken);
            response.EnsureSuccessStatusCode();
            var contentType = response.Content.Headers.ContentType?.MediaType;
            if (contentType == null || !contentType.StartsWith("text/html"))
            {
                logger.LogWarning("URL is not an HTML page: {Url}, ContentType: {ContentType}", url, contentType);
                var nonHtmlEmbed = new LinkEmbed
                {
                    Url = url,
                    Title = uri.Host,
                    ContentType = contentType
                };
                // Cache non-HTML responses too
                await CacheLinkPreview(nonHtmlEmbed, url, cacheExpiry);
                return nonHtmlEmbed;
            }
            var html = await response.Content.ReadAsStringAsync(cancellationToken);
            var linkEmbed = await ExtractLinkData(url, html, uri);
            // Cache the result
            await CacheLinkPreview(linkEmbed, url, cacheExpiry);
            return linkEmbed;
        }
        catch (HttpRequestException ex)
        {
            logger.LogError(ex, "Failed to fetch URL: {Url}", url);
            throw new WebReaderException($"Failed to fetch URL: {url}", ex);
        }
    }
    private async Task ExtractLinkData(string url, string html, Uri uri)
    {
        var embed = new LinkEmbed
        {
            Url = url
        };
        // Configure AngleSharp context
        var config = Configuration.Default;
        var context = BrowsingContext.New(config);
        var document = await context.OpenAsync(req => req.Content(html));
        // Extract OpenGraph tags
        var ogTitle = GetMetaTagContent(document, "og:title");
        var ogDescription = GetMetaTagContent(document, "og:description");
        var ogImage = GetMetaTagContent(document, "og:image");
        var ogSiteName = GetMetaTagContent(document, "og:site_name");
        var ogType = GetMetaTagContent(document, "og:type");
        // Extract Twitter card tags as fallback
        var twitterTitle = GetMetaTagContent(document, "twitter:title");
        var twitterDescription = GetMetaTagContent(document, "twitter:description");
        var twitterImage = GetMetaTagContent(document, "twitter:image");
        // Extract standard meta tags as final fallback
        var metaTitle = GetMetaTagContent(document, "title") ??
                        GetMetaContent(document, "title");
        var metaDescription = GetMetaTagContent(document, "description");
        // Extract page title
        var pageTitle = document.Title?.Trim();
        // Extract publish date
        var publishedTime = GetMetaTagContent(document, "article:published_time") ??
                            GetMetaTagContent(document, "datePublished") ??
                            GetMetaTagContent(document, "pubdate");
        // Extract author
        var author = GetMetaTagContent(document, "author") ??
                     GetMetaTagContent(document, "article:author");
        // Extract favicon
        var faviconUrl = GetFaviconUrl(document, uri);
        // Populate the embed with the data, prioritizing OpenGraph
        embed.Title = ogTitle ?? twitterTitle ?? metaTitle ?? pageTitle ?? uri.Host;
        embed.Description = ogDescription ?? twitterDescription ?? metaDescription;
        embed.ImageUrl = ResolveRelativeUrl(ogImage ?? twitterImage, uri);
        embed.SiteName = ogSiteName ?? uri.Host;
        embed.ContentType = ogType;
        embed.FaviconUrl = faviconUrl;
        embed.Author = author;
        // Parse and set published date
        if (!string.IsNullOrEmpty(publishedTime) &&
            DateTime.TryParse(publishedTime, CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal,
                out DateTime parsedDate))
        {
            embed.PublishedDate = parsedDate;
        }
        return embed;
    }
    private static string? GetMetaTagContent(IDocument doc, string property)
    {
        // Check for OpenGraph/Twitter style meta tags
        var node = doc.QuerySelector($"meta[property='{property}'][content]")
                   ?? doc.QuerySelector($"meta[name='{property}'][content]");
        return node?.GetAttribute("content")?.Trim();
    }
    private static string? GetMetaContent(IDocument doc, string name)
    {
        var node = doc.QuerySelector($"meta[name='{name}'][content]");
        return node?.GetAttribute("content")?.Trim();
    }
    private static string? GetFaviconUrl(IDocument doc, Uri baseUri)
    {
        // Look for apple-touch-icon first as it's typically higher quality
        var appleIconNode = doc.QuerySelector("link[rel='apple-touch-icon'][href]");
        if (appleIconNode != null)
        {
            return ResolveRelativeUrl(appleIconNode.GetAttribute("href"), baseUri);
        }
        // Then check for standard favicon
        var faviconNode = doc.QuerySelector("link[rel='icon'][href]") ??
                          doc.QuerySelector("link[rel='shortcut icon'][href]");
        return faviconNode != null
            ? ResolveRelativeUrl(faviconNode.GetAttribute("href"), baseUri)
            : new Uri(baseUri, "/favicon.ico").ToString();
    }
    private static string? ResolveRelativeUrl(string? url, Uri baseUri)
    {
        if (string.IsNullOrEmpty(url))
        {
            return null;
        }
        if (Uri.TryCreate(url, UriKind.Absolute, out _))
        {
            return url; // Already absolute
        }
        return Uri.TryCreate(baseUri, url, out var absoluteUri) ? absoluteUri.ToString() : null;
    }
    /// 
    /// Generate a hash-based cache key for a URL
    /// 
    private string GenerateUrlCacheKey(string url)
    {
        // Normalize the URL first
        var normalizedUrl = NormalizeUrl(url);
        // Create SHA256 hash of the normalized URL
        using var sha256 = System.Security.Cryptography.SHA256.Create();
        var urlBytes = System.Text.Encoding.UTF8.GetBytes(normalizedUrl);
        var hashBytes = sha256.ComputeHash(urlBytes);
        // Convert to hex string
        var hashString = BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant();
        // Return prefixed key
        return $"{LinkPreviewCachePrefix}{hashString}";
    }
    /// 
    /// Normalize URL by trimming trailing slashes but preserving query parameters
    /// 
    private string NormalizeUrl(string url)
    {
        if (string.IsNullOrEmpty(url))
            return string.Empty;
        // First ensure we have a valid URI
        if (!Uri.TryCreate(url, UriKind.Absolute, out var uri))
            return url.TrimEnd('/');
        // Rebuild the URL without trailing slashes but with query parameters
        var scheme = uri.Scheme;
        var host = uri.Host;
        var port = uri.IsDefaultPort ? string.Empty : $":{uri.Port}";
        var path = uri.AbsolutePath.TrimEnd('/');
        var query = uri.Query;
        return $"{scheme}://{host}{port}{path}{query}".ToLowerInvariant();
    }
    /// 
    /// Cache a link preview
    /// 
    private async Task CacheLinkPreview(LinkEmbed? linkEmbed, string url, TimeSpan? expiry = null)
    {
        if (linkEmbed == null || string.IsNullOrEmpty(url))
            return;
        try
        {
            var cacheKey = GenerateUrlCacheKey(url);
            var expiryTime = expiry ?? TimeSpan.FromHours(24);
            await cache.SetWithGroupsAsync(
                cacheKey,
                linkEmbed,
                [LinkPreviewCacheGroup],
                expiryTime);
            logger.LogDebug("Cached link preview for URL: {Url} with key: {CacheKey}", url, cacheKey);
        }
        catch (Exception ex)
        {
            // Log but don't throw - caching failures shouldn't break the main functionality
            logger.LogWarning(ex, "Failed to cache link preview for URL: {Url}", url);
        }
    }
    /// 
    /// Try to get a cached link preview
    /// 
    private async Task GetCachedLinkPreview(string url)
    {
        if (string.IsNullOrEmpty(url))
            return null;
        try
        {
            var cacheKey = GenerateUrlCacheKey(url);
            var cachedPreview = await cache.GetAsync(cacheKey);
            if (cachedPreview is not null)
                logger.LogDebug("Retrieved cached link preview for URL: {Url}", url);
            return cachedPreview;
        }
        catch (Exception ex)
        {
            logger.LogWarning(ex, "Failed to retrieve cached link preview for URL: {Url}", url);
            return null;
        }
    }
    /// 
    /// Invalidate cache for a specific URL
    /// 
    public async Task InvalidateCacheForUrlAsync(string url)
    {
        if (string.IsNullOrEmpty(url))
            return;
        try
        {
            var cacheKey = GenerateUrlCacheKey(url);
            await cache.RemoveAsync(cacheKey);
            logger.LogDebug("Invalidated cache for URL: {Url} with key: {CacheKey}", url, cacheKey);
        }
        catch (Exception ex)
        {
            logger.LogWarning(ex, "Failed to invalidate cache for URL: {Url}", url);
        }
    }
    /// 
    /// Invalidate all cached link previews
    /// 
    public async Task InvalidateAllCachedPreviewsAsync()
    {
        try
        {
            await cache.RemoveGroupAsync(LinkPreviewCacheGroup);
            logger.LogInformation("Invalidated all cached link previews");
        }
        catch (Exception ex)
        {
            logger.LogWarning(ex, "Failed to invalidate all cached link previews");
        }
    }
}