using System.Text.Json;
using HtmlAgilityPack;

namespace BlueLaminate.Scraper.Wiki;

/// <summary>
/// Fetches a rendered page from the Counter-Strike Fandom wiki, shared by all
/// wiki scrapers.
///
/// The rendered HTML pages sit behind Cloudflare, which 403s .NET's TLS
/// fingerprint regardless of headers. The MediaWiki <c>action=parse</c> API is
/// not challenged, so we fetch the same content as JSON from there and return
/// the embedded HTML as a parsed document.
/// </summary>
public sealed class WikiPageFetcher
{
    private const string ApiBase = "https://counterstrike.fandom.com/api.php";

    private readonly HttpClient _http;

    public WikiPageFetcher(HttpClient http) => _http = http;

    /// <summary>Loads a wiki page (e.g. "Weapons") as a parsed HTML document.</summary>
    public async Task<HtmlDocument> LoadAsync(string page, CancellationToken ct = default)
    {
        var url = $"{ApiBase}?action=parse&page={Uri.EscapeDataString(page)}&prop=text&format=json";

        using var resp = await _http.GetAsync(url, ct);
        resp.EnsureSuccessStatusCode();

        await using var stream = await resp.Content.ReadAsStreamAsync(ct);
        using var json = await JsonDocument.ParseAsync(stream, cancellationToken: ct);

        if (json.RootElement.TryGetProperty("error", out var error))
        {
            var info = error.TryGetProperty("info", out var i) ? i.GetString() : "unknown error";
            throw new InvalidOperationException($"Wiki API returned an error for page '{page}': {info}");
        }

        var html = json.RootElement
            .GetProperty("parse")
            .GetProperty("text")
            .GetProperty("*")
            .GetString()
            ?? throw new InvalidOperationException($"Wiki API response for page '{page}' had no parsed text.");

        var doc = new HtmlDocument();
        doc.LoadHtml(html);
        return doc;
    }
}