Add init weapon scraper
This commit is contained in:
51
BlueLaminate/BlueLaminate.Scraper/Wiki/WikiPageFetcher.cs
Normal file
51
BlueLaminate/BlueLaminate.Scraper/Wiki/WikiPageFetcher.cs
Normal file
@@ -0,0 +1,51 @@
|
||||
using System.Text.Json;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace BlueLaminate.Scraper.Wiki;
|
||||
|
||||
/// <summary>
|
||||
/// Fetches a rendered page from the Counter-Strike Fandom wiki, shared by all
|
||||
/// wiki scrapers.
|
||||
///
|
||||
/// The rendered HTML pages sit behind Cloudflare, which 403s .NET's TLS
|
||||
/// fingerprint regardless of headers. The MediaWiki <c>action=parse</c> API is
|
||||
/// not challenged, so we fetch the same content as JSON from there and return
|
||||
/// the embedded HTML as a parsed document.
|
||||
/// </summary>
|
||||
public sealed class WikiPageFetcher
|
||||
{
|
||||
private const string ApiBase = "https://counterstrike.fandom.com/api.php";
|
||||
|
||||
private readonly HttpClient _http;
|
||||
|
||||
public WikiPageFetcher(HttpClient http) => _http = http;
|
||||
|
||||
/// <summary>Loads a wiki page (e.g. "Weapons") as a parsed HTML document.</summary>
|
||||
public async Task<HtmlDocument> LoadAsync(string page, CancellationToken ct = default)
|
||||
{
|
||||
var url = $"{ApiBase}?action=parse&page={Uri.EscapeDataString(page)}&prop=text&format=json";
|
||||
|
||||
using var resp = await _http.GetAsync(url, ct);
|
||||
resp.EnsureSuccessStatusCode();
|
||||
|
||||
await using var stream = await resp.Content.ReadAsStreamAsync(ct);
|
||||
using var json = await JsonDocument.ParseAsync(stream, cancellationToken: ct);
|
||||
|
||||
if (json.RootElement.TryGetProperty("error", out var error))
|
||||
{
|
||||
var info = error.TryGetProperty("info", out var i) ? i.GetString() : "unknown error";
|
||||
throw new InvalidOperationException($"Wiki API returned an error for page '{page}': {info}");
|
||||
}
|
||||
|
||||
var html = json.RootElement
|
||||
.GetProperty("parse")
|
||||
.GetProperty("text")
|
||||
.GetProperty("*")
|
||||
.GetString()
|
||||
?? throw new InvalidOperationException($"Wiki API response for page '{page}' had no parsed text.");
|
||||
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(html);
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user