Change to static skin catalog population

This commit is contained in:
bob
2026-05-29 18:36:17 -05:00
parent 6f3c0175cd
commit b51f1d9f5f
26 changed files with 3063 additions and 370 deletions

View File

@@ -6,8 +6,4 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.12.4" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,36 @@
namespace BlueLaminate.Scraper.Skins;
/// <summary>A single CS2 skin from the CSGO-API static catalogue (skins.json).</summary>
/// <param name="Id">Stable catalogue id, e.g. "skin-e757fd7191f9". Globally unique natural key.</param>
/// <param name="WeaponName">Owning weapon, e.g. "AK-47", "Hand Wraps", "Bayonet".</param>
/// <param name="Category">Weapon category, e.g. "Rifles", "Knives", "Gloves". Becomes the weapon type.</param>
/// <param name="Team">"CT", "T", or "Both".</param>
/// <param name="Name">Skin/pattern name, e.g. "Dragon Lore"; "Vanilla" for knives with no finish.</param>
/// <param name="Rarity">Rarity tier, e.g. "Covert", "Classified", "Extraordinary".</param>
/// <param name="Description">Flavour/description text, or null.</param>
/// <param name="ImageUrl">Catalogue image URL, or null.</param>
/// <param name="StatTrakAvailable">True if a StatTrak variant exists.</param>
/// <param name="SouvenirAvailable">True if a Souvenir variant exists.</param>
/// <param name="FloatMin">Minimum wear value, or null when the catalogue gives none (e.g. vanilla knives).</param>
/// <param name="FloatMax">Maximum wear value, or null.</param>
/// <param name="Sources">Collections and containers this skin belongs to.</param>
public sealed record CatalogSkin(
string Id,
string WeaponName,
string Category,
string Team,
string Name,
string Rarity,
string? Description,
string? ImageUrl,
bool StatTrakAvailable,
bool SouvenirAvailable,
decimal? FloatMin,
decimal? FloatMax,
IReadOnlyList<CatalogSource> Sources);
/// <summary>A collection or container a skin originates from.</summary>
/// <param name="Id">Stable catalogue id, e.g. "collection-set-community-37" or "crate-4288". Natural key.</param>
/// <param name="Name">Display name, e.g. "The Dead Hand Collection", "Glove Case".</param>
/// <param name="Type">"Collection" or "Container".</param>
public sealed record CatalogSource(string Id, string Name, string Type);

View File

@@ -0,0 +1,105 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace BlueLaminate.Scraper.Skins;
/// <summary>
/// Loads the CS2 skin catalogue from the ByMykel/CSGO-API static dataset
/// (skins.json) and maps it to <see cref="CatalogSkin"/> records. This replaces
/// the old HTML scraper: one JSON file carries every skin with its weapon,
/// category, rarity, wear range, and the collections/containers it comes from.
/// </summary>
public sealed class SkinCatalogClient
{
public const string DefaultUrl =
"https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json";
private static readonly JsonSerializerOptions Options = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
PropertyNameCaseInsensitive = true,
NumberHandling = JsonNumberHandling.AllowReadingFromString,
};
private readonly HttpClient _http;
private readonly string _url;
public SkinCatalogClient(HttpClient http, string? url = null)
{
_http = http;
_url = url ?? DefaultUrl;
}
public async Task<IReadOnlyList<CatalogSkin>> FetchAsync(CancellationToken ct = default)
{
await using var stream = await _http.GetStreamAsync(_url, ct);
var dtos = await JsonSerializer.DeserializeAsync<List<SkinDto>>(stream, Options, ct)
?? throw new InvalidOperationException("skins.json deserialized to null.");
return dtos.Select(Map).ToList();
}
private static CatalogSkin Map(SkinDto dto)
{
var sources = new List<CatalogSource>();
AddSources(sources, dto.Collections, "Collection");
AddSources(sources, dto.Crates, "Container");
return new CatalogSkin(
Id: dto.Id,
WeaponName: dto.Weapon?.Name ?? "Unknown",
Category: dto.Category?.Name ?? "Unknown",
Team: MapTeam(dto.Team?.Id),
// Knives with no finish carry a null pattern; "Vanilla" is the community term.
Name: dto.Pattern?.Name ?? "Vanilla",
Rarity: dto.Rarity?.Name ?? "Unknown",
Description: dto.Description,
ImageUrl: dto.Image,
StatTrakAvailable: dto.Stattrak,
SouvenirAvailable: dto.Souvenir,
FloatMin: dto.MinFloat,
FloatMax: dto.MaxFloat,
Sources: sources);
}
private static void AddSources(List<CatalogSource> into, List<NamedDto>? items, string type)
{
if (items is null)
return;
foreach (var item in items)
{
if (string.IsNullOrEmpty(item.Id) || string.IsNullOrEmpty(item.Name))
continue;
if (into.Any(s => s.Id == item.Id))
continue;
into.Add(new CatalogSource(item.Id, item.Name, type));
}
}
private static string MapTeam(string? teamId) => teamId switch
{
"terrorists" => "T",
"counter-terrorists" => "CT",
_ => "Both",
};
private sealed record SkinDto(
string Id,
string? Name,
string? Description,
NamedDto? Weapon,
NamedDto? Category,
NamedDto? Pattern,
decimal? MinFloat,
decimal? MaxFloat,
NamedDto? Rarity,
bool Stattrak,
bool Souvenir,
string? Image,
NamedDto? Team,
List<NamedDto>? Collections,
List<NamedDto>? Crates);
private sealed record NamedDto(string? Id, string? Name);
}

View File

@@ -1,7 +0,0 @@
namespace BlueLaminate.Scraper.Weapons;
/// <summary>A single CS2 weapon parsed from the Counter-Strike wiki.</summary>
/// <param name="Name">Display name, e.g. "AK-47".</param>
/// <param name="Type">Category from the wiki heading, e.g. "Pistols", "Assault Rifles".</param>
/// <param name="Team">"CT", "T", or "Both".</param>
public sealed record ScrapedWeapon(string Name, string Type, string Team);

View File

@@ -1,172 +0,0 @@
using System.Text.RegularExpressions;
using BlueLaminate.Scraper.Wiki;
using HtmlAgilityPack;
namespace BlueLaminate.Scraper.Weapons;
/// <summary>
/// Scrapes the CS2 weapon catalogue from the wiki's "Weapons" page.
///
/// Layout: the page has one "tabber" per weapon category, each immediately
/// preceded by a section heading (the category / Type). Inside each tabber the
/// "Global Offensive &amp; Counter-Strike 2" tab holds a gallery of captions —
/// one per weapon, optionally suffixed with "(CT)" or "(T)" for team-locked
/// weapons.
/// </summary>
public sealed class WeaponWikiScraper
{
private const string Page = "Weapons";
private const string Cs2TabHash = "Global_Offensive_&_Counter-Strike_2";
// Matches a trailing "(CT)" / "(T)" team annotation, capturing the team.
private static readonly Regex TeamAnnotation =
new(@"\s*\((CT|T)\)\s*$", RegexOptions.Compiled);
// The wiki labels the default knife "Stock Knife"; drop the prefix.
private static readonly Regex StockPrefix =
new(@"^Stock\s+", RegexOptions.Compiled);
private readonly WikiPageFetcher _fetcher;
public WeaponWikiScraper(WikiPageFetcher fetcher) => _fetcher = fetcher;
public async Task<IReadOnlyList<ScrapedWeapon>> ScrapeAsync(CancellationToken ct = default)
{
var doc = await _fetcher.LoadAsync(Page, ct);
// Headings and tabbers in document order so each tabber inherits the
// most recent heading as its category.
var nodes = doc.DocumentNode.SelectNodes(
"//h2 | //h3 | //h4 | "
+ "//div[contains(concat(' ', normalize-space(@class), ' '), ' tabber ')]");
var aggregator = new WeaponAggregator();
string? currentType = null;
if (nodes is not null)
{
foreach (var node in nodes)
{
if (node.Name is "h2" or "h3" or "h4")
{
currentType = HeadingText(node);
continue;
}
if (currentType is null)
continue;
foreach (var caption in Cs2Captions(node))
aggregator.Add(caption, currentType);
}
}
return aggregator.Build();
}
/// <summary>Caption texts from the CS2 tab of a single tabber, if present.</summary>
private static IEnumerable<string> Cs2Captions(HtmlNode tabber)
{
var tabs = tabber.SelectNodes(
".//li[contains(concat(' ', normalize-space(@class), ' '), ' wds-tabs__tab ')]");
if (tabs is null)
yield break;
var index = -1;
for (var i = 0; i < tabs.Count; i++)
{
// HtmlAgilityPack returns attribute values un-decoded, and the wiki
// entity-encodes the "&" in this hash (&amp;).
var hash = HtmlEntity.DeEntitize(tabs[i].GetAttributeValue("data-hash", string.Empty));
if (hash == Cs2TabHash)
{
index = i;
break;
}
}
if (index < 0)
yield break;
var contents = tabber.SelectNodes(
".//div[contains(concat(' ', normalize-space(@class), ' '), ' wds-tab__content ')]");
if (contents is null || index >= contents.Count)
yield break;
var captions = contents[index].SelectNodes(
".//div[contains(concat(' ', normalize-space(@class), ' '), ' lightbox-caption ')]");
if (captions is null)
yield break;
foreach (var caption in captions)
yield return WikiText.Normalize(caption.InnerText);
}
private static string HeadingText(HtmlNode heading)
{
var headline = heading.SelectSingleNode(
".//span[contains(concat(' ', normalize-space(@class), ' '), ' mw-headline ')]");
return WikiText.Normalize((headline ?? heading).InnerText);
}
/// <summary>
/// Collapses the per-caption rows into one weapon per name, tracking which
/// teams it appeared for so a weapon shown as both "(CT)" and "(T)" (or with
/// no annotation) resolves to "Both".
/// </summary>
private sealed class WeaponAggregator
{
private sealed class Entry
{
public required string Type { get; init; }
public bool SawCt;
public bool SawT;
public bool SawUnannotated;
}
private readonly Dictionary<string, Entry> _byName = new();
private readonly List<string> _order = new();
public void Add(string caption, string type)
{
if (string.IsNullOrEmpty(caption))
return;
var match = TeamAnnotation.Match(caption);
var name = TeamAnnotation.Replace(caption, string.Empty);
name = StockPrefix.Replace(name, string.Empty).Trim();
if (name.Length == 0)
return;
if (!_byName.TryGetValue(name, out var entry))
{
entry = new Entry { Type = type };
_byName[name] = entry;
_order.Add(name);
}
if (!match.Success)
entry.SawUnannotated = true;
else if (match.Groups[1].Value == "CT")
entry.SawCt = true;
else
entry.SawT = true;
}
public IReadOnlyList<ScrapedWeapon> Build()
{
var result = new List<ScrapedWeapon>(_order.Count);
foreach (var name in _order)
{
var e = _byName[name];
var team =
e.SawUnannotated || (e.SawCt && e.SawT) ? "Both"
: e.SawCt ? "CT"
: e.SawT ? "T"
: "Both";
result.Add(new ScrapedWeapon(name, e.Type, team));
}
return result;
}
}
}

View File

@@ -1,51 +0,0 @@
using System.Text.Json;
using HtmlAgilityPack;
namespace BlueLaminate.Scraper.Wiki;
/// <summary>
/// Fetches a rendered page from the Counter-Strike Fandom wiki, shared by all
/// wiki scrapers.
///
/// The rendered HTML pages sit behind Cloudflare, which 403s .NET's TLS
/// fingerprint regardless of headers. The MediaWiki <c>action=parse</c> API is
/// not challenged, so we fetch the same content as JSON from there and return
/// the embedded HTML as a parsed document.
/// </summary>
public sealed class WikiPageFetcher
{
private const string ApiBase = "https://counterstrike.fandom.com/api.php";
private readonly HttpClient _http;
public WikiPageFetcher(HttpClient http) => _http = http;
/// <summary>Loads a wiki page (e.g. "Weapons") as a parsed HTML document.</summary>
public async Task<HtmlDocument> LoadAsync(string page, CancellationToken ct = default)
{
var url = $"{ApiBase}?action=parse&page={Uri.EscapeDataString(page)}&prop=text&format=json";
using var resp = await _http.GetAsync(url, ct);
resp.EnsureSuccessStatusCode();
await using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var json = await JsonDocument.ParseAsync(stream, cancellationToken: ct);
if (json.RootElement.TryGetProperty("error", out var error))
{
var info = error.TryGetProperty("info", out var i) ? i.GetString() : "unknown error";
throw new InvalidOperationException($"Wiki API returned an error for page '{page}': {info}");
}
var html = json.RootElement
.GetProperty("parse")
.GetProperty("text")
.GetProperty("*")
.GetString()
?? throw new InvalidOperationException($"Wiki API response for page '{page}' had no parsed text.");
var doc = new HtmlDocument();
doc.LoadHtml(html);
return doc;
}
}

View File

@@ -1,14 +0,0 @@
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace BlueLaminate.Scraper.Wiki;
/// <summary>Text helpers shared by wiki scrapers.</summary>
public static class WikiText
{
private static readonly Regex Whitespace = new(@"\s+", RegexOptions.Compiled);
/// <summary>Decodes HTML entities and collapses whitespace runs to single spaces.</summary>
public static string Normalize(string raw) =>
Whitespace.Replace(HtmlEntity.DeEntitize(raw) ?? string.Empty, " ").Trim();
}