Change to static skin catalog population
This commit is contained in:
@@ -6,8 +6,4 @@
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="HtmlAgilityPack" Version="1.12.4" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
36
BlueLaminate/BlueLaminate.Scraper/Skins/CatalogSkin.cs
Normal file
36
BlueLaminate/BlueLaminate.Scraper/Skins/CatalogSkin.cs
Normal file
@@ -0,0 +1,36 @@
|
||||
namespace BlueLaminate.Scraper.Skins;
|
||||
|
||||
/// <summary>A single CS2 skin from the CSGO-API static catalogue (skins.json).</summary>
|
||||
/// <param name="Id">Stable catalogue id, e.g. "skin-e757fd7191f9". Globally unique natural key.</param>
|
||||
/// <param name="WeaponName">Owning weapon, e.g. "AK-47", "Hand Wraps", "Bayonet".</param>
|
||||
/// <param name="Category">Weapon category, e.g. "Rifles", "Knives", "Gloves". Becomes the weapon type.</param>
|
||||
/// <param name="Team">"CT", "T", or "Both".</param>
|
||||
/// <param name="Name">Skin/pattern name, e.g. "Dragon Lore"; "Vanilla" for knives with no finish.</param>
|
||||
/// <param name="Rarity">Rarity tier, e.g. "Covert", "Classified", "Extraordinary".</param>
|
||||
/// <param name="Description">Flavour/description text, or null.</param>
|
||||
/// <param name="ImageUrl">Catalogue image URL, or null.</param>
|
||||
/// <param name="StatTrakAvailable">True if a StatTrak variant exists.</param>
|
||||
/// <param name="SouvenirAvailable">True if a Souvenir variant exists.</param>
|
||||
/// <param name="FloatMin">Minimum wear value, or null when the catalogue gives none (e.g. vanilla knives).</param>
|
||||
/// <param name="FloatMax">Maximum wear value, or null.</param>
|
||||
/// <param name="Sources">Collections and containers this skin belongs to.</param>
|
||||
public sealed record CatalogSkin(
|
||||
string Id,
|
||||
string WeaponName,
|
||||
string Category,
|
||||
string Team,
|
||||
string Name,
|
||||
string Rarity,
|
||||
string? Description,
|
||||
string? ImageUrl,
|
||||
bool StatTrakAvailable,
|
||||
bool SouvenirAvailable,
|
||||
decimal? FloatMin,
|
||||
decimal? FloatMax,
|
||||
IReadOnlyList<CatalogSource> Sources);
|
||||
|
||||
/// <summary>A collection or container a skin originates from.</summary>
|
||||
/// <param name="Id">Stable catalogue id, e.g. "collection-set-community-37" or "crate-4288". Natural key.</param>
|
||||
/// <param name="Name">Display name, e.g. "The Dead Hand Collection", "Glove Case".</param>
|
||||
/// <param name="Type">"Collection" or "Container".</param>
|
||||
public sealed record CatalogSource(string Id, string Name, string Type);
|
||||
105
BlueLaminate/BlueLaminate.Scraper/Skins/SkinCatalogClient.cs
Normal file
105
BlueLaminate/BlueLaminate.Scraper/Skins/SkinCatalogClient.cs
Normal file
@@ -0,0 +1,105 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace BlueLaminate.Scraper.Skins;
|
||||
|
||||
/// <summary>
|
||||
/// Loads the CS2 skin catalogue from the ByMykel/CSGO-API static dataset
|
||||
/// (skins.json) and maps it to <see cref="CatalogSkin"/> records. This replaces
|
||||
/// the old HTML scraper: one JSON file carries every skin with its weapon,
|
||||
/// category, rarity, wear range, and the collections/containers it comes from.
|
||||
/// </summary>
|
||||
public sealed class SkinCatalogClient
|
||||
{
|
||||
public const string DefaultUrl =
|
||||
"https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json";
|
||||
|
||||
private static readonly JsonSerializerOptions Options = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
PropertyNameCaseInsensitive = true,
|
||||
NumberHandling = JsonNumberHandling.AllowReadingFromString,
|
||||
};
|
||||
|
||||
private readonly HttpClient _http;
|
||||
private readonly string _url;
|
||||
|
||||
public SkinCatalogClient(HttpClient http, string? url = null)
|
||||
{
|
||||
_http = http;
|
||||
_url = url ?? DefaultUrl;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<CatalogSkin>> FetchAsync(CancellationToken ct = default)
|
||||
{
|
||||
await using var stream = await _http.GetStreamAsync(_url, ct);
|
||||
var dtos = await JsonSerializer.DeserializeAsync<List<SkinDto>>(stream, Options, ct)
|
||||
?? throw new InvalidOperationException("skins.json deserialized to null.");
|
||||
|
||||
return dtos.Select(Map).ToList();
|
||||
}
|
||||
|
||||
private static CatalogSkin Map(SkinDto dto)
|
||||
{
|
||||
var sources = new List<CatalogSource>();
|
||||
AddSources(sources, dto.Collections, "Collection");
|
||||
AddSources(sources, dto.Crates, "Container");
|
||||
|
||||
return new CatalogSkin(
|
||||
Id: dto.Id,
|
||||
WeaponName: dto.Weapon?.Name ?? "Unknown",
|
||||
Category: dto.Category?.Name ?? "Unknown",
|
||||
Team: MapTeam(dto.Team?.Id),
|
||||
// Knives with no finish carry a null pattern; "Vanilla" is the community term.
|
||||
Name: dto.Pattern?.Name ?? "Vanilla",
|
||||
Rarity: dto.Rarity?.Name ?? "Unknown",
|
||||
Description: dto.Description,
|
||||
ImageUrl: dto.Image,
|
||||
StatTrakAvailable: dto.Stattrak,
|
||||
SouvenirAvailable: dto.Souvenir,
|
||||
FloatMin: dto.MinFloat,
|
||||
FloatMax: dto.MaxFloat,
|
||||
Sources: sources);
|
||||
}
|
||||
|
||||
private static void AddSources(List<CatalogSource> into, List<NamedDto>? items, string type)
|
||||
{
|
||||
if (items is null)
|
||||
return;
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
if (string.IsNullOrEmpty(item.Id) || string.IsNullOrEmpty(item.Name))
|
||||
continue;
|
||||
if (into.Any(s => s.Id == item.Id))
|
||||
continue;
|
||||
into.Add(new CatalogSource(item.Id, item.Name, type));
|
||||
}
|
||||
}
|
||||
|
||||
private static string MapTeam(string? teamId) => teamId switch
|
||||
{
|
||||
"terrorists" => "T",
|
||||
"counter-terrorists" => "CT",
|
||||
_ => "Both",
|
||||
};
|
||||
|
||||
private sealed record SkinDto(
|
||||
string Id,
|
||||
string? Name,
|
||||
string? Description,
|
||||
NamedDto? Weapon,
|
||||
NamedDto? Category,
|
||||
NamedDto? Pattern,
|
||||
decimal? MinFloat,
|
||||
decimal? MaxFloat,
|
||||
NamedDto? Rarity,
|
||||
bool Stattrak,
|
||||
bool Souvenir,
|
||||
string? Image,
|
||||
NamedDto? Team,
|
||||
List<NamedDto>? Collections,
|
||||
List<NamedDto>? Crates);
|
||||
|
||||
private sealed record NamedDto(string? Id, string? Name);
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
namespace BlueLaminate.Scraper.Weapons;
|
||||
|
||||
/// <summary>A single CS2 weapon parsed from the Counter-Strike wiki.</summary>
|
||||
/// <param name="Name">Display name, e.g. "AK-47".</param>
|
||||
/// <param name="Type">Category from the wiki heading, e.g. "Pistols", "Assault Rifles".</param>
|
||||
/// <param name="Team">"CT", "T", or "Both".</param>
|
||||
public sealed record ScrapedWeapon(string Name, string Type, string Team);
|
||||
@@ -1,172 +0,0 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using BlueLaminate.Scraper.Wiki;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace BlueLaminate.Scraper.Weapons;
|
||||
|
||||
/// <summary>
|
||||
/// Scrapes the CS2 weapon catalogue from the wiki's "Weapons" page.
|
||||
///
|
||||
/// Layout: the page has one "tabber" per weapon category, each immediately
|
||||
/// preceded by a section heading (the category / Type). Inside each tabber the
|
||||
/// "Global Offensive & Counter-Strike 2" tab holds a gallery of captions —
|
||||
/// one per weapon, optionally suffixed with "(CT)" or "(T)" for team-locked
|
||||
/// weapons.
|
||||
/// </summary>
|
||||
public sealed class WeaponWikiScraper
|
||||
{
|
||||
private const string Page = "Weapons";
|
||||
private const string Cs2TabHash = "Global_Offensive_&_Counter-Strike_2";
|
||||
|
||||
// Matches a trailing "(CT)" / "(T)" team annotation, capturing the team.
|
||||
private static readonly Regex TeamAnnotation =
|
||||
new(@"\s*\((CT|T)\)\s*$", RegexOptions.Compiled);
|
||||
|
||||
// The wiki labels the default knife "Stock Knife"; drop the prefix.
|
||||
private static readonly Regex StockPrefix =
|
||||
new(@"^Stock\s+", RegexOptions.Compiled);
|
||||
|
||||
private readonly WikiPageFetcher _fetcher;
|
||||
|
||||
public WeaponWikiScraper(WikiPageFetcher fetcher) => _fetcher = fetcher;
|
||||
|
||||
public async Task<IReadOnlyList<ScrapedWeapon>> ScrapeAsync(CancellationToken ct = default)
|
||||
{
|
||||
var doc = await _fetcher.LoadAsync(Page, ct);
|
||||
|
||||
// Headings and tabbers in document order so each tabber inherits the
|
||||
// most recent heading as its category.
|
||||
var nodes = doc.DocumentNode.SelectNodes(
|
||||
"//h2 | //h3 | //h4 | "
|
||||
+ "//div[contains(concat(' ', normalize-space(@class), ' '), ' tabber ')]");
|
||||
|
||||
var aggregator = new WeaponAggregator();
|
||||
string? currentType = null;
|
||||
|
||||
if (nodes is not null)
|
||||
{
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
if (node.Name is "h2" or "h3" or "h4")
|
||||
{
|
||||
currentType = HeadingText(node);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (currentType is null)
|
||||
continue;
|
||||
|
||||
foreach (var caption in Cs2Captions(node))
|
||||
aggregator.Add(caption, currentType);
|
||||
}
|
||||
}
|
||||
|
||||
return aggregator.Build();
|
||||
}
|
||||
|
||||
/// <summary>Caption texts from the CS2 tab of a single tabber, if present.</summary>
|
||||
private static IEnumerable<string> Cs2Captions(HtmlNode tabber)
|
||||
{
|
||||
var tabs = tabber.SelectNodes(
|
||||
".//li[contains(concat(' ', normalize-space(@class), ' '), ' wds-tabs__tab ')]");
|
||||
if (tabs is null)
|
||||
yield break;
|
||||
|
||||
var index = -1;
|
||||
for (var i = 0; i < tabs.Count; i++)
|
||||
{
|
||||
// HtmlAgilityPack returns attribute values un-decoded, and the wiki
|
||||
// entity-encodes the "&" in this hash (&).
|
||||
var hash = HtmlEntity.DeEntitize(tabs[i].GetAttributeValue("data-hash", string.Empty));
|
||||
if (hash == Cs2TabHash)
|
||||
{
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (index < 0)
|
||||
yield break;
|
||||
|
||||
var contents = tabber.SelectNodes(
|
||||
".//div[contains(concat(' ', normalize-space(@class), ' '), ' wds-tab__content ')]");
|
||||
if (contents is null || index >= contents.Count)
|
||||
yield break;
|
||||
|
||||
var captions = contents[index].SelectNodes(
|
||||
".//div[contains(concat(' ', normalize-space(@class), ' '), ' lightbox-caption ')]");
|
||||
if (captions is null)
|
||||
yield break;
|
||||
|
||||
foreach (var caption in captions)
|
||||
yield return WikiText.Normalize(caption.InnerText);
|
||||
}
|
||||
|
||||
private static string HeadingText(HtmlNode heading)
|
||||
{
|
||||
var headline = heading.SelectSingleNode(
|
||||
".//span[contains(concat(' ', normalize-space(@class), ' '), ' mw-headline ')]");
|
||||
return WikiText.Normalize((headline ?? heading).InnerText);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Collapses the per-caption rows into one weapon per name, tracking which
|
||||
/// teams it appeared for so a weapon shown as both "(CT)" and "(T)" (or with
|
||||
/// no annotation) resolves to "Both".
|
||||
/// </summary>
|
||||
private sealed class WeaponAggregator
|
||||
{
|
||||
private sealed class Entry
|
||||
{
|
||||
public required string Type { get; init; }
|
||||
public bool SawCt;
|
||||
public bool SawT;
|
||||
public bool SawUnannotated;
|
||||
}
|
||||
|
||||
private readonly Dictionary<string, Entry> _byName = new();
|
||||
private readonly List<string> _order = new();
|
||||
|
||||
public void Add(string caption, string type)
|
||||
{
|
||||
if (string.IsNullOrEmpty(caption))
|
||||
return;
|
||||
|
||||
var match = TeamAnnotation.Match(caption);
|
||||
var name = TeamAnnotation.Replace(caption, string.Empty);
|
||||
name = StockPrefix.Replace(name, string.Empty).Trim();
|
||||
if (name.Length == 0)
|
||||
return;
|
||||
|
||||
if (!_byName.TryGetValue(name, out var entry))
|
||||
{
|
||||
entry = new Entry { Type = type };
|
||||
_byName[name] = entry;
|
||||
_order.Add(name);
|
||||
}
|
||||
|
||||
if (!match.Success)
|
||||
entry.SawUnannotated = true;
|
||||
else if (match.Groups[1].Value == "CT")
|
||||
entry.SawCt = true;
|
||||
else
|
||||
entry.SawT = true;
|
||||
}
|
||||
|
||||
public IReadOnlyList<ScrapedWeapon> Build()
|
||||
{
|
||||
var result = new List<ScrapedWeapon>(_order.Count);
|
||||
foreach (var name in _order)
|
||||
{
|
||||
var e = _byName[name];
|
||||
var team =
|
||||
e.SawUnannotated || (e.SawCt && e.SawT) ? "Both"
|
||||
: e.SawCt ? "CT"
|
||||
: e.SawT ? "T"
|
||||
: "Both";
|
||||
result.Add(new ScrapedWeapon(name, e.Type, team));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
using System.Text.Json;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace BlueLaminate.Scraper.Wiki;
|
||||
|
||||
/// <summary>
|
||||
/// Fetches a rendered page from the Counter-Strike Fandom wiki, shared by all
|
||||
/// wiki scrapers.
|
||||
///
|
||||
/// The rendered HTML pages sit behind Cloudflare, which 403s .NET's TLS
|
||||
/// fingerprint regardless of headers. The MediaWiki <c>action=parse</c> API is
|
||||
/// not challenged, so we fetch the same content as JSON from there and return
|
||||
/// the embedded HTML as a parsed document.
|
||||
/// </summary>
|
||||
public sealed class WikiPageFetcher
|
||||
{
|
||||
private const string ApiBase = "https://counterstrike.fandom.com/api.php";
|
||||
|
||||
private readonly HttpClient _http;
|
||||
|
||||
public WikiPageFetcher(HttpClient http) => _http = http;
|
||||
|
||||
/// <summary>Loads a wiki page (e.g. "Weapons") as a parsed HTML document.</summary>
|
||||
public async Task<HtmlDocument> LoadAsync(string page, CancellationToken ct = default)
|
||||
{
|
||||
var url = $"{ApiBase}?action=parse&page={Uri.EscapeDataString(page)}&prop=text&format=json";
|
||||
|
||||
using var resp = await _http.GetAsync(url, ct);
|
||||
resp.EnsureSuccessStatusCode();
|
||||
|
||||
await using var stream = await resp.Content.ReadAsStreamAsync(ct);
|
||||
using var json = await JsonDocument.ParseAsync(stream, cancellationToken: ct);
|
||||
|
||||
if (json.RootElement.TryGetProperty("error", out var error))
|
||||
{
|
||||
var info = error.TryGetProperty("info", out var i) ? i.GetString() : "unknown error";
|
||||
throw new InvalidOperationException($"Wiki API returned an error for page '{page}': {info}");
|
||||
}
|
||||
|
||||
var html = json.RootElement
|
||||
.GetProperty("parse")
|
||||
.GetProperty("text")
|
||||
.GetProperty("*")
|
||||
.GetString()
|
||||
?? throw new InvalidOperationException($"Wiki API response for page '{page}' had no parsed text.");
|
||||
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(html);
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace BlueLaminate.Scraper.Wiki;
|
||||
|
||||
/// <summary>Text helpers shared by wiki scrapers.</summary>
|
||||
public static class WikiText
|
||||
{
|
||||
private static readonly Regex Whitespace = new(@"\s+", RegexOptions.Compiled);
|
||||
|
||||
/// <summary>Decodes HTML entities and collapses whitespace runs to single spaces.</summary>
|
||||
public static string Normalize(string raw) =>
|
||||
Whitespace.Replace(HtmlEntity.DeEntitize(raw) ?? string.Empty, " ").Trim();
|
||||
}
|
||||
Reference in New Issue
Block a user