Add init weapon scraper

This commit is contained in:
bob
2026-05-29 14:00:58 -05:00
parent 286d1366fe
commit 6f3c0175cd
20 changed files with 1199 additions and 62 deletions

View File

@@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<None Update="appsettings.json" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\BlueLaminate.EFCore\BlueLaminate.EFCore.csproj" />
<ProjectReference Include="..\BlueLaminate.Scraper\BlueLaminate.Scraper.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="System.CommandLine" Version="2.0.8" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,79 @@
using System.CommandLine;
using BlueLaminate.Cli;
using BlueLaminate.EFCore.Data;
using BlueLaminate.Scraper.Weapons;
using BlueLaminate.Scraper.Wiki;
// Entry point: System.CommandLine builds the command tree, parsing, and help.
// New features are added as additional commands here as they're implemented.
var forceOption = new Option<bool>("--force")
{
Description = "Ignore the once-a-month throttle and sync now."
};
var dryRunOption = new Option<bool>("--dry-run")
{
Description = "Scrape and print the weapons without writing to the database."
};
var syncWeapons = new Command(
"sync-weapons",
"Scrape the CS2 weapon catalogue from the wiki and upsert it (throttled to once a month).")
{
forceOption,
dryRunOption,
};
syncWeapons.SetAction((parseResult, ct) =>
SyncWeaponsAsync(parseResult.GetValue(forceOption), parseResult.GetValue(dryRunOption), ct));
var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker tools.")
{
syncWeapons,
};
return await root.Parse(args).InvokeAsync();
// Fetch the CS2 weapon catalogue from the wiki and upsert it. Throttled to once
// a month unless --force is passed; --dry-run scrapes and prints without a DB.
static async Task<int> SyncWeaponsAsync(bool force, bool dryRun, CancellationToken ct)
{
var scraper = new WeaponWikiScraper(new WikiPageFetcher(CreateHttpClient()));
if (dryRun)
{
var weapons = await scraper.ScrapeAsync(ct);
Console.WriteLine($"Scraped {weapons.Count} weapons (dry run, nothing written):");
foreach (var w in weapons)
Console.WriteLine($" {w.Name,-20} {w.Type,-16} {w.Team}");
return 0;
}
using var db = new SkinTrackerDbContextFactory().CreateDbContext([]);
var result = await new WeaponSyncService(db, scraper).SyncAsync(force, ct);
if (result.Skipped)
{
Console.WriteLine(
$"Skipped: weapons were last synced {result.LastRanAt:u}. "
+ "Next run allowed one month later — pass --force to override.");
}
else
{
Console.WriteLine(
$"Synced {result.Scraped} weapons: {result.Inserted} inserted, "
+ $"{result.Updated} updated, "
+ $"{result.Scraped - result.Inserted - result.Updated} unchanged.");
}
return 0;
}
static HttpClient CreateHttpClient()
{
var http = new HttpClient();
// The wiki is fronted by Cloudflare; a browser-like User-Agent is accepted
// on the MediaWiki API endpoint the scraper uses.
http.DefaultRequestHeaders.UserAgent.ParseAdd(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+ "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36");
return http;
}

View File

@@ -0,0 +1,77 @@
using BlueLaminate.EFCore.Data;
using BlueLaminate.EFCore.Entities;
using BlueLaminate.Scraper.Weapons;
using Microsoft.EntityFrameworkCore;
namespace BlueLaminate.Cli;
/// <param name="Skipped">True when the monthly throttle suppressed the run.</param>
/// <param name="LastRanAt">When the previous successful run happened, if any.</param>
public sealed record WeaponSyncResult(
bool Skipped,
DateTimeOffset? LastRanAt,
int Scraped,
int Inserted,
int Updated);
/// <summary>
/// Fetches the CS2 weapon catalogue and upserts it into the database. The
/// catalogue changes rarely, so a run is throttled to at most once a month
/// unless explicitly forced.
/// </summary>
public sealed class WeaponSyncService
{
public const string Source = "weapons";
private readonly SkinTrackerDbContext _db;
private readonly WeaponWikiScraper _scraper;
public WeaponSyncService(SkinTrackerDbContext db, WeaponWikiScraper scraper)
{
_db = db;
_scraper = scraper;
}
public async Task<WeaponSyncResult> SyncAsync(bool force = false, CancellationToken ct = default)
{
var now = DateTimeOffset.UtcNow;
var lastRanAt = await _db.ScrapeRuns
.Where(r => r.Source == Source)
.OrderByDescending(r => r.RanAt)
.Select(r => (DateTimeOffset?)r.RanAt)
.FirstOrDefaultAsync(ct);
if (!force && lastRanAt is { } last && last.AddMonths(1) > now)
return new WeaponSyncResult(Skipped: true, last, Scraped: 0, Inserted: 0, Updated: 0);
var scraped = await _scraper.ScrapeAsync(ct);
var existing = await _db.Weapons.ToDictionaryAsync(w => w.Name, ct);
var inserted = 0;
var updated = 0;
foreach (var s in scraped)
{
if (existing.TryGetValue(s.Name, out var weapon))
{
if (weapon.Type != s.Type || weapon.Team != s.Team)
{
weapon.Type = s.Type;
weapon.Team = s.Team;
updated++;
}
}
else
{
_db.Weapons.Add(new Weapon { Name = s.Name, Type = s.Type, Team = s.Team });
inserted++;
}
}
_db.ScrapeRuns.Add(new ScrapeRun { Source = Source, RanAt = now, ItemCount = scraped.Count });
await _db.SaveChangesAsync(ct);
return new WeaponSyncResult(Skipped: false, lastRanAt, scraped.Count, inserted, updated);
}
}

View File

@@ -0,0 +1,5 @@
{
"ConnectionStrings": {
"SkinTracker": "Host=localhost;Port=5432;Database=skintracker;Username=postgres"
}
}