Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
88 lines
3.7 KiB
C#
88 lines
3.7 KiB
C#
using BlueLaminate.C2;
|
|
using BlueLaminate.Core.CsMoney;
|
|
using BlueLaminate.Core.DependencyInjection;
|
|
using BlueLaminate.EFCore.Data;
|
|
using Microsoft.EntityFrameworkCore;
|
|
|
|
// The C2: hands cs.money scrape jobs to Python workers and ingests their results.
|
|
// Reuses the whole BlueLaminate stack (DB, ingest service) via the one composition root.
|
|
// Content root = the binary directory so appsettings.json is found regardless of the
|
|
// working directory the process is launched from (matches the CLI's approach).
|
|
var builder = WebApplication.CreateBuilder(new WebApplicationOptions
|
|
{
|
|
Args = args,
|
|
ContentRootPath = AppContext.BaseDirectory,
|
|
});
|
|
builder.Services.AddBlueLaminateCore(builder.Configuration);
|
|
builder.Services.AddSingleton<JobQueue>();
|
|
|
|
var app = builder.Build();
|
|
|
|
// Apply pending EF migrations at startup (incl. the market_listings view) so a fresh
|
|
// container is ready with one command. Disable with AutoMigrate=false if you'd rather
|
|
// run `dotnet ef database update` yourself.
|
|
if (app.Configuration.GetValue("AutoMigrate", true))
|
|
{
|
|
using var scope = app.Services.CreateScope();
|
|
var db = scope.ServiceProvider.GetRequiredService<SkinTrackerDbContext>();
|
|
db.Database.Migrate();
|
|
}
|
|
|
|
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured
|
|
// the gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
|
|
var workerToken = builder.Configuration["WorkerToken"];
|
|
var maxPagesPerJob = builder.Configuration.GetValue("MaxPagesPerJob", 60);
|
|
|
|
app.MapGet("/health", () => Results.Ok(new { status = "ok" }));
|
|
|
|
// Operator read endpoints: "where is this listed?" across markets. Open (read-only).
|
|
app.MapGet("/market/skin/{skinId:int}", async (
|
|
int skinId, MarketPresenceService presence, CancellationToken ct) =>
|
|
Results.Ok(await presence.ForSkinAsync(skinId, ct)));
|
|
|
|
app.MapGet("/market/instance/{instanceId:int}", async (
|
|
int instanceId, MarketPresenceService presence, CancellationToken ct) =>
|
|
Results.Ok(await presence.ForInstanceAsync(instanceId, ct)));
|
|
|
|
var jobs = app.MapGroup("/jobs");
|
|
jobs.AddEndpointFilter(async (ctx, next) =>
|
|
{
|
|
if (!string.IsNullOrEmpty(workerToken)
|
|
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
|
|
{
|
|
return Results.Unauthorized();
|
|
}
|
|
|
|
return await next(ctx);
|
|
});
|
|
|
|
// Claim the next stalest skin+wear to scrape. 204 when nothing is currently available
|
|
// (everything in the stalest batch is already leased to other workers).
|
|
jobs.MapGet("/next", async (JobQueue queue, SkinTrackerDbContext db, CancellationToken ct) =>
|
|
{
|
|
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
|
|
return job is null ? Results.NoContent() : Results.Ok(job);
|
|
});
|
|
|
|
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the
|
|
// worker stays dumb: it just forwards the raw cs.money items it gathered.
|
|
jobs.MapPost("/{jobId}/result", async (
|
|
string jobId, ScrapeResultDto result, JobQueue queue, CsMoneyIngestService ingest, CancellationToken ct) =>
|
|
{
|
|
var mapping = queue.Complete(jobId);
|
|
if (mapping is null)
|
|
{
|
|
return Results.NotFound(new { error = "unknown or expired jobId" });
|
|
}
|
|
|
|
// Only a fully-walked sweep ("completed") is authoritative. On a partial result
|
|
// (fetch-cap / challenged / float tie) we still upsert what we saw, but we must NOT
|
|
// mark unseen listings Removed or stamp the swept-checkpoint — the unseen ones may
|
|
// simply be unfetched, and the band must be re-queued and retried.
|
|
var complete = string.Equals(result.StoppedReason, "completed", StringComparison.OrdinalIgnoreCase);
|
|
var r = await ingest.IngestAsync(mapping.SkinId, mapping.ConditionId, result.Items ?? [], complete, ct);
|
|
return Results.Ok(r);
|
|
});
|
|
|
|
app.Run();
|