Files
Operation-Blue-Laminate-v2/BlueLaminate/BlueLaminate.C2/Program.cs
bob 8b0eb0db78 Cut metered-proxy bandwidth: re-sweep floor + wire-size logging
JobQueue now skips bands swept within MinResweepHours (config, default 6h) instead of re-scraping the whole catalogue continuously — the dominant cost on the metered residential proxy. Roughly linear savings with no data loss (full pagination retained); 0 disables it. Worker logs the real compressed transferSize per job (what the proxy bills) rather than the ~6.5x-larger decompressed length, so spend is visible.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 15:27:37 -05:00

93 lines
4.1 KiB
C#

using BlueLaminate.C2;
using BlueLaminate.Core.CsMoney;
using BlueLaminate.Core.DependencyInjection;
using BlueLaminate.EFCore.Data;
using Microsoft.EntityFrameworkCore;
// The C2: hands cs.money scrape jobs to Python workers and ingests their results.
// Reuses the whole BlueLaminate stack (DB, ingest service) via the one composition root.
// Content root = the binary directory so appsettings.json is found regardless of the
// working directory the process is launched from (matches the CLI's approach).
var builder = WebApplication.CreateBuilder(new WebApplicationOptions
{
Args = args,
ContentRootPath = AppContext.BaseDirectory,
});
builder.Services.AddBlueLaminateCore(builder.Configuration);
// Re-sweep floor: don't re-hand-out a band whose listings were swept less than this
// many hours ago. The dominant cost on the metered residential proxy is re-scraping
// already-fresh bands, so this caps how often any band is re-pulled. 0 = continuous.
var minResweepHours = builder.Configuration.GetValue("MinResweepHours", 6.0);
builder.Services.AddSingleton(new JobQueue(TimeSpan.FromHours(minResweepHours)));
var app = builder.Build();
// Apply pending EF migrations at startup (incl. the market_listings view) so a fresh
// container is ready with one command. Disable with AutoMigrate=false if you'd rather
// run `dotnet ef database update` yourself.
if (app.Configuration.GetValue("AutoMigrate", true))
{
using var scope = app.Services.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<SkinTrackerDbContext>();
db.Database.Migrate();
}
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured
// the gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
var workerToken = builder.Configuration["WorkerToken"];
var maxPagesPerJob = builder.Configuration.GetValue("MaxPagesPerJob", 60);
app.MapGet("/health", () => Results.Ok(new { status = "ok" }));
// Operator read endpoints: "where is this listed?" across markets. Open (read-only).
app.MapGet("/market/skin/{skinId:int}", async (
int skinId, MarketPresenceService presence, CancellationToken ct) =>
Results.Ok(await presence.ForSkinAsync(skinId, ct)));
app.MapGet("/market/instance/{instanceId:int}", async (
int instanceId, MarketPresenceService presence, CancellationToken ct) =>
Results.Ok(await presence.ForInstanceAsync(instanceId, ct)));
var jobs = app.MapGroup("/jobs");
jobs.AddEndpointFilter(async (ctx, next) =>
{
if (!string.IsNullOrEmpty(workerToken)
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
{
return Results.Unauthorized();
}
return await next(ctx);
});
// Claim the next stalest skin+wear to scrape. 204 when nothing is currently available
// (everything in the stalest batch is already leased to other workers).
jobs.MapGet("/next", async (JobQueue queue, SkinTrackerDbContext db, CancellationToken ct) =>
{
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
return job is null ? Results.NoContent() : Results.Ok(job);
});
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the
// worker stays dumb: it just forwards the raw cs.money items it gathered.
jobs.MapPost("/{jobId}/result", async (
string jobId, ScrapeResultDto result, JobQueue queue, CsMoneyIngestService ingest, CancellationToken ct) =>
{
var mapping = queue.Complete(jobId);
if (mapping is null)
{
return Results.NotFound(new { error = "unknown or expired jobId" });
}
// Only a fully-walked sweep ("completed") is authoritative. On a partial result
// (fetch-cap / challenged / float tie) we still upsert what we saw, but we must NOT
// mark unseen listings Removed or stamp the swept-checkpoint — the unseen ones may
// simply be unfetched, and the band must be re-queued and retried.
var complete = string.Equals(result.StoppedReason, "completed", StringComparison.OrdinalIgnoreCase);
var r = await ingest.IngestAsync(mapping.SkinId, mapping.ConditionId, result.Items ?? [], complete, ct);
return Results.Ok(r);
});
app.Run();