Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
13
BlueLaminate/BlueLaminate.C2/BlueLaminate.C2.csproj
Normal file
13
BlueLaminate/BlueLaminate.C2/BlueLaminate.C2.csproj
Normal file
@@ -0,0 +1,13 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Web">
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\BlueLaminate.Core\BlueLaminate.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
19
BlueLaminate/BlueLaminate.C2/Contracts.cs
Normal file
19
BlueLaminate/BlueLaminate.C2/Contracts.cs
Normal file
@@ -0,0 +1,19 @@
|
||||
using BlueLaminate.Core.CsMoney;
|
||||
|
||||
namespace BlueLaminate.C2;
|
||||
|
||||
/// <summary>A unit of scrape work handed to a worker: one skin+wear, as a search.</summary>
|
||||
/// <param name="JobId">Opaque id the worker echoes back when posting results.</param>
|
||||
/// <param name="SkinId">Catalogue skin this job targets.</param>
|
||||
/// <param name="ConditionId">Wear band (skin_conditions row), or null for a whole skin.</param>
|
||||
/// <param name="Search">Free-text market search, e.g. "M4A4 Cyber Security ft".</param>
|
||||
/// <param name="MaxPages">Safety cap on page fetches (60 items each). The worker
|
||||
/// paginates by walking the float axis, so a skin+wear needs ceil(listings/60) pages.</param>
|
||||
public sealed record ScrapeJobDto(string JobId, int SkinId, int? ConditionId, string Search, int MaxPages);
|
||||
|
||||
/// <summary>A worker's results for a claimed job: the listings it scraped.</summary>
|
||||
/// <param name="Items">All sell-order items gathered across pages (raw cs.money shape).</param>
|
||||
/// <param name="Pages">How many pages the worker fetched.</param>
|
||||
/// <param name="StoppedReason">Why it stopped. "completed" = full sweep (authoritative);
|
||||
/// anything else (fetch-cap / challenged / stuck-float-tie) is partial.</param>
|
||||
public sealed record ScrapeResultDto(List<CsMoneyItem> Items, int Pages, string? StoppedReason);
|
||||
24
BlueLaminate/BlueLaminate.C2/Dockerfile
Normal file
24
BlueLaminate/BlueLaminate.C2/Dockerfile
Normal file
@@ -0,0 +1,24 @@
|
||||
# Build context is the REPO ROOT (so Central Package Management's Directory.*.props
|
||||
# at the root are available). Build with:
|
||||
# docker compose build (compose sets the context)
|
||||
FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
|
||||
WORKDIR /src
|
||||
|
||||
# Restore against the full solution sources the C2 transitively needs.
|
||||
COPY Directory.Build.props Directory.Packages.props ./
|
||||
COPY BlueLaminate/ BlueLaminate/
|
||||
RUN dotnet restore BlueLaminate/BlueLaminate.C2/BlueLaminate.C2.csproj
|
||||
RUN dotnet publish BlueLaminate/BlueLaminate.C2/BlueLaminate.C2.csproj \
|
||||
-c Release -o /app --no-restore
|
||||
|
||||
FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime
|
||||
# NOTE: deliberately do NOT install libgssapi-krb5-2. Without it Npgsql logs a
|
||||
# harmless "cannot load libgssapi_krb5.so.2" line and falls back to password auth;
|
||||
# WITH it, a failed/misconfigured connection attempt segfaults during GSS negotiation
|
||||
# (observed: container exit 139 / crash-loop). Graceful failure beats the segfault.
|
||||
WORKDIR /app
|
||||
COPY --from=build /app ./
|
||||
# Bind all interfaces inside the container (overrides appsettings' localhost binding).
|
||||
ENV ASPNETCORE_URLS=http://+:5080
|
||||
EXPOSE 5080
|
||||
ENTRYPOINT ["dotnet", "BlueLaminate.C2.dll"]
|
||||
88
BlueLaminate/BlueLaminate.C2/JobQueue.cs
Normal file
88
BlueLaminate/BlueLaminate.C2/JobQueue.cs
Normal file
@@ -0,0 +1,88 @@
|
||||
using System.Collections.Concurrent;
|
||||
using BlueLaminate.Core.CsMoney;
|
||||
using BlueLaminate.EFCore.Data;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
|
||||
namespace BlueLaminate.C2;
|
||||
|
||||
/// <summary>
|
||||
/// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the
|
||||
/// catalogue's per-band checkpoints (<c>SkinCondition.ListingsSweptAt</c>) rather than
|
||||
/// a pre-built queue. Each claim picks the stalest band (never-swept first), leases it
|
||||
/// in memory so two workers can't get the same one, and builds a free-text search. On
|
||||
/// completion the ingest stamps <c>ListingsSweptAt</c>, so the band drops to the back —
|
||||
/// the sweep loops the whole catalogue continuously and resumes cleanly after restarts.
|
||||
/// </summary>
|
||||
public sealed class JobQueue
|
||||
{
|
||||
// A leased condition can't be re-handed-out until released or the lease expires
|
||||
// (so a crashed worker's band returns to the pool instead of stalling forever).
|
||||
private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15);
|
||||
private const int CandidateBatch = 100;
|
||||
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
private readonly ConcurrentDictionary<int, DateTimeOffset> _leases = new(); // conditionId -> leasedAt
|
||||
private readonly ConcurrentDictionary<string, JobMapping> _inFlight = new(); // jobId -> mapping
|
||||
|
||||
public async Task<ScrapeJobDto?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
|
||||
{
|
||||
await _gate.WaitAsync(ct);
|
||||
try
|
||||
{
|
||||
// Reclaim expired leases first.
|
||||
var cutoff = DateTimeOffset.UtcNow - LeaseTtl;
|
||||
foreach (var (cid, at) in _leases)
|
||||
{
|
||||
if (at < cutoff)
|
||||
{
|
||||
_leases.TryRemove(cid, out _);
|
||||
}
|
||||
}
|
||||
|
||||
// Stalest bands first (never-swept null sorts before any timestamp).
|
||||
var candidates = await db.SkinConditions
|
||||
.OrderBy(c => c.ListingsSweptAt.HasValue)
|
||||
.ThenBy(c => c.ListingsSweptAt)
|
||||
.Select(c => new Candidate(
|
||||
c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition))
|
||||
.Take(CandidateBatch)
|
||||
.ToListAsync(ct);
|
||||
|
||||
var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId));
|
||||
if (pick is null)
|
||||
{
|
||||
return null; // everything in the stalest batch is already in flight
|
||||
}
|
||||
|
||||
_leases[pick.ConditionId] = DateTimeOffset.UtcNow;
|
||||
var jobId = Guid.NewGuid().ToString("N");
|
||||
_inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId);
|
||||
|
||||
var code = Wear.ToCode(pick.Condition) ?? pick.Condition;
|
||||
var search = $"{pick.Weapon} {pick.SkinName} {code}".Trim();
|
||||
return new ScrapeJobDto(jobId, pick.SkinId, pick.ConditionId, search, maxPages);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_gate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Resolve a posted job to its skin+condition and release its lease.</summary>
|
||||
public JobMapping? Complete(string jobId)
|
||||
{
|
||||
if (_inFlight.TryRemove(jobId, out var mapping))
|
||||
{
|
||||
_leases.TryRemove(mapping.ConditionId, out _);
|
||||
return mapping;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public int InFlight => _inFlight.Count;
|
||||
|
||||
public sealed record JobMapping(int SkinId, int ConditionId);
|
||||
|
||||
private sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
|
||||
}
|
||||
87
BlueLaminate/BlueLaminate.C2/Program.cs
Normal file
87
BlueLaminate/BlueLaminate.C2/Program.cs
Normal file
@@ -0,0 +1,87 @@
|
||||
using BlueLaminate.C2;
|
||||
using BlueLaminate.Core.CsMoney;
|
||||
using BlueLaminate.Core.DependencyInjection;
|
||||
using BlueLaminate.EFCore.Data;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
|
||||
// The C2: hands cs.money scrape jobs to Python workers and ingests their results.
|
||||
// Reuses the whole BlueLaminate stack (DB, ingest service) via the one composition root.
|
||||
// Content root = the binary directory so appsettings.json is found regardless of the
|
||||
// working directory the process is launched from (matches the CLI's approach).
|
||||
var builder = WebApplication.CreateBuilder(new WebApplicationOptions
|
||||
{
|
||||
Args = args,
|
||||
ContentRootPath = AppContext.BaseDirectory,
|
||||
});
|
||||
builder.Services.AddBlueLaminateCore(builder.Configuration);
|
||||
builder.Services.AddSingleton<JobQueue>();
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
// Apply pending EF migrations at startup (incl. the market_listings view) so a fresh
|
||||
// container is ready with one command. Disable with AutoMigrate=false if you'd rather
|
||||
// run `dotnet ef database update` yourself.
|
||||
if (app.Configuration.GetValue("AutoMigrate", true))
|
||||
{
|
||||
using var scope = app.Services.CreateScope();
|
||||
var db = scope.ServiceProvider.GetRequiredService<SkinTrackerDbContext>();
|
||||
db.Database.Migrate();
|
||||
}
|
||||
|
||||
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured
|
||||
// the gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
|
||||
var workerToken = builder.Configuration["WorkerToken"];
|
||||
var maxPagesPerJob = builder.Configuration.GetValue("MaxPagesPerJob", 60);
|
||||
|
||||
app.MapGet("/health", () => Results.Ok(new { status = "ok" }));
|
||||
|
||||
// Operator read endpoints: "where is this listed?" across markets. Open (read-only).
|
||||
app.MapGet("/market/skin/{skinId:int}", async (
|
||||
int skinId, MarketPresenceService presence, CancellationToken ct) =>
|
||||
Results.Ok(await presence.ForSkinAsync(skinId, ct)));
|
||||
|
||||
app.MapGet("/market/instance/{instanceId:int}", async (
|
||||
int instanceId, MarketPresenceService presence, CancellationToken ct) =>
|
||||
Results.Ok(await presence.ForInstanceAsync(instanceId, ct)));
|
||||
|
||||
var jobs = app.MapGroup("/jobs");
|
||||
jobs.AddEndpointFilter(async (ctx, next) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(workerToken)
|
||||
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
|
||||
{
|
||||
return Results.Unauthorized();
|
||||
}
|
||||
|
||||
return await next(ctx);
|
||||
});
|
||||
|
||||
// Claim the next stalest skin+wear to scrape. 204 when nothing is currently available
|
||||
// (everything in the stalest batch is already leased to other workers).
|
||||
jobs.MapGet("/next", async (JobQueue queue, SkinTrackerDbContext db, CancellationToken ct) =>
|
||||
{
|
||||
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
|
||||
return job is null ? Results.NoContent() : Results.Ok(job);
|
||||
});
|
||||
|
||||
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the
|
||||
// worker stays dumb: it just forwards the raw cs.money items it gathered.
|
||||
jobs.MapPost("/{jobId}/result", async (
|
||||
string jobId, ScrapeResultDto result, JobQueue queue, CsMoneyIngestService ingest, CancellationToken ct) =>
|
||||
{
|
||||
var mapping = queue.Complete(jobId);
|
||||
if (mapping is null)
|
||||
{
|
||||
return Results.NotFound(new { error = "unknown or expired jobId" });
|
||||
}
|
||||
|
||||
// Only a fully-walked sweep ("completed") is authoritative. On a partial result
|
||||
// (fetch-cap / challenged / float tie) we still upsert what we saw, but we must NOT
|
||||
// mark unseen listings Removed or stamp the swept-checkpoint — the unseen ones may
|
||||
// simply be unfetched, and the band must be re-queued and retried.
|
||||
var complete = string.Equals(result.StoppedReason, "completed", StringComparison.OrdinalIgnoreCase);
|
||||
var r = await ingest.IngestAsync(mapping.SkinId, mapping.ConditionId, result.Items ?? [], complete, ct);
|
||||
return Results.Ok(r);
|
||||
});
|
||||
|
||||
app.Run();
|
||||
23
BlueLaminate/BlueLaminate.C2/Properties/launchSettings.json
Normal file
23
BlueLaminate/BlueLaminate.C2/Properties/launchSettings.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/launchsettings.json",
|
||||
"profiles": {
|
||||
"http": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": true,
|
||||
"launchBrowser": true,
|
||||
"applicationUrl": "http://localhost:5103",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
},
|
||||
"https": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": true,
|
||||
"launchBrowser": true,
|
||||
"applicationUrl": "https://localhost:7111;http://localhost:5103",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
}
|
||||
}
|
||||
}
|
||||
16
BlueLaminate/BlueLaminate.C2/appsettings.json
Normal file
16
BlueLaminate/BlueLaminate.C2/appsettings.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft.AspNetCore": "Warning",
|
||||
"Microsoft.EntityFrameworkCore": "Warning"
|
||||
}
|
||||
},
|
||||
"AllowedHosts": "*",
|
||||
"Urls": "http://0.0.0.0:5080",
|
||||
"ConnectionStrings": {
|
||||
"SkinTracker": "Host=localhost;Port=5432;Database=skintracker;Username=postgres"
|
||||
},
|
||||
"WorkerToken": "dev-worker-token",
|
||||
"MaxPagesPerJob": 60
|
||||
}
|
||||
Reference in New Issue
Block a user