almost ready

This commit is contained in:
bob
2026-06-01 10:52:06 -05:00
parent 8b0eb0db78
commit 763305ca89
94 changed files with 8766 additions and 2674 deletions

2
.gitignore vendored
View File

@@ -101,6 +101,8 @@ env/
# cs.money discovery capture dumps (JSON responses)
csmoney-captures/
# API response capture dumps (CSFloat schema/listing samples, worker page dumps)
captures/
# Local compose secrets (DB connection string, tokens)
.env

View File

@@ -1,4 +1,5 @@
using BlueLaminate.Core.CsMoney;
using BlueLaminate.Core.SkinLand;
namespace BlueLaminate.C2;
@@ -17,3 +18,20 @@ public sealed record ScrapeJobDto(string JobId, int SkinId, int? ConditionId, st
/// <param name="StoppedReason">Why it stopped. "completed" = full sweep (authoritative);
/// anything else (fetch-cap / challenged / stuck-float-tie) is partial.</param>
public sealed record ScrapeResultDto(List<CsMoneyItem> Items, int Pages, string? StoppedReason);
/// <summary>A unit of skin.land scrape work: one skin+wear, as its market page URL.</summary>
/// <param name="JobId">Opaque id the worker echoes back when posting results.</param>
/// <param name="SkinId">Catalogue skin this job targets.</param>
/// <param name="ConditionId">Wear band (skin_conditions row).</param>
/// <param name="Url">The skin.land market page, e.g.
/// "https://skin.land/market/csgo/ak-47-redline-field-tested/". The worker resolves the
/// internal skin_id from this page, then pages the obtained-skins API.</param>
/// <param name="MaxPages">Safety cap on offer-page fetches (Laravel paginator, ~26/page).</param>
public sealed record SkinLandJobDto(string JobId, int SkinId, int ConditionId, string Url, int MaxPages);
/// <summary>A worker's results for a claimed skin.land job: the offers it scraped.</summary>
/// <param name="Items">All obtained-skins offers gathered across pages (raw skin.land shape).</param>
/// <param name="Pages">How many offer pages the worker fetched.</param>
/// <param name="StoppedReason">Why it stopped. "completed" = full sweep (authoritative);
/// anything else (fetch-cap / challenged / no-skin-id) is partial.</param>
public sealed record SkinLandResultDto(List<SkinLandOffer> Items, int Pages, string? StoppedReason);

View File

@@ -1,5 +1,4 @@
using System.Collections.Concurrent;
using BlueLaminate.Core.CsMoney;
using BlueLaminate.EFCore.Data;
using Microsoft.EntityFrameworkCore;
@@ -7,42 +6,58 @@ namespace BlueLaminate.C2;
/// <summary>
/// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the
/// catalogue's per-band checkpoints (<c>SkinCondition.ListingsSweptAt</c>) rather than
/// a pre-built queue. Each claim picks the stalest band (never-swept first), leases it
/// in memory so two workers can't get the same one, and builds a free-text search. On
/// completion the ingest stamps <c>ListingsSweptAt</c>, so the band drops to the back —
/// the sweep loops the whole catalogue continuously and resumes cleanly after restarts.
/// catalogue's per-band, per-site checkpoints (the rows in <c>skin_condition_sweeps</c>
/// for this queue's <see cref="_source"/>) rather than a pre-built queue. Each claim picks
/// the stalest band (never-swept first), leases it in memory so two workers can't get the
/// same one, and builds the work target. On completion the ingest stamps the band's
/// checkpoint, so it drops to the back — the sweep loops the whole catalogue continuously
/// and resumes cleanly after restarts. Because the checkpoint is per-site, a band one
/// market just swept is still due on another.
/// <para>
/// The queue is source-agnostic: it's constructed with the checkpoint
/// <see cref="_source"/> and a <see cref="_targetBuilder"/> that turns a band into the
/// thing a worker needs — a free-text search for cs.money, a market URL for skin.land — so
/// one class drives every market. Register one instance per source.
/// </para>
/// <para>
/// A <see cref="_minResweepInterval"/> floor keeps a band from being re-handed-out until
/// its data is at least that stale. Without it the queue re-scrapes the whole catalogue
/// as fast as the workers run, which on a metered residential proxy is the dominant cost;
/// the floor trades a little price-freshness for a roughly linear bandwidth cut (a 6h
/// floor vs. continuous ≈ 6× less, if a full pass takes ~1h). When every band is fresher
/// than the floor the queue hands out nothing (workers idle) until one ages past it.
/// its data is at least that stale. Without it the queue re-scrapes the whole catalogue as
/// fast as the workers run, which on a metered residential proxy is the dominant cost; the
/// floor trades a little price-freshness for a roughly linear bandwidth cut. When every
/// band is fresher than the floor the queue hands out nothing (workers idle) until one ages.
/// </para>
/// </summary>
public sealed class JobQueue
{
// A leased condition can't be re-handed-out until released or the lease expires
// (so a crashed worker's band returns to the pool instead of stalling forever).
// A leased condition can't be re-handed-out until released or the lease expires (so a
// crashed worker's band returns to the pool instead of stalling forever).
private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15);
private const int CandidateBatch = 100;
private readonly string _source;
private readonly TimeSpan _minResweepInterval;
private readonly Func<Candidate, string> _targetBuilder;
private readonly SemaphoreSlim _gate = new(1, 1);
private readonly ConcurrentDictionary<int, DateTimeOffset> _leases = new(); // conditionId -> leasedAt
private readonly ConcurrentDictionary<string, JobMapping> _inFlight = new(); // jobId -> mapping
/// <param name="source">
/// The <c>skin_condition_sweeps.Source</c> this queue reads/leases on (a
/// <c>SweepSource</c> value, e.g. "csmoney" / "skinland").
/// </param>
/// <param name="minResweepInterval">
/// How stale a band's <c>ListingsSweptAt</c> must be before it's eligible again.
/// How stale a band's checkpoint must be before it's eligible again.
/// <see cref="TimeSpan.Zero"/> disables the floor (continuous re-sweep).
/// </param>
public JobQueue(TimeSpan minResweepInterval)
/// <param name="targetBuilder">Turns a claimed band into the worker's target string.</param>
public JobQueue(string source, TimeSpan minResweepInterval, Func<Candidate, string> targetBuilder)
{
_source = source;
_minResweepInterval = minResweepInterval;
_targetBuilder = targetBuilder;
}
public async Task<ScrapeJobDto?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
public async Task<ClaimedJob?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
{
await _gate.WaitAsync(ct);
try
@@ -58,17 +73,26 @@ public sealed class JobQueue
}
// Only consider bands that are never-swept or stale past the re-sweep floor,
// then stalest first (never-swept null sorts before any timestamp). With the
// floor in place a fully-fresh catalogue yields no candidates, so workers idle
// instead of needlessly re-pulling ~1MB pages on the metered proxy.
// then stalest first (never-swept null sorts before any timestamp). The
// checkpoint is read for THIS queue's source only (a correlated subquery over
// the per-site sweep rows), so a band another market just swept is still
// never-swept here. With the floor in place a fully-fresh catalogue yields no
// candidates, so workers idle instead of needlessly re-pulling on the proxy.
var freshCutoff = DateTimeOffset.UtcNow - _minResweepInterval;
var candidates = await db.SkinConditions
.Where(c => c.ListingsSweptAt == null || c.ListingsSweptAt <= freshCutoff)
.OrderBy(c => c.ListingsSweptAt.HasValue)
.ThenBy(c => c.ListingsSweptAt)
.Select(c => new Candidate(
c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition))
.Select(c => new
{
Candidate = new Candidate(c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition),
SweptAt = c.Sweeps
.Where(s => s.Source == _source)
.Select(s => (DateTimeOffset?)s.SweptAt)
.FirstOrDefault(),
})
.Where(x => x.SweptAt == null || x.SweptAt <= freshCutoff)
.OrderBy(x => x.SweptAt.HasValue)
.ThenBy(x => x.SweptAt)
.Take(CandidateBatch)
.Select(x => x.Candidate)
.ToListAsync(ct);
var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId));
@@ -81,9 +105,7 @@ public sealed class JobQueue
var jobId = Guid.NewGuid().ToString("N");
_inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId);
var code = Wear.ToCode(pick.Condition) ?? pick.Condition;
var search = $"{pick.Weapon} {pick.SkinName} {code}".Trim();
return new ScrapeJobDto(jobId, pick.SkinId, pick.ConditionId, search, maxPages);
return new ClaimedJob(jobId, pick.SkinId, pick.ConditionId, _targetBuilder(pick), maxPages);
}
finally
{
@@ -107,5 +129,8 @@ public sealed class JobQueue
public sealed record JobMapping(int SkinId, int ConditionId);
private sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
/// <summary>A claimed band ready to hand to a worker: its ids + built target string.</summary>
public sealed record ClaimedJob(string JobId, int SkinId, int ConditionId, string Target, int MaxPages);
public sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
}

View File

@@ -1,13 +1,16 @@
using BlueLaminate.C2;
using BlueLaminate.Core.CsMoney;
using BlueLaminate.Core.DependencyInjection;
using BlueLaminate.Core.SkinLand;
using System.Text.Json.Serialization;
using BlueLaminate.EFCore.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
// The C2: hands cs.money scrape jobs to Python workers and ingests their results.
// Reuses the whole BlueLaminate stack (DB, ingest service) via the one composition root.
// Content root = the binary directory so appsettings.json is found regardless of the
// working directory the process is launched from (matches the CLI's approach).
// The C2: hands cs.money and skin.land scrape jobs to Python workers and ingests their
// results. Reuses the whole BlueLaminate stack (DB, ingest services) via the one
// composition root. Content root = the binary directory so appsettings.json is found
// regardless of the working directory the process is launched from (matches the CLI).
var builder = WebApplication.CreateBuilder(new WebApplicationOptions
{
Args = args,
@@ -15,17 +18,34 @@ var builder = WebApplication.CreateBuilder(new WebApplicationOptions
});
builder.Services.AddBlueLaminateCore(builder.Configuration);
// Re-sweep floor: don't re-hand-out a band whose listings were swept less than this
// many hours ago. The dominant cost on the metered residential proxy is re-scraping
// already-fresh bands, so this caps how often any band is re-pulled. 0 = continuous.
// Worker result bodies carry some numbers as JSON strings (skin.land's item_float comes
// through as "0.60…"); allow string-encoded numbers so they bind, parsed straight to
// decimal (full precision preserved). Harmless to cs.money's numeric fields.
builder.Services.ConfigureHttpJsonOptions(o =>
o.SerializerOptions.NumberHandling |= JsonNumberHandling.AllowReadingFromString);
// Re-sweep floor: don't re-hand-out a band whose listings were swept less than this many
// hours ago. The dominant cost on the metered residential proxy is re-scraping already-
// fresh bands, so this caps how often any band is re-pulled. 0 = continuous. Shared by
// both markets (each keeps its own per-site checkpoints, so the floors are independent).
var minResweepHours = builder.Configuration.GetValue("MinResweepHours", 6.0);
builder.Services.AddSingleton(new JobQueue(TimeSpan.FromHours(minResweepHours)));
var floor = TimeSpan.FromHours(minResweepHours);
// One JobQueue per market source (same class, different checkpoint source + target). The
// candidate query reads each band's checkpoint for that queue's source only, so the two
// sweeps progress independently over the shared catalogue.
builder.Services.AddKeyedSingleton(CsMoneyIngestService.Source, new JobQueue(
CsMoneyIngestService.Source, floor,
c => $"{c.Weapon} {c.SkinName} {Wear.ToCode(c.Condition) ?? c.Condition}".Trim()));
builder.Services.AddKeyedSingleton(SkinLandIngestService.Source, new JobQueue(
SkinLandIngestService.Source, floor,
c => SkinLandSlug.MarketUrl(c.Weapon, c.SkinName, c.Condition)));
var app = builder.Build();
// Apply pending EF migrations at startup (incl. the market_listings view) so a fresh
// container is ready with one command. Disable with AutoMigrate=false if you'd rather
// run `dotnet ef database update` yourself.
// container is ready with one command. Disable with AutoMigrate=false if you'd rather run
// `dotnet ef database update` yourself.
if (app.Configuration.GetValue("AutoMigrate", true))
{
using var scope = app.Services.CreateScope();
@@ -33,8 +53,8 @@ if (app.Configuration.GetValue("AutoMigrate", true))
db.Database.Migrate();
}
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured
// the gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured the
// gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
var workerToken = builder.Configuration["WorkerToken"];
var maxPagesPerJob = builder.Configuration.GetValue("MaxPagesPerJob", 60);
@@ -49,30 +69,43 @@ app.MapGet("/market/instance/{instanceId:int}", async (
int instanceId, MarketPresenceService presence, CancellationToken ct) =>
Results.Ok(await presence.ForInstanceAsync(instanceId, ct)));
var jobs = app.MapGroup("/jobs");
jobs.AddEndpointFilter(async (ctx, next) =>
// The same X-Worker-Token gate applied to every worker-facing route group.
Func<RouteGroupBuilder, RouteGroupBuilder> withTokenGate = group =>
{
if (!string.IsNullOrEmpty(workerToken)
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
group.AddEndpointFilter(async (ctx, next) =>
{
return Results.Unauthorized();
}
if (!string.IsNullOrEmpty(workerToken)
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
{
return Results.Unauthorized();
}
return await next(ctx);
});
return await next(ctx);
});
return group;
};
// --- cs.money worker endpoints (unchanged behaviour) ------------------------------------
var jobs = withTokenGate(app.MapGroup("/jobs"));
// Claim the next stalest skin+wear to scrape. 204 when nothing is currently available
// (everything in the stalest batch is already leased to other workers).
jobs.MapGet("/next", async (JobQueue queue, SkinTrackerDbContext db, CancellationToken ct) =>
jobs.MapGet("/next", async (
[FromKeyedServices(CsMoneyIngestService.Source)] JobQueue queue,
SkinTrackerDbContext db, CancellationToken ct) =>
{
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
return job is null ? Results.NoContent() : Results.Ok(job);
return job is null
? Results.NoContent()
: Results.Ok(new ScrapeJobDto(job.JobId, job.SkinId, job.ConditionId, job.Target, job.MaxPages));
});
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the
// worker stays dumb: it just forwards the raw cs.money items it gathered.
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the worker
// stays dumb: it just forwards the raw cs.money items it gathered.
jobs.MapPost("/{jobId}/result", async (
string jobId, ScrapeResultDto result, JobQueue queue, CsMoneyIngestService ingest, CancellationToken ct) =>
string jobId, ScrapeResultDto result,
[FromKeyedServices(CsMoneyIngestService.Source)] JobQueue queue,
CsMoneyIngestService ingest, CancellationToken ct) =>
{
var mapping = queue.Complete(jobId);
if (mapping is null)
@@ -89,4 +122,33 @@ jobs.MapPost("/{jobId}/result", async (
return Results.Ok(r);
});
// --- skin.land worker endpoints ---------------------------------------------------------
var skinLandJobs = withTokenGate(app.MapGroup("/skinland/jobs"));
skinLandJobs.MapGet("/next", async (
[FromKeyedServices(SkinLandIngestService.Source)] JobQueue queue,
SkinTrackerDbContext db, CancellationToken ct) =>
{
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
return job is null
? Results.NoContent()
: Results.Ok(new SkinLandJobDto(job.JobId, job.SkinId, job.ConditionId, job.Target, job.MaxPages));
});
skinLandJobs.MapPost("/{jobId}/result", async (
string jobId, SkinLandResultDto result,
[FromKeyedServices(SkinLandIngestService.Source)] JobQueue queue,
SkinLandIngestService ingest, CancellationToken ct) =>
{
var mapping = queue.Complete(jobId);
if (mapping is null)
{
return Results.NotFound(new { error = "unknown or expired jobId" });
}
var complete = string.Equals(result.StoppedReason, "completed", StringComparison.OrdinalIgnoreCase);
var r = await ingest.IngestAsync(mapping.SkinId, mapping.ConditionId, result.Items ?? [], complete, ct);
return Results.Ok(r);
});
app.Run();

View File

@@ -1,122 +0,0 @@
using BlueLaminate.Scraper.CsMoney;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Options;
using System.CommandLine;
namespace BlueLaminate.Cli.Commands;
/// <summary>
/// <c>capture-csmoney</c>: open the cs.money market through the IPRoyal residential
/// proxy (local forwarding hop, no CDP) in a real, non-headless browser. You clear
/// the Cloudflare challenge once; the tool then pages the listings API from inside
/// the cleared page with human-like pacing, dumping each page's JSON and reporting
/// how many pages survive before a re-challenge. Discovery/measurement tool — writes
/// nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.
/// </summary>
internal static class CaptureCsMoneyCommand
{
public static Command Build(IHost host)
{
var countryOption = new Option<string?>("--country")
{
Description = "ISO country code(s) for the exit IP, e.g. \"us\". Default: configured/random.",
};
var loadImagesOption = new Option<bool>("--load-images")
{
Description = "Load images (uses more bandwidth). Default off to conserve the metered plan.",
};
var pagesOption = new Option<int>("--pages")
{
Description = "Maximum offset pages (60 items each) to fetch before stopping.",
DefaultValueFactory = _ => 50,
};
var noProxyOption = new Option<bool>("--no-proxy")
{
Description = "Diagnostic: drive the browser on this machine's own IP (no IPRoyal proxy), "
+ "to isolate whether re-challenges are IP reputation vs. the webdriver fingerprint.",
};
var outOption = new Option<string>("--out")
{
Description = "Directory to write captured JSON pages to.",
DefaultValueFactory = _ => "csmoney-captures",
};
var command = new Command(
"capture-csmoney",
"Open the cs.money market through the residential proxy, clear Cloudflare once, then page "
+ "the listings API with pacing and report how many pages survive. Discovery/measurement "
+ "tool — writes nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
{
countryOption,
loadImagesOption,
pagesOption,
outOption,
noProxyOption,
};
command.SetAction((parseResult, ct) => RunAsync(
host,
parseResult.GetValue(countryOption),
parseResult.GetValue(loadImagesOption),
parseResult.GetValue(pagesOption),
parseResult.GetValue(outOption)!,
parseResult.GetValue(noProxyOption),
ct));
return command;
}
private static async Task<int> RunAsync(
IHost host, string? country, bool loadImages, int pages, string outDir, bool noProxy,
CancellationToken ct)
{
using var scope = host.Services.CreateScope();
var options = scope.ServiceProvider.GetRequiredService<IOptions<CsMoneyOptions>>().Value;
var exitCountry = string.IsNullOrWhiteSpace(country) ? options.Country : country;
var images = loadImages || options.LoadImages;
Console.WriteLine($"Opening {options.MarketUrl}{(noProxy ? " (DIRECT no proxy)" : "")}");
Console.WriteLine(
"Solve any Cloudflare challenge in the window and wait until the market grid "
+ "(items + prices) is actually visible — that means the session is cleared.");
Console.WriteLine(
$"Press Enter here once it's visible. The tool then pages up to {pages} page(s) of "
+ "listings from inside the cleared page and reports how far it gets.");
try
{
var capture = scope.ServiceProvider.GetRequiredService<CsMoneyCaptureService>();
// Block until the operator presses Enter; the browser stays open the whole
// time. ReadLine is sync, so push it off-thread.
var result = await capture.RunAsync(
outDir,
new ProxyRequest(Country: exitCountry, Sticky: true),
images,
useProxy: !noProxy,
pages,
() => Task.Run(() => Console.ReadLine(), ct),
ct);
var full = Path.GetFullPath(outDir);
Console.WriteLine();
Console.WriteLine(
$"Stopped: {result.StoppedReason}. {result.PagesSucceeded} page(s), "
+ $"{result.ItemsTotal} item(s) → {full}");
return result.PagesSucceeded > 0 ? 0 : 1;
}
catch (OperationCanceledException)
{
Console.Error.WriteLine("Capture cancelled.");
return 130;
}
catch (Exception ex)
{
Console.Error.WriteLine($"cs.money capture failed: {ex.Message}");
return 1;
}
}
}

View File

@@ -1,72 +0,0 @@
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using System.CommandLine;
namespace BlueLaminate.Cli.Commands;
/// <summary>
/// <c>probe-proxy</c>: launch a non-headless Edge browser through the IPRoyal
/// residential proxy and print the exit IP, to confirm authentication works and
/// the IP is genuinely residential. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.
/// Costs a few KB, so it's the right first check against a metered plan.
/// </summary>
internal static class ProbeProxyCommand
{
public static Command Build(IHost host)
{
var countryOption = new Option<string?>("--country")
{
Description = "Optional ISO country code(s) for the exit IP, e.g. \"us\" or \"us,gb\". "
+ "Default: random.",
};
var rotatingOption = new Option<bool>("--rotating")
{
Description = "Use a rotating exit IP instead of a pinned (sticky) session.",
};
var command = new Command(
"probe-proxy",
"Launch non-headless Edge through the IPRoyal residential proxy and print the exit IP "
+ "to confirm auth works and the IP is residential. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
{
countryOption,
rotatingOption,
};
command.SetAction((parseResult, ct) => RunAsync(
host,
parseResult.GetValue(countryOption),
parseResult.GetValue(rotatingOption),
ct));
return command;
}
private static async Task<int> RunAsync(
IHost host, string? country, bool rotating, CancellationToken ct)
{
using var scope = host.Services.CreateScope();
try
{
var probe = scope.ServiceProvider.GetRequiredService<ProxyProbe>();
var info = await probe.RunAsync(new ProxyRequest(Country: country, Sticky: !rotating));
Console.WriteLine();
Console.WriteLine($" Exit IP : {info.Ip}");
Console.WriteLine($" Location: {info.City}, {info.Region}, {info.Country}");
Console.WriteLine($" Org/ASN : {info.Org}");
Console.WriteLine($" Hostname: {info.Hostname ?? ""}");
Console.WriteLine();
Console.WriteLine(
"Check Org/ASN: a consumer ISP = residential; a hosting provider = datacenter.");
return 0;
}
catch (Exception ex)
{
Console.Error.WriteLine($"Proxy probe failed: {ex.Message}");
return 1;
}
}
}

View File

@@ -72,8 +72,6 @@ var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker too
FetchListingsCommand.Build(host),
SweepListingsCommand.Build(host),
SweepCatalogCommand.Build(host),
ProbeProxyCommand.Build(host),
CaptureCsMoneyCommand.Build(host),
};
// Ctrl+C → cancel the action's token so long-running commands (e.g. sweep-catalog,

View File

@@ -10,14 +10,6 @@
"SkinCatalog": {
"Url": "https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json"
},
"CsMoney": {
"MarketUrl": "https://cs.money/market/buy/",
"ApiUrlTemplate": "https://cs.money/2.0/market/sell-orders?limit=60&offset={0}",
"Country": "",
"LoadImages": false,
"PageDelaySeconds": 2.5,
"PageJitterSeconds": 2.0
},
"Sweep": {
"PageDelay": "00:00:05",
"MaxJitter": "00:00:03",

View File

@@ -20,7 +20,7 @@ public sealed record CsMoneyIngestResult(
/// </summary>
public sealed class CsMoneyIngestService
{
public const string Source = "csmoney";
public const string Source = SweepSource.CsMoney;
private readonly SkinTrackerDbContext _db;
private readonly ILogger<CsMoneyIngestService> _logger;
@@ -192,7 +192,7 @@ public sealed class CsMoneyIngestService
return null;
}
var seed = pattern.ToString();
var seed = pattern;
var st = it.Asset.IsStatTrak;
var sv = it.Asset.IsSouvenir;
@@ -280,13 +280,13 @@ public sealed class CsMoneyIngestService
}
}
// Stamp this band's cs.money checkpoint (upsert into skin_condition_sweeps under
// the csmoney source). Caller persists via SaveChangesAsync.
private async Task StampCheckpointAsync(int? conditionId, DateTimeOffset now, CancellationToken ct)
{
if (conditionId is { } cid)
{
await _db.SkinConditions
.Where(c => c.Id == cid)
.ExecuteUpdateAsync(s => s.SetProperty(c => c.ListingsSweptAt, now), ct);
await SweepCheckpoints.StampConditionAsync(_db, cid, Source, now, ct);
}
}

View File

@@ -2,10 +2,7 @@ using BlueLaminate.Core.Listings;
using BlueLaminate.Core.Options;
using BlueLaminate.Core.Skins;
using BlueLaminate.EFCore.DependencyInjection;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.CsFloat;
using BlueLaminate.Scraper.CsMoney;
using BlueLaminate.Scraper.Proxies;
using BlueLaminate.Scraper.Skins;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -54,8 +51,6 @@ public static class ServiceCollectionExtensions
.Bind(configuration.GetSection(SkinCatalogOptions.SectionName));
services.AddOptions<SweepOptions>()
.Bind(configuration.GetSection(SweepOptions.SectionName));
services.AddOptions<CsMoneyOptions>()
.Bind(configuration.GetSection(CsMoneyOptions.SectionName));
// Typed-handler pooling via IHttpClientFactory; clients are scoped so a
// command's handler and the service it drives share one instance (and thus
@@ -72,42 +67,12 @@ public static class ServiceCollectionExtensions
sp.GetRequiredService<IHttpClientFactory>().CreateClient(CatalogHttpClient),
sp.GetRequiredService<IOptions<SkinCatalogOptions>>().Value));
// Residential proxy provider (IPRoyal). Credentials come from configuration
// — IPROYAL_USERNAME / IPROYAL_PASSWORD env vars in practice. Resolution
// throws a clear error only when a proxy-using command actually needs it, so
// API-only commands (sync, fetch) run without proxy creds configured.
services.AddSingleton<IProxyProvider>(sp =>
{
var username = configuration["IPROYAL_USERNAME"];
var password = configuration["IPROYAL_PASSWORD"];
if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password))
{
throw new InvalidOperationException(
"IPRoyal credentials are not configured. Set IPROYAL_USERNAME and "
+ "IPROYAL_PASSWORD (env vars or user secrets) before running a proxy command.");
}
return new IpRoyalProxyProvider(username, password);
});
// cs.money is driven through a real, non-headless browser (Selenium/Edge,
// zero CDP) routed through a local forwarding proxy that chains to the
// residential gateway, not an HttpClient.
services.AddSingleton<LocalForwardingProxyFactory>();
services.AddScoped<BrowserDriverFactory>();
services.AddScoped<ProxyProbe>();
services.AddScoped(sp => new CsMoneyCaptureService(
sp.GetRequiredService<IProxyProvider>(),
sp.GetRequiredService<LocalForwardingProxyFactory>(),
sp.GetRequiredService<BrowserDriverFactory>(),
sp.GetRequiredService<IOptions<CsMoneyOptions>>().Value,
sp.GetRequiredService<ILogger<CsMoneyCaptureService>>()));
// Application services (constructor injection; DbContext keeps them scoped).
services.AddScoped<ListingSweepService>();
services.AddScoped<SkinSyncService>();
services.AddScoped<CsMoney.CsMoneyIngestService>();
services.AddScoped<CsMoney.MarketPresenceService>();
services.AddScoped<SkinLand.SkinLandIngestService>();
return services;
}

View File

@@ -30,7 +30,7 @@ namespace BlueLaminate.Core.Listings;
public sealed class ListingSweepService
{
public const string Source = "listings";
public const string CatalogSource = "listings-catalog";
public const string CatalogSource = SweepSource.CsFloatCatalog;
private readonly SkinTrackerDbContext _db;
private readonly CsFloatListingsClient _client;
@@ -79,6 +79,9 @@ public sealed class ListingSweepService
.Select(s => new { s.Id, s.DefIndex, s.PaintIndex })
.ToDictionaryAsync(s => (s.DefIndex!.Value, s.PaintIndex!.Value), s => s.Id, ct);
// (skin, wear) -> condition id, so each listing's wear band is set directly.
var conditionLookup = await BuildConditionLookupAsync(ct);
// Track which listing ids we touched this run, so a complete pass can flag
// the rest as Removed.
var touchedIds = new HashSet<string>();
@@ -118,7 +121,7 @@ public sealed class ListingSweepService
seen += page.Listings.Count;
var (ins, upd, link, allKnown) = await IngestPageAsync(
page.Listings, skinByIndex, touchedIds, touchedInstanceIds, now, ct);
page.Listings, skinByIndex, conditionLookup, touchedIds, touchedInstanceIds, now, ct);
inserted += ins;
updated += upd;
linked += link;
@@ -207,7 +210,7 @@ public sealed class ListingSweepService
try
{
// Repeat the whole catalogue until cancelled. Re-querying each pass picks
// up newly-synced skins and re-orders by the latest ListingsSweptAt.
// up newly-synced skins and re-orders by this site's latest checkpoint.
while (!ct.IsCancellationRequested)
{
var now = DateTimeOffset.UtcNow;
@@ -219,6 +222,9 @@ public sealed class ListingSweepService
break;
}
// (skin, wear) -> condition id, refreshed each pass alongside the units.
var conditionLookup = await BuildConditionLookupAsync(ct);
var index = 0;
foreach (var unit in units)
{
@@ -258,7 +264,7 @@ public sealed class ListingSweepService
seen += page.Listings.Count;
var (ins, upd, _, _) = await IngestPageAsync(
page.Listings, lookup, touchedIds, touchedInstanceIds, now, ct);
page.Listings, lookup, conditionLookup, touchedIds, touchedInstanceIds, now, ct);
inserted += ins;
updated += upd;
@@ -293,20 +299,19 @@ public sealed class ListingSweepService
{
removed += await MarkRemovedForSkinConditionAsync(
unit.SkinId, unit.Condition!, touchedIds, now, ct);
await _db.SkinConditions
.Where(c => c.Id == conditionId)
.ExecuteUpdateAsync(
setters => setters.SetProperty(c => c.ListingsSweptAt, now), ct);
await SweepCheckpoints.StampConditionAsync(_db, conditionId, CatalogSource, now, ct);
}
else
{
removed += await MarkRemovedForSkinAsync(unit.SkinId, touchedIds, now, ct);
await _db.Skins
.Where(s => s.Id == unit.SkinId)
.ExecuteUpdateAsync(
setters => setters.SetProperty(s => s.ListingsSweptAt, now), ct);
await SweepCheckpoints.StampSkinAsync(_db, unit.SkinId, CatalogSource, now, ct);
}
// Persist the checkpoint upsert now so a cancellation between bands
// doesn't lose it (the stamp goes through the change tracker, not a
// set-based update).
await _db.SaveChangesAsync(ct);
covered++;
await PaceAsync(delayBetweenPages, ct);
@@ -352,8 +357,9 @@ public sealed class ListingSweepService
// One unit of catalogue-sweep work: a skin filtered to a single wear band, or a
// whole skin when it has no bands. Float bounds + ConditionId are null for the
// whole-skin case (tracked by Skin.ListingsSweptAt instead). SweptAt drives the
// never-swept-first / stalest-first ordering.
// whole-skin case (checkpointed in skin_sweeps rather than skin_condition_sweeps).
// SweptAt is this site's checkpoint for the unit and drives the never-swept-first /
// stalest-first ordering.
private sealed record SweepUnit(
int SkinId,
int Def,
@@ -383,6 +389,9 @@ public sealed class ListingSweepService
// small (~2k skins) so this is negligible.
private async Task<List<SweepUnit>> BuildSweepUnitsAsync(CancellationToken ct)
{
// Read each unit's checkpoint for THIS site only (a correlated subquery over the
// per-source sweep rows), so a band swept on another site still sorts as
// never-swept here. No row for this source => null => front of the queue.
var skins = await _db.Skins
.Where(s => s.DefIndex != null && s.PaintIndex != null)
.Select(s => new
@@ -393,9 +402,22 @@ public sealed class ListingSweepService
s.Name,
Weapon = s.Weapon.Name,
s.Rarity,
s.ListingsSweptAt,
SweptAt = s.Sweeps
.Where(x => x.Source == CatalogSource)
.Select(x => (DateTimeOffset?)x.SweptAt)
.FirstOrDefault(),
Conditions = s.Conditions
.Select(c => new { c.Id, c.Condition, c.MinFloat, c.MaxFloat, c.ListingsSweptAt })
.Select(c => new
{
c.Id,
c.Condition,
c.FloatMin,
c.FloatMax,
SweptAt = c.Sweeps
.Where(x => x.Source == CatalogSource)
.Select(x => (DateTimeOffset?)x.SweptAt)
.FirstOrDefault(),
})
.ToList(),
})
.ToListAsync(ct);
@@ -408,7 +430,7 @@ public sealed class ListingSweepService
units.Add(new SweepUnit(
s.Id, s.Def, s.Paint, s.Name, s.Weapon, s.Rarity,
ConditionId: null, Condition: null, MinFloat: null, MaxFloat: null,
SweptAt: s.ListingsSweptAt));
SweptAt: s.SweptAt));
continue;
}
@@ -417,8 +439,8 @@ public sealed class ListingSweepService
units.Add(new SweepUnit(
s.Id, s.Def, s.Paint, s.Name, s.Weapon, s.Rarity,
ConditionId: c.Id, Condition: c.Condition,
MinFloat: c.MinFloat, MaxFloat: c.MaxFloat,
SweptAt: c.ListingsSweptAt));
MinFloat: c.FloatMin, MaxFloat: c.FloatMax,
SweptAt: c.SweptAt));
}
}
@@ -431,6 +453,15 @@ public sealed class ListingSweepService
.ToList();
}
// (skinId, wear name) -> skin_conditions.id, built once per run so each listing's
// wear band resolves without a per-row query. The wear name equals
// skin_conditions.condition (CSFloat's authoritative tier name, e.g. "Factory New").
private async Task<Dictionary<(int SkinId, string Condition), int>> BuildConditionLookupAsync(
CancellationToken ct) =>
await _db.SkinConditions
.Select(c => new { c.SkinId, c.Condition, c.Id })
.ToDictionaryAsync(c => (c.SkinId, c.Condition), c => c.Id, ct);
// Flag this skin's once-Active listings that we didn't see this run as Removed.
private async Task<int> MarkRemovedForSkinAsync(
int skinId, HashSet<string> touchedIds, DateTimeOffset now, CancellationToken ct)
@@ -472,6 +503,7 @@ public sealed class ListingSweepService
private async Task<(int Inserted, int Updated, int Linked, bool AllKnown)> IngestPageAsync(
IReadOnlyList<CsFloatListing> listings,
IReadOnlyDictionary<(int, int), int> skinByIndex,
IReadOnlyDictionary<(int, string), int> conditionBySkinAndWear,
HashSet<string> touchedIds,
HashSet<int> touchedInstanceIds,
DateTimeOffset now,
@@ -501,6 +533,14 @@ public sealed class ListingSweepService
linked++;
}
// Wear band: resolve from (skin, wear name) so both the catalogue and the
// incremental sweep set the same condition_id. Null when the skin is
// unknown or the item has no wear (e.g. vanilla knives).
int? conditionId = skinId is { } skinForCond && l.WearName is { } wearForCond
&& conditionBySkinAndWear.TryGetValue((skinForCond, wearForCond), out var resolvedCond)
? resolvedCond
: null;
// Resolve the physical item only when we know the skin — the
// fingerprint is meaningless without it.
var instance = skinId is { } sid
@@ -520,6 +560,7 @@ public sealed class ListingSweepService
row.Status = ListingStatus.Active;
row.RemovedAt = null;
row.SkinId = skinId;
row.ConditionId = conditionId;
row.AssetId = l.AssetId;
row.SkinInstance = instance;
updated++;
@@ -527,7 +568,7 @@ public sealed class ListingSweepService
else
{
allKnown = false;
var entity = MapToEntity(l, skinId, now);
var entity = MapToEntity(l, skinId, conditionId, now);
entity.SkinInstance = instance;
_db.Listings.Add(entity);
inserted++;
@@ -541,16 +582,23 @@ public sealed class ListingSweepService
// The fingerprint is (skin, full-precision float, seed, stattrak, souvenir).
// It is deliberately NOT unique — duped copies share it — so a match may
// already represent more than one physical item; dupe detection runs later.
private async Task<SkinInstance> ResolveInstanceAsync(
private async Task<SkinInstance?> ResolveInstanceAsync(
int skinId, CsFloatListing l, DateTimeOffset now, CancellationToken ct)
{
var seed = l.PaintSeed.ToString();
// Floatless items (e.g. Vanilla knives) can't be fingerprinted; skip the
// instance and leave the listing's SkinInstanceId null, like the cs.money path.
if (l.FloatValue is not { } floatValue)
{
return null;
}
var seed = l.PaintSeed;
// Check the change-tracker first (an instance just added earlier this page
// isn't queryable yet), then the database.
var tracked = _db.ChangeTracker.Entries<SkinInstance>()
.Select(e => e.Entity)
.FirstOrDefault(i => i.SkinId == skinId && i.FloatValue == l.FloatValue
.FirstOrDefault(i => i.SkinId == skinId && i.FloatValue == floatValue
&& i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir);
if (tracked is not null)
{
@@ -559,7 +607,7 @@ public sealed class ListingSweepService
}
var instance = await _db.SkinInstances.FirstOrDefaultAsync(
i => i.SkinId == skinId && i.FloatValue == l.FloatValue
i => i.SkinId == skinId && i.FloatValue == floatValue
&& i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir,
ct);
@@ -572,7 +620,7 @@ public sealed class ListingSweepService
instance = new SkinInstance
{
SkinId = skinId,
FloatValue = l.FloatValue,
FloatValue = floatValue,
PaintSeed = seed,
StatTrak = l.IsStatTrak,
Souvenir = l.IsSouvenir,
@@ -583,7 +631,7 @@ public sealed class ListingSweepService
return instance;
}
private static Listing MapToEntity(CsFloatListing l, int? skinId, DateTimeOffset now) => new()
private static Listing MapToEntity(CsFloatListing l, int? skinId, int? conditionId, DateTimeOffset now) => new()
{
CsFloatListingId = l.ListingId,
Type = l.Type,
@@ -602,6 +650,7 @@ public sealed class ListingSweepService
SellerSteamId = l.SellerSteamId,
InspectLink = l.InspectLink,
SkinId = skinId,
ConditionId = conditionId,
FirstSeenAt = now,
LastSeenAt = now,
Status = ListingStatus.Active,

View File

@@ -0,0 +1,205 @@
using BlueLaminate.EFCore.Data;
using BlueLaminate.EFCore.Entities;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Core.SkinLand;
/// <summary>Outcome of ingesting one skin+wear scrape job's results.</summary>
public sealed record SkinLandIngestResult(
int Matched, int Inserted, int Updated, int Removed, int Skipped);
/// <summary>
/// Persists the offers the worker scraped for one targeted skin+wear job into the
/// <c>skin_land_listings</c> table. Mirrors <see cref="CsMoney.CsMoneyIngestService"/>'s
/// upsert-by-natural-key + soft-track-Removed + complete-vs-partial flow, but is thinner:
/// skin.land exposes no paint seed, so there's no <c>SkinInstance</c> resolution and no
/// dupe detection. The scraped page is already one exact skin+wear (the worker fetches it
/// by slug), so instead of cs.money's fuzzy name filter we only validate defensively that
/// each offer's slug matches the targeted band, skipping any that don't.
/// </summary>
public sealed class SkinLandIngestService
{
public const string Source = SweepSource.SkinLand;
private readonly SkinTrackerDbContext _db;
private readonly ILogger<SkinLandIngestService> _logger;
public SkinLandIngestService(SkinTrackerDbContext db, ILogger<SkinLandIngestService> logger)
{
_db = db;
_logger = logger;
}
/// <param name="complete">
/// True only when the worker walked every page of the skin+wear (stoppedReason
/// "completed"). On a partial sweep we upsert what we saw but skip Removed-marking,
/// the price point, and the swept-checkpoint — unseen offers may just be unfetched, so
/// the band stays un-stamped and gets re-queued rather than being wrongly pruned.
/// </param>
public async Task<SkinLandIngestResult> IngestAsync(
int skinId, int? conditionId, IReadOnlyList<SkinLandOffer> offers, bool complete, CancellationToken ct = default)
{
var now = DateTimeOffset.UtcNow;
var skin = await _db.Skins
.Where(s => s.Id == skinId)
.Select(s => new { s.Id, s.Name, Weapon = s.Weapon.Name })
.FirstOrDefaultAsync(ct);
if (skin is null)
{
_logger.LogWarning("Ingest skipped: skin {SkinId} not found.", skinId);
return new SkinLandIngestResult(0, 0, 0, 0, offers.Count);
}
string? conditionName = null;
if (conditionId is { } cid)
{
conditionName = await _db.SkinConditions
.Where(c => c.Id == cid).Select(c => c.Condition).FirstOrDefaultAsync(ct);
}
// Each offer carries its skin's slug; the targeted band has a known slug. When we
// can build the expected slug, keep only offers whose slug matches (a cheap guard
// against a wrong/redirected page); otherwise accept all (the worker targeted it).
var expectedSlug = conditionName is null
? null
: SkinLandSlug.Slugify($"{skin.Weapon} {skin.Name} {conditionName}");
var matched = offers.Where(o =>
expectedSlug is null
|| string.Equals(o.Skin?.Url, expectedSlug, StringComparison.OrdinalIgnoreCase)).ToList();
var skipped = offers.Count - matched.Count;
if (matched.Count == 0)
{
// Nothing for this skin+wear. If the sweep was complete this is genuine (none
// listed, or a slug mismatch) — stamp the checkpoint so it advances. If partial
// (e.g. challenged before any page), leave it un-stamped so the band is retried.
if (complete)
{
await StampCheckpointAsync(conditionId, now, ct);
await _db.SaveChangesAsync(ct);
}
return new SkinLandIngestResult(0, 0, 0, 0, skipped);
}
var listingIds = matched.Select(o => o.Id).ToList();
var existing = await _db.SkinLandListings
.Where(l => listingIds.Contains(l.ListingId))
.ToDictionaryAsync(l => l.ListingId, ct);
var inserted = 0;
var updated = 0;
var touched = new HashSet<long>();
foreach (var o in matched)
{
touched.Add(o.Id);
if (existing.TryGetValue(o.Id, out var row))
{
row.Price = o.FinalWithdrawalPrice ?? row.Price;
row.FloatValue = o.ItemFloat;
row.NameTag = o.NameTag;
row.InspectLink = o.ItemLink;
row.StickerCount = o.Stickers?.Count(s => s is not null) ?? 0;
row.LastSeenAt = now;
row.Status = ListingStatus.Active;
row.RemovedAt = null;
row.ConditionId = conditionId;
updated++;
}
else
{
_db.SkinLandListings.Add(Map(o, skinId, conditionId, now));
inserted++;
}
}
// Persist inserts/updates before the set-based Removed query runs.
await _db.SaveChangesAsync(ct);
// The following only hold if we saw the FULL skin+wear set. On a partial sweep,
// offers we didn't fetch are not gone (so don't mark them Removed), the cheapest
// offer may be among the unfetched (so don't record a price point), and the band
// isn't fully swept (so don't stamp the checkpoint — let it re-queue).
var removed = 0;
if (complete)
{
removed = await MarkRemovedAsync(skinId, conditionId, touched, now, ct);
if (conditionId is { } condId)
{
var priced = matched.Where(m => m.FinalWithdrawalPrice is not null)
.Select(m => m.FinalWithdrawalPrice!.Value).ToList();
if (priced.Count > 0)
{
await _db.PriceHistories.AddAsync(new PriceHistory
{
SkinId = skinId,
ConditionId = condId,
Price = priced.Min(),
Currency = "USD",
RecordedAt = now,
Source = Source,
}, ct);
}
}
await StampCheckpointAsync(conditionId, now, ct);
}
await _db.SaveChangesAsync(ct);
_logger.LogInformation(
"skin.land ingest {Weapon} | {Skin} ({Wear}): {Matched} matched ({Ins} new, {Upd} upd, "
+ "{Rem} removed), {Skipped} skipped by filter{Partial}.",
skin.Weapon, skin.Name, conditionName ?? "all", matched.Count, inserted, updated, removed, skipped,
complete ? "" : " [PARTIAL — not pruned/checkpointed]");
return new SkinLandIngestResult(matched.Count, inserted, updated, removed, skipped);
}
// Flag this skin+wear's once-Active offers we didn't see this run as Removed.
private async Task<int> MarkRemovedAsync(
int skinId, int? conditionId, HashSet<long> touched, DateTimeOffset now, CancellationToken ct)
{
return await _db.SkinLandListings
.Where(l => l.SkinId == skinId
&& l.ConditionId == conditionId
&& l.Status == ListingStatus.Active
&& !touched.Contains(l.ListingId))
.ExecuteUpdateAsync(setters => setters
.SetProperty(l => l.Status, ListingStatus.Removed)
.SetProperty(l => l.RemovedAt, now), ct);
}
// Stamp this band's skin.land checkpoint (upsert into skin_condition_sweeps under the
// skinland source). Caller persists via SaveChangesAsync.
private async Task StampCheckpointAsync(int? conditionId, DateTimeOffset now, CancellationToken ct)
{
if (conditionId is { } cid)
{
await SweepCheckpoints.StampConditionAsync(_db, cid, Source, now, ct);
}
}
private static SkinLandListing Map(SkinLandOffer o, int skinId, int? conditionId, DateTimeOffset now) => new()
{
ListingId = o.Id,
SkinId = skinId,
ConditionId = conditionId,
MarketHashName = o.Skin?.Name ?? "",
FloatValue = o.ItemFloat,
IsStatTrak = o.Skin?.IsStatTrak ?? false,
IsSouvenir = o.Skin?.IsSouvenir ?? false,
NameTag = o.NameTag,
StickerCount = o.Stickers?.Count(s => s is not null) ?? 0,
Price = o.FinalWithdrawalPrice ?? 0m,
Currency = "USD",
InspectLink = o.ItemLink,
FirstSeenAt = now,
LastSeenAt = now,
Status = ListingStatus.Active,
};
}

View File

@@ -0,0 +1,35 @@
using System.Text.Json.Serialization;
namespace BlueLaminate.Core.SkinLand;
/// <summary>
/// The subset of a skin.land <c>obtained-skins</c> offer we persist, parsed from the
/// JSON the Python worker scrapes (the paginated <c>data[]</c> array). Decimals are
/// parsed directly (not via double) so the full-precision float round-trips exactly into
/// <c>numeric(20,18)</c>. skin.land exposes no paint seed / def index, so there's nothing
/// to fingerprint a <c>SkinInstance</c> with — the shape is intentionally thin.
/// </summary>
public sealed class SkinLandOffer
{
[JsonPropertyName("id")] public long Id { get; set; }
[JsonPropertyName("item_float")] public decimal? ItemFloat { get; set; }
[JsonPropertyName("final_withdrawal_price")] public decimal? FinalWithdrawalPrice { get; set; }
[JsonPropertyName("name_tag")] public string? NameTag { get; set; }
[JsonPropertyName("item_link")] public string? ItemLink { get; set; }
[JsonPropertyName("stickers")] public List<SkinLandSticker?>? Stickers { get; set; }
[JsonPropertyName("skin")] public SkinLandSkin? Skin { get; set; }
}
public sealed class SkinLandSkin
{
[JsonPropertyName("id")] public long? Id { get; set; }
[JsonPropertyName("name")] public string? Name { get; set; }
[JsonPropertyName("url")] public string? Url { get; set; } // the market slug
[JsonPropertyName("is_stattrak")] public bool IsStatTrak { get; set; }
[JsonPropertyName("is_souvenir")] public bool IsSouvenir { get; set; }
}
public sealed class SkinLandSticker
{
[JsonPropertyName("name")] public string? Name { get; set; }
}

View File

@@ -0,0 +1,55 @@
using System.Text;
namespace BlueLaminate.Core.SkinLand;
/// <summary>
/// Builds a skin.land market URL from the catalogue's weapon + skin + wear. skin.land's
/// market routes are <c>/market/csgo/{slug}/</c> where the slug is simply
/// <c>{weapon}-{skin}-{wear}</c> kebab-cased — verified against the live site (e.g.
/// "M4A4" + "Global Offensive" + "Battle-Scarred" → <c>m4a4-global-offensive-battle-scarred</c>,
/// "AK-47" + "Redline" + "Field-Tested" → <c>ak-47-redline-field-tested</c>). No discovery
/// or stored mapping is needed.
/// <para>
/// StatTrak and Souvenir are <em>separate</em> pages on skin.land (<c>stattrak-</c>/
/// <c>souvenir-</c> prefixed slugs); this builds the base (non-special) page, which is the
/// unit v1 sweeps per <c>SkinCondition</c>.
/// </para>
/// </summary>
public static class SkinLandSlug
{
private const string MarketBase = "https://skin.land/market/csgo/";
/// <summary>"M4A4", "Global Offensive", "Battle-Scarred" → the full market URL.</summary>
public static string MarketUrl(string weapon, string skinName, string condition) =>
$"{MarketBase}{Slugify($"{weapon} {skinName} {condition}")}/";
/// <summary>
/// Lowercase, collapse every run of non-alphanumeric characters to a single hyphen,
/// and trim leading/trailing hyphens. So "AK-47 | Redline (Field-Tested)" and the
/// catalogue's "AK-47 Redline Field-Tested" both reduce to "ak-47-redline-field-tested".
/// </summary>
public static string Slugify(string value)
{
var sb = new StringBuilder(value.Length);
var pendingHyphen = false;
foreach (var ch in value)
{
if (char.IsLetterOrDigit(ch))
{
if (pendingHyphen && sb.Length > 0)
{
sb.Append('-');
}
sb.Append(char.ToLowerInvariant(ch));
pendingHyphen = false;
}
else
{
pendingHyphen = true;
}
}
return sb.ToString();
}
}

View File

@@ -8,7 +8,8 @@ public class InventoryItemConfiguration : IEntityTypeConfiguration<InventoryItem
{
public void Configure(EntityTypeBuilder<InventoryItem> entity)
{
entity.HasIndex(e => e.AssetId);
// A Steam asset id identifies one physical copy; never store it twice.
entity.HasIndex(e => e.AssetId).IsUnique();
entity.HasOne(e => e.User)
.WithMany(u => u.InventoryItems)

View File

@@ -31,6 +31,14 @@ public class ListingConfiguration : IEntityTypeConfiguration<Listing>
.HasForeignKey(e => e.SkinId)
.OnDelete(DeleteBehavior.SetNull);
// Wear band the sweep targeted (set directly from the sweep unit, not
// best-effort). Set null on delete so a condition row can change without
// blocking its listings — matching the cs.money/skin.land tables.
entity.HasOne(e => e.Condition)
.WithMany()
.HasForeignKey(e => e.ConditionId)
.OnDelete(DeleteBehavior.SetNull);
// Listings roll up to the physical item they represent.
entity.HasOne(e => e.SkinInstance)
.WithMany(i => i.Listings)

View File

@@ -8,12 +8,11 @@ public class SkinConditionConfiguration : IEntityTypeConfiguration<SkinCondition
{
public void Configure(EntityTypeBuilder<SkinCondition> entity)
{
entity.Property(e => e.MinFloat).HasColumnType("numeric(10,9)");
entity.Property(e => e.MaxFloat).HasColumnType("numeric(10,9)");
entity.Property(e => e.FloatMin).HasColumnType("numeric(10,9)");
entity.Property(e => e.FloatMax).HasColumnType("numeric(10,9)");
// The catalogue sweep orders bands by this (never-swept first, then stalest),
// so index it like the equivalent column on skins.
entity.HasIndex(e => e.ListingsSweptAt);
// Per-site "last swept" checkpoints live in skin_condition_sweeps (one row per
// site); see SkinConditionSweepConfiguration for the indexes that order them.
entity.HasOne(e => e.Skin)
.WithMany(s => s.Conditions)

View File

@@ -0,0 +1,24 @@
using BlueLaminate.EFCore.Entities;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
namespace BlueLaminate.EFCore.Configurations;
public class SkinConditionSweepConfiguration : IEntityTypeConfiguration<SkinConditionSweep>
{
public void Configure(EntityTypeBuilder<SkinConditionSweep> entity)
{
// One checkpoint per band per site: the natural key, and what the upsert
// ("stamp") in SweepCheckpoints relies on.
entity.HasIndex(e => new { e.SkinConditionId, e.Source }).IsUnique();
// Each site's sweep orders its bands never-swept-first then stalest; index the
// ordering it scans (filter by source, sort by swept_at).
entity.HasIndex(e => new { e.Source, e.SweptAt });
entity.HasOne(e => e.SkinCondition)
.WithMany(c => c.Sweeps)
.HasForeignKey(e => e.SkinConditionId)
.OnDelete(DeleteBehavior.Cascade);
}
}

View File

@@ -29,9 +29,8 @@ public class SkinConfiguration : IEntityTypeConfiguration<Skin>
.IsUnique()
.HasFilter("def_index IS NOT NULL AND paint_index IS NOT NULL");
// The catalogue sweep orders skins by when they were last swept (nulls
// first) to resume across capped runs; index that ordering.
entity.HasIndex(e => e.ListingsSweptAt);
// Per-site "last swept" checkpoints live in skin_sweeps (one row per site);
// see SkinSweepConfiguration for the indexes that order them.
entity.HasOne(e => e.Weapon)
.WithMany(w => w.Skins)

View File

@@ -0,0 +1,39 @@
using BlueLaminate.EFCore.Entities;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
namespace BlueLaminate.EFCore.Configurations;
public class SkinLandListingConfiguration : IEntityTypeConfiguration<SkinLandListing>
{
public void Configure(EntityTypeBuilder<SkinLandListing> entity)
{
// skin.land's offer id is the natural key; ingest upserts against it and must
// never create duplicates.
entity.HasIndex(e => e.ListingId).IsUnique();
entity.Property(e => e.Price).HasPrecision(18, 2);
// Full precision (matches SkinInstance/cs.money) even though skin.land offers
// aren't fingerprinted — keep the float lossless for later analysis.
entity.Property(e => e.FloatValue).HasColumnType("numeric(20,18)");
// Enum as text so the DB is self-describing (matches the project's leaning).
entity.Property(e => e.Status).HasConversion<string>();
// Targeted scrape: results are filtered/sorted by skin+wear and by activity.
entity.HasIndex(e => new { e.SkinId, e.ConditionId });
entity.HasIndex(e => e.Status);
// Each job targets a known skin, so this link is required (Restrict: a skin with
// live listings shouldn't be deleted out from under them).
entity.HasOne(e => e.Skin)
.WithMany()
.HasForeignKey(e => e.SkinId)
.OnDelete(DeleteBehavior.Restrict);
entity.HasOne(e => e.Condition)
.WithMany()
.HasForeignKey(e => e.ConditionId)
.OnDelete(DeleteBehavior.SetNull);
}
}

View File

@@ -0,0 +1,22 @@
using BlueLaminate.EFCore.Entities;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
namespace BlueLaminate.EFCore.Configurations;
public class SkinSweepConfiguration : IEntityTypeConfiguration<SkinSweep>
{
public void Configure(EntityTypeBuilder<SkinSweep> entity)
{
// One checkpoint per skin per site: the natural key the upsert relies on.
entity.HasIndex(e => new { e.SkinId, e.Source }).IsUnique();
// Mirror SkinConditionSweep: index the (source, swept_at) ordering each sweep scans.
entity.HasIndex(e => new { e.Source, e.SweptAt });
entity.HasOne(e => e.Skin)
.WithMany(s => s.Sweeps)
.HasForeignKey(e => e.SkinId)
.OnDelete(DeleteBehavior.Cascade);
}
}

View File

@@ -8,6 +8,10 @@ public class TradeConfiguration : IEntityTypeConfiguration<Trade>
{
public void Configure(EntityTypeBuilder<Trade> entity)
{
// Steam's trade id is the natural key for an observed trade. Nullable (some
// trades are reconstructed without one); Postgres keeps multiple NULLs distinct.
entity.HasIndex(e => e.SteamTradeId).IsUnique();
entity.HasOne(e => e.FromUser)
.WithMany(u => u.TradesSent)
.HasForeignKey(e => e.FromUserId)

View File

@@ -23,6 +23,8 @@ public class SkinTrackerDbContext : DbContext
public DbSet<Collection> Collections => Set<Collection>();
public DbSet<Skin> Skins => Set<Skin>();
public DbSet<SkinCondition> SkinConditions => Set<SkinCondition>();
public DbSet<SkinSweep> SkinSweeps => Set<SkinSweep>();
public DbSet<SkinConditionSweep> SkinConditionSweeps => Set<SkinConditionSweep>();
public DbSet<SteamUser> SteamUsers => Set<SteamUser>();
public DbSet<SkinInstance> SkinInstances => Set<SkinInstance>();
public DbSet<InventoryItem> InventoryItems => Set<InventoryItem>();
@@ -31,6 +33,7 @@ public class SkinTrackerDbContext : DbContext
public DbSet<PriceHistory> PriceHistories => Set<PriceHistory>();
public DbSet<Listing> Listings => Set<Listing>();
public DbSet<CsMoneyListing> CsMoneyListings => Set<CsMoneyListing>();
public DbSet<SkinLandListing> SkinLandListings => Set<SkinLandListing>();
/// <summary>Read-only cross-market view UNIONing the per-market listing tables.</summary>
public DbSet<MarketListing> MarketListings => Set<MarketListing>();
@@ -47,6 +50,8 @@ public class SkinTrackerDbContext : DbContext
modelBuilder.ApplyConfiguration(new CollectionConfiguration());
modelBuilder.ApplyConfiguration(new SkinConfiguration());
modelBuilder.ApplyConfiguration(new SkinConditionConfiguration());
modelBuilder.ApplyConfiguration(new SkinSweepConfiguration());
modelBuilder.ApplyConfiguration(new SkinConditionSweepConfiguration());
modelBuilder.ApplyConfiguration(new SteamUserConfiguration());
modelBuilder.ApplyConfiguration(new SkinInstanceConfiguration());
modelBuilder.ApplyConfiguration(new InventoryItemConfiguration());
@@ -55,6 +60,7 @@ public class SkinTrackerDbContext : DbContext
modelBuilder.ApplyConfiguration(new PriceHistoryConfiguration());
modelBuilder.ApplyConfiguration(new ListingConfiguration());
modelBuilder.ApplyConfiguration(new CsMoneyListingConfiguration());
modelBuilder.ApplyConfiguration(new SkinLandListingConfiguration());
modelBuilder.ApplyConfiguration(new MarketListingConfiguration());
}
}

View File

@@ -0,0 +1,64 @@
using BlueLaminate.EFCore.Entities;
using Microsoft.EntityFrameworkCore;
namespace BlueLaminate.EFCore.Data;
/// <summary>
/// Write helpers for the per-site sweep checkpoints (<see cref="SkinSweep"/> /
/// <see cref="SkinConditionSweep"/>). Each marketplace sweeper stamps its own row
/// keyed by <c>(entity, source)</c>, so a band swept on one site is still "never
/// swept" on another. Adding a new site means a new <see cref="SweepSource"/>
/// constant — no schema changes.
/// <para>
/// Reads stay inline in the sweep queries (a correlated subquery over the navigation
/// for the relevant <c>Source</c>) so EF can translate and order by them server-side.
/// </para>
/// </summary>
public static class SweepCheckpoints
{
/// <summary>
/// Record that <paramref name="source"/> just swept this wear band. Upserts the
/// single (condition, source) row via the change tracker; the caller persists with
/// <see cref="DbContext.SaveChangesAsync"/>.
/// </summary>
public static async Task StampConditionAsync(
SkinTrackerDbContext db, int conditionId, string source, DateTimeOffset sweptAt, CancellationToken ct)
{
var existing = await db.SkinConditionSweeps
.FirstOrDefaultAsync(s => s.SkinConditionId == conditionId && s.Source == source, ct);
if (existing is null)
{
db.SkinConditionSweeps.Add(new SkinConditionSweep
{
SkinConditionId = conditionId,
Source = source,
SweptAt = sweptAt,
});
}
else
{
existing.SweptAt = sweptAt;
}
}
/// <summary>As <see cref="StampConditionAsync"/>, for a whole-skin unit (no wear bands).</summary>
public static async Task StampSkinAsync(
SkinTrackerDbContext db, int skinId, string source, DateTimeOffset sweptAt, CancellationToken ct)
{
var existing = await db.SkinSweeps
.FirstOrDefaultAsync(s => s.SkinId == skinId && s.Source == source, ct);
if (existing is null)
{
db.SkinSweeps.Add(new SkinSweep
{
SkinId = skinId,
Source = source,
SweptAt = sweptAt,
});
}
else
{
existing.SweptAt = sweptAt;
}
}
}

View File

@@ -36,9 +36,12 @@ public class Listing
/// <summary>"buy_now" or "auction".</summary>
public string Type { get; set; } = null!;
/// <summary>Asking price in USD.</summary>
/// <summary>Asking price.</summary>
public decimal Price { get; set; }
/// <summary>Currency of <see cref="Price"/>. CSFloat lists in USD.</summary>
public string Currency { get; set; } = "USD";
/// <summary>When CSFloat says the listing was created.</summary>
public DateTimeOffset ListedAt { get; set; }
@@ -48,7 +51,13 @@ public class Listing
public int PaintIndex { get; set; }
public string MarketHashName { get; set; } = null!;
public string? WearName { get; set; }
public decimal FloatValue { get; set; }
/// <summary>
/// Exact float, or null for items with no float at all (e.g. Vanilla knives).
/// Null is deliberately distinct from a genuine 0.0 float; a floatless item
/// also can't be fingerprinted, so its <see cref="SkinInstanceId"/> stays null.
/// </summary>
public decimal? FloatValue { get; set; }
public int PaintSeed { get; set; }
public bool IsStatTrak { get; set; }
public bool IsSouvenir { get; set; }
@@ -68,6 +77,15 @@ public class Listing
public int? SkinId { get; set; }
public Skin? Skin { get; set; }
/// <summary>
/// The wear band this listing belongs to. Unlike <see cref="SkinId"/> this is NOT
/// best-effort: the catalogue sweep pages one skin+wear band at a time, so the band
/// is set directly from the sweep unit. Null for whole-skin sweeps (e.g. vanilla
/// knives with no wear bands).
/// </summary>
public int? ConditionId { get; set; }
public SkinCondition? Condition { get; set; }
/// <summary>
/// The physical item (by fingerprint) this listing is for. Many listings over
/// time roll up to one instance, forming its market-movement history. Nullable

View File

@@ -16,12 +16,6 @@ public class Skin
public int? DefIndex { get; set; }
public int? PaintIndex { get; set; }
// When the catalogue-driven listing sweep last fully covered this skin. The
// sweep processes least-recently-swept skins first (nulls = never swept), so
// capped runs chain across the whole catalogue and the stalest data refreshes
// first. Null until the first sweep reaches this skin.
public DateTimeOffset? ListingsSweptAt { get; set; }
public string Name { get; set; } = null!;
public string Rarity { get; set; } = null!;
public string? Description { get; set; }
@@ -44,6 +38,12 @@ public class Skin
public bool? TrueFloat { get; private set; }
public ICollection<SkinCondition> Conditions { get; set; } = new List<SkinCondition>();
// Per-site "last swept" checkpoints for the whole-skin sweep unit — only used for
// skins with no wear bands (the per-band checkpoint lives on SkinCondition.Sweeps).
// The sweep processes never-swept (no row) / stalest skins first. See SkinSweep.
public ICollection<SkinSweep> Sweeps { get; set; } = new List<SkinSweep>();
public ICollection<SkinInstance> Instances { get; set; } = new List<SkinInstance>();
public ICollection<PriceHistory> PriceHistories { get; set; } = new List<PriceHistory>();
}

View File

@@ -7,14 +7,15 @@ public class SkinCondition
public Skin Skin { get; set; } = null!;
public string Condition { get; set; } = null!;
public decimal MinFloat { get; set; }
public decimal MaxFloat { get; set; }
public decimal FloatMin { get; set; }
public decimal FloatMax { get; set; }
// When the catalogue-driven listing sweep last fully covered this skin's wear
// band. The sweep splits each skin by wear and pages one band at a time, so this
// is the per-band checkpoint: an interrupted run resumes from never-swept/stalest
// bands rather than redoing a whole skin. Null until the first sweep reaches it.
public DateTimeOffset? ListingsSweptAt { get; set; }
// Per-site "last swept" checkpoints for this wear band — one row per marketplace
// (Source). The sweep splits each skin by wear and pages one band at a time, so
// this is the per-band checkpoint: an interrupted run resumes from never-swept
// (no row) / stalest bands rather than redoing a whole skin. Tracked per site so a
// band swept on CSFloat is still never-swept on cs.money. See SkinConditionSweep.
public ICollection<SkinConditionSweep> Sweeps { get; set; } = new List<SkinConditionSweep>();
public ICollection<SkinInstance> Instances { get; set; } = new List<SkinInstance>();
public ICollection<PriceHistory> PriceHistories { get; set; } = new List<PriceHistory>();

View File

@@ -0,0 +1,21 @@
namespace BlueLaminate.EFCore.Entities;
/// <summary>
/// One site's "last swept" checkpoint for a single wear band. The catalogue sweep
/// processes least-recently-swept bands first (no row = never swept), so capped/looping
/// runs chain across the catalogue and refresh the stalest data first. Keyed by
/// <c>(SkinConditionId, Source)</c> so each marketplace tracks its own progress
/// independently — a band swept on one site stays never-swept on another.
/// </summary>
public class SkinConditionSweep
{
public int Id { get; set; }
public int SkinConditionId { get; set; }
public SkinCondition SkinCondition { get; set; } = null!;
/// <summary>Which site swept it — a <see cref="SweepSource"/> value.</summary>
public string Source { get; set; } = null!;
public DateTimeOffset SweptAt { get; set; }
}

View File

@@ -26,9 +26,11 @@ public class SkinInstance
public SkinCondition? Condition { get; set; }
// The fingerprint. FloatValue is stored at full precision (see config) so
// that exact-match dupe detection isn't fooled by rounding.
// that exact-match dupe detection isn't fooled by rounding. An instance is
// only created for items that have a float + paint seed (skins), so both are
// non-null here even though some listings (e.g. vanilla knives) lack them.
public decimal FloatValue { get; set; }
public string PaintSeed { get; set; } = null!;
public int PaintSeed { get; set; }
public bool StatTrak { get; set; }
public bool Souvenir { get; set; }
public DateTimeOffset FirstSeenAt { get; set; }

View File

@@ -0,0 +1,54 @@
namespace BlueLaminate.EFCore.Entities;
/// <summary>
/// One offer observed on skin.land via its internal
/// <c>GET /api/v2/obtained-skins?skin_id={id}&amp;page={n}</c> endpoint (scraped through
/// the Python worker, since skin.land has no public API and sits behind Cloudflare).
/// <para>
/// Kept in its own table like <see cref="CsMoneyListing"/>, but deliberately thinner:
/// skin.land exposes a full-precision float and price but <b>no paint seed / def index</b>,
/// so an offer can't be fingerprinted to a market-agnostic <see cref="SkinInstance"/> and
/// there is no cross-market roll-up or dupe detection here (revisit if pattern is ever
/// exposed). StatTrak and Souvenir live on <em>separate</em> skin.land pages (their own
/// <c>stattrak-</c>/<c>souvenir-</c> slugs); v1 sweeps the base page per skin+wear, so
/// <see cref="IsStatTrak"/>/<see cref="IsSouvenir"/> are normally false.
/// </para>
/// Soft-tracked across sweeps exactly like <see cref="CsMoneyListing"/>:
/// <see cref="FirstSeenAt"/>/<see cref="LastSeenAt"/> bound the observation window and
/// <see cref="Status"/> flips to <see cref="ListingStatus.Removed"/> when a once-seen
/// offer stops appearing (sold/delisted).
/// </summary>
public class SkinLandListing
{
public int Id { get; set; }
/// <summary>skin.land's offer id (obtained-skin <c>id</c>). Natural key for dedup.</summary>
public long ListingId { get; set; }
// Catalogue links. Like cs.money (and unlike the CSFloat global sweep) these are NOT
// best-effort: each scrape job targets one skin+wear, so we set them directly.
public int SkinId { get; set; }
public Skin Skin { get; set; } = null!;
public int? ConditionId { get; set; }
public SkinCondition? Condition { get; set; }
// Item identity, from the offer's skin block.
public string MarketHashName { get; set; } = null!;
public decimal? FloatValue { get; set; } // item_float (string, full precision)
public bool IsStatTrak { get; set; }
public bool IsSouvenir { get; set; }
public string? NameTag { get; set; } // offer.name_tag (rare; affects value)
public int StickerCount { get; set; }
// Pricing. skin.land returns a single price (the amount to buy/withdraw the item).
public decimal Price { get; set; } // final_withdrawal_price
public string Currency { get; set; } = "USD"; // prices are read in USD
public string? InspectLink { get; set; } // item_link (steam:// inspect)
// Soft-tracking across sweeps.
public DateTimeOffset FirstSeenAt { get; set; }
public DateTimeOffset LastSeenAt { get; set; }
public ListingStatus Status { get; set; }
public DateTimeOffset? RemovedAt { get; set; }
}

View File

@@ -0,0 +1,20 @@
namespace BlueLaminate.EFCore.Entities;
/// <summary>
/// One site's "last swept" checkpoint for a whole skin — used only for skins with no
/// wear bands (e.g. vanilla knives), which are swept as a single unit. The per-band
/// equivalent is <see cref="SkinConditionSweep"/>. Keyed by <c>(SkinId, Source)</c> so
/// each marketplace tracks its own progress independently.
/// </summary>
public class SkinSweep
{
public int Id { get; set; }
public int SkinId { get; set; }
public Skin Skin { get; set; } = null!;
/// <summary>Which site swept it — a <see cref="SweepSource"/> value.</summary>
public string Source { get; set; } = null!;
public DateTimeOffset SweptAt { get; set; }
}

View File

@@ -0,0 +1,23 @@
namespace BlueLaminate.EFCore.Entities;
/// <summary>
/// Canonical site identifiers for per-site sweep checkpoints — the <c>Source</c>
/// discriminator on <see cref="SkinSweep"/> and <see cref="SkinConditionSweep"/>.
/// Each marketplace sweeper stamps its own checkpoint under one of these, so a band
/// swept on one site is still "never swept" on another.
/// <para>
/// To add sweeping for a new marketplace, add one constant here and have that
/// sweeper read/stamp checkpoints with it — no schema or query changes needed.
/// </para>
/// </summary>
public static class SweepSource
{
/// <summary>CSFloat catalogue-driven sweep (<c>ListingSweepService.SweepCatalogAsync</c>).</summary>
public const string CsFloatCatalog = "listings-catalog";
/// <summary>cs.money worker sweep (<c>CsMoneyIngestService</c>).</summary>
public const string CsMoney = "csmoney";
/// <summary>skin.land worker sweep (<c>SkinLandIngestService</c>).</summary>
public const string SkinLand = "skinland";
}

View File

@@ -0,0 +1,146 @@
using System;
using Microsoft.EntityFrameworkCore.Migrations;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace BlueLaminate.EFCore.Migrations
{
/// <inheritdoc />
public partial class AddPerSiteSweepCheckpoints : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropIndex(
name: "ix_skins_listings_swept_at",
schema: "skintracker",
table: "skins");
migrationBuilder.DropIndex(
name: "ix_skin_conditions_listings_swept_at",
schema: "skintracker",
table: "skin_conditions");
migrationBuilder.DropColumn(
name: "listings_swept_at",
schema: "skintracker",
table: "skins");
migrationBuilder.DropColumn(
name: "listings_swept_at",
schema: "skintracker",
table: "skin_conditions");
migrationBuilder.CreateTable(
name: "skin_condition_sweeps",
schema: "skintracker",
columns: table => new
{
id = table.Column<int>(type: "integer", nullable: false)
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
skin_condition_id = table.Column<int>(type: "integer", nullable: false),
source = table.Column<string>(type: "text", nullable: false),
swept_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false)
},
constraints: table =>
{
table.PrimaryKey("pk_skin_condition_sweeps", x => x.id);
table.ForeignKey(
name: "fk_skin_condition_sweeps_skin_conditions_skin_condition_id",
column: x => x.skin_condition_id,
principalSchema: "skintracker",
principalTable: "skin_conditions",
principalColumn: "id",
onDelete: ReferentialAction.Cascade);
});
migrationBuilder.CreateTable(
name: "skin_sweeps",
schema: "skintracker",
columns: table => new
{
id = table.Column<int>(type: "integer", nullable: false)
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
skin_id = table.Column<int>(type: "integer", nullable: false),
source = table.Column<string>(type: "text", nullable: false),
swept_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false)
},
constraints: table =>
{
table.PrimaryKey("pk_skin_sweeps", x => x.id);
table.ForeignKey(
name: "fk_skin_sweeps_skins_skin_id",
column: x => x.skin_id,
principalSchema: "skintracker",
principalTable: "skins",
principalColumn: "id",
onDelete: ReferentialAction.Cascade);
});
migrationBuilder.CreateIndex(
name: "ix_skin_condition_sweeps_skin_condition_id_source",
schema: "skintracker",
table: "skin_condition_sweeps",
columns: new[] { "skin_condition_id", "source" },
unique: true);
migrationBuilder.CreateIndex(
name: "ix_skin_condition_sweeps_source_swept_at",
schema: "skintracker",
table: "skin_condition_sweeps",
columns: new[] { "source", "swept_at" });
migrationBuilder.CreateIndex(
name: "ix_skin_sweeps_skin_id_source",
schema: "skintracker",
table: "skin_sweeps",
columns: new[] { "skin_id", "source" },
unique: true);
migrationBuilder.CreateIndex(
name: "ix_skin_sweeps_source_swept_at",
schema: "skintracker",
table: "skin_sweeps",
columns: new[] { "source", "swept_at" });
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(
name: "skin_condition_sweeps",
schema: "skintracker");
migrationBuilder.DropTable(
name: "skin_sweeps",
schema: "skintracker");
migrationBuilder.AddColumn<DateTimeOffset>(
name: "listings_swept_at",
schema: "skintracker",
table: "skins",
type: "timestamp with time zone",
nullable: true);
migrationBuilder.AddColumn<DateTimeOffset>(
name: "listings_swept_at",
schema: "skintracker",
table: "skin_conditions",
type: "timestamp with time zone",
nullable: true);
migrationBuilder.CreateIndex(
name: "ix_skins_listings_swept_at",
schema: "skintracker",
table: "skins",
column: "listings_swept_at");
migrationBuilder.CreateIndex(
name: "ix_skin_conditions_listings_swept_at",
schema: "skintracker",
table: "skin_conditions",
column: "listings_swept_at");
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,239 @@
using System;
using Microsoft.EntityFrameworkCore.Migrations;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace BlueLaminate.EFCore.Migrations
{
/// <inheritdoc />
public partial class AddSkinLandListings : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.CreateTable(
name: "skin_land_listings",
schema: "skintracker",
columns: table => new
{
id = table.Column<int>(type: "integer", nullable: false)
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
listing_id = table.Column<long>(type: "bigint", nullable: false),
skin_id = table.Column<int>(type: "integer", nullable: false),
condition_id = table.Column<int>(type: "integer", nullable: true),
market_hash_name = table.Column<string>(type: "text", nullable: false),
float_value = table.Column<decimal>(type: "numeric(20,18)", nullable: true),
is_stat_trak = table.Column<bool>(type: "boolean", nullable: false),
is_souvenir = table.Column<bool>(type: "boolean", nullable: false),
name_tag = table.Column<string>(type: "text", nullable: true),
sticker_count = table.Column<int>(type: "integer", nullable: false),
price = table.Column<decimal>(type: "numeric(18,2)", precision: 18, scale: 2, nullable: false),
currency = table.Column<string>(type: "text", nullable: false),
inspect_link = table.Column<string>(type: "text", nullable: true),
first_seen_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false),
last_seen_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false),
status = table.Column<string>(type: "text", nullable: false),
removed_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: true)
},
constraints: table =>
{
table.PrimaryKey("pk_skin_land_listings", x => x.id);
table.ForeignKey(
name: "fk_skin_land_listings_skin_conditions_condition_id",
column: x => x.condition_id,
principalSchema: "skintracker",
principalTable: "skin_conditions",
principalColumn: "id",
onDelete: ReferentialAction.SetNull);
table.ForeignKey(
name: "fk_skin_land_listings_skins_skin_id",
column: x => x.skin_id,
principalSchema: "skintracker",
principalTable: "skins",
principalColumn: "id",
onDelete: ReferentialAction.Restrict);
});
migrationBuilder.CreateIndex(
name: "ix_skin_land_listings_condition_id",
schema: "skintracker",
table: "skin_land_listings",
column: "condition_id");
migrationBuilder.CreateIndex(
name: "ix_skin_land_listings_listing_id",
schema: "skintracker",
table: "skin_land_listings",
column: "listing_id",
unique: true);
migrationBuilder.CreateIndex(
name: "ix_skin_land_listings_skin_id_condition_id",
schema: "skintracker",
table: "skin_land_listings",
columns: new[] { "skin_id", "condition_id" });
migrationBuilder.CreateIndex(
name: "ix_skin_land_listings_status",
schema: "skintracker",
table: "skin_land_listings",
column: "status");
// Extend the cross-market read view with a skin.land arm. skin.land exposes no
// paint seed / asset id / instance fingerprint, so those columns are NULL; the
// wear comes from the joined condition row (the offer table doesn't store it).
migrationBuilder.Sql("""
CREATE OR REPLACE VIEW skintracker.market_listings AS
SELECT
'csfloat'::text AS marketplace,
l.cs_float_listing_id AS external_id,
l.skin_id AS skin_id,
NULL::integer AS condition_id,
l.skin_instance_id AS skin_instance_id,
l.market_hash_name AS market_hash_name,
l.wear_name AS wear,
l.float_value AS float_value,
l.paint_seed AS paint_seed,
l.is_stat_trak AS is_stat_trak,
l.is_souvenir AS is_souvenir,
l.sticker_count AS sticker_count,
l.price AS price,
'USD'::text AS currency,
l.inspect_link AS inspect_link,
l.asset_id AS asset_id,
l.status AS status,
l.first_seen_at AS first_seen_at,
l.last_seen_at AS last_seen_at,
l.removed_at AS removed_at
FROM skintracker.listings l
UNION ALL
SELECT
'csmoney'::text,
c.sell_order_id::text,
c.skin_id,
c.condition_id,
c.skin_instance_id,
c.market_hash_name,
-- Normalise cs.money's wear short code to the full wear name the
-- other arms emit (csfloat wear_name / skinland condition), so the
-- view's `wear` column is consistent across marketplaces.
CASE lower(c.quality)
WHEN 'fn' THEN 'Factory New'
WHEN 'mw' THEN 'Minimal Wear'
WHEN 'ft' THEN 'Field-Tested'
WHEN 'ww' THEN 'Well-Worn'
WHEN 'bs' THEN 'Battle-Scarred'
ELSE c.quality
END,
c.float_value,
c.paint_seed,
c.is_stat_trak,
c.is_souvenir,
c.sticker_count,
c.price,
c.currency,
c.inspect_link,
c.asset_id,
c.status,
c.first_seen_at,
c.last_seen_at,
c.removed_at
FROM skintracker.cs_money_listings c
UNION ALL
SELECT
'skinland'::text,
s.listing_id::text,
s.skin_id,
s.condition_id,
NULL::integer,
s.market_hash_name,
sc.condition,
s.float_value,
NULL::integer,
s.is_stat_trak,
s.is_souvenir,
s.sticker_count,
s.price,
s.currency,
s.inspect_link,
NULL::text,
s.status,
s.first_seen_at,
s.last_seen_at,
s.removed_at
FROM skintracker.skin_land_listings s
LEFT JOIN skintracker.skin_conditions sc ON sc.id = s.condition_id;
""");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
// Restore the pre-skin.land view (csfloat + csmoney) before dropping the table
// it references, so the view never points at a missing relation.
migrationBuilder.Sql("""
CREATE OR REPLACE VIEW skintracker.market_listings AS
SELECT
'csfloat'::text AS marketplace,
l.cs_float_listing_id AS external_id,
l.skin_id AS skin_id,
NULL::integer AS condition_id,
l.skin_instance_id AS skin_instance_id,
l.market_hash_name AS market_hash_name,
l.wear_name AS wear,
l.float_value AS float_value,
l.paint_seed AS paint_seed,
l.is_stat_trak AS is_stat_trak,
l.is_souvenir AS is_souvenir,
l.sticker_count AS sticker_count,
l.price AS price,
'USD'::text AS currency,
l.inspect_link AS inspect_link,
l.asset_id AS asset_id,
l.status AS status,
l.first_seen_at AS first_seen_at,
l.last_seen_at AS last_seen_at,
l.removed_at AS removed_at
FROM skintracker.listings l
UNION ALL
SELECT
'csmoney'::text,
c.sell_order_id::text,
c.skin_id,
c.condition_id,
c.skin_instance_id,
c.market_hash_name,
-- Normalise cs.money's wear short code to the full wear name the
-- other arms emit (csfloat wear_name / skinland condition), so the
-- view's `wear` column is consistent across marketplaces.
CASE lower(c.quality)
WHEN 'fn' THEN 'Factory New'
WHEN 'mw' THEN 'Minimal Wear'
WHEN 'ft' THEN 'Field-Tested'
WHEN 'ww' THEN 'Well-Worn'
WHEN 'bs' THEN 'Battle-Scarred'
ELSE c.quality
END,
c.float_value,
c.paint_seed,
c.is_stat_trak,
c.is_souvenir,
c.sticker_count,
c.price,
c.currency,
c.inspect_link,
c.asset_id,
c.status,
c.first_seen_at,
c.last_seen_at,
c.removed_at
FROM skintracker.cs_money_listings c;
""");
migrationBuilder.DropTable(
name: "skin_land_listings",
schema: "skintracker");
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace BlueLaminate.EFCore.Migrations
{
/// <inheritdoc />
public partial class MakeListingFloatNullable : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AlterColumn<decimal>(
name: "float_value",
schema: "skintracker",
table: "listings",
type: "numeric(20,18)",
nullable: true,
oldClrType: typeof(decimal),
oldType: "numeric(20,18)");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.AlterColumn<decimal>(
name: "float_value",
schema: "skintracker",
table: "listings",
type: "numeric(20,18)",
nullable: false,
defaultValue: 0m,
oldClrType: typeof(decimal),
oldType: "numeric(20,18)",
oldNullable: true);
}
}
}

View File

@@ -0,0 +1,308 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace BlueLaminate.EFCore.Migrations
{
/// <inheritdoc />
public partial class ConsistencyPass_FloatBoundsCurrencyConditionPaintSeed : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropIndex(
name: "ix_inventory_items_asset_id",
schema: "skintracker",
table: "inventory_items");
migrationBuilder.RenameColumn(
name: "min_float",
schema: "skintracker",
table: "skin_conditions",
newName: "float_min");
migrationBuilder.RenameColumn(
name: "max_float",
schema: "skintracker",
table: "skin_conditions",
newName: "float_max");
// text -> integer needs an explicit USING cast; EF's AlterColumn omits it and
// Postgres won't cast automatically. Every stored seed is a stringified
// integer, so the cast is total.
migrationBuilder.Sql(
"ALTER TABLE skintracker.skin_instances " +
"ALTER COLUMN paint_seed TYPE integer USING paint_seed::integer;");
migrationBuilder.AddColumn<int>(
name: "condition_id",
schema: "skintracker",
table: "listings",
type: "integer",
nullable: true);
migrationBuilder.AddColumn<string>(
name: "currency",
schema: "skintracker",
table: "listings",
type: "text",
nullable: false,
defaultValue: "USD");
migrationBuilder.CreateIndex(
name: "ix_trades_steam_trade_id",
schema: "skintracker",
table: "trades",
column: "steam_trade_id",
unique: true);
migrationBuilder.CreateIndex(
name: "ix_listings_condition_id",
schema: "skintracker",
table: "listings",
column: "condition_id");
migrationBuilder.CreateIndex(
name: "ix_inventory_items_asset_id",
schema: "skintracker",
table: "inventory_items",
column: "asset_id",
unique: true);
migrationBuilder.AddForeignKey(
name: "fk_listings_skin_conditions_condition_id",
schema: "skintracker",
table: "listings",
column: "condition_id",
principalSchema: "skintracker",
principalTable: "skin_conditions",
principalColumn: "id",
onDelete: ReferentialAction.SetNull);
// Now that listings carries its own condition_id and currency, the csfloat
// arm of the cross-market view uses them instead of NULL / a hardcoded 'USD'.
migrationBuilder.Sql("""
CREATE OR REPLACE VIEW skintracker.market_listings AS
SELECT
'csfloat'::text AS marketplace,
l.cs_float_listing_id AS external_id,
l.skin_id AS skin_id,
l.condition_id AS condition_id,
l.skin_instance_id AS skin_instance_id,
l.market_hash_name AS market_hash_name,
l.wear_name AS wear,
l.float_value AS float_value,
l.paint_seed AS paint_seed,
l.is_stat_trak AS is_stat_trak,
l.is_souvenir AS is_souvenir,
l.sticker_count AS sticker_count,
l.price AS price,
l.currency AS currency,
l.inspect_link AS inspect_link,
l.asset_id AS asset_id,
l.status AS status,
l.first_seen_at AS first_seen_at,
l.last_seen_at AS last_seen_at,
l.removed_at AS removed_at
FROM skintracker.listings l
UNION ALL
SELECT
'csmoney'::text,
c.sell_order_id::text,
c.skin_id,
c.condition_id,
c.skin_instance_id,
c.market_hash_name,
CASE lower(c.quality)
WHEN 'fn' THEN 'Factory New'
WHEN 'mw' THEN 'Minimal Wear'
WHEN 'ft' THEN 'Field-Tested'
WHEN 'ww' THEN 'Well-Worn'
WHEN 'bs' THEN 'Battle-Scarred'
ELSE c.quality
END,
c.float_value,
c.paint_seed,
c.is_stat_trak,
c.is_souvenir,
c.sticker_count,
c.price,
c.currency,
c.inspect_link,
c.asset_id,
c.status,
c.first_seen_at,
c.last_seen_at,
c.removed_at
FROM skintracker.cs_money_listings c
UNION ALL
SELECT
'skinland'::text,
s.listing_id::text,
s.skin_id,
s.condition_id,
NULL::integer,
s.market_hash_name,
sc.condition,
s.float_value,
NULL::integer,
s.is_stat_trak,
s.is_souvenir,
s.sticker_count,
s.price,
s.currency,
s.inspect_link,
NULL::text,
s.status,
s.first_seen_at,
s.last_seen_at,
s.removed_at
FROM skintracker.skin_land_listings s
LEFT JOIN skintracker.skin_conditions sc ON sc.id = s.condition_id;
""");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
// Restore the view to its pre-migration form (csfloat condition_id/currency
// hardcoded) FIRST, so the listings columns it now references can be dropped.
migrationBuilder.Sql("""
CREATE OR REPLACE VIEW skintracker.market_listings AS
SELECT
'csfloat'::text AS marketplace,
l.cs_float_listing_id AS external_id,
l.skin_id AS skin_id,
NULL::integer AS condition_id,
l.skin_instance_id AS skin_instance_id,
l.market_hash_name AS market_hash_name,
l.wear_name AS wear,
l.float_value AS float_value,
l.paint_seed AS paint_seed,
l.is_stat_trak AS is_stat_trak,
l.is_souvenir AS is_souvenir,
l.sticker_count AS sticker_count,
l.price AS price,
'USD'::text AS currency,
l.inspect_link AS inspect_link,
l.asset_id AS asset_id,
l.status AS status,
l.first_seen_at AS first_seen_at,
l.last_seen_at AS last_seen_at,
l.removed_at AS removed_at
FROM skintracker.listings l
UNION ALL
SELECT
'csmoney'::text,
c.sell_order_id::text,
c.skin_id,
c.condition_id,
c.skin_instance_id,
c.market_hash_name,
CASE lower(c.quality)
WHEN 'fn' THEN 'Factory New'
WHEN 'mw' THEN 'Minimal Wear'
WHEN 'ft' THEN 'Field-Tested'
WHEN 'ww' THEN 'Well-Worn'
WHEN 'bs' THEN 'Battle-Scarred'
ELSE c.quality
END,
c.float_value,
c.paint_seed,
c.is_stat_trak,
c.is_souvenir,
c.sticker_count,
c.price,
c.currency,
c.inspect_link,
c.asset_id,
c.status,
c.first_seen_at,
c.last_seen_at,
c.removed_at
FROM skintracker.cs_money_listings c
UNION ALL
SELECT
'skinland'::text,
s.listing_id::text,
s.skin_id,
s.condition_id,
NULL::integer,
s.market_hash_name,
sc.condition,
s.float_value,
NULL::integer,
s.is_stat_trak,
s.is_souvenir,
s.sticker_count,
s.price,
s.currency,
s.inspect_link,
NULL::text,
s.status,
s.first_seen_at,
s.last_seen_at,
s.removed_at
FROM skintracker.skin_land_listings s
LEFT JOIN skintracker.skin_conditions sc ON sc.id = s.condition_id;
""");
migrationBuilder.DropForeignKey(
name: "fk_listings_skin_conditions_condition_id",
schema: "skintracker",
table: "listings");
migrationBuilder.DropIndex(
name: "ix_trades_steam_trade_id",
schema: "skintracker",
table: "trades");
migrationBuilder.DropIndex(
name: "ix_listings_condition_id",
schema: "skintracker",
table: "listings");
migrationBuilder.DropIndex(
name: "ix_inventory_items_asset_id",
schema: "skintracker",
table: "inventory_items");
migrationBuilder.DropColumn(
name: "condition_id",
schema: "skintracker",
table: "listings");
migrationBuilder.DropColumn(
name: "currency",
schema: "skintracker",
table: "listings");
migrationBuilder.RenameColumn(
name: "float_min",
schema: "skintracker",
table: "skin_conditions",
newName: "min_float");
migrationBuilder.RenameColumn(
name: "float_max",
schema: "skintracker",
table: "skin_conditions",
newName: "max_float");
migrationBuilder.AlterColumn<string>(
name: "paint_seed",
schema: "skintracker",
table: "skin_instances",
type: "text",
nullable: false,
oldClrType: typeof(int),
oldType: "integer");
migrationBuilder.CreateIndex(
name: "ix_inventory_items_asset_id",
schema: "skintracker",
table: "inventory_items",
column: "asset_id");
}
}
}

View File

@@ -215,6 +215,7 @@ namespace BlueLaminate.EFCore.Migrations
.HasName("pk_inventory_items");
b.HasIndex("AssetId")
.IsUnique()
.HasDatabaseName("ix_inventory_items_asset_id");
b.HasIndex("SkinInstanceId")
@@ -239,11 +240,20 @@ namespace BlueLaminate.EFCore.Migrations
.HasColumnType("text")
.HasColumnName("asset_id");
b.Property<int?>("ConditionId")
.HasColumnType("integer")
.HasColumnName("condition_id");
b.Property<string>("CsFloatListingId")
.IsRequired()
.HasColumnType("text")
.HasColumnName("cs_float_listing_id");
b.Property<string>("Currency")
.IsRequired()
.HasColumnType("text")
.HasColumnName("currency");
b.Property<int>("DefIndex")
.HasColumnType("integer")
.HasColumnName("def_index");
@@ -252,7 +262,7 @@ namespace BlueLaminate.EFCore.Migrations
.HasColumnType("timestamp with time zone")
.HasColumnName("first_seen_at");
b.Property<decimal>("FloatValue")
b.Property<decimal?>("FloatValue")
.HasColumnType("numeric(20,18)")
.HasColumnName("float_value");
@@ -334,6 +344,9 @@ namespace BlueLaminate.EFCore.Migrations
b.HasIndex("AssetId")
.HasDatabaseName("ix_listings_asset_id");
b.HasIndex("ConditionId")
.HasDatabaseName("ix_listings_condition_id");
b.HasIndex("CsFloatListingId")
.IsUnique()
.HasDatabaseName("ix_listings_cs_float_listing_id");
@@ -553,10 +566,6 @@ namespace BlueLaminate.EFCore.Migrations
.HasColumnType("text")
.HasColumnName("image_url");
b.Property<DateTimeOffset?>("ListingsSweptAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("listings_swept_at");
b.Property<string>("Name")
.IsRequired()
.HasColumnType("text")
@@ -597,9 +606,6 @@ namespace BlueLaminate.EFCore.Migrations
b.HasKey("Id")
.HasName("pk_skins");
b.HasIndex("ListingsSweptAt")
.HasDatabaseName("ix_skins_listings_swept_at");
b.HasIndex("Slug")
.IsUnique()
.HasDatabaseName("ix_skins_slug");
@@ -632,17 +638,13 @@ namespace BlueLaminate.EFCore.Migrations
.HasColumnType("text")
.HasColumnName("condition");
b.Property<DateTimeOffset?>("ListingsSweptAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("listings_swept_at");
b.Property<decimal>("MaxFloat")
b.Property<decimal>("FloatMax")
.HasColumnType("numeric(10,9)")
.HasColumnName("max_float");
.HasColumnName("float_max");
b.Property<decimal>("MinFloat")
b.Property<decimal>("FloatMin")
.HasColumnType("numeric(10,9)")
.HasColumnName("min_float");
.HasColumnName("float_min");
b.Property<int>("SkinId")
.HasColumnType("integer")
@@ -651,15 +653,47 @@ namespace BlueLaminate.EFCore.Migrations
b.HasKey("Id")
.HasName("pk_skin_conditions");
b.HasIndex("ListingsSweptAt")
.HasDatabaseName("ix_skin_conditions_listings_swept_at");
b.HasIndex("SkinId")
.HasDatabaseName("ix_skin_conditions_skin_id");
b.ToTable("skin_conditions", "skintracker");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinConditionSweep", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer")
.HasColumnName("id");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("SkinConditionId")
.HasColumnType("integer")
.HasColumnName("skin_condition_id");
b.Property<string>("Source")
.IsRequired()
.HasColumnType("text")
.HasColumnName("source");
b.Property<DateTimeOffset>("SweptAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("swept_at");
b.HasKey("Id")
.HasName("pk_skin_condition_sweeps");
b.HasIndex("SkinConditionId", "Source")
.IsUnique()
.HasDatabaseName("ix_skin_condition_sweeps_skin_condition_id_source");
b.HasIndex("Source", "SweptAt")
.HasDatabaseName("ix_skin_condition_sweeps_source_swept_at");
b.ToTable("skin_condition_sweeps", "skintracker");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
{
b.Property<int>("Id")
@@ -689,9 +723,8 @@ namespace BlueLaminate.EFCore.Migrations
.HasColumnType("timestamp with time zone")
.HasColumnName("last_seen_at");
b.Property<string>("PaintSeed")
.IsRequired()
.HasColumnType("text")
b.Property<int>("PaintSeed")
.HasColumnType("integer")
.HasColumnName("paint_seed");
b.Property<int>("SkinId")
@@ -725,6 +758,137 @@ namespace BlueLaminate.EFCore.Migrations
b.ToTable("skin_instances", "skintracker");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinLandListing", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer")
.HasColumnName("id");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int?>("ConditionId")
.HasColumnType("integer")
.HasColumnName("condition_id");
b.Property<string>("Currency")
.IsRequired()
.HasColumnType("text")
.HasColumnName("currency");
b.Property<DateTimeOffset>("FirstSeenAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("first_seen_at");
b.Property<decimal?>("FloatValue")
.HasColumnType("numeric(20,18)")
.HasColumnName("float_value");
b.Property<string>("InspectLink")
.HasColumnType("text")
.HasColumnName("inspect_link");
b.Property<bool>("IsSouvenir")
.HasColumnType("boolean")
.HasColumnName("is_souvenir");
b.Property<bool>("IsStatTrak")
.HasColumnType("boolean")
.HasColumnName("is_stat_trak");
b.Property<DateTimeOffset>("LastSeenAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("last_seen_at");
b.Property<long>("ListingId")
.HasColumnType("bigint")
.HasColumnName("listing_id");
b.Property<string>("MarketHashName")
.IsRequired()
.HasColumnType("text")
.HasColumnName("market_hash_name");
b.Property<string>("NameTag")
.HasColumnType("text")
.HasColumnName("name_tag");
b.Property<decimal>("Price")
.HasPrecision(18, 2)
.HasColumnType("numeric(18,2)")
.HasColumnName("price");
b.Property<DateTimeOffset?>("RemovedAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("removed_at");
b.Property<int>("SkinId")
.HasColumnType("integer")
.HasColumnName("skin_id");
b.Property<string>("Status")
.IsRequired()
.HasColumnType("text")
.HasColumnName("status");
b.Property<int>("StickerCount")
.HasColumnType("integer")
.HasColumnName("sticker_count");
b.HasKey("Id")
.HasName("pk_skin_land_listings");
b.HasIndex("ConditionId")
.HasDatabaseName("ix_skin_land_listings_condition_id");
b.HasIndex("ListingId")
.IsUnique()
.HasDatabaseName("ix_skin_land_listings_listing_id");
b.HasIndex("Status")
.HasDatabaseName("ix_skin_land_listings_status");
b.HasIndex("SkinId", "ConditionId")
.HasDatabaseName("ix_skin_land_listings_skin_id_condition_id");
b.ToTable("skin_land_listings", "skintracker");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinSweep", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer")
.HasColumnName("id");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("SkinId")
.HasColumnType("integer")
.HasColumnName("skin_id");
b.Property<string>("Source")
.IsRequired()
.HasColumnType("text")
.HasColumnName("source");
b.Property<DateTimeOffset>("SweptAt")
.HasColumnType("timestamp with time zone")
.HasColumnName("swept_at");
b.HasKey("Id")
.HasName("pk_skin_sweeps");
b.HasIndex("SkinId", "Source")
.IsUnique()
.HasDatabaseName("ix_skin_sweeps_skin_id_source");
b.HasIndex("Source", "SweptAt")
.HasDatabaseName("ix_skin_sweeps_source_swept_at");
b.ToTable("skin_sweeps", "skintracker");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SteamUser", b =>
{
b.Property<int>("Id")
@@ -788,6 +952,10 @@ namespace BlueLaminate.EFCore.Migrations
b.HasIndex("FromUserId")
.HasDatabaseName("ix_trades_from_user_id");
b.HasIndex("SteamTradeId")
.IsUnique()
.HasDatabaseName("ix_trades_steam_trade_id");
b.HasIndex("ToUserId")
.HasDatabaseName("ix_trades_to_user_id");
@@ -927,6 +1095,12 @@ namespace BlueLaminate.EFCore.Migrations
modelBuilder.Entity("BlueLaminate.EFCore.Entities.Listing", b =>
{
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
.WithMany()
.HasForeignKey("ConditionId")
.OnDelete(DeleteBehavior.SetNull)
.HasConstraintName("fk_listings_skin_conditions_condition_id");
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
.WithMany()
.HasForeignKey("SkinId")
@@ -939,6 +1113,8 @@ namespace BlueLaminate.EFCore.Migrations
.OnDelete(DeleteBehavior.SetNull)
.HasConstraintName("fk_listings_skin_instances_skin_instance_id");
b.Navigation("Condition");
b.Navigation("Skin");
b.Navigation("SkinInstance");
@@ -989,6 +1165,18 @@ namespace BlueLaminate.EFCore.Migrations
b.Navigation("Skin");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinConditionSweep", b =>
{
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "SkinCondition")
.WithMany("Sweeps")
.HasForeignKey("SkinConditionId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired()
.HasConstraintName("fk_skin_condition_sweeps_skin_conditions_skin_condition_id");
b.Navigation("SkinCondition");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
{
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
@@ -1009,6 +1197,38 @@ namespace BlueLaminate.EFCore.Migrations
b.Navigation("Skin");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinLandListing", b =>
{
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
.WithMany()
.HasForeignKey("ConditionId")
.OnDelete(DeleteBehavior.SetNull)
.HasConstraintName("fk_skin_land_listings_skin_conditions_condition_id");
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
.WithMany()
.HasForeignKey("SkinId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired()
.HasConstraintName("fk_skin_land_listings_skins_skin_id");
b.Navigation("Condition");
b.Navigation("Skin");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinSweep", b =>
{
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
.WithMany("Sweeps")
.HasForeignKey("SkinId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired()
.HasConstraintName("fk_skin_sweeps_skins_skin_id");
b.Navigation("Skin");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.Trade", b =>
{
b.HasOne("BlueLaminate.EFCore.Entities.SteamUser", "FromUser")
@@ -1080,6 +1300,8 @@ namespace BlueLaminate.EFCore.Migrations
b.Navigation("Instances");
b.Navigation("PriceHistories");
b.Navigation("Sweeps");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinCondition", b =>
@@ -1087,6 +1309,8 @@ namespace BlueLaminate.EFCore.Migrations
b.Navigation("Instances");
b.Navigation("PriceHistories");
b.Navigation("Sweeps");
});
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>

View File

@@ -8,7 +8,6 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Selenium.WebDriver" />
</ItemGroup>
</Project>

View File

@@ -1,79 +0,0 @@
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
using OpenQA.Selenium.Edge;
namespace BlueLaminate.Scraper.Browser;
/// <summary>
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
/// the local proxy already carries the upstream credentials, so there's no 407 to
/// answer in the browser. Combined with a warmed, persistent profile this is the
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
/// (which has no .NET equivalent).
/// <para>
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
/// whether pictures render, so this stays realistic.
/// </para>
/// </summary>
public sealed class BrowserDriverFactory
{
private readonly ILogger<BrowserDriverFactory> _logger;
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
{
_logger = logger;
}
/// <summary>
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
/// auth). When <paramref name="profileDir"/> is set the profile persists across
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
/// history carry over — a warmed profile looks far less like a fresh bot); when
/// null a throwaway profile is used.
/// </summary>
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
{
var options = new EdgeOptions();
// Route browser traffic through the local proxy via the launch argument
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
// driver download). No scheme = all protocols use the proxy. When null/empty
// the browser uses the machine's direct connection (diagnostic --no-proxy).
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
{
options.AddArgument($"--proxy-server={proxyEndpoint}");
}
// Reduce the most obvious automation tells; residential exit + a real
// (non-headless) browser + a warmed profile do the rest.
options.AddArgument("--disable-blink-features=AutomationControlled");
options.AddExcludedArgument("enable-automation");
options.AddAdditionalOption("useAutomationExtension", false);
options.AddArgument("--no-first-run");
options.AddArgument("--no-default-browser-check");
options.AddArgument("--start-maximized");
var persist = !string.IsNullOrWhiteSpace(profileDir);
var dir = persist
? profileDir!
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
Directory.CreateDirectory(dir);
options.AddArgument($"--user-data-dir={dir}");
if (blockImages)
{
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
}
_logger.LogInformation(
"Launching Edge via {Route} (profile: {Profile}).",
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
persist ? dir : "throwaway");
return new EdgeDriver(options);
}
}

View File

@@ -15,7 +15,10 @@ namespace BlueLaminate.Scraper.CsFloat;
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
/// <param name="PaintSeed">Pattern seed.</param>
/// <param name="FloatValue">Exact float/wear value.</param>
/// <param name="FloatValue">
/// Exact float/wear value, or null for items that have no float at all
/// (e.g. Vanilla knives). A null is distinct from a genuine 0.0 float.
/// </param>
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
/// <param name="IsStatTrak">StatTrak™ variant.</param>
/// <param name="IsSouvenir">Souvenir variant.</param>
@@ -37,7 +40,7 @@ public sealed record CsFloatListing(
int DefIndex,
int PaintIndex,
int PaintSeed,
decimal FloatValue,
decimal? FloatValue,
string? WearName,
bool IsStatTrak,
bool IsSouvenir,

View File

@@ -321,7 +321,7 @@ public sealed class CsFloatListingsClient
public int DefIndex { get; init; }
public int PaintIndex { get; init; }
public int PaintSeed { get; init; }
public decimal FloatValue { get; init; }
public decimal? FloatValue { get; init; }
public string? WearName { get; init; }
public bool IsStatTrak { get; init; }
public bool IsSouvenir { get; init; }

View File

@@ -1,211 +0,0 @@
using System.Text;
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.CsMoney;
/// <summary>Outcome of a stealth pagination run.</summary>
/// <param name="PagesSucceeded">How many offset pages returned listings JSON before stopping.</param>
/// <param name="ItemsTotal">Total listing items captured across those pages.</param>
/// <param name="StoppedReason">Why pagination stopped: "challenged", "empty", "completed", or "error".</param>
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
/// <summary>
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
/// the listings API with human-like pacing using in-page <c>fetch()</c> calls from
/// the cleared origin (so the cf_clearance cookie rides along). It records each
/// page's JSON and — crucially for the current phase — <b>measures how many pages
/// survive before Cloudflare re-challenges</b>, which tells us whether the
/// fingerprint reductions are enough for a real sweep.
/// </summary>
public sealed class CsMoneyCaptureService
{
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly CsMoneyOptions _options;
private readonly ILogger<CsMoneyCaptureService> _logger;
public CsMoneyCaptureService(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
CsMoneyOptions options,
ILogger<CsMoneyCaptureService> logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_options = options;
_logger = logger;
}
/// <summary>
/// Open the market, wait for <paramref name="browseUntilDone"/> (the operator
/// clears Cloudflare and presses Enter), then page the listings API up to
/// <paramref name="maxPages"/> times, stopping early on a re-challenge or an
/// empty page. Each page's body is written to <paramref name="outputDir"/>.
/// </summary>
public async Task<CsMoneyCaptureResult> RunAsync(
string outputDir,
ProxyRequest request,
bool loadImages,
bool useProxy,
int maxPages,
Func<Task> browseUntilDone,
CancellationToken ct = default)
{
Directory.CreateDirectory(outputDir);
// --no-proxy (useProxy=false) drives the automated browser on the machine's
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
// or the webdriver fingerprint itself.
LocalForwardingProxy? localProxy = null;
string? proxyEndpoint = null;
if (useProxy)
{
var lease = _provider.Acquire(request);
localProxy = _proxyFactory.Create(lease).Start();
proxyEndpoint = localProxy.Endpoint;
}
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
var pages = 0;
var items = 0;
var reason = "completed";
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
driver.Navigate().GoToUrl(_options.MarketUrl);
// Operator clears the Cloudflare challenge in the visible window, waits
// until the market grid is actually rendered, then presses Enter.
await browseUntilDone();
for (var offset = 0; pages < maxPages; offset += 60)
{
ct.ThrowIfCancellationRequested();
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
var (status, body) = DirectFetch(driver, apiUrl);
if (LooksLikeChallenge(status, body))
{
_logger.LogWarning(
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
offset, pages);
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
reason = "challenged";
break;
}
var count = TryCountItems(body);
if (count is 0)
{
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
reason = "empty";
break;
}
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
pages++;
items += count ?? 0;
_logger.LogInformation(
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
pages, offset, status, count, body.Length);
await DelayAsync(ct);
}
}
catch (OperationCanceledException)
{
reason = "cancelled";
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
reason = "error";
}
finally
{
driver.Quit();
if (localProxy is not null)
{
await localProxy.DisposeAsync();
}
}
return new CsMoneyCaptureResult(pages, items, reason);
}
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
// cs.money origin, so the cf_clearance cookie is sent automatically.
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
{
const string script = """
const url = arguments[0];
const done = arguments[arguments.length - 1];
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
""";
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
if (string.IsNullOrEmpty(raw))
{
return (-1, "");
}
using var doc = JsonDocument.Parse(raw);
var status = doc.RootElement.GetProperty("status").GetInt32();
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
return (status, body);
}
private static bool LooksLikeChallenge(int status, string body) =>
status is 403 or 503 or -1
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
// Count items[] without binding a full model (the typed model is Phase 2).
private static int? TryCountItems(string body)
{
try
{
using var doc = JsonDocument.Parse(body);
return doc.RootElement.TryGetProperty("items", out var items)
&& items.ValueKind == JsonValueKind.Array
? items.GetArrayLength()
: null;
}
catch (JsonException)
{
return null;
}
}
private async Task DelayAsync(CancellationToken ct)
{
var jitter = _options.PageJitterSeconds > 0
? Random.Shared.NextDouble() * _options.PageJitterSeconds
: 0;
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
if (seconds > 0)
{
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
}
}
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
}

View File

@@ -1,50 +0,0 @@
namespace BlueLaminate.Scraper.CsMoney;
/// <summary>
/// Configuration for the cs.money scraper, bound from the <c>CsMoney</c>
/// configuration section.
/// <para>
/// cs.money exposes no public API and sits behind Cloudflare bot protection, so we
/// drive a real, non-headless browser (Selenium/Edge) routed through an IPRoyal
/// residential proxy via a local forwarding hop (no CDP). The market endpoint
/// re-challenges aggressively during pagination, so these options also tune the
/// warmed profile and request pacing we use to survive longer.
/// </para>
/// </summary>
public sealed class CsMoneyOptions
{
public const string SectionName = "CsMoney";
/// <summary>Public market page the browser opens (and where the operator clears Cloudflare).</summary>
public string MarketUrl { get; set; } = "https://cs.money/market/buy/";
/// <summary>
/// Listings API template; <c>{0}</c> is the page offset (steps of 60). Fetched
/// in-page from the cleared market origin so the cf_clearance cookie is sent.
/// </summary>
public string ApiUrlTemplate { get; set; } =
"https://cs.money/2.0/market/sell-orders?limit=60&offset={0}";
/// <summary>
/// Persistent Chromium profile directory. Reusing one profile keeps the
/// cf_clearance cookie and history between runs — a warmed profile is far less
/// likely to be re-challenged than a fresh one. Empty = throwaway profile.
/// </summary>
public string ProfileDir { get; set; } =
Path.Combine(Path.GetTempPath(), "bluelaminate-csmoney-profile");
/// <summary>
/// Optional ISO country code(s) for the residential exit IP, e.g. "us". Null/empty
/// lets IPRoyal pick at random.
/// </summary>
public string? Country { get; set; }
/// <summary>Load images. Off by default to conserve the metered residential plan.</summary>
public bool LoadImages { get; set; }
/// <summary>Base delay between paginated API fetches, in seconds (human-like pacing).</summary>
public double PageDelaySeconds { get; set; } = 2.5;
/// <summary>Extra random jitter added to each delay, in seconds (0..value).</summary>
public double PageJitterSeconds { get; set; } = 2.0;
}

View File

@@ -1,21 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// Source of proxy endpoints. The whole point of this seam is that the rest of
/// the scraper depends only on this interface and <see cref="ProxyLease"/>, so a
/// different residential provider — or the future C2 that allocates IPs to
/// containers, or a composite "grab-bag" over several providers — drops in
/// without changing any browser or scraping code.
/// </summary>
public interface IProxyProvider
{
/// <summary>Identifier recorded on issued leases, e.g. "iproyal".</summary>
string Name { get; }
/// <summary>
/// Produce a usable endpoint for the given request. For gateway providers
/// this is pure string composition (no network call); the C2 implementation
/// can override that later with real allocation.
/// </summary>
ProxyLease Acquire(ProxyRequest request);
}

View File

@@ -1,77 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// <see cref="IProxyProvider"/> for IPRoyal's residential gateway. IPRoyal keeps
/// one fixed host/port (geo.iproyal.com:12321) and encodes everything else —
/// country, sticky-session id, session lifetime — as underscore-delimited
/// parameters appended to the account password. Example password:
/// "secret_country-us_session-ab12cd_lifetime-30m". The account username is sent
/// unchanged. Docs: https://docs.iproyal.com/proxies/residential/proxy
/// </summary>
public sealed class IpRoyalProxyProvider : IProxyProvider
{
public const string GatewayHost = "geo.iproyal.com";
public const int GatewayPort = 12321;
// IPRoyal caps sticky sessions; 30 minutes is a safe default that comfortably
// covers a single scrape pass without forcing an early IP rotation.
private static readonly TimeSpan DefaultLifetime = TimeSpan.FromMinutes(30);
private readonly string _username;
private readonly string _password;
public IpRoyalProxyProvider(string username, string password)
{
if (string.IsNullOrWhiteSpace(username))
{
throw new ArgumentException("IPRoyal username is required.", nameof(username));
}
if (string.IsNullOrWhiteSpace(password))
{
throw new ArgumentException("IPRoyal password is required.", nameof(password));
}
_username = username;
_password = password;
}
public string Name => "iproyal";
public ProxyLease Acquire(ProxyRequest request)
{
var password = _password;
string? sessionId = null;
DateTimeOffset? expiresAt = null;
// Country first; the router picks one at random when several are listed.
if (!string.IsNullOrWhiteSpace(request.Country))
{
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
}
if (request.Sticky)
{
sessionId = request.SessionId ?? NewSessionId();
var lifetime = request.Lifetime ?? DefaultLifetime;
// IPRoyal expresses lifetime as whole minutes (e.g. "_lifetime-30m").
var minutes = Math.Max(1, (int)Math.Round(lifetime.TotalMinutes));
password += $"_session-{sessionId}_lifetime-{minutes}m";
expiresAt = DateTimeOffset.UtcNow.AddMinutes(minutes);
}
return new ProxyLease(
Host: GatewayHost,
Port: GatewayPort,
Username: _username,
Password: password,
Provider: Name,
SessionId: sessionId,
ExpiresAt: expiresAt);
}
// Short, URL/param-safe token. IPRoyal treats the session value opaquely;
// it only needs to be stable for the duration of a sticky lease.
private static string NewSessionId() =>
Guid.NewGuid().ToString("N")[..10];
}

View File

@@ -1,232 +0,0 @@
using System.Net;
using System.Net.Sockets;
using System.Text;
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
/// gateway's <c>Proxy-Authorization</c> header itself.
/// <para>
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
/// CDP — far less detectable — while the upstream still carries the IPRoyal
/// username/password (and its baked-in country/session params).
/// </para>
/// <para>
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
/// the browser does TLS end-to-end with the real host — this proxy never sees
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
/// </para>
/// </summary>
public sealed class LocalForwardingProxy : IAsyncDisposable
{
private readonly ProxyLease _upstream;
private readonly ILogger _logger;
private readonly TcpListener _listener;
private readonly CancellationTokenSource _cts = new();
private readonly string _authHeader;
private Task? _acceptLoop;
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
{
_upstream = upstream;
_logger = logger;
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
var token = Convert.ToBase64String(
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
}
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
public string Endpoint { get; private set; } = "";
/// <summary>Bind the local port and start accepting browser connections.</summary>
public LocalForwardingProxy Start()
{
_listener.Start();
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
Endpoint = $"127.0.0.1:{port}";
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
_logger.LogInformation(
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
Endpoint, _upstream.Endpoint, _upstream.Provider);
return this;
}
private async Task AcceptLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
TcpClient client;
try
{
client = await _listener.AcceptTcpClientAsync(ct);
}
catch (OperationCanceledException)
{
break;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Accept failed.");
continue;
}
// Fire-and-forget per connection; exceptions are swallowed per client so
// one bad tunnel never takes down the listener.
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
}
}
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
{
using (client)
{
client.NoDelay = true;
try
{
var clientStream = client.GetStream();
var header = await ReadHeaderAsync(clientStream, ct);
if (header is null)
{
return;
}
var requestLine = header.Split("\r\n", 2)[0];
var parts = requestLine.Split(' ');
if (parts.Length < 2)
{
return;
}
var method = parts[0];
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
{
await HandleConnectAsync(clientStream, parts[1], ct);
}
else
{
await HandlePlainAsync(clientStream, header, ct);
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Client connection error.");
}
}
}
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
{
using var upstream = new TcpClient { NoDelay = true };
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
var upstreamStream = upstream.GetStream();
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
var ok = upstreamHeader is not null
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
&& sl[1] == "200";
if (!ok)
{
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
return;
}
await clientStream.WriteAsync(
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
await RelayAsync(clientStream, upstreamStream, ct);
}
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
{
var hostLine = header.Split("\r\n")
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
if (hostLine is null)
{
return;
}
using var upstream = new TcpClient { NoDelay = true };
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
var upstreamStream = upstream.GetStream();
// Insert the Proxy-Authorization header right after the request line.
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
await RelayAsync(clientStream, upstreamStream, ct);
}
// Pipe both directions until either side closes.
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
{
var toUpstream = a.CopyToAsync(b, ct);
var toClient = b.CopyToAsync(a, ct);
await Task.WhenAny(toUpstream, toClient);
}
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
{
var buffer = new byte[1];
var sb = new StringBuilder(256);
while (true)
{
var read = await stream.ReadAsync(buffer, ct);
if (read == 0)
{
return sb.Length > 0 ? sb.ToString() : null;
}
sb.Append((char)buffer[0]);
if (sb.Length >= 4
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
{
return sb.ToString();
}
// Guard against a runaway/garbage stream.
if (sb.Length > 64 * 1024)
{
return sb.ToString();
}
}
}
public async ValueTask DisposeAsync()
{
await _cts.CancelAsync();
_listener.Stop();
if (_acceptLoop is not null)
{
try
{
await _acceptLoop;
}
catch (OperationCanceledException)
{
// expected on shutdown
}
}
_cts.Dispose();
}
}

View File

@@ -1,21 +0,0 @@
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
/// </summary>
public sealed class LocalForwardingProxyFactory
{
private readonly ILogger<LocalForwardingProxy> _logger;
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
{
_logger = logger;
}
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
}

View File

@@ -1,29 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// A concrete, ready-to-use proxy endpoint handed back by an
/// <see cref="IProxyProvider"/>. This is the only proxy type a consumer ever
/// sees, so swapping providers (or mixing several in a grab-bag) never touches
/// the calling code. <see cref="Username"/> and <see cref="Password"/> are the
/// literal credentials to present to the gateway — for providers like IPRoyal
/// the targeting/session parameters are already baked into them.
/// </summary>
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
/// <param name="Port">Gateway port, e.g. 12321.</param>
/// <param name="Username">Credential username for the gateway.</param>
/// <param name="Password">Credential password (may carry encoded session/geo params).</param>
/// <param name="Provider">Name of the provider that issued this lease.</param>
/// <param name="SessionId">The sticky session key, if this is a pinned IP.</param>
/// <param name="ExpiresAt">When a sticky IP may be recycled; null if rotating/unbounded.</param>
public sealed record ProxyLease(
string Host,
int Port,
string Username,
string Password,
string Provider,
string? SessionId = null,
DateTimeOffset? ExpiresAt = null)
{
/// <summary>"host:port" form used by browser proxy settings.</summary>
public string Endpoint => $"{Host}:{Port}";
}

View File

@@ -1,103 +0,0 @@
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
/// <param name="Org">
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
/// residential vs. datacenter: a consumer ISP here means a real residential
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
/// </param>
public sealed record ProxyExitInfo(
string? Ip,
string? City,
string? Region,
string? Country,
string? Org,
string? Hostname,
string? Timezone);
/// <summary>
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
/// launch the real browser through it, and read back the exit IP from an
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
/// against a metered residential plan — it proves auth works and shows whether
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
/// </summary>
public sealed class ProxyProbe
{
private const string IpEchoUrl = "https://ipinfo.io/json";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<ProxyProbe> _logger;
public ProxyProbe(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
ILogger<ProxyProbe> logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_logger = logger;
}
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
{
var lease = _provider.Acquire(request);
_logger.LogInformation(
"Acquired {Provider} lease (exit {Mode}).",
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
await using var localProxy = _proxyFactory.Create(lease).Start();
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
driver.Navigate().GoToUrl(IpEchoUrl);
// Read the document's text rather than the DOM so the browser's
// built-in JSON viewer doesn't get in the way, then carve out the
// JSON object it rendered.
var rendered = ((IJavaScriptExecutor)driver)
.ExecuteScript("return document.documentElement.innerText;") as string
?? throw new InvalidOperationException("Browser returned no page text.");
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
?? throw new InvalidOperationException("IP-echo response was empty.");
_logger.LogInformation(
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
info.Ip, info.City, info.Region, info.Country, info.Org);
return info;
}
finally
{
driver.Quit();
}
}
private static string ExtractJson(string text)
{
var start = text.IndexOf('{');
var end = text.LastIndexOf('}');
if (start < 0 || end <= start)
{
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
}
return text[start..(end + 1)];
}
}

View File

@@ -1,30 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// What kind of exit IP the caller wants. Provider-agnostic: each
/// <see cref="IProxyProvider"/> translates these knobs into its own gateway
/// syntax. A sticky request asks the provider to pin one residential IP for the
/// session's lifetime; a non-sticky request lets the IP rotate per connection.
/// </summary>
/// <param name="Country">
/// Optional ISO 3166-1 alpha-2 code, or a comma-separated list to let the
/// provider pick one at random (e.g. "us" or "us,gb,de"). Null means no
/// geo constraint.
/// </param>
/// <param name="Sticky">
/// True to keep the same exit IP for the whole session; false to rotate.
/// </param>
/// <param name="SessionId">
/// Optional caller-supplied session key for a sticky lease. When null and
/// <paramref name="Sticky"/> is true the provider generates one.
/// </param>
/// <param name="Lifetime">
/// How long a sticky IP should be held before the provider may recycle it.
/// Ignored when <paramref name="Sticky"/> is false. Null lets the provider
/// apply its own default.
/// </param>
public sealed record ProxyRequest(
string? Country = null,
bool Sticky = true,
string? SessionId = null,
TimeSpan? Lifetime = null);

View File

@@ -1 +0,0 @@
{"data":{"aed":3.67308,"afn":63.8101,"all":81.9632,"amd":368.143,"ang":1.80234,"aoa":918.907,"ars":1408.71,"aud":1.39151,"awg":1.79,"azn":1.69966,"bam":1.68079,"bbd":1.99,"bdt":122.756,"bgn":1.67724,"bhd":0.377063,"bif":2977.25,"bmd":1,"bnd":1.27739,"bob":6.93362,"brl":5.03662,"bsd":1,"btn":94.9823,"bwp":13.4051,"byn":2.76,"bzd":2,"cad":1.38011,"cdf":2303.13,"chf":0.781072,"clp":889.925,"cny":6.76633,"cop":3658.64,"crc":456.323,"cve":94.8541,"czk":20.8256,"djf":177.6,"dkk":6.41027,"dop":58.34,"dzd":132.483,"eek":11.7036,"egp":52.2449,"etb":158.478,"eur":0.85756,"eurc":0.85756,"fjd":2.22183,"fkp":0.743205,"gbp":0.743163,"gel":2.6635,"ghs":11.738,"gip":0.743205,"gmd":71.7,"gnf":8733.01,"gtq":7.62826,"gyd":209.218,"hkd":7.83683,"hnl":26.5919,"hrk":6.46045,"htg":131.051,"huf":303.494,"idr":17846.4,"ils":2.81558,"inr":94.9244,"isk":122.978,"jmd":157.512,"jod":0.709142,"jpy":159.298,"kes":129.43,"kgs":87.4636,"khr":4026.38,"kmf":422.97,"krw":1507.45,"kwd":0.306761,"kyd":0.831626,"kzt":485.776,"lak":21934.5,"lbp":89500,"lkr":330.556,"lrd":182.518,"lsl":16.2382,"ltl":2.85333,"lvl":0.666172,"mad":9.18233,"mdl":17.2495,"mga":4197.32,"mkd":52.9711,"mmk":3658.01,"mnt":3578.79,"mop":8.07515,"mro":357.429,"mur":47.3605,"mvr":15.4615,"mwk":1734.01,"mxn":17.3547,"myr":3.96506,"mzn":63.7022,"nad":16.2435,"ngn":1407.3,"nio":36.6243,"nok":9.25345,"npr":152.04,"nzd":1.67028,"omr":0.385044,"pab":1,"pen":3.4017,"pgk":4.36134,"php":61.5484,"pkr":278.578,"pln":3.62897,"pyg":6017.9,"qar":3.64153,"ron":4.5042,"rsd":100.688,"rub":71.0734,"rwf":1463.11,"sar":3.75298,"sbd":8.0556,"scr":14.4837,"sek":9.24372,"sgd":1.27675,"shp":0.743619,"sle":22.7529,"sll":22791.4,"sos":571.375,"srd":37.1698,"std":20979.6,"svc":8.75278,"szl":16.2358,"thb":32.5267,"tjs":9.25184,"tnd":2.92,"top":2.35974,"try":45.8529,"ttd":6.74984,"twd":31.4269,"tzs":2629.69,"uah":44.2847,"ugx":3771.6,"usd":1,"usdc":1,"usdt":1.0013,"uyu":40.1504,"uzs":12004,"vef":50.1656,"vnd":26311,"vuv":118.053,"wst":2.70421,"xaf":562.45,"xcd":2.6882,"xcg":1.80234,"xof":562.975,"xpf":102.465,"yer":1566.65,"zar":16.2289,"zmw":18.3213}}

View File

@@ -1 +0,0 @@
{"inferred_location":{"short":"US","long":"United States","currency":"USD"}}

File diff suppressed because one or more lines are too long

View File

@@ -1 +0,0 @@
{"code":1,"message":"You need to be logged in to search listings"}

View File

@@ -1 +0,0 @@
{"inferred_location":{"short":"US","long":"United States","currency":"USD"}}

File diff suppressed because one or more lines are too long

View File

@@ -29,11 +29,6 @@
<!-- CLI / telemetry -->
<PackageVersion Include="System.CommandLine" Version="2.0.8" />
<PackageVersion Include="OpenTelemetry" Version="1.15.3" />
<!-- Browser automation (cs.money sits behind Cloudflare; a real, non-headless
browser routed through a residential proxy is required to clear the
challenge and observe the site's internal API). -->
<PackageVersion Include="Selenium.WebDriver" Version="4.44.0" />
</ItemGroup>
</Project>

View File

@@ -20,12 +20,12 @@
SET search_path = skintracker;
INSERT INTO skin_conditions (skin_id, condition, min_float, max_float)
INSERT INTO skin_conditions (skin_id, condition, float_min, float_max)
SELECT
s.id,
t.name,
GREATEST(s.float_min, t.lo) AS min_float, -- clamp the tier to the skin's range
LEAST(s.float_max, t.hi) AS max_float
GREATEST(s.float_min, t.lo) AS float_min, -- clamp the tier to the skin's range
LEAST(s.float_max, t.hi) AS float_max
FROM skins s
CROSS JOIN (VALUES
('Factory New', 0.00, 0.07),
@@ -51,9 +51,9 @@ ORDER BY s.id, t.lo;
-- Sanity checks (optional)
-- ------------------------------------------------------------
-- Rows per condition:
-- SELECT condition, count(*) FROM skin_conditions GROUP BY condition ORDER BY min(min_float);
-- SELECT condition, count(*) FROM skin_conditions GROUP BY condition ORDER BY min(float_min);
--
-- Spot-check a capped skin (e.g. an Asiimov) shows clamped FT bounds:
-- SELECT s.name, sc.condition, sc.min_float, sc.max_float
-- SELECT s.name, sc.condition, sc.float_min, sc.float_max
-- FROM skin_conditions sc JOIN skins s ON s.id = sc.skin_id
-- WHERE s.name ILIKE 'Asiimov' ORDER BY sc.min_float;
-- WHERE s.name ILIKE 'Asiimov' ORDER BY sc.float_min;

View File

@@ -1,44 +1,18 @@
-- ============================================================
-- CS2 Skin Tracker — backfill skin_conditions.listings_swept_at
-- Run against the skintracker database as the app role, ONCE,
-- after the AddSkinConditionListingsSweptAt migration is applied
-- and 05_fill_skin_conditions.sql has populated the wear bands.
-- Idempotent: re-running only touches still-null bands.
--
-- Why: the catalogue sweep used to page each skin to completion
-- as a single unit, so a non-null skins.listings_swept_at means
-- EVERY wear of that skin was covered at that time. The sweep now
-- checkpoints per wear band (skin_conditions.listings_swept_at).
-- Without this backfill, every band of an already-swept skin would
-- look never-swept and jump to the front of the queue, needlessly
-- re-sweeping skins that are already current. Inheriting the skin's
-- timestamp marks those bands as covered so the sweep moves on.
-- SUPERSEDED — DO NOT RUN.
--
-- Only fills bands that are still null, so bands already swept under
-- the new per-band logic keep their (newer) timestamp.
-- The single shared `listings_swept_at` columns on `skins` and
-- `skin_conditions` were replaced by per-site checkpoint tables
-- (`skin_sweeps` / `skin_condition_sweeps`, keyed by (entity, source))
-- in the AddPerSiteSweepCheckpoints migration. Each site now tracks its
-- own "last swept" under its own `source`, so a band swept on CSFloat is
-- still never-swept on cs.money.
--
-- The columns this script updated no longer exist, so running it now
-- would error. We intentionally did NOT migrate the old values into the
-- new tables: both sites simply cold-sweep the catalogue once and the
-- never-swept-first ordering refills the checkpoints. This file is kept
-- only so the db/ script numbering stays stable.
-- ============================================================
SET search_path = skintracker;
UPDATE skin_conditions sc
SET listings_swept_at = s.listings_swept_at
FROM skins s
WHERE sc.skin_id = s.id
AND s.listings_swept_at IS NOT NULL -- skin was fully swept under the old per-skin logic
AND sc.listings_swept_at IS NULL; -- don't overwrite bands already swept per-band
-- ------------------------------------------------------------
-- Sanity checks (optional)
-- ------------------------------------------------------------
-- Bands backfilled vs still never-swept:
-- SELECT
-- count(*) FILTER (WHERE listings_swept_at IS NOT NULL) AS swept,
-- count(*) FILTER (WHERE listings_swept_at IS NULL) AS never_swept
-- FROM skin_conditions;
--
-- A previously-swept skin should now have all its bands stamped:
-- SELECT s.name, sc.condition, sc.listings_swept_at
-- FROM skin_conditions sc JOIN skins s ON s.id = sc.skin_id
-- WHERE s.listings_swept_at IS NOT NULL
-- ORDER BY s.name, sc.min_float
-- LIMIT 20;

View File

@@ -1,14 +1,19 @@
# One-command startup for the cs.money scraper control plane + worker.
# One-command startup for the scraper control plane + per-market workers.
# Postgres is external (runs independently on the host); the C2 connects to it via
# host.docker.internal and auto-applies EF migrations on boot.
#
# docker compose up --build
#
# Scale workers (drop the worker `ports:` first — noVNC can't share one host port):
# docker compose up --build --scale worker=10
# Worker counts per market are env-driven (deploy.replicas), so one command sets the mix —
# e.g. 1 skin.land worker and 0 cs.money workers (PowerShell):
# $env:CSMONEY_WORKERS=0; $env:SKINLAND_WORKERS=1; docker compose up --build
# bash/sh:
# CSMONEY_WORKERS=0 SKINLAND_WORKERS=1 docker compose up --build
# (Or set them in a .env file next to this compose file.) Defaults: 1 of each.
#
# Each worker mints its own IPRoyal sticky session at startup, so every replica gets a
# distinct residential exit IP. Set IPROYAL_USERNAME / IPROYAL_PASSWORD (e.g. in a .env
# file next to this compose file) to turn the proxy on.
# distinct residential exit IP. Set IPROYAL_USERNAME / IPROYAL_PASSWORD (.env works) to
# turn the proxy on. The worker `ports:` are ephemeral so replicas never collide.
services:
c2:
build:
@@ -33,7 +38,11 @@ services:
build:
context: .
dockerfile: worker/Dockerfile
# cs.money worker count. Set CSMONEY_WORKERS=0 to run none (e.g. skin.land-only).
deploy:
replicas: ${CSMONEY_WORKERS:-1}
environment:
WORKER_SCRIPT: csmoney_worker.py # (also the image default; explicit for symmetry)
C2_URL: http://c2:5080
WORKER_TOKEN: ${WORKER_TOKEN:-dev-worker-token}
# IPRoyal residential proxy: each replica self-assigns a unique sticky session
@@ -53,3 +62,30 @@ services:
# http://localhost:<mapped>/vnc.html to watch / solve a challenge.
- "6080"
restart: unless-stopped
# The skin.land worker: same image, but runs skinland_worker.py against the C2's
# /skinland job group and warms on a skin.land page. Each replica gets its own IPRoyal
# sticky exit IP exactly like the cs.money worker. Count via SKINLAND_WORKERS.
skinland-worker:
build:
context: .
dockerfile: worker/Dockerfile
deploy:
replicas: ${SKINLAND_WORKERS:-1}
environment:
WORKER_SCRIPT: skinland_worker.py
C2_URL: http://c2:5080
MARKET_URL: ${SKINLAND_MARKET_URL:-https://skin.land/market/csgo/}
WORKER_TOKEN: ${WORKER_TOKEN:-dev-worker-token}
IPROYAL_USERNAME: ${IPROYAL_USERNAME:-}
IPROYAL_PASSWORD: ${IPROYAL_PASSWORD:-}
IPROYAL_COUNTRY: ${IPROYAL_COUNTRY:-us}
IPROYAL_LIFETIME_MIN: ${IPROYAL_LIFETIME_MIN:-60}
PROXY: ${PROXY:-}
SOLVE_SECONDS: ${SOLVE_SECONDS:-45}
LOAD_IMAGES: ${LOAD_IMAGES:-}
depends_on:
- c2
ports:
- "6080"
restart: unless-stopped

148
monitoring/README.md Normal file
View File

@@ -0,0 +1,148 @@
# BlueLaminate observability stack (standalone, Proxmox LXC)
A self-contained Grafana **LGTM** stack — **L**oki (logs), **G**rafana (dashboards),
**T**empo (traces), and Prometheus (**M**etrics) — fronted by **Grafana Alloy** as a single
OTLP ingress. It runs as native systemd services on its own Proxmox LXC, decoupled from the
app's `docker-compose.yml`. The C2 and Python workers push OpenTelemetry data to Alloy, which
fans the three signals out to the backends; Grafana ties them together.
```
C2 / workers ──OTLP(4317 grpc / 4318 http)──► Alloy ──┬─► Loki (logs, :3100)
(other host) ├─► Prometheus (metrics, :9090, remote-write)
└─► Tempo (traces, :4319 OTLP → store)
Grafana (:3000)
datasources: Loki + Prometheus + Tempo
```
Only Alloy's OTLP ports (`4317`/`4318`) and Grafana (`3000`) need to be reachable from the
LAN. Loki and Tempo bind localhost; Alloy is the only client that talks to them.
## Layout
```
monitoring/
install.sh # idempotent provisioner — run as root in the LXC
alloy/config.alloy # OTLP receiver → batch → Loki / Prometheus / Tempo
prometheus/prometheus.yml # self-monitoring scrapes (app metrics arrive via remote-write)
prometheus/prometheus.service # systemd unit: remote-write + OTLP receivers, 15d retention
loki/loki.yml # single-binary, filesystem store, 15d retention
tempo/tempo.yml # OTLP on :4319, local store, metrics_generator → Prometheus
grafana/datasources.yml # Loki + Prometheus(default) + Tempo, correlated
grafana/dashboards.yml # file-based dashboard provider
grafana/dashboards/overview.json # starter dashboard (target health, span rates, logs)
```
## 1. Create the LXC (run on the Proxmox host)
Reference only — adjust the storage, bridge, and template names to your node. An unprivileged
Debian 13 container with ~2 vCPU / 24 GB RAM / 2040 GB disk is plenty.
```bash
# Make sure a Debian 13 template is present (once):
# pveam update && pveam available | grep debian-13
# pveam download local debian-13-standard_*_amd64.tar.zst
pct create 910 local:vztmpl/debian-13-standard_13.0-1_amd64.tar.zst \
--hostname grafana-lxc \
--cores 2 --memory 4096 --swap 1024 \
--rootfs local-lvm:32 \
--net0 name=eth0,bridge=vmbr0,ip=dhcp \
--unprivileged 1 --features nesting=0 \
--onboot 1 --start 1
# (Optional) give it a static IP instead of dhcp, e.g.
# --net0 name=eth0,bridge=vmbr0,ip=192.168.1.50/24,gw=192.168.1.1
```
`nesting=0` is fine — there's no Docker here, just native binaries.
## 2. Deploy the stack (inside the LXC)
```bash
pct enter 910 # or: ssh root@<lxc-ip>
apt-get update && apt-get install -y git
git clone <this-repo-url> /opt/bluelaminate
cd /opt/bluelaminate/monitoring
sudo bash install.sh
```
No git on the LXC? Copy just this folder over instead:
`scp -r monitoring root@<lxc-ip>:/opt/monitoring && ssh root@<lxc-ip> 'cd /opt/monitoring && bash install.sh'`
The script adds the Grafana apt repo, installs grafana/loki/tempo/alloy, drops the Prometheus
release binary into `/opt/prometheus`, lays our configs over the packaged defaults, and
enables all five services. It prints the URLs and the OTLP endpoint when done.
## 3. Verify
```bash
systemctl is-active grafana-server loki tempo prometheus alloy # all → active
curl -s localhost:3100/ready # Loki → ready
curl -s localhost:3200/ready # Tempo → ready
curl -s localhost:9090/-/ready # Prometheus → Ready
```
Open Grafana at `http://<lxc-ip>:3000` (first login `admin` / `admin` — change it). The three
datasources and the **BlueLaminate → Stack Overview** dashboard are provisioned automatically.
Alloy's pipeline graph is at `http://<lxc-ip>:12345`.
### End-to-end OTLP smoke test (no app changes needed)
Send synthetic telemetry from any machine that can reach the LXC, using the OpenTelemetry
`telemetrygen` tool (`go install github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen@latest`):
```bash
telemetrygen traces --otlp-endpoint <lxc-ip>:4317 --otlp-insecure --traces 5
telemetrygen metrics --otlp-endpoint <lxc-ip>:4317 --otlp-insecure --duration 10s
telemetrygen logs --otlp-endpoint <lxc-ip>:4317 --otlp-insecure --logs 5
```
Then in Grafana **Explore**: pick **Tempo** (search recent traces), **Prometheus** (query
`gen`), and **Loki** (`{service_name=~".+"}`) — seeing data in all three confirms the full
fan-out before any app is wired up.
## 4. Wiring the apps later (the OTLP contract)
This deployment is **stack-only**; the C2 and workers aren't instrumented yet. When you do,
point them at this LXC — nothing here changes. The drop-in:
**.NET C2** (`BlueLaminate.C2`) — add packages `OpenTelemetry.Extensions.Hosting`,
`OpenTelemetry.Exporter.OpenTelemetryProtocol`, and the
`OpenTelemetry.Instrumentation.AspNetCore` / `.Http` / runtime instrumentations, then
`builder.Services.AddOpenTelemetry().WithTracing(...).WithMetrics(...)` plus
`builder.Logging.AddOpenTelemetry(...)`. Configure via env:
```
OTEL_EXPORTER_OTLP_ENDPOINT=http://<lxc-ip>:4318
OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
OTEL_SERVICE_NAME=bluelaminate-c2
```
**Python workers** (`worker/csmoney_worker.py`, `skinland_worker.py`) — add
`opentelemetry-distro` and `opentelemetry-exporter-otlp` to `worker/requirements.txt`, run
under `opentelemetry-instrument python csmoney_worker.py`, same env vars with
`OTEL_SERVICE_NAME=csmoney-worker` / `skinland-worker`. (Today the workers emit structured
JSON logs to stdout — `LOG_JSON=1`, set by default in the image; an interim option is to
ship their Docker stdout to Loki with an Alloy `loki.source.docker` component on the app
host, which can parse those JSON fields directly, instead of instrumenting in-process.)
Add those env vars to the matching `docker-compose.yml` services when the instrumentation lands.
## Hardening
- **Firewall the OTLP ports.** `4317`/`4318` are bound to `0.0.0.0`. Restrict them to the app
host, e.g. `ufw allow from <app-host-ip> to any port 4317,4318 proto tcp`.
- **Auth on ingest (optional).** Add an `otelcol.auth.bearer` handler to
`otelcol.receiver.otlp` in `alloy/config.alloy` and send a matching
`OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer <token>` from the apps.
- **Grafana password.** Change `admin` on first login, or set
`GF_SECURITY_ADMIN_PASSWORD` in `/etc/grafana/grafana.ini`.
## Retention / sizing
Defaults are LXC-friendly: Prometheus **15d**, Loki **15d**, Tempo **7d**. Bump the
`retention.time` flag (`prometheus.service`), `limits_config.retention_period` (`loki.yml`),
and `compactor.compaction.block_retention` (`tempo.yml`) if you have the disk. Re-run
`install.sh` to apply config edits.
```

View File

@@ -0,0 +1,67 @@
// Grafana Alloy — the single OTLP ingress for the BlueLaminate fleet.
//
// Receives OTLP (gRPC :4317 / HTTP :4318) from the C2 and the Python workers, batches it,
// then fans the three signals out to the local backends:
// metrics -> Prometheus (remote-write)
// logs -> Loki (push API)
// traces -> Tempo (OTLP gRPC on :4319, a non-colliding port)
//
// OTLP is bound on 0.0.0.0 so apps on other LAN hosts can push to this LXC. Everything it
// forwards to listens on localhost only (see each backend's config) — Alloy is the only
// thing that talks to Loki/Prometheus/Tempo. See README "Hardening" to add a bearer token.
otelcol.receiver.otlp "in" {
grpc {
endpoint = "0.0.0.0:4317"
}
http {
endpoint = "0.0.0.0:4318"
}
output {
metrics = [otelcol.processor.batch.default.input]
logs = [otelcol.processor.batch.default.input]
traces = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.batch "default" {
output {
metrics = [otelcol.exporter.prometheus.to_prom.input]
logs = [otelcol.exporter.loki.to_loki.input]
traces = [otelcol.exporter.otlp.to_tempo.input]
}
}
// --- metrics -> Prometheus remote-write ---------------------------------------------------
otelcol.exporter.prometheus "to_prom" {
forward_to = [prometheus.remote_write.local.receiver]
}
prometheus.remote_write "local" {
endpoint {
url = "http://localhost:9090/api/v1/write"
}
}
// --- logs -> Loki push --------------------------------------------------------------------
otelcol.exporter.loki "to_loki" {
forward_to = [loki.write.local.receiver]
}
loki.write "local" {
endpoint {
url = "http://localhost:3100/loki/api/v1/push"
}
}
// --- traces -> Tempo ----------------------------------------------------------------------
// Tempo's own OTLP receiver listens on :4319 so it doesn't collide with this Alloy receiver
// on :4317/:4318. TLS off — it's a localhost hop.
otelcol.exporter.otlp "to_tempo" {
client {
endpoint = "localhost:4319"
tls {
insecure = true
}
}
}

View File

@@ -0,0 +1,15 @@
# Grafana dashboard provider — loads JSON dashboards from /var/lib/grafana/dashboards.
# Copied to /etc/grafana/provisioning/dashboards/ by install.sh.
apiVersion: 1
providers:
- name: BlueLaminate
orgId: 1
folder: BlueLaminate
type: file
disableDeletion: false
allowUiUpdates: true
updateIntervalSeconds: 30
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false

View File

@@ -0,0 +1,109 @@
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"panels": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"mappings": [
{ "type": "value", "options": { "0": { "text": "DOWN", "color": "red" }, "1": { "text": "UP", "color": "green" } } }
],
"thresholds": { "mode": "absolute", "steps": [ { "color": "red", "value": null }, { "color": "green", "value": 1 } ] }
},
"overrides": []
},
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 0 },
"id": 1,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "value_and_name"
},
"pluginVersion": "11.0.0",
"targets": [
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "up", "legendFormat": "{{job}}", "refId": "A" }
],
"title": "Stack targets — up/down",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1 }, "unit": "reqps" },
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
"id": 2,
"options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum by (service_name) (rate(traces_spanmetrics_calls_total[5m]))",
"legendFormat": "{{service_name}}",
"refId": "A"
}
],
"title": "Span call rate by service (Tempo span-metrics)",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1 }, "unit": "bytes" },
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
"id": 3,
"options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "process_resident_memory_bytes",
"legendFormat": "{{job}}",
"refId": "A"
}
],
"title": "Stack process memory",
"type": "timeseries"
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 },
"id": 4,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": true
},
"targets": [
{
"datasource": { "type": "loki", "uid": "loki" },
"expr": "{service_name=~\".+\"}",
"refId": "A"
}
],
"title": "Recent logs (all services)",
"type": "logs"
}
],
"refresh": "30s",
"schemaVersion": 39,
"tags": ["bluelaminate"],
"templating": { "list": [] },
"time": { "from": "now-6h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "BlueLaminate — Stack Overview",
"uid": "bl-overview",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,53 @@
# Grafana datasource provisioning — Prometheus (default), Loki, Tempo, wired for
# trace <-> log <-> metric correlation. Copied to
# /etc/grafana/provisioning/datasources/ by install.sh.
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://localhost:9090
isDefault: true
jsonData:
httpMethod: POST
- name: Loki
type: loki
uid: loki
access: proxy
url: http://localhost:3100
jsonData:
# Turn a trace_id found on a log line into a clickable jump to the trace in Tempo.
# OTLP logs carry the id as structured metadata `trace_id`; adjust the regex if your
# app instrumentation emits it differently.
derivedFields:
- name: TraceID
matcherType: label
matcherRegex: trace_id
datasourceUid: tempo
url: "${__value.raw}"
urlDisplayLabel: "View trace"
- name: Tempo
type: tempo
uid: tempo
access: proxy
url: http://localhost:3200
jsonData:
# Span -> related logs in Loki.
tracesToLogsV2:
datasourceUid: loki
spanStartTimeShift: "-1h"
spanEndTimeShift: "1h"
filterByTraceID: true
filterBySpanID: false
# Span -> RED metrics in Prometheus (from Tempo's metrics_generator).
tracesToMetrics:
datasourceUid: prometheus
# Service graph + node graph from the generator's service-graph metrics.
serviceMap:
datasourceUid: prometheus
nodeGraph:
enabled: true

122
monitoring/install.sh Normal file
View File

@@ -0,0 +1,122 @@
#!/usr/bin/env bash
#
# Provision the standalone BlueLaminate observability stack on a fresh Debian LXC:
# Grafana + Loki + Tempo + Alloy (Grafana apt repo, each with its own systemd unit)
# Prometheus (official release tarball -> /opt/prometheus + our unit)
#
# Idempotent: safe to re-run (re-applies configs and restarts services). Run as root.
#
# sudo ./install.sh
#
# Override the Prometheus version with PROM_VERSION=x.y.z ./install.sh if needed.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [[ "${EUID}" -ne 0 ]]; then
echo "ERROR: run as root (sudo ./install.sh)." >&2
exit 1
fi
ARCH="$(dpkg --print-architecture)" # amd64 / arm64
echo "==> Target architecture: ${ARCH}"
# --- prerequisites ------------------------------------------------------------------------
echo "==> Installing prerequisites"
export DEBIAN_FRONTEND=noninteractive
apt-get update -y
apt-get install -y apt-transport-https software-properties-common gpg wget curl tar
# --- Grafana apt repo: grafana, loki, tempo, alloy ----------------------------------------
echo "==> Adding the Grafana apt repository"
mkdir -p /etc/apt/keyrings
if [[ ! -s /etc/apt/keyrings/grafana.asc ]]; then
wget -qO /etc/apt/keyrings/grafana.asc https://apt.grafana.com/gpg-full.key
fi
echo "deb [signed-by=/etc/apt/keyrings/grafana.asc] https://apt.grafana.com stable main" \
> /etc/apt/sources.list.d/grafana.list
apt-get update -y
echo "==> Installing Grafana, Loki, Tempo, Alloy"
apt-get install -y grafana loki tempo alloy
# --- Prometheus (release tarball) ---------------------------------------------------------
echo "==> Installing Prometheus"
PROM_VERSION="${PROM_VERSION:-$(curl -fsSL https://api.github.com/repos/prometheus/prometheus/releases/latest \
| grep -oP '"tag_name":\s*"v\K[^"]+' || true)}"
PROM_VERSION="${PROM_VERSION:-3.2.1}"
echo " Prometheus version: ${PROM_VERSION}"
id -u prometheus &>/dev/null || useradd --system --no-create-home --shell /usr/sbin/nologin prometheus
PROM_DIR="prometheus-${PROM_VERSION}.linux-${ARCH}"
TMP="$(mktemp -d)"
trap 'rm -rf "${TMP}"' EXIT
wget -qO "${TMP}/prom.tar.gz" \
"https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/${PROM_DIR}.tar.gz"
tar -xzf "${TMP}/prom.tar.gz" -C "${TMP}"
install -d /opt/prometheus
install -m 0755 "${TMP}/${PROM_DIR}/prometheus" /opt/prometheus/prometheus
install -m 0755 "${TMP}/${PROM_DIR}/promtool" /opt/prometheus/promtool
# --- data directories ---------------------------------------------------------------------
echo "==> Creating data directories"
install -d -o prometheus -g prometheus /var/lib/prometheus
install -d -o loki -g loki /var/lib/loki /var/lib/loki/chunks /var/lib/loki/rules /var/lib/loki/compactor
install -d -o tempo -g tempo /var/lib/tempo /var/lib/tempo/wal /var/lib/tempo/blocks \
/var/lib/tempo/generator/wal /var/lib/tempo/generator/traces
# --- configuration ------------------------------------------------------------------------
echo "==> Installing configuration files"
install -d /etc/alloy /etc/loki /etc/tempo /etc/prometheus
install -m 0644 "${SCRIPT_DIR}/alloy/config.alloy" /etc/alloy/config.alloy
install -m 0644 "${SCRIPT_DIR}/loki/loki.yml" /etc/loki/config.yml
install -m 0644 "${SCRIPT_DIR}/tempo/tempo.yml" /etc/tempo/config.yml
install -m 0644 "${SCRIPT_DIR}/prometheus/prometheus.yml" /etc/prometheus/prometheus.yml
install -m 0644 "${SCRIPT_DIR}/prometheus/prometheus.service" /etc/systemd/system/prometheus.service
# Point Alloy's systemd unit at our config (the package reads /etc/default/alloy).
cat > /etc/default/alloy <<'EOF'
CONFIG_FILE="/etc/alloy/config.alloy"
CUSTOM_ARGS=""
RESTART_ON_UPGRADE=true
EOF
# Grafana provisioning (datasources + dashboards).
echo "==> Installing Grafana provisioning"
install -d /etc/grafana/provisioning/datasources \
/etc/grafana/provisioning/dashboards \
/var/lib/grafana/dashboards
install -m 0644 "${SCRIPT_DIR}/grafana/datasources.yml" /etc/grafana/provisioning/datasources/bluelaminate.yml
install -m 0644 "${SCRIPT_DIR}/grafana/dashboards.yml" /etc/grafana/provisioning/dashboards/bluelaminate.yml
install -m 0644 "${SCRIPT_DIR}"/grafana/dashboards/*.json /var/lib/grafana/dashboards/
chown -R grafana:grafana /var/lib/grafana/dashboards 2>/dev/null || true
# --- start everything ---------------------------------------------------------------------
echo "==> Enabling + starting services"
systemctl daemon-reload
systemctl enable --now grafana-server loki tempo prometheus alloy
systemctl restart loki tempo prometheus alloy grafana-server
# --- summary ------------------------------------------------------------------------------
IP="$(hostname -I 2>/dev/null | awk '{print $1}')"
cat <<EOF
============================================================================
BlueLaminate observability stack installed.
Grafana UI : http://${IP:-<lxc-ip>}:3000 (first login admin/admin)
OTLP ingress : ${IP:-<lxc-ip>}:4317 (gRPC) / ${IP:-<lxc-ip>}:4318 (HTTP)
Alloy debug UI : http://${IP:-<lxc-ip>}:12345
Prometheus : http://${IP:-<lxc-ip>}:9090
Point apps at: OTEL_EXPORTER_OTLP_ENDPOINT=http://${IP:-<lxc-ip>}:4318
Readiness checks:
systemctl is-active grafana-server loki tempo prometheus alloy
curl -s localhost:3100/ready # Loki
curl -s localhost:3200/ready # Tempo
curl -s localhost:9090/-/ready # Prometheus
============================================================================
EOF

59
monitoring/loki/loki.yml Normal file
View File

@@ -0,0 +1,59 @@
# Loki — single-binary, filesystem-backed, no auth (localhost-only; Alloy is the only writer).
# Tuned for an LXC: TSDB index, 15-day retention with the compactor enforcing deletes.
auth_enabled: false
server:
http_listen_address: 127.0.0.1
http_listen_port: 3100
grpc_listen_port: 9096
log_level: info
common:
instance_addr: 127.0.0.1
path_prefix: /var/lib/loki
storage:
filesystem:
chunks_directory: /var/lib/loki/chunks
rules_directory: /var/lib/loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
schema_config:
configs:
- from: 2024-01-01
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
limits_config:
retention_period: 360h # 15 days
reject_old_samples: true
reject_old_samples_max_age: 168h
# Required so OTLP resource/scope attributes (and trace_id/span_id) land as structured metadata.
allow_structured_metadata: true
volume_enabled: true
compactor:
working_directory: /var/lib/loki/compactor
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
delete_request_store: filesystem
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
ruler:
storage:
type: local
local:
directory: /var/lib/loki/rules

View File

@@ -0,0 +1,25 @@
# Prometheus is not in the Grafana apt repo, so install.sh drops the release binary into
# /opt/prometheus and installs this unit. Flags: remote-write + OTLP receivers ON (Alloy and
# Tempo push to it), 15-day local retention.
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
Restart=on-failure
RestartSec=5
ExecStart=/opt/prometheus/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus \
--storage.tsdb.retention.time=15d \
--web.enable-remote-write-receiver \
--web.enable-otlp-receiver \
--web.listen-address=0.0.0.0:9090
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,32 @@
# Prometheus for the BlueLaminate observability LXC.
#
# App + Tempo metrics arrive via REMOTE-WRITE (Alloy and Tempo's metrics_generator push to
# /api/v1/write — enabled by the --web.enable-remote-write-receiver flag in prometheus.service),
# so they need no scrape config. The scrape jobs below are just the stack's own self-monitoring.
global:
scrape_interval: 30s
evaluation_interval: 30s
external_labels:
monitor: bluelaminate-lxc
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ["localhost:9090"]
- job_name: alloy
static_configs:
- targets: ["localhost:12345"]
- job_name: loki
static_configs:
- targets: ["localhost:3100"]
- job_name: tempo
static_configs:
- targets: ["localhost:3200"]
- job_name: grafana
static_configs:
- targets: ["localhost:3000"]

View File

@@ -0,0 +1,48 @@
# Tempo — local-disk trace store. Receives OTLP from Alloy on :4319 (Alloy owns :4317/:4318),
# and runs the metrics_generator to emit RED + service-graph metrics, remote-written into
# Prometheus so Grafana can draw request rates and the service map without any app metrics.
server:
http_listen_address: 0.0.0.0
http_listen_port: 3200
grpc_listen_port: 9095
log_level: info
distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4319"
ingester:
max_block_duration: 5m
compactor:
compaction:
block_retention: 168h # 7 days of traces
metrics_generator:
registry:
external_labels:
source: tempo
storage:
path: /var/lib/tempo/generator/wal
remote_write:
- url: http://localhost:9090/api/v1/write
send_exemplars: true
traces_storage:
path: /var/lib/tempo/generator/traces
storage:
trace:
backend: local
wal:
path: /var/lib/tempo/wal
local:
path: /var/lib/tempo/blocks
# Turn the generator on for every tenant (single-tenant here).
overrides:
defaults:
metrics_generator:
processors: [service-graphs, span-metrics]

View File

@@ -18,13 +18,20 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
WORKDIR /app
COPY worker/requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY worker/worker.py worker/entrypoint.sh ./
# blworker/ is the shared package both market scripts import; ship it + the two thin
# market scripts + the entrypoint.
COPY worker/blworker ./blworker
COPY worker/csmoney_worker.py worker/skinland_worker.py worker/entrypoint.sh ./
RUN chmod +x entrypoint.sh
# Which worker this image runs (overridden per service in docker-compose). The cs.money
# worker is the default; the skin.land service sets WORKER_SCRIPT=skinland_worker.py.
ENV BROWSER_PATH=/usr/bin/chromium \
CHROME_NO_SANDBOX=1 \
DISPLAY=:99 \
SOLVE_SECONDS=45 \
WORKER_SCRIPT=csmoney_worker.py \
LOG_JSON=1 \
PYTHONUNBUFFERED=1

View File

@@ -14,47 +14,27 @@ webdriver` and chromedriver `cdc_` artifacts that Cloudflare keys on. `nodriver`
drives a normal Chromium directly over CDP (no chromedriver) and patches those
tells, so it passes where Selenium loops.
## Step 1: prove it (current)
`poc.py` proves nodriver can clear cs.money's Cloudflare and fetch the listings API
before we build the full pull-based fleet.
## Local setup
```powershell
cd worker
py -m venv .venv
.venv\Scripts\Activate.ps1
pip install -r requirements.txt
python poc.py
```
A Chromium window opens on the market. Solve the Cloudflare check if shown; the
script waits, then pages `sell-orders` deeply (PAGES), reporting how far the warm
session survives before any re-challenge and confirming full float precision.
Output lands in `worker/captures/`.
**Targeted skin+wear search.** cs.money search is free-text on the page
(`?search=cyber+security+ft`). Set `SEARCH` and the PoC navigates there, **captures
the actual filtered `sell-orders` API request the page fires** (so we learn the real
filter params instead of guessing), prints it, then pages that filtered API:
```powershell
$env:SEARCH="cyber security ft"; python poc.py # FT M4A4 Cyber Security only
```
The `>>> DISCOVERED sell-orders API call` line shows how the search maps to API
params — that's how the C2 will build targeted jobs.
Run on your own IP first (no proxy) — that's the clean A/B vs. the Selenium run.
If auto-detect can't find a browser, set `BROWSER_PATH` to Chrome or Edge
(`C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe`).
## Step 2: the pull fleet
## The pull fleet
`worker.py` holds one warm nodriver session and loops: poll the .NET C2 for a job
(a skin+wear search), scrape that search's sell-orders via in-page fetch, and post
`csmoney_worker.py` holds one warm nodriver session and loops: poll the .NET C2 for a
job (a skin+wear search), scrape that search's sell-orders via in-page fetch, and post
the items back. The C2 (`BlueLaminate.C2`) picks the stalest skin+wear from the
catalogue, and on result persists to `cs_money_listings` + `price_history`
(`Source = "csmoney"`), stamping `SkinCondition.ListingsSweptAt`.
(`Source = "csmoney"`), stamping that band's per-site checkpoint (the `csmoney`
row in `skin_condition_sweeps`). The checkpoint is per-site, so a band CSFloat
already swept is still due for a cs.money sweep.
Run the C2 (needs Postgres migrated), then the worker:
@@ -65,8 +45,64 @@ dotnet run --project BlueLaminate\BlueLaminate.C2 # serves http://local
# terminal 2 — the worker
cd worker; .venv\Scripts\Activate.ps1
$env:WORKER_TOKEN="dev-worker-token" # must match the C2's WorkerToken
python worker.py
python csmoney_worker.py
```
The worker warms the session (you clear Cloudflare once), then runs continuously.
Scale out by starting more workers (each with its own `PROXY`).
## Layout
Both market scripts are thin: each subclasses `blworker.Worker` and fills in only its
own scrape + cookie-consent steps. Everything shared lives in the `blworker/` package:
| file | responsibility |
| --- | --- |
| `blworker/config.py` | `Settings` — every env knob, parsed once |
| `blworker/log.py` | stdout logging, human or `LOG_JSON=1` (for Loki) |
| `blworker/proxy.py` | IPRoyal forwarder + session/password helpers |
| `blworker/c2.py` | `C2Client` — claim a job, post a result |
| `blworker/runtime.py` | `Worker` base: proxy/browser bring-up, the poll→scrape→post loop, Cloudflare IP rotation, graceful shutdown |
| `csmoney_worker.py` / `skinland_worker.py` | the per-market scrape strategies |
To add a market: subclass `Worker`, set `name`/`jobs_path`/`default_market_url`, implement
`scrape_job` + `describe_job` (+ `dismiss_consent` if it has a banner), and call
`run(YourWorker)`.
## skin.land worker
`skinland_worker.py` is the same pull model for **skin.land** (also Cloudflare-walled). It
shares all the proxy/Cloudflare/C2 plumbing with the cs.money worker via `blworker`; only
the scrape differs. The C2 hands out jobs from its **`/skinland/jobs`** group (the
`skinland` rows in `skin_condition_sweeps`, so a band cs.money/CSFloat already swept is
still due here) and on result persists to `skin_land_listings` + `price_history`
(`Source = "skinland"`).
How it scrapes (learned during discovery):
- A job's target is the market **page URL**, e.g.
`https://skin.land/market/csgo/ak-47-redline-field-tested/`. The slug is just
`{weapon}-{skin}-{wear}` kebab-cased — the C2 builds it from the catalogue, no lookup.
- skin.land is a Nuxt SSR app. The page embeds an internal numeric `skin_id`; the worker
resolves it once from the `__NUXT__` payload (the skin object whose `url` == the slug),
caches it per slug, then pages the clean JSON API
`GET https://app.skin.land/api/v2/obtained-skins?skin_id={id}&page={n}` (a Laravel
paginator `{data:[…offers], meta:{current_page,last_page,…}}`), walking to `last_page`.
- Each offer carries a full-precision `item_float`, `final_withdrawal_price`, and the steam
`item_link`. skin.land exposes **no paint seed**, so listings aren't fingerprinted to a
`SkinInstance` (no cross-market roll-up / dupe detection here). StatTrak and Souvenir are
separate pages (`stattrak-`/`souvenir-` slugs); v1 sweeps the base page per skin+wear.
Run it alongside (or instead of) the cs.money worker — it points at the same C2:
```powershell
cd worker; .venv\Scripts\Activate.ps1
$env:WORKER_TOKEN="dev-worker-token"
python skinland_worker.py
```
Under Docker it's the `skinland-worker` service (same image, `WORKER_SCRIPT=skinland_worker.py`):
```powershell
docker compose up --build --scale skinland-worker=5
```

View File

@@ -0,0 +1,20 @@
"""Shared scaffolding for the BlueLaminate market scrape workers.
A market worker (cs.money, skin.land, …) subclasses `Worker`, fills in its scrape +
consent steps, and calls `run(MyWorker)`. Everything else — config, logging, the IPRoyal
proxy/forwarder, the C2 client, the poll/scrape/post loop, IP rotation, graceful
shutdown — lives here so it's written once.
"""
from .config import Settings
from .runtime import ScrapeResult, Worker, click, looks_like_challenge, page_fetch, run
__all__ = [
"Settings",
"ScrapeResult",
"Worker",
"click",
"looks_like_challenge",
"page_fetch",
"run",
]

57
worker/blworker/c2.py Normal file
View File

@@ -0,0 +1,57 @@
"""HTTP client for the .NET C2's job endpoints.
Stdlib urllib so the blocking calls run off the asyncio loop via to_thread (the event
loop belongs to the browser). Each worker points at one job route group — "/jobs" for
cs.money, "/skinland/jobs" for skin.land — set once at construction.
"""
import asyncio
import json
import logging
import urllib.error
import urllib.request
log = logging.getLogger("c2")
class C2Client:
def __init__(self, base_url: str, token: str, jobs_path: str):
self._base = base_url.rstrip("/")
self._token = token
self._jobs = jobs_path.strip("/")
def _get_job_sync(self):
req = urllib.request.Request(
f"{self._base}/{self._jobs}/next", headers={"X-Worker-Token": self._token})
try:
with urllib.request.urlopen(req, timeout=15) as r:
if r.status == 204:
return None
return json.loads(r.read() or b"null")
except urllib.error.HTTPError as e:
log.warning("/%s/next -> HTTP %s", self._jobs, e.code)
return None
except urllib.error.URLError as e:
log.warning("C2 unreachable: %s", e)
return None
def _post_result_sync(self, job_id: str, payload: dict):
data = json.dumps(payload).encode()
req = urllib.request.Request(
f"{self._base}/{self._jobs}/{job_id}/result", data=data, method="POST",
headers={"X-Worker-Token": self._token, "Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=60) as r:
return json.loads(r.read() or b"null")
except urllib.error.HTTPError as e:
log.warning("result -> HTTP %s: %r", e.code, e.read()[:200])
return None
except urllib.error.URLError as e:
log.warning("C2 unreachable posting result: %s", e)
return None
async def get_job(self):
return await asyncio.to_thread(self._get_job_sync)
async def post_result(self, job_id, payload):
return await asyncio.to_thread(self._post_result_sync, job_id, payload)

81
worker/blworker/config.py Normal file
View File

@@ -0,0 +1,81 @@
"""Worker configuration, parsed once from the environment.
All env knobs the workers honor live here so there's a single source of truth (the
two market workers used to each re-parse the same ~15 vars). Frozen dataclass — read
it, don't mutate it.
"""
import os
from dataclasses import dataclass
def _int(name: str, default: int) -> int:
return int(os.environ.get(name, str(default)))
def _float(name: str, default: float) -> float:
return float(os.environ.get(name, str(default)))
def _flag(name: str) -> bool:
return os.environ.get(name) == "1"
@dataclass(frozen=True)
class Settings:
# C2
c2_url: str
token: str
# Session / pacing
market_url: str # "" => use the worker's own default page
solve_seconds: int
delay: float
jitter: float
idle_seconds: int
# Browser
browser_path: str | None
load_images: bool
chrome_no_sandbox: bool
# Proxy (auth-free fallback)
proxy: str | None
# IPRoyal residential gateway
iproyal_host: str
iproyal_port: int
iproyal_username: str | None
iproyal_password: str | None
iproyal_country: str
iproyal_lifetime_min: int
# Logging
log_level: str
log_json: bool
@property
def use_iproyal(self) -> bool:
"""IPRoyal takes priority over a plain PROXY when its creds are set."""
return bool(self.iproyal_username and self.iproyal_password)
@classmethod
def from_env(cls) -> "Settings":
return cls(
c2_url=os.environ.get("C2_URL", "http://localhost:5080").rstrip("/"),
token=os.environ.get("WORKER_TOKEN", "dev-worker-token"),
market_url=os.environ.get("MARKET_URL", ""),
solve_seconds=_int("SOLVE_SECONDS", 30),
delay=_float("DELAY", 2.0),
jitter=_float("JITTER", 1.5),
idle_seconds=_int("IDLE_SECONDS", 10),
browser_path=os.environ.get("BROWSER_PATH") or None,
# Residential proxy is metered per GB; Cloudflare gates on JS, not images, and
# the market APIs are pure JSON — so block images unless explicitly debugging.
load_images=_flag("LOAD_IMAGES"),
chrome_no_sandbox=_flag("CHROME_NO_SANDBOX"),
proxy=os.environ.get("PROXY") or None,
iproyal_host=os.environ.get("IPROYAL_HOST", "geo.iproyal.com"),
iproyal_port=_int("IPROYAL_PORT", 12321),
iproyal_username=os.environ.get("IPROYAL_USERNAME") or None,
iproyal_password=os.environ.get("IPROYAL_PASSWORD") or None,
iproyal_country=os.environ.get("IPROYAL_COUNTRY", "us").strip().lower(),
iproyal_lifetime_min=_int("IPROYAL_LIFETIME_MIN", 60),
log_level=os.environ.get("LOG_LEVEL", "INFO").upper(),
log_json=_flag("LOG_JSON"),
)

47
worker/blworker/log.py Normal file
View File

@@ -0,0 +1,47 @@
"""Stdlib logging setup — one stream handler on stdout, human or JSON.
Workers used to print() everything; that gives no levels, no timestamps, and nothing
Loki can parse. Default is a compact human format for local runs; set LOG_JSON=1 in the
container so Grafana Alloy -> Loki gets structured fields (ts, level, logger, msg) plus
any `extra=` keys a call site attaches.
"""
import json
import logging
import sys
# logging.LogRecord built-ins we don't want to echo into a JSON line as "extra" fields.
_RESERVED = set(
logging.makeLogRecord({}).__dict__
) | {"message", "asctime", "taskName"}
class _JsonFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
payload = {
"ts": self.formatTime(record, "%Y-%m-%dT%H:%M:%S%z"),
"level": record.levelname,
"logger": record.name,
"msg": record.getMessage(),
}
for key, value in record.__dict__.items():
if key not in _RESERVED and not key.startswith("_"):
payload[key] = value
if record.exc_info:
payload["exc"] = self.formatException(record.exc_info)
return json.dumps(payload, default=str)
def configure(level: str = "INFO", json_logs: bool = False) -> None:
"""Install a single stdout handler on the root logger (idempotent)."""
handler = logging.StreamHandler(sys.stdout)
if json_logs:
handler.setFormatter(_JsonFormatter())
else:
handler.setFormatter(
logging.Formatter("%(asctime)s %(levelname)-5s %(name)s | %(message)s", "%H:%M:%S")
)
root = logging.getLogger()
root.handlers.clear()
root.addHandler(handler)
root.setLevel(level)

154
worker/blworker/proxy.py Normal file
View File

@@ -0,0 +1,154 @@
"""IPRoyal residential proxy plumbing.
The in-process forwarder + the password/session helpers — identical across every market
worker, so they live here. HTTPS market traffic flows through the CONNECT tunnel, so the
forwarder only ever relays ciphertext. Ported from the .NET LocalForwardingProxy /
IpRoyalProxyProvider.
"""
import asyncio
import base64
import logging
import uuid
log = logging.getLogger("proxy")
def new_session_id() -> str:
"""Short, opaque, URL-safe token. IPRoyal pins one residential exit IP per distinct
session value, so a fresh id == a fresh IP."""
return uuid.uuid4().hex[:10]
def iproyal_password(password: str, country: str, lifetime_min: int, session_id: str) -> str:
"""Bake the targeting/session knobs onto the account password, IPRoyal-style:
"<pass>_country-us_session-<id>_lifetime-60m". Country is optional."""
pw = password
if country:
pw += f"_country-{country}"
pw += f"_session-{session_id}_lifetime-{lifetime_min}m"
return pw
class LocalForwardingProxy:
"""In-process HTTP proxy on 127.0.0.1 that chains every connection to the IPRoyal
gateway, injecting the Proxy-Authorization header itself. Chromium ignores creds in
--proxy-server and the in-browser ways to answer the gateway's 407 (a CDP auth
handler, or a disabled MV2 extension) are Cloudflare tells — so we terminate the
browser->proxy hop locally and add auth here, leaving Chrome to talk to an auth-free
endpoint at zero CDP. HTTPS (all market traffic) flows through the CONNECT tunnel, so
this proxy only relays ciphertext and never sees plaintext. The active session token
can be swapped live (set_password) to move to a fresh exit IP without restarting the
browser. (New tunnels pick up the new IP; any still-open keep-alive tunnel stays on
the old one until it closes.)"""
def __init__(self, host: str, port: int, username: str, password: str):
self._host = host
self._port = port
self._username = username
self._password = password
self._server: asyncio.AbstractServer | None = None
self.endpoint = ""
def set_password(self, password: str) -> None:
self._password = password
def _auth_header(self) -> str:
token = base64.b64encode(f"{self._username}:{self._password}".encode()).decode()
return f"Proxy-Authorization: Basic {token}\r\n"
async def start(self) -> "LocalForwardingProxy":
self._server = await asyncio.start_server(self._handle, "127.0.0.1", 0)
port = self._server.sockets[0].getsockname()[1]
self.endpoint = f"127.0.0.1:{port}"
return self
async def stop(self) -> None:
if self._server is not None:
self._server.close()
try:
await self._server.wait_closed()
except Exception:
pass
@staticmethod
async def _read_header(reader: asyncio.StreamReader) -> str | None:
"""Read up to the end of the HTTP header block (CRLFCRLF). None on EOF/overflow."""
try:
data = await reader.readuntil(b"\r\n\r\n")
except (asyncio.IncompleteReadError, asyncio.LimitOverrunError):
return None
return data.decode("latin-1")
async def _handle(self, client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter) -> None:
up_writer: asyncio.StreamWriter | None = None
try:
header = await self._read_header(client_reader)
if not header:
return
parts = header.split("\r\n", 1)[0].split(" ")
if len(parts) < 2:
return
method, target = parts[0], parts[1]
up_reader, up_writer = await asyncio.open_connection(self._host, self._port)
if method.upper() == "CONNECT":
# HTTPS: open an authenticated tunnel upstream, then relay raw bytes.
up_writer.write(
f"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{self._auth_header()}\r\n".encode())
await up_writer.drain()
up_header = await self._read_header(up_reader)
status = up_header.split(" ", 2) if up_header else []
if len(status) < 2 or status[1] != "200":
line = (up_header or "no response").split("\r\n", 1)[0]
log.warning("upstream refused CONNECT %s: %s", target, line)
client_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n")
await client_writer.drain()
return
client_writer.write(b"HTTP/1.1 200 Connection established\r\n\r\n")
await client_writer.drain()
else:
# Plain HTTP: re-inject the request upstream with auth, then relay.
idx = header.index("\r\n") + 2
up_writer.write((header[:idx] + self._auth_header() + header[idx:]).encode())
await up_writer.drain()
await self._relay(client_reader, client_writer, up_reader, up_writer)
except Exception:
pass # one bad tunnel must never take down the listener
finally:
for w in (client_writer, up_writer):
if w is not None:
try:
w.close()
except Exception:
pass
@staticmethod
async def _relay(
client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter,
up_reader: asyncio.StreamReader, up_writer: asyncio.StreamWriter) -> None:
# Pipe both directions, but tear the whole tunnel down as soon as EITHER side
# closes (mirrors the .NET WhenAny). Waiting for both — as a plain gather does —
# leaks a task holding two sockets on every half-closed connection, which piles
# up fast across a long multi-worker run. Closing both writers when the first pipe
# finishes unblocks the other's pending read so both tasks settle.
async def pipe(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
try:
while data := await reader.read(65536):
writer.write(data)
await writer.drain()
except Exception:
pass
a = asyncio.create_task(pipe(client_reader, up_writer))
b = asyncio.create_task(pipe(up_reader, client_writer))
try:
await asyncio.wait({a, b}, return_when=asyncio.FIRST_COMPLETED)
finally:
for w in (client_writer, up_writer):
try:
w.close()
except Exception:
pass
await asyncio.gather(a, b, return_exceptions=True)

235
worker/blworker/runtime.py Normal file
View File

@@ -0,0 +1,235 @@
"""The shared worker runtime — everything that's identical across market workers.
`Worker` is a template-method base: it owns the proxy/browser bring-up, the poll ->
scrape -> post loop, Cloudflare-driven IP rotation, result logging, and graceful
shutdown. A market worker subclasses it and fills in only what differs — how to dismiss
the consent banner, how to scrape one job, and how to describe a job in the log. The two
~300-line workers used to copy this whole loop verbatim.
"""
import asyncio
import json
import logging
import random
import signal
from abc import ABC, abstractmethod
from dataclasses import dataclass
import nodriver as uc
from .c2 import C2Client
from .config import Settings
from .proxy import LocalForwardingProxy, iproyal_password, new_session_id
@dataclass
class ScrapeResult:
"""What a single job scrape yields. `wire_bytes` is the metered (compressed) cost."""
items: list
pages: int
reason: str
wire_bytes: int = 0
def looks_like_challenge(body: str) -> bool:
"""True for an actual Cloudflare interstitial (or an empty body). Keyed on CF markers,
NOT a leading '<' — a real market page IS html, so a startswith('<') check would flag
every good page fetch as a challenge."""
b = body or ""
return not b.strip() or "Just a moment" in b or "challenge-platform" in b
async def page_fetch(page, url: str, accept: str = "application/json") -> tuple[int, str, int]:
"""Fetch in-page from the warm (Cloudflare-cleared) session and read back the Resource
Timing transferSize — the actual compressed bytes the metered proxy bills (or -1 when
cross-origin timing isn't exposed). Returns (status, body, wire_bytes). Use
accept='text/html' for an SSR page payload, the default JSON for an API."""
expr = (
f"fetch({url!r}, {{credentials:'include', headers:{{'accept': {accept!r}}}}})"
f".then(async r => {{"
f" const body = await r.text();"
f" const e = performance.getEntriesByName({url!r}).slice(-1)[0];"
f" return JSON.stringify({{status: r.status, body: body, wire: e ? e.transferSize : -1}});"
f"}}).catch(e => JSON.stringify({{status: -1, body: String(e), wire: -1}}))"
)
raw = await page.evaluate(expr, await_promise=True)
if not isinstance(raw, str):
return (-1, "", -1)
try:
obj = json.loads(raw)
return (int(obj.get("status", -1)), obj.get("body", ""), int(obj.get("wire", -1)))
except (json.JSONDecodeError, ValueError, TypeError):
return (-1, raw, -1)
async def click(page, text: str, timeout: int = 3) -> bool:
"""Best-match click on visible text; swallow the not-found/timeout case."""
try:
el = await page.find(text, best_match=True, timeout=timeout)
if el:
await el.click()
return True
except Exception:
pass
return False
class Worker(ABC):
# Per-market constants, set by the subclass.
name: str = "worker"
jobs_path: str = "/jobs"
default_market_url: str = ""
def __init__(self, settings: Settings):
self.settings = settings
self.market_url = settings.market_url or self.default_market_url
self.c2 = C2Client(settings.c2_url, settings.token, self.jobs_path)
self.log = logging.getLogger(self.name)
self._forwarder: LocalForwardingProxy | None = None
self._session_id: str | None = None
self._stop = asyncio.Event()
# --- hooks a market worker overrides ------------------------------------------
@abstractmethod
async def scrape_job(self, page, job) -> ScrapeResult:
"""Scrape ALL listings for one job and return them."""
@abstractmethod
def describe_job(self, job) -> str:
"""One-line job description for the log (e.g. the search term or slug)."""
async def dismiss_consent(self, page) -> str | None:
"""Dismiss the cookie banner privacy-first; return a note, or None if absent.
Default: nothing to do. Markets with a banner override this."""
return None
# --- shared machinery ---------------------------------------------------------
def _iproyal_password(self, session_id: str) -> str:
s = self.settings
return iproyal_password(s.iproyal_password, s.iproyal_country, s.iproyal_lifetime_min, session_id)
async def _pace(self, page) -> None:
await page.sleep(self.settings.delay + random.uniform(0, self.settings.jitter))
async def warm(self, page) -> None:
"""Open the market and clear Cloudflare so the session holds cf_clearance."""
s = self.settings
self.log.info("warming session at %s (clear Cloudflare; %ds)", self.market_url, s.solve_seconds)
await page.get(self.market_url)
await page.sleep(s.solve_seconds)
note = await self.dismiss_consent(page)
self.log.info("consent: %s", note or "left up")
async def _setup_proxy(self) -> tuple[str | None, str]:
"""IPRoyal (auth'd, per-worker sticky IP) takes priority; else a plain auth-free
PROXY; else this host's own IP. Returns (proxy_endpoint, human_label)."""
s = self.settings
if s.use_iproyal:
self._session_id = new_session_id()
self._forwarder = await LocalForwardingProxy(
s.iproyal_host, s.iproyal_port, s.iproyal_username,
self._iproyal_password(self._session_id)).start()
label = f"iproyal[{s.iproyal_country or 'any'}] session {self._session_id} via {self._forwarder.endpoint}"
return self._forwarder.endpoint, label
return s.proxy, (s.proxy or "own IP")
def _browser_args(self, proxy: str | None) -> list[str]:
s = self.settings
args = [f"--proxy-server={proxy}"] if proxy else []
if not s.load_images:
# Disable image loading at the engine level — the dominant bandwidth cost on
# an image-heavy market, and unneeded for CF clearance or the JSON API.
args.append("--blink-settings=imagesEnabled=false")
if s.chrome_no_sandbox:
# Required when running Chromium as root in a container.
args += ["--no-sandbox", "--disable-dev-shm-usage"]
return args
async def _on_challenge(self, page) -> None:
"""The exit IP is likely flagged. On IPRoyal, rotate to a fresh sticky session
(new IP) before re-warming; otherwise just re-solve in place."""
if self._forwarder is not None:
self._session_id = new_session_id()
self._forwarder.set_password(self._iproyal_password(self._session_id))
self.log.warning("challenged; rotating exit IP -> session %s, re-warming", self._session_id)
else:
self.log.warning("challenged; re-warming session")
await self.warm(page)
def _log_result(self, res: ScrapeResult, posted: dict | None, total_wire: int) -> None:
if posted:
summary = (f"matched {posted.get('matched')}, new {posted.get('inserted')}, "
f"upd {posted.get('updated')}, removed {posted.get('removed')}")
else:
summary = "post failed"
self.log.info("scraped %d items (%dp, %s, %.0fKB wire) -> %s [lifetime %.1fMB]",
len(res.items), res.pages, res.reason, res.wire_bytes / 1024,
summary, total_wire / 1_048_576)
def _install_signal_handlers(self) -> None:
"""Stop the loop on SIGINT/SIGTERM so `docker stop` shuts down cleanly. Not
supported on Windows (ProactorEventLoop) — there Ctrl-C still raises
KeyboardInterrupt, which the run loop's finally handles just as well."""
try:
loop = asyncio.get_running_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(sig, self._stop.set)
except (NotImplementedError, AttributeError):
pass
async def _idle(self) -> None:
"""Sleep when the C2 has no work, but wake immediately on shutdown."""
try:
await asyncio.wait_for(self._stop.wait(), timeout=self.settings.idle_seconds)
except asyncio.TimeoutError:
pass
async def run(self) -> None:
self._install_signal_handlers()
s = self.settings
proxy, proxy_label = await self._setup_proxy()
self.log.info("starting (C2=%s, proxy=%s, images=%s)",
s.c2_url, proxy_label, "on" if s.load_images else "off")
browser = await uc.start(
headless=False, browser_executable_path=s.browser_path,
browser_args=self._browser_args(proxy))
try:
page = await browser.get("about:blank")
await self.warm(page)
total_wire = 0 # metered (compressed) bytes pulled, lifetime
while not self._stop.is_set():
job = await self.c2.get_job()
if not job:
await self._idle()
continue
self.log.info("job %s%s", job["jobId"][:8], self.describe_job(job))
res = await self.scrape_job(page, job)
total_wire += res.wire_bytes
if res.reason == "challenged":
await self._on_challenge(page)
posted = await self.c2.post_result(job["jobId"], {
"items": res.items, "pages": res.pages, "stoppedReason": res.reason})
self._log_result(res, posted, total_wire)
await self._pace(page)
finally:
self.log.info("shutting down")
browser.stop()
if self._forwarder is not None:
await self._forwarder.stop()
def run(worker_cls: type[Worker]) -> None:
"""Boot a worker from the environment: parse config, set up logging, run the loop on
nodriver's event loop. The thin market scripts call this and nothing else."""
from . import log as log_setup
settings = Settings.from_env()
log_setup.configure(settings.log_level, settings.log_json)
uc.loop().run_until_complete(worker_cls(settings).run())

129
worker/csmoney_worker.py Normal file
View File

@@ -0,0 +1,129 @@
"""cs.money scrape worker (pull model).
A thin strategy over blworker.Worker: it supplies only the cs.money-specific bits — the
consent banner steps and how to scrape one skin+wear's sell-orders. The warm session, the
poll/scrape/post loop, the IPRoyal proxy and IP rotation, logging and shutdown all live in
the shared runtime. Env knobs are documented in worker/README.md.
cs.money is an Astro SSR app: the free-text market search filters server-side and the
resulting listings are embedded in the page as a __page-params JSON blob. The
/2.0/market/sell-orders API rejects a `search` param (HTTP 400), so we fetch the PAGE for
a search and read the embedded items — same item shape as the API.
A page returns at most 60 and offset is ignored, so we paginate with a FORWARD CURSOR on
float: cs.money honors `order=asc&sort=float` + `minFloat`, and float is full-precision and
effectively unique per item. We grab the 60 lowest-float items at/above `lo`, advance `lo`
to the highest float returned, and repeat until a page is under the cap. (The old
minPrice/maxPrice bisection silently truncated cheap skins: >60 listings can share a
sub-$0.02 reference band, which no price window can split — floats almost never tie, so the
cursor always makes progress.)
cd worker
.venv\\Scripts\\Activate.ps1
pip install -r requirements.txt
python csmoney_worker.py
"""
import json
import re
import urllib.parse
from blworker import ScrapeResult, Worker, click, page_fetch, run
PAGE = ("https://cs.money/market/buy/?search={search}"
"&order=asc&sort=float&minFloat={lo:.12f}&maxFloat=1")
PAGE_CAP = 60 # items per SSR page
PAGE_PARAMS_RE = re.compile(
r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
def extract_items(html: str) -> list:
"""Pull inventory.items out of the page's __page-params JSON blob."""
m = PAGE_PARAMS_RE.search(html)
if not m:
return []
try:
return json.loads(m.group(1)).get("inventory", {}).get("items", []) or []
except json.JSONDecodeError:
return []
class CsMoneyWorker(Worker):
name = "csmoney"
jobs_path = "/jobs"
default_market_url = "https://cs.money/market/buy/"
def describe_job(self, job) -> str:
return f"search {job['search']!r}"
async def dismiss_consent(self, page) -> str | None:
"""Privacy-preserving. The banner only offers 'Accept all' / 'Manage cookies';
the Reject-all control lives inside the Manage window. So: Manage -> Reject all ->
Confirm. (The data path reads SSR __page-params regardless, but this keeps the
session honest and unblocks any future interaction.)"""
steps = []
if await click(page, "Manage cookies") or await click(page, "Manage"):
await page.sleep(1)
if await click(page, "Reject all"):
steps.append("reject-all")
for c in ("Confirm my choice", "Confirm", "Save"):
if await click(page, c):
steps.append(f"confirm:{c}")
break
return ", ".join(steps) if steps else None
async def scrape_job(self, page, job) -> ScrapeResult:
"""Scrape ALL listings for one skin+wear via a forward float cursor.
Grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float on
the page, repeat until a page is under the cap. The boundary item is re-fetched
(minFloat is inclusive) and dropped by the id dedup."""
search = urllib.parse.quote_plus(job["search"])
max_fetches = job.get("maxPages", 40) # safety cap on page fetches per job
seen: dict = {}
fetches = 0
wire = 0
lo = 0.0
reason = "completed"
while fetches < max_fetches:
_status, body, wbytes = await page_fetch(page, PAGE.format(search=search, lo=lo))
fetches += 1
if wbytes > 0:
wire += wbytes
if "Just a moment" in body or "challenge-platform" in body:
return ScrapeResult(list(seen.values()), fetches, "challenged", wire)
items = extract_items(body)
floats = []
for it in items:
if it.get("id") is not None:
seen[it["id"]] = it
fl = (it.get("asset") or {}).get("float")
if isinstance(fl, (int, float)):
floats.append(fl)
if len(items) < PAGE_CAP:
break # last page — fewer than the cap means we've seen everything
# Advance the cursor past the highest float on this page. Items at exactly that
# float are re-fetched next round (minFloat is inclusive) and deduped by id.
nxt = max(floats) if floats else None
if nxt is None or nxt <= lo:
# Cursor can't advance: >60 listings share a single float value, or the
# items carry no float. Bail loudly rather than spin — a flagged gap beats
# a silent one (this is the failure the price-window version hid).
reason = "stuck-float-tie"
break
lo = nxt
await self._pace(page)
else:
reason = "fetch-cap"
return ScrapeResult(list(seen.values()), fetches, reason, wire)
if __name__ == "__main__":
run(CsMoneyWorker)

View File

@@ -1,71 +0,0 @@
"""
Diagnose the cs.money cookie-consent banner so we can dismiss it programmatically.
It's likely a Shadow DOM web component (CookieConsentSystem), which is why
document.querySelectorAll-based clicks miss the real buttons.
Saves:
captures/_consent.png - screenshot (so we can SEE the banner + button positions)
captures/_consent.txt - shadow-host tags + every consent-like button found by
piercing shadow roots, with center coordinates.
cd worker; .venv\\Scripts\\Activate.ps1
python diag_consent.py
"""
import json
import os
import pathlib
import nodriver as uc
URL = os.environ.get("URL", "https://cs.money/market/buy/?search=ak-47+redline")
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
BROWSER_PATH = os.environ.get("BROWSER_PATH")
OUT = pathlib.Path(__file__).parent / "captures"
# Pierce shadow roots to find consent buttons + their viewport-center coords.
DEEP_FIND = r"""
JSON.stringify((()=>{
const hits=[], hosts=[];
function walk(root){
root.querySelectorAll('*').forEach(e=>{
if(e.shadowRoot){ hosts.push(e.tagName.toLowerCase()); walk(e.shadowRoot); }
const t=(e.textContent||'').trim();
if(t.length<40 && /accept all|manage cookies|reject all|confirm my choice|^accept$|^manage$/i.test(t)){
const r=e.getBoundingClientRect();
if(r.width>0&&r.height>0)
hits.push({tag:e.tagName, text:t, x:Math.round(r.x+r.width/2), y:Math.round(r.y+r.height/2)});
}
});
}
walk(document);
return {shadowHosts:[...new Set(hosts)], buttons:hits};
})())
"""
async def main():
OUT.mkdir(exist_ok=True)
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH)
try:
page = await browser.get(URL)
print(f"Loaded {URL}; waiting {SOLVE_SECONDS}s for Cloudflare...")
await page.sleep(SOLVE_SECONDS)
png = str(OUT / "_consent.png")
await page.save_screenshot(png)
print(f"screenshot -> {png}")
raw = await page.evaluate(DEEP_FIND)
info = json.loads(raw) if isinstance(raw, str) else {"error": repr(raw)}
(OUT / "_consent.txt").write_text(json.dumps(info, indent=2), encoding="utf-8")
print("shadow hosts:", info.get("shadowHosts"))
print("consent buttons found:")
for b in info.get("buttons", []):
print(f" {b}")
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -1,183 +0,0 @@
"""
Discover how cs.money paginates a filtered search past the initial ~60 SSR items.
Tests two hypotheses against a high-result search (default "ak-47 redline", which has
well over 60 listings):
A. Does the SSR page honor offset/limit in the URL? Fetch ?search=...&offset=60 and
?search=...&limit=120 and compare item ids to page 1. If disjoint/larger, we can
paginate cheaply by re-fetching the page.
B. The real client "load more": scroll hard to trigger lazy-load and capture any
cs.money /2.0/ XHR via Resource Timing — that request carries the structured
filter params + offset, i.e. a lighter direct-API pagination path.
Findings are printed and saved to captures/_pagination.txt.
cd worker; .venv\\Scripts\\Activate.ps1
python discover_pagination.py
$env:SEARCH="ak-47 redline"; python discover_pagination.py # override the search
"""
import json
import os
import pathlib
import re
import nodriver as uc
from nodriver import cdp
SEARCH = os.environ.get("SEARCH", "ak-47 redline")
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
BROWSER_PATH = os.environ.get("BROWSER_PATH")
PROXY = os.environ.get("PROXY")
BASE = "https://cs.money/market/buy/"
PAGE_PARAMS_RE = re.compile(r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
OUT = pathlib.Path(__file__).parent / "captures"
CONSENT = ["Reject all", "Only necessary", "Reject", "Decline", "Deny"]
# Aggressive scroll: window + every scrollable container (the grid scrolls in a div,
# which is why a plain window.scrollTo didn't trigger lazy-load before).
SCROLL_JS = (
"window.scrollTo(0, document.body.scrollHeight);"
"document.querySelectorAll('*').forEach(e=>{"
" if (e.scrollHeight > e.clientHeight + 80) e.scrollTop = e.scrollHeight;});")
async def js(page, expr):
raw = await page.evaluate(f"JSON.stringify({expr})")
try:
return json.loads(raw) if isinstance(raw, str) else None
except (json.JSONDecodeError, TypeError):
return None
async def fetch_text(page, url):
expr = (f"fetch({url!r},{{credentials:'include'}}).then(async r=>"
f"JSON.stringify({{status:r.status, body:await r.text()}}))")
raw = await page.evaluate(expr, await_promise=True)
try:
o = json.loads(raw)
return o.get("status"), o.get("body", "")
except (json.JSONDecodeError, TypeError):
return None, ""
def page_item_ids(html):
m = PAGE_PARAMS_RE.search(html or "")
if not m:
return []
try:
return [it.get("id") for it in json.loads(m.group(1)).get("inventory", {}).get("items", [])]
except json.JSONDecodeError:
return []
async def click_visible(page, pattern):
"""Click the first VISIBLE element whose trimmed text matches `pattern` (case-
insensitive). nodriver's find() was matching hidden/duplicate nodes; restricting
to offsetParent!=null + short text hits the real button."""
expr = ("JSON.stringify((()=>{"
"const re=new RegExp(" + json.dumps(pattern) + ",'i');"
"const els=[...document.querySelectorAll('button,a,[role=\"button\"],span,div')];"
"const b=els.find(e=>e.offsetParent!==null && (e.textContent||'').trim().length<40 "
"&& re.test((e.textContent||'').trim()));"
"if(b){b.click();return true}return false})())")
r = await page.evaluate(expr)
return isinstance(r, str) and "true" in r
async def banner_present(page):
r = await page.evaluate(
"JSON.stringify(/Manage cookies|Accept all/i.test(document.body.innerText||''))")
return isinstance(r, str) and "true" in r
async def dismiss(page):
"""Privacy-preserving first (Manage -> Reject all -> Confirm); if the banner is
still up, fall back to Accept all so the page becomes interactive (discovery
needs scrolling to work)."""
steps = []
if await click_visible(page, "manage cookies|^manage$"):
steps.append("manage")
await page.sleep(1.2)
if await click_visible(page, "reject all"):
steps.append("reject-all")
await page.sleep(0.4)
for c in ("confirm my choice", "^confirm$", "^save$"):
if await click_visible(page, c):
steps.append("confirm")
break
await page.sleep(1)
if await banner_present(page):
steps.append("still-up->accept" if await click_visible(page, "accept all|^accept$") else "still-up")
await page.sleep(0.5)
steps.append("gone" if not await banner_present(page) else "STILL-PRESENT")
return ", ".join(steps)
async def main():
OUT.mkdir(exist_ok=True)
args = [f"--proxy-server={PROXY}"] if PROXY else []
args.append("--blink-settings=imagesEnabled=false")
from urllib.parse import quote_plus
q = quote_plus(SEARCH)
findings = []
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args)
try:
url0 = f"{BASE}?search={q}"
page = await browser.get(url0)
print(f"Warming on {url0} ({SOLVE_SECONDS}s for Cloudflare)...")
await page.sleep(SOLVE_SECONDS)
print(f"Consent: {await dismiss(page)}")
# --- A. URL offset/limit on the SSR page ---
_, h0 = await fetch_text(page, f"{BASE}?search={q}")
_, h1 = await fetch_text(page, f"{BASE}?search={q}&offset=60")
_, h2 = await fetch_text(page, f"{BASE}?search={q}&limit=120")
a, b, c = page_item_ids(h0), page_item_ids(h1), page_item_ids(h2)
overlap = len(set(a) & set(b))
findings.append(f"page1 ids={len(a)} offset=60 ids={len(b)} (overlap with page1={overlap}) limit=120 ids={len(c)}")
findings.append(f" -> offset works? {'YES (disjoint)' if b and overlap == 0 else 'no/ignored'}")
findings.append(f" -> limit works? {'YES (>60)' if len(c) > 60 else 'no/ignored'}")
# --- B. Trigger client load-more, capture cs.money /2.0/ XHRs ---
# Infinite scroll only fires on GRADUAL downward scrolling — jumping to the
# bottom skips the trigger. So step down in small wheel increments and watch
# the item count grow.
before = set(await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or [])
async def card_count():
n = await page.evaluate(
"JSON.stringify(document.querySelectorAll('[href*=\"/item/\"],[class*=\"item\" i]').length)")
return n
print(f" cards before scroll: {await card_count()}")
for step in range(60):
try:
await page.send(cdp.input_.dispatch_mouse_event(
type_="mouseWheel", x=720, y=450, delta_x=0, delta_y=500))
except Exception:
pass
await page.sleep(0.7)
if step % 15 == 14:
now = [u for u in (await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or [])
if u not in before and "cs.money" in u and "metrics." not in u and "traces." not in u]
print(f" step {step+1}: cards={await card_count()} new cs.money reqs={len(now)}")
after = await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or []
new_xhrs = [u for u in after if u not in before and "cs.money" in u
and "metrics." not in u and "traces." not in u]
findings.append(f"\nclient requests after scrolling ({len(new_xhrs)} new cs.money):")
findings.extend(f" {u}" for u in dict.fromkeys(new_xhrs))
if not new_xhrs:
findings.append(" (none — grid may not lazy-load via XHR, or scroll didn't reach the trigger)")
report = "\n".join(findings)
print("\n=== FINDINGS ===\n" + report)
(OUT / "_pagination.txt").write_text(f"search: {SEARCH}\n\n{report}\n", encoding="utf-8")
print(f"\nsaved to {OUT / '_pagination.txt'}")
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -1,96 +0,0 @@
"""
Find cs.money's price-filter URL param (the basis for price-bucket pagination).
The market has a Price from/to filter in the sidebar. `search=` works via the URL and
the page SSRs the filtered listings into __page-params, so a price param likely works
the same way. We baseline the cheapest set, then try candidate param names with a high
floor and check whether the returned listings actually shift above it.
cd worker; .venv\\Scripts\\Activate.ps1
python discover_price_param.py
"""
import json
import os
import pathlib
import re
from urllib.parse import quote_plus
import nodriver as uc
SEARCH = os.environ.get("SEARCH", "ak-47 redline")
FLOOR = float(os.environ.get("FLOOR", "200"))
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
BROWSER_PATH = os.environ.get("BROWSER_PATH")
BASE = "https://cs.money/market/buy/"
PP = re.compile(r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
OUT = pathlib.Path(__file__).parent / "captures"
# Param-name variants for a price floor (and a couple of from/to pairs).
CANDIDATES = [
"minPrice", "priceFrom", "price_from", "priceMin", "min_price",
"priceGte", "from", "price_min", "minprice", "price.gte", "pricegte",
]
async def fetch_prices(page, url):
expr = (f"fetch({url!r},{{credentials:'include'}}).then(async r=>"
f"JSON.stringify({{status:r.status, body:await r.text()}}))")
raw = await page.evaluate(expr, await_promise=True)
try:
body = json.loads(raw).get("body", "")
except (json.JSONDecodeError, TypeError):
return None
m = PP.search(body or "")
if not m:
return None
try:
items = json.loads(m.group(1)).get("inventory", {}).get("items", [])
except json.JSONDecodeError:
return None
return [it.get("pricing", {}) for it in items if it.get("pricing")]
async def main():
OUT.mkdir(exist_ok=True)
q = quote_plus(SEARCH)
lines = []
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH,
browser_args=["--blink-settings=imagesEnabled=false"])
try:
page = await browser.get(f"{BASE}?search={q}")
print(f"Warming ({SOLVE_SECONDS}s)..."); await page.sleep(SOLVE_SECONDS)
# Test minPrice/maxPrice semantics directly (old cs.money API used these).
tests = [
("baseline", f"{BASE}?search={q}"),
("maxPrice=200", f"{BASE}?search={q}&maxPrice=200"),
("minPrice=300", f"{BASE}?search={q}&minPrice=300"),
("minPrice=300&maxPrice=400", f"{BASE}?search={q}&minPrice=300&maxPrice=400"),
("minPrice=500&maxPrice=1000", f"{BASE}?search={q}&minPrice=500&maxPrice=1000"),
]
def rng(pr, field):
vals = [p.get(field) for p in pr if isinstance(p.get(field), (int, float))]
return (min(vals), max(vals)) if vals else (None, None)
for name, url in tests:
pr = await fetch_prices(page, url)
if not pr:
lines.append(f"{name:28} -> no items")
else:
d0, d1 = rng(pr, "default")
c0, c1 = rng(pr, "computed")
b0, b1 = rng(pr, "basePrice")
lines.append(f"{name:28} -> n={len(pr)} default[{d0:.2f},{d1:.2f}] "
f"computed[{c0:.2f},{c1:.2f}] base[{b0:.2f},{b1:.2f}]")
print(lines[-1])
(OUT / "_price_param.txt").write_text(
f"search={SEARCH} floor={FLOOR}\n\n" + "\n".join(lines), encoding="utf-8")
print(f"\nsaved to {OUT/'_price_param.txt'}")
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -15,5 +15,6 @@ x11vnc -display "${DISPLAY_NUM}" -forever -shared -nopw -quiet -bg
echo "[entrypoint] starting noVNC on :6080 (open http://localhost:6080/vnc.html)"
websockify --web=/usr/share/novnc 6080 localhost:5900 &
echo "[entrypoint] launching worker"
exec python worker.py
WORKER_SCRIPT="${WORKER_SCRIPT:-csmoney_worker.py}"
echo "[entrypoint] launching ${WORKER_SCRIPT}"
exec python "${WORKER_SCRIPT}"

View File

@@ -1,285 +0,0 @@
"""
Proof-of-concept / pre-fleet validation for the cs.money scraper.
Proves the things we need before building the C2 + worker fleet:
1. nodriver clears cs.money's Cloudflare where .NET Selenium couldn't.
2. a single WARM session can page the sell-orders API deeply without re-challenge.
3. a free-text market search (e.g. "cyber security ft") can be turned into a
filtered sell-orders API call — we DISCOVER the real API params by capturing the
request the page itself fires, instead of guessing.
It opens the market (optionally a search URL) in a real non-headless Chromium, lets
you clear Cloudflare, dismisses the cookie banner (privacy-preserving), captures the
sell-orders request the page makes, then pages that API from inside the cleared page
(same-origin fetch carries cf_clearance), pacing itself and stopping on re-challenge.
cd worker
.venv\\Scripts\\Activate.ps1
pip install -r requirements.txt
python poc.py # whole-market sweep
$env:SEARCH="cyber security ft"; python poc.py # targeted: FT M4A4 Cyber Security
Env knobs (all optional):
SEARCH free-text market search; when set, scrape only those results
MARKET_URL market page base (default the buy market)
SOLVE_SECONDS seconds to wait for you to clear Cloudflare (default 30)
PAGES how many offset pages (60 each) to attempt (default 20)
START_OFFSET first offset (default 0)
DELAY / JITTER base + random seconds between fetches (default 2.0 / 1.5)
PROXY host:port for an auth-free proxy (omit to use your own IP)
BROWSER_PATH path to Chrome/Edge if auto-detect fails
"""
import json
import os
import pathlib
import random
from urllib.parse import quote_plus, urlsplit, parse_qsl, urlencode, urlunsplit
import nodriver as uc
from nodriver import cdp
SEARCH = os.environ.get("SEARCH")
MARKET_URL = os.environ.get("MARKET_URL", "https://cs.money/market/buy/")
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
PAGES = int(os.environ.get("PAGES", "20"))
START_OFFSET = int(os.environ.get("START_OFFSET", "0"))
DELAY = float(os.environ.get("DELAY", "2.0"))
JITTER = float(os.environ.get("JITTER", "1.5"))
PROXY = os.environ.get("PROXY")
BROWSER_PATH = os.environ.get("BROWSER_PATH")
# Fallback template if we fail to capture the page's own request (offset = {}).
DEFAULT_TEMPLATE = "https://cs.money/2.0/market/sell-orders?limit=60&offset={}"
OUT_DIR = pathlib.Path(__file__).parent / "captures"
CONSENT_LABELS = ["Reject all", "Reject All", "Only necessary", "Necessary only",
"Reject", "Decline", "Deny"]
# Filled by the CDP network handler with sell-orders request URLs the page fires.
_seen_urls: list[str] = []
def looks_like_challenge(body: str) -> bool:
s = (body or "").lstrip()
return not s or s.startswith("<") or "Just a moment" in body or "challenge-platform" in body
def decimals(v: float) -> int:
r = repr(float(v))
return len(r.split(".")[-1]) if "." in r else 0
def template_from(url: str) -> str:
"""Turn a captured sell-orders URL into a template with offset as '{}',
preserving every other param (the search/filter encoding we want to learn)."""
parts = urlsplit(url)
q = [(k, v) for k, v in parse_qsl(parts.query, keep_blank_values=True) if k != "offset"]
if not any(k == "limit" for k, _ in q):
q.append(("limit", "60"))
base_q = urlencode(q)
new_q = (base_q + "&" if base_q else "") + "offset={}"
return urlunsplit((parts.scheme, parts.netloc, parts.path, new_q, ""))
async def dismiss_consent(page) -> str | None:
"""Best-effort, privacy-preserving — never clicks 'Accept all'."""
for label in CONSENT_LABELS:
try:
el = await page.find(label, best_match=True, timeout=2)
except Exception:
el = None
if el:
try:
await el.click()
return label
except Exception:
pass
return None
async def fetch_json(page, url: str) -> tuple[str, str]:
expr = (
f"fetch({url!r}, {{credentials:'include', headers:{{'accept':'application/json'}}}})"
f".then(async r => JSON.stringify({{status: r.status, body: await r.text()}}))"
)
raw = await page.evaluate(expr, await_promise=True)
if not isinstance(raw, str):
return ("-1", "")
try:
obj = json.loads(raw)
return (str(obj.get("status", "-1")), obj.get("body", ""))
except json.JSONDecodeError:
return ("-1", raw)
async def main():
OUT_DIR.mkdir(exist_ok=True)
args = [f"--proxy-server={PROXY}"] if PROXY else []
target_url = MARKET_URL
tag = "market"
if SEARCH:
sep = "&" if "?" in MARKET_URL else "?"
target_url = f"{MARKET_URL}{sep}search={quote_plus(SEARCH)}"
tag = "search_" + "".join(c if c.isalnum() else "_" for c in SEARCH)[:40]
print(f"Launching nodriver Chromium (proxy={PROXY or 'none / own IP'})...")
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args)
pages_ok = items_total = floats_total = low_prec = 0
dp_min, dp_max = 99, 0
deepest_offset = None
reason = "completed (hit PAGES limit)"
try:
# Open a blank tab first so the network handler is attached BEFORE the page
# fires its filtered sell-orders request (otherwise we'd miss it).
page = await browser.get("about:blank")
async def on_request(evt):
url = evt.request.url
if "/market/sell-orders" in url:
_seen_urls.append(url)
page.add_handler(cdp.network.RequestWillBeSent, on_request)
try:
await page.send(cdp.network.enable())
except Exception as ex:
print(f"(network capture unavailable: {ex})")
print(f"Opening {target_url}")
await page.get(target_url)
print(f"Solve any Cloudflare challenge. Waiting {SOLVE_SECONDS}s for the grid...")
await page.sleep(SOLVE_SECONDS)
clicked = await dismiss_consent(page)
print(f"Consent banner: {'dismissed via ' + clicked if clicked else 'left up (does not block fetch)'}")
# Reliable discovery via the Resource Timing API: the browser records EVERY
# request the page made, so we read the real sell-orders URL straight out of it
# (no flaky CDP event timing). Also dump nearby API calls for context.
# cs.money is an Astro SSR app — the initial filtered listings are rendered
# server-side (no client XHR to capture). Scroll to provoke lazy-load
# pagination, which DOES fire a client request carrying the real filter params.
print("Scrolling to trigger lazy-load pagination...")
for _ in range(6):
try:
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
except Exception:
pass
await page.sleep(2)
# nodriver returns arrays unreliably from evaluate(), so JSON.stringify in JS
# and json.loads here (the string path is proven by fetch_json).
async def js_list(expr: str) -> list:
raw = await page.evaluate(f"JSON.stringify({expr})")
try:
return json.loads(raw) if isinstance(raw, str) else []
except (json.JSONDecodeError, TypeError):
return []
try:
all_urls = await js_list("performance.getEntriesByType('resource').map(e=>e.name)")
print(f">>> Resource Timing saw {len(all_urls)} requests total")
if all_urls:
(OUT_DIR / "_all_requests.txt").write_text(
"\n".join(dict.fromkeys(all_urls)), encoding="utf-8")
sell = [u for u in all_urls if "/market/sell-orders" in u]
_seen_urls.extend(sell)
api = [u for u in all_urls if "cs.money/" in u and ("/2.0/" in u or "/1.0/" in u)]
if api:
(OUT_DIR / "_api_calls.txt").write_text("\n".join(dict.fromkeys(api)), encoding="utf-8")
print(f">>> {len(set(api))} cs.money API calls; saved to {OUT_DIR / '_api_calls.txt'}")
except Exception as ex:
print(f"(resource-timing query failed: {ex})")
# Dump the SSR'd page so we can see how the filter is encoded and where the
# listings data lives (Astro embeds island props / hydration JSON in the HTML).
try:
html = await page.evaluate("document.documentElement.outerHTML")
if isinstance(html, str) and html:
(OUT_DIR / "_page.html").write_text(html, encoding="utf-8")
print(f">>> saved page HTML ({len(html)} bytes) to {OUT_DIR / '_page.html'}")
except Exception as ex:
print(f"(page HTML dump failed: {ex})")
# Discovery: what sell-orders request did the page actually make?
if _seen_urls:
captured = _seen_urls[-1]
template = template_from(captured)
print("\n>>> DISCOVERED sell-orders API call the page fired:")
print(f" {captured}")
print(f">>> pagination template: {template}\n")
# Persist it — the console line is easy to lose, and this is the one bit
# of ground truth (the real filter-param scheme) we need.
(OUT_DIR / "_discovered.txt").write_text(
"ALL captured sell-orders requests:\n"
+ "\n".join(dict.fromkeys(_seen_urls))
+ f"\n\npagination template:\n{template}\n",
encoding="utf-8")
print(f">>> saved to {OUT_DIR / '_discovered.txt'}")
else:
template = DEFAULT_TEMPLATE
if SEARCH:
template = template.replace("offset={}", f"search={quote_plus(SEARCH)}&offset={{}}")
print(f"\n(no request captured; falling back to template: {template})\n")
for i in range(PAGES):
offset = START_OFFSET + i * 60
status, body = await fetch_json(page, template.format(offset))
if looks_like_challenge(body):
print(f" page {i + 1} [offset {offset}]: RE-CHALLENGED (status {status}). Stopping.")
(OUT_DIR / f"{tag}_challenge_offset_{offset}.html").write_text(body, encoding="utf-8")
reason = f"re-challenged at offset {offset}"
break
try:
items = json.loads(body).get("items", [])
except json.JSONDecodeError:
print(f" page {i + 1} [offset {offset}]: non-JSON (status {status}). Stopping.")
reason = f"non-JSON at offset {offset}"
break
if not items:
print(f" page {i + 1} [offset {offset}]: 0 items — end of results.")
reason = "end of results"
break
(OUT_DIR / f"{tag}_offset_{offset:06d}.json").write_text(body, encoding="utf-8")
pages_ok += 1
deepest_offset = offset
items_total += len(items)
names = set()
for it in items:
fl = it.get("asset", {}).get("float")
if fl is not None:
floats_total += 1
d = decimals(fl)
dp_min, dp_max = min(dp_min, d), max(dp_max, d)
if d <= 6: # short repr — exact binary fraction (e.g. 1/16), not truncation
low_prec += 1
names.add(it.get("asset", {}).get("names", {}).get("full"))
sample = next(iter(names), None) if SEARCH else None
print(f" page {i + 1} [offset {offset}] OK — {len(items)} items"
+ (f" (e.g. {sample}; {len(names)} distinct names)" if SEARCH else ""))
await page.sleep(DELAY + random.uniform(0, JITTER))
print("\n=== summary ===")
print(f" query: {SEARCH or '(whole market)'}")
print(f" stopped: {reason}")
print(f" clean pages: {pages_ok} deepest offset: {deepest_offset} items: {items_total}")
if floats_total:
# Truncation would make MANY values short, not one exact binary fraction.
verdict = "FULL precision" if low_prec / floats_total < 0.02 else "POSSIBLE TRUNCATION"
print(f" floats: {floats_total} items, {dp_max}-decimal max, "
f"{low_prec} short-repr (exact fractions) — {verdict}")
print(f" files in {OUT_DIR}")
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -1,77 +0,0 @@
"""
Probe which extra filter params cs.money's SSR market search honors, so we can
pick a SECOND pagination axis to break apart dense price bands that saturate the
60-cap (see diag_windows.py). For a saturating search we try candidate params and
report how the returned set's size + float range + price range change.
python probe_filters.py "Glock-18 Candy Apple mw"
"""
import asyncio
import sys
import nodriver as uc
import worker
BASE = "https://cs.money/market/buy/?search={q}"
# (label, extra query string) — candidates cs.money markets commonly expose.
CANDIDATES = [
("baseline", ""),
("sort=price asc", "&order=asc&sort=price"),
("sort=price desc", "&order=desc&sort=price"),
("sort=float", "&sort=float"),
("minFloat/maxFloat lo", "&minFloat=0.07&maxFloat=0.10"),
("minFloat/maxFloat hi", "&minFloat=0.10&maxFloat=0.15"),
("maxWear lo", "&minWear=0.07&maxWear=0.10"),
("isStatTrak=true", "&isStatTrak=true"),
("hasStickers=false", "&hasStickers=false"),
]
def stats(items):
floats = [(((it.get("asset") or {}).get("float"))) for it in items]
floats = [f for f in floats if isinstance(f, (int, float))]
bases = []
for it in items:
p = it.get("pricing") or {}
b = p.get("basePrice", p.get("computed"))
if isinstance(b, (int, float)):
bases.append(b)
fr = f"[{min(floats):.4f},{max(floats):.4f}]" if floats else "[-]"
br = f"[{min(bases):.2f},{max(bases):.2f}]" if bases else "[-]"
return f"n={len(items):3d} float{fr} base{br}"
async def main():
search = " ".join(sys.argv[1:]) or "Glock-18 Candy Apple mw"
q = worker.urllib.parse.quote_plus(search)
args = ["--blink-settings=imagesEnabled=false"]
browser = await uc.start(headless=False, browser_args=args)
try:
page = await browser.get("about:blank")
await worker.warm(page)
base_ids = None
for label, extra in CANDIDATES:
url = BASE.format(q=q) + extra
status, body = await worker.fetch_json(page, url)
if "Just a moment" in body or "challenge-platform" in body:
print(f" {label:24s} CHALLENGED"); break
items = worker.extract_items(body)
ids = {it.get("id") for it in items}
if label == "baseline":
base_ids = ids
delta = ""
else:
# If a param is IGNORED, the set is identical to baseline.
delta = "IGNORED (== baseline)" if ids == base_ids else f"CHANGED ({len(ids ^ (base_ids or set()))} diff ids)"
print(f" {label:24s} {stats(items)} {delta}")
await page.sleep(worker.DELAY)
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -1,5 +1,9 @@
# cs.money scraping worker.
# Market scraping workers (cs.money, skin.land).
# nodriver = the modern successor to undetected-chromedriver: it drives a normal
# Chromium over CDP directly (no chromedriver, so none of the cdc_/webdriver tells
# that got our .NET Selenium setup insta-challenged by Cloudflare).
nodriver>=0.39
#
# Everything else the workers use is the Python stdlib (asyncio, urllib, logging, json) —
# no other third-party deps. Upper bound is a guard against a surprise breaking release;
# bump it deliberately after testing a challenge solve.
nodriver>=0.39,<0.50

174
worker/skinland_worker.py Normal file
View File

@@ -0,0 +1,174 @@
"""skin.land scrape worker (pull model).
A thin strategy over blworker.Worker, mirroring the cs.money worker — it supplies only the
skin.land-specific bits; the warm session, poll/scrape/post loop, IPRoyal proxy, IP
rotation, logging and shutdown all live in the shared runtime. Env knobs: worker/README.md.
How skin.land is scraped (learned from the discovery probes):
- A job's target is the market PAGE URL, e.g.
https://skin.land/market/csgo/ak-47-redline-field-tested/
- That Nuxt page embeds an internal numeric skin_id. We resolve it once from the page's
__NUXT__ payload (the skin object whose `url` == the page slug), cache it per slug, then
page the clean JSON API:
GET https://app.skin.land/api/v2/obtained-skins?skin_id={id}&page={n}
which returns a Laravel paginator {data:[...offers], meta:{current_page,last_page,…}}.
- We walk pages 1..last_page (capped by the job's maxPages), dedup offers by id, and post.
cd worker
.venv\\Scripts\\Activate.ps1
pip install -r requirements.txt
python skinland_worker.py
"""
import json
import re
from blworker import ScrapeResult, Worker, click, looks_like_challenge, page_fetch, run
# The offers API. skin_id is skin.land's internal id (resolved from the page); page is the
# Laravel paginator page. Same warm session, fetched in-page (CORS-allowed app subdomain).
API = "https://app.skin.land/api/v2/obtained-skins?skin_id={skin_id}&page={page}"
# The page's Nuxt payload is a devalue flat array; the main skin object is the one whose
# `url` field resolves to the page slug, and its `id` field resolves to the skin_id.
NUXT_ARRAY_RE = re.compile(r'\[\["(?:ShallowReactive|Reactive)",\d+\]')
def slug_of(url: str) -> str:
return url.rstrip("/").rsplit("/", 1)[-1]
def extract_nuxt_array(html: str):
"""Pull the Nuxt devalue payload (a JSON flat array of values with index references)
out of the page HTML. Returns the parsed list, or None."""
m = NUXT_ARRAY_RE.search(html)
if not m:
return None
start = m.start()
depth = 0
instr = False
esc = False
for i in range(start, len(html)):
ch = html[i]
if esc:
esc = False
continue
if ch == "\\":
esc = True
continue
if ch == '"':
instr = not instr
continue
if instr:
continue
if ch == "[":
depth += 1
elif ch == "]":
depth -= 1
if depth == 0:
try:
return json.loads(html[start:i + 1])
except json.JSONDecodeError:
return None
return None
def resolve_skin_id(html: str, slug: str) -> int | None:
"""Find the page's main skin object in the Nuxt payload — the dict whose `url` field
resolves to the page slug — and return its resolved `id` (skin.land's internal skin_id
used by the obtained-skins API)."""
arr = extract_nuxt_array(html)
if not arr:
return None
def val(ref):
return arr[ref] if isinstance(ref, int) and 0 <= ref < len(arr) else ref
for el in arr:
if isinstance(el, dict) and "url" in el and "id" in el and val(el["url"]) == slug:
sid = val(el["id"])
if isinstance(sid, int):
return sid
return None
class SkinLandWorker(Worker):
name = "skinland"
jobs_path = "/skinland/jobs"
default_market_url = "https://skin.land/market/csgo/"
def __init__(self, settings):
super().__init__(settings)
# skin_id is stable per skin+wear, so cache it per slug to skip the ~page fetch on
# re-sweeps.
self._skin_id_cache: dict[str, int] = {}
def describe_job(self, job) -> str:
return slug_of(job["url"])
async def dismiss_consent(self, page) -> str | None:
"""Privacy-preserving: dismiss the cookie banner with essential-only if present."""
for label in ("Accept essential", "ACCEPT ESSENTIAL", "Reject all"):
if await click(page, label):
return f"dismissed via {label!r}"
return None
async def _get_skin_id(self, page, job, slug: str) -> tuple[int | None, str, int]:
"""Resolve (and cache) skin.land's skin_id for this slug. Returns
(skin_id, reason, wire); reason is "" on success, else a partial-stop reason."""
if slug in self._skin_id_cache:
return self._skin_id_cache[slug], "", 0
_status, html, wire = await page_fetch(page, job["url"], accept="text/html")
if looks_like_challenge(html):
return None, "challenged", max(wire, 0)
skin_id = resolve_skin_id(html, slug)
if skin_id is None:
return None, "no-skin-id", max(wire, 0)
self._skin_id_cache[slug] = skin_id
return skin_id, "", max(wire, 0)
async def scrape_job(self, page, job) -> ScrapeResult:
"""Scrape ALL offers for one skin+wear by paging the obtained-skins API."""
slug = slug_of(job["url"])
max_pages = job.get("maxPages", 40)
skin_id, reason, wire = await self._get_skin_id(page, job, slug)
if skin_id is None:
return ScrapeResult([], 0, reason, wire)
seen: dict = {}
fetches = 0
page_n = 1
reason = "completed"
while page_n <= max_pages:
_status, body, wbytes = await page_fetch(page, API.format(skin_id=skin_id, page=page_n))
fetches += 1
if wbytes > 0:
wire += wbytes
if looks_like_challenge(body):
return ScrapeResult(list(seen.values()), fetches, "challenged", wire)
try:
payload = json.loads(body)
except json.JSONDecodeError:
return ScrapeResult(list(seen.values()), fetches, "bad-json", wire)
for o in payload.get("data") or []:
if o.get("id") is not None:
seen[o["id"]] = o
meta = payload.get("meta") or {}
last = meta.get("last_page")
if not payload.get("data") or (isinstance(last, int) and page_n >= last):
break # walked the final page
page_n += 1
await self._pace(page)
else:
reason = "fetch-cap"
return ScrapeResult(list(seen.values()), fetches, reason, wire)
if __name__ == "__main__":
run(SkinLandWorker)

View File

@@ -1,77 +0,0 @@
"""
One-off count verification: scrape a single skin+wear search from cs.money and
report how many distinct sell-orders come back, reusing the production worker's
warm-session + price-window bisection logic (worker.scrape_job).
Use it to sanity-check that our pagination actually recovers the FULL listing
count cs.money shows on the site (the known ground truth) for one query.
cd worker
.venv\\Scripts\\Activate.ps1
python verify_count.py "Desert Eagle Bronze Deco fn"
Env knobs (same meaning as worker.py): SOLVE_SECONDS, DELAY, JITTER, PROXY,
BROWSER_PATH, LOAD_IMAGES. MAX_FETCHES caps window fetches (default 80).
"""
import asyncio
import os
import sys
from collections import Counter
import nodriver as uc
import worker
MAX_FETCHES = int(os.environ.get("MAX_FETCHES", "80"))
async def main():
search = " ".join(sys.argv[1:]) or "Desert Eagle Bronze Deco fn"
args = [f"--proxy-server={worker.PROXY}"] if worker.PROXY else []
if not worker.LOAD_IMAGES:
args.append("--blink-settings=imagesEnabled=false")
if os.environ.get("CHROME_NO_SANDBOX") == "1":
args += ["--no-sandbox", "--disable-dev-shm-usage"]
print(f"Verifying count for search {search!r} (proxy={worker.PROXY or 'own IP'})")
browser = await uc.start(
headless=False, browser_executable_path=worker.BROWSER_PATH, browser_args=args)
try:
page = await browser.get("about:blank")
await worker.warm(page)
job = {"search": search, "maxPages": MAX_FETCHES}
items, fetches, reason = await worker.scrape_job(page, job)
print("\n=== result ===")
print(f" search: {search}")
print(f" stopped: {reason}")
print(f" fetches: {fetches}")
print(f" DISTINCT sell-orders (deduped by id): {len(items)}")
# Break down what came back so we can see whether the count is inflated by
# off-target names/wears (the C2's name+wear filter would drop those later).
names = Counter()
wears = Counter()
st = 0
for it in items:
asset = it.get("asset") or {}
names[(asset.get("names") or {}).get("full")] += 1
wears[asset.get("quality")] += 1
if asset.get("isStatTrak"):
st += 1
print(f" StatTrak in set: {st}")
print(" by name:")
for name, n in names.most_common():
print(f" {n:4d} {name}")
print(" by wear (quality code):")
for w, n in wears.most_common():
print(f" {n:4d} {w}")
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -1,79 +0,0 @@
"""
Validate the float-cursor scrape by walking the float axis in BOTH directions and
comparing the recovered sell-order id sets. If ascending (lowest float first) and
descending (highest float first) independently land on the same listings, the
cursor is exhaustive and order-independent — i.e. the count is real, not an artifact
of walk direction or boundary double-counting.
python verify_crosscheck.py "Glock-18 Candy Apple mw"
"""
import asyncio
import sys
import nodriver as uc
import worker
CAP = worker.PAGE_CAP
ASC = ("https://cs.money/market/buy/?search={q}"
"&order=asc&sort=float&minFloat={cur:.12f}&maxFloat=1")
DESC = ("https://cs.money/market/buy/?search={q}"
"&order=desc&sort=float&minFloat=0&maxFloat={cur:.12f}")
async def walk(page, q, template, ascending, max_fetches=60):
seen = {}
cur = 0.0 if ascending else 1.0
fetches = 0
while fetches < max_fetches:
status, body = await worker.fetch_json(page, template.format(q=q, cur=cur))
fetches += 1
if "Just a moment" in body or "challenge-platform" in body:
return seen, fetches, "challenged"
items = worker.extract_items(body)
floats = []
for it in items:
if it.get("id") is not None:
seen[it["id"]] = it
fl = (it.get("asset") or {}).get("float")
if isinstance(fl, (int, float)):
floats.append(fl)
if len(items) < CAP:
return seen, fetches, "completed"
nxt = (max(floats) if ascending else min(floats)) if floats else None
if nxt is None or (ascending and nxt <= cur) or (not ascending and nxt >= cur):
return seen, fetches, "stuck"
cur = nxt
await page.sleep(worker.DELAY)
return seen, fetches, "fetch-cap"
async def main():
search = " ".join(sys.argv[1:]) or "Glock-18 Candy Apple mw"
q = worker.urllib.parse.quote_plus(search)
browser = await uc.start(headless=False, browser_args=["--blink-settings=imagesEnabled=false"])
try:
page = await browser.get("about:blank")
await worker.warm(page)
asc, fa, ra = await walk(page, q, ASC, ascending=True)
print(f"ASC : {len(asc):4d} ids {fa} fetches {ra}")
desc, fd, rd = await walk(page, q, DESC, ascending=False)
print(f"DESC: {len(desc):4d} ids {fd} fetches {rd}")
a, d = set(asc), set(desc)
union = a | d
print("\n=== cross-check ===")
print(f" ASC only: {len(a - d)}")
print(f" DESC only: {len(d - a)}")
print(f" in both: {len(a & d)}")
print(f" UNION (distinct):{len(union)}")
agree = "AGREE — count is solid" if a == d else "DISAGREE — one walk missed listings"
print(f" verdict: {agree}")
finally:
browser.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())

View File

@@ -1,483 +0,0 @@
"""
cs.money scrape worker (pull model).
Holds ONE warm nodriver session (the thing that beats Cloudflare), then loops:
poll the .NET C2 for a job, scrape that skin+wear's sell-orders via in-page fetch
from the cleared session, and post the results back. The C2 owns job selection
(stalest skin+wear first) and persistence; this worker just fetches and forwards.
cd worker
.venv\\Scripts\\Activate.ps1
pip install -r requirements.txt
python worker.py
Env knobs:
C2_URL C2 base URL (default http://localhost:5080)
WORKER_TOKEN shared secret, must match the C2's WorkerToken (default dev-worker-token)
MARKET_URL market page to warm the session on (default the buy market)
SOLVE_SECONDS seconds to clear Cloudflare on startup (default 30)
DELAY / JITTER base + random seconds between page fetches (default 2.0 / 1.5)
IDLE_SECONDS sleep when the C2 has no work (default 10)
BROWSER_PATH path to Chrome/Edge if auto-detect fails
Proxy (pick one; IPRoyal takes priority when its creds are set):
IPROYAL_USERNAME IPRoyal residential account username
IPROYAL_PASSWORD IPRoyal residential account password
IPROYAL_COUNTRY ISO country for the exit (default us; blank = any)
IPROYAL_LIFETIME_MIN sticky-IP hold in minutes (default 60)
PROXY host:port for an auth-free proxy (fallback; omit to use your own IP)
Each worker process mints its own random IPRoyal sticky session at startup, so N
workers get N distinct residential exit IPs with no coordination — scale with
`docker compose up --scale worker=N`. On a Cloudflare challenge the worker rotates
to a fresh session (new IP) and re-warms. Chromium can't carry proxy credentials on
--proxy-server, so we run a tiny in-process forwarder (LocalForwardingProxy below)
that injects the IPRoyal auth and chains to the gateway; Chrome talks only to an
auth-free 127.0.0.1 endpoint, keeping us at zero CDP (a CDP auth handler is a
Cloudflare tell).
"""
import asyncio
import base64
import json
import os
import random
import re
import urllib.error
import urllib.parse
import urllib.request
import uuid
import nodriver as uc
C2_URL = os.environ.get("C2_URL", "http://localhost:5080").rstrip("/")
TOKEN = os.environ.get("WORKER_TOKEN", "dev-worker-token")
MARKET_URL = os.environ.get("MARKET_URL", "https://cs.money/market/buy/")
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
DELAY = float(os.environ.get("DELAY", "2.0"))
JITTER = float(os.environ.get("JITTER", "1.5"))
IDLE_SECONDS = int(os.environ.get("IDLE_SECONDS", "10"))
PROXY = os.environ.get("PROXY")
BROWSER_PATH = os.environ.get("BROWSER_PATH")
# IPRoyal residential gateway. One fixed host/port; country, sticky-session id and
# lifetime are encoded as underscore params appended to the password (see
# _iproyal_password). Mirrors the .NET IpRoyalProxyProvider scheme.
IPROYAL_HOST = os.environ.get("IPROYAL_HOST", "geo.iproyal.com")
IPROYAL_PORT = int(os.environ.get("IPROYAL_PORT", "12321"))
IPROYAL_USERNAME = os.environ.get("IPROYAL_USERNAME")
IPROYAL_PASSWORD = os.environ.get("IPROYAL_PASSWORD")
IPROYAL_COUNTRY = os.environ.get("IPROYAL_COUNTRY", "us").strip().lower()
IPROYAL_LIFETIME_MIN = int(os.environ.get("IPROYAL_LIFETIME_MIN", "60"))
# Residential proxy is metered per GB. Cloudflare gates on JS, not images, and the
# sell-orders API is pure JSON — so block images by default to slash page-render
# bandwidth. Set LOAD_IMAGES=1 to re-enable (e.g. for debugging the visible page).
LOAD_IMAGES = os.environ.get("LOAD_IMAGES") == "1"
# cs.money is an Astro SSR app: the free-text market search filters server-side and
# the resulting listings are embedded in the page as a __page-params JSON blob. The
# /2.0/market/sell-orders API rejects a `search` param (HTTP 400), so we fetch the
# PAGE for a search and read the embedded items — same item shape as the API.
#
# A page returns at most 60 and offset is ignored, so we paginate with a FORWARD
# CURSOR on float: cs.money honors `order=asc&sort=float` + `minFloat`, and float is
# full-precision and effectively unique per item. We grab the 60 lowest-float items
# at/above `lo`, advance `lo` to the highest float returned, and repeat until a page
# is under the cap. (The old minPrice/maxPrice bisection silently truncated cheap
# skins: >60 listings can share a sub-$0.02 reference band, which no price window can
# split — floats almost never tie, so the cursor always makes progress.)
PAGE = ("https://cs.money/market/buy/?search={search}"
"&order=asc&sort=float&minFloat={lo:.12f}&maxFloat=1")
PAGE_CAP = 60 # items per SSR page
PAGE_PARAMS_RE = re.compile(
r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
# --- IPRoyal residential proxy ----------------------------------------------------
def _new_session_id() -> str:
"""Short, opaque, URL-safe token. IPRoyal pins one residential exit IP per
distinct session value, so a fresh id == a fresh IP."""
return uuid.uuid4().hex[:10]
def _iproyal_password(session_id: str) -> str:
"""Bake the targeting/session knobs onto the account password, IPRoyal-style:
"<pass>_country-us_session-<id>_lifetime-60m". Country is optional."""
pw = IPROYAL_PASSWORD
if IPROYAL_COUNTRY:
pw += f"_country-{IPROYAL_COUNTRY}"
pw += f"_session-{session_id}_lifetime-{IPROYAL_LIFETIME_MIN}m"
return pw
class LocalForwardingProxy:
"""In-process HTTP proxy on 127.0.0.1 that chains every connection to the IPRoyal
gateway, injecting the Proxy-Authorization header itself. Chromium ignores creds in
--proxy-server and the in-browser ways to answer the gateway's 407 (a CDP auth
handler, or a disabled MV2 extension) are Cloudflare tells — so we terminate the
browser->proxy hop locally and add auth here, leaving Chrome to talk to an auth-free
endpoint at zero CDP. HTTPS (all cs.money serves) flows through the CONNECT tunnel,
so this proxy only relays ciphertext and never sees plaintext. Ported from the .NET
LocalForwardingProxy. The active session token can be swapped live (set_password) to
move to a fresh exit IP without restarting the browser. (New tunnels pick up the new
IP; any still-open keep-alive tunnel stays on the old one until it closes.)"""
def __init__(self, host: str, port: int, username: str, password: str):
self._host = host
self._port = port
self._username = username
self._password = password
self._server: asyncio.AbstractServer | None = None
self.endpoint = ""
def set_password(self, password: str) -> None:
self._password = password
def _auth_header(self) -> str:
token = base64.b64encode(f"{self._username}:{self._password}".encode()).decode()
return f"Proxy-Authorization: Basic {token}\r\n"
async def start(self) -> "LocalForwardingProxy":
self._server = await asyncio.start_server(self._handle, "127.0.0.1", 0)
port = self._server.sockets[0].getsockname()[1]
self.endpoint = f"127.0.0.1:{port}"
return self
async def stop(self) -> None:
if self._server is not None:
self._server.close()
try:
await self._server.wait_closed()
except Exception:
pass
@staticmethod
async def _read_header(reader: asyncio.StreamReader) -> str | None:
"""Read up to the end of the HTTP header block (CRLFCRLF). None on EOF/overflow."""
try:
data = await reader.readuntil(b"\r\n\r\n")
except (asyncio.IncompleteReadError, asyncio.LimitOverrunError):
return None
return data.decode("latin-1")
async def _handle(self, client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter) -> None:
up_writer: asyncio.StreamWriter | None = None
try:
header = await self._read_header(client_reader)
if not header:
return
parts = header.split("\r\n", 1)[0].split(" ")
if len(parts) < 2:
return
method, target = parts[0], parts[1]
up_reader, up_writer = await asyncio.open_connection(self._host, self._port)
if method.upper() == "CONNECT":
# HTTPS: open an authenticated tunnel upstream, then relay raw bytes.
up_writer.write(
f"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{self._auth_header()}\r\n".encode())
await up_writer.drain()
up_header = await self._read_header(up_reader)
status = up_header.split(" ", 2) if up_header else []
if len(status) < 2 or status[1] != "200":
line = (up_header or "no response").split("\r\n", 1)[0]
print(f" proxy: upstream refused CONNECT {target}: {line}")
client_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n")
await client_writer.drain()
return
client_writer.write(b"HTTP/1.1 200 Connection established\r\n\r\n")
await client_writer.drain()
else:
# Plain HTTP: re-inject the request upstream with auth, then relay.
idx = header.index("\r\n") + 2
up_writer.write((header[:idx] + self._auth_header() + header[idx:]).encode())
await up_writer.drain()
await self._relay(client_reader, client_writer, up_reader, up_writer)
except Exception:
pass # one bad tunnel must never take down the listener
finally:
for w in (client_writer, up_writer):
if w is not None:
try:
w.close()
except Exception:
pass
@staticmethod
async def _relay(
client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter,
up_reader: asyncio.StreamReader, up_writer: asyncio.StreamWriter) -> None:
# Pipe both directions, but tear the whole tunnel down as soon as EITHER side
# closes (mirrors the .NET WhenAny). Waiting for both — as a plain gather does —
# leaks a task holding two sockets on every half-closed connection, which piles
# up fast across a long multi-worker run. Closing both writers when the first
# pipe finishes unblocks the other's pending read so both tasks settle.
async def pipe(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
try:
while data := await reader.read(65536):
writer.write(data)
await writer.drain()
except Exception:
pass
a = asyncio.create_task(pipe(client_reader, up_writer))
b = asyncio.create_task(pipe(up_reader, client_writer))
try:
await asyncio.wait({a, b}, return_when=asyncio.FIRST_COMPLETED)
finally:
for w in (client_writer, up_writer):
try:
w.close()
except Exception:
pass
await asyncio.gather(a, b, return_exceptions=True)
def looks_like_challenge(body: str) -> bool:
s = (body or "").lstrip()
return not s or s.startswith("<") or "Just a moment" in body or "challenge-platform" in body
# --- C2 HTTP (stdlib, run off the event loop) -------------------------------------
def _get_job_sync():
req = urllib.request.Request(f"{C2_URL}/jobs/next", headers={"X-Worker-Token": TOKEN})
try:
with urllib.request.urlopen(req, timeout=15) as r:
if r.status == 204:
return None
return json.loads(r.read() or b"null")
except urllib.error.HTTPError as e:
print(f" C2 /jobs/next -> HTTP {e.code}")
return None
except urllib.error.URLError as e:
print(f" C2 unreachable: {e}")
return None
def _post_result_sync(job_id: str, payload: dict):
data = json.dumps(payload).encode()
req = urllib.request.Request(
f"{C2_URL}/jobs/{job_id}/result", data=data, method="POST",
headers={"X-Worker-Token": TOKEN, "Content-Type": "application/json"})
try:
with urllib.request.urlopen(req, timeout=60) as r:
return json.loads(r.read() or b"null")
except urllib.error.HTTPError as e:
print(f" C2 result -> HTTP {e.code}: {e.read()[:200]!r}")
return None
except urllib.error.URLError as e:
print(f" C2 unreachable posting result: {e}")
return None
async def get_job():
return await asyncio.to_thread(_get_job_sync)
async def post_result(job_id, payload):
return await asyncio.to_thread(_post_result_sync, job_id, payload)
# --- scraping ---------------------------------------------------------------------
async def fetch_json(page, url: str) -> tuple[str, str, int]:
"""Fetch in-page and also read back the Resource Timing transferSize — the actual
COMPRESSED bytes on the wire (what the metered proxy bills), not len(body) which is
the decompressed size. Returns (status, body, wire_bytes); wire_bytes is -1 if the
timing entry wasn't available. Same-origin (cs.money), so the size fields are exposed."""
expr = (
f"fetch({url!r}, {{credentials:'include', headers:{{'accept':'application/json'}}}})"
f".then(async r => {{"
f" const body = await r.text();"
f" const e = performance.getEntriesByName({url!r}).slice(-1)[0];"
f" return JSON.stringify({{status: r.status, body: body,"
f" wire: e ? e.transferSize : -1, dec: e ? e.decodedBodySize : -1}});"
f"}})"
)
raw = await page.evaluate(expr, await_promise=True)
if not isinstance(raw, str):
return ("-1", "", -1)
try:
obj = json.loads(raw)
return (str(obj.get("status", "-1")), obj.get("body", ""), int(obj.get("wire", -1)))
except (json.JSONDecodeError, ValueError, TypeError):
return ("-1", raw, -1)
async def _click(page, text, timeout=3):
try:
el = await page.find(text, best_match=True, timeout=timeout)
if el:
await el.click()
return True
except Exception:
pass
return False
async def dismiss_consent(page):
"""Privacy-preserving. The banner only offers 'Accept all' / 'Manage cookies';
the Reject-all control lives inside the Manage window. So: Manage -> Reject all ->
Confirm. (The data path reads SSR __page-params regardless, but this keeps the
session honest and unblocks any future interaction.)"""
steps = []
if await _click(page, "Manage cookies") or await _click(page, "Manage"):
await page.sleep(1)
if await _click(page, "Reject all"):
steps.append("reject-all")
for c in ("Confirm my choice", "Confirm", "Save"):
if await _click(page, c):
steps.append(f"confirm:{c}")
break
return ", ".join(steps) if steps else None
async def warm(page):
"""Open the market and clear Cloudflare so the session holds cf_clearance."""
print(f"Warming session at {MARKET_URL} (clear Cloudflare; {SOLVE_SECONDS}s)...")
await page.get(MARKET_URL)
await page.sleep(SOLVE_SECONDS)
clicked = await dismiss_consent(page)
print(f"Consent: {'dismissed via ' + clicked if clicked else 'left up'}")
def extract_items(html: str) -> list:
"""Pull inventory.items out of the page's __page-params JSON blob."""
m = PAGE_PARAMS_RE.search(html)
if not m:
return []
try:
return json.loads(m.group(1)).get("inventory", {}).get("items", []) or []
except json.JSONDecodeError:
return []
async def scrape_job(page, job) -> tuple[list, int, str, int]:
"""Scrape ALL listings for one skin+wear via a forward float cursor.
A search page returns at most 60 items and ignores offset, but cs.money sorts by
float (order=asc&sort=float) and filters by minFloat. So we walk the float axis:
grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float on
the page, and repeat until a page is under the cap. The boundary item is re-fetched
(minFloat is inclusive) and dropped by the id dedup. Returns
(items, fetches, reason, wire_bytes) where wire_bytes is the metered (compressed) cost.
"""
search = urllib.parse.quote_plus(job["search"])
max_fetches = job.get("maxPages", 40) # safety cap on page fetches per job
seen: dict = {}
fetches = 0
wire = 0
lo = 0.0
reason = "completed"
while fetches < max_fetches:
status, body, wbytes = await fetch_json(page, PAGE.format(search=search, lo=lo))
fetches += 1
if wbytes > 0:
wire += wbytes
if "Just a moment" in body or "challenge-platform" in body:
return list(seen.values()), fetches, "challenged", wire
items = extract_items(body)
floats = []
for it in items:
if it.get("id") is not None:
seen[it["id"]] = it
fl = (it.get("asset") or {}).get("float")
if isinstance(fl, (int, float)):
floats.append(fl)
if len(items) < PAGE_CAP:
break # last page — fewer than the cap means we've seen everything
# Advance the cursor past the highest float on this page. Items at exactly that
# float are re-fetched next round (minFloat is inclusive) and deduped by id.
nxt = max(floats) if floats else None
if nxt is None or nxt <= lo:
# Cursor can't advance: >60 listings share a single float value, or the
# items carry no float. Bail loudly rather than spin — a flagged gap beats
# a silent one (this is the failure the price-window version hid).
reason = "stuck-float-tie"
break
lo = nxt
await page.sleep(DELAY + random.uniform(0, JITTER))
else:
reason = "fetch-cap"
return list(seen.values()), fetches, reason, wire
async def main():
# IPRoyal (auth'd, per-worker sticky IP) takes priority; else a plain auth-free
# PROXY; else this host's own IP. The forwarder injects IPRoyal auth so Chrome
# only ever sees an auth-free 127.0.0.1 endpoint.
forwarder = None
session_id = None
if IPROYAL_USERNAME and IPROYAL_PASSWORD:
session_id = _new_session_id()
forwarder = await LocalForwardingProxy(
IPROYAL_HOST, IPROYAL_PORT, IPROYAL_USERNAME, _iproyal_password(session_id)).start()
proxy = forwarder.endpoint
proxy_label = f"iproyal[{IPROYAL_COUNTRY or 'any'}] session {session_id} via {forwarder.endpoint}"
else:
proxy = PROXY
proxy_label = PROXY or "own IP"
args = [f"--proxy-server={proxy}"] if proxy else []
if not LOAD_IMAGES:
# Disable image loading at the engine level — the dominant bandwidth cost on
# an image-heavy market, and unneeded for CF clearance or the JSON API.
args.append("--blink-settings=imagesEnabled=false")
if os.environ.get("CHROME_NO_SANDBOX") == "1":
# Required when running Chromium as root in a container.
args += ["--no-sandbox", "--disable-dev-shm-usage"]
print(f"Starting worker (C2={C2_URL}, proxy={proxy_label}, images={'on' if LOAD_IMAGES else 'off'})...")
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args)
try:
page = await browser.get("about:blank")
await warm(page)
total_wire = 0 # metered (compressed) bytes this worker has pulled, lifetime
while True:
job = await get_job()
if not job:
await asyncio.sleep(IDLE_SECONDS)
continue
print(f"Job {job['jobId'][:8]} — search {job['search']!r}")
items, pages, reason, wire = await scrape_job(page, job)
total_wire += wire
if reason == "challenged":
# The exit IP is likely flagged. On IPRoyal, rotate to a fresh sticky
# session (new IP) before re-warming; otherwise just re-solve in place.
if forwarder is not None:
session_id = _new_session_id()
forwarder.set_password(_iproyal_password(session_id))
print(f" challenged; rotating exit IP -> session {session_id}, re-warming...")
else:
print(" re-challenged; re-warming session...")
await warm(page)
result = await post_result(job["jobId"], {
"items": items, "pages": pages, "stoppedReason": reason})
summary = (f"matched {result.get('matched')}, new {result.get('inserted')}, "
f"upd {result.get('updated')}, removed {result.get('removed')}") if result else "post failed"
wire_kb = wire / 1024
print(f" scraped {len(items)} items ({pages}p, {reason}, {wire_kb:.0f}KB wire) "
f"-> {summary} [lifetime {total_wire / 1_048_576:.1f}MB]")
await page.sleep(DELAY + random.uniform(0, JITTER))
finally:
browser.stop()
if forwarder is not None:
await forwarder.stop()
if __name__ == "__main__":
uc.loop().run_until_complete(main())