almost ready
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -101,6 +101,8 @@ env/
|
|||||||
|
|
||||||
# cs.money discovery capture dumps (JSON responses)
|
# cs.money discovery capture dumps (JSON responses)
|
||||||
csmoney-captures/
|
csmoney-captures/
|
||||||
|
# API response capture dumps (CSFloat schema/listing samples, worker page dumps)
|
||||||
|
captures/
|
||||||
|
|
||||||
# Local compose secrets (DB connection string, tokens)
|
# Local compose secrets (DB connection string, tokens)
|
||||||
.env
|
.env
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
using BlueLaminate.Core.CsMoney;
|
using BlueLaminate.Core.CsMoney;
|
||||||
|
using BlueLaminate.Core.SkinLand;
|
||||||
|
|
||||||
namespace BlueLaminate.C2;
|
namespace BlueLaminate.C2;
|
||||||
|
|
||||||
@@ -17,3 +18,20 @@ public sealed record ScrapeJobDto(string JobId, int SkinId, int? ConditionId, st
|
|||||||
/// <param name="StoppedReason">Why it stopped. "completed" = full sweep (authoritative);
|
/// <param name="StoppedReason">Why it stopped. "completed" = full sweep (authoritative);
|
||||||
/// anything else (fetch-cap / challenged / stuck-float-tie) is partial.</param>
|
/// anything else (fetch-cap / challenged / stuck-float-tie) is partial.</param>
|
||||||
public sealed record ScrapeResultDto(List<CsMoneyItem> Items, int Pages, string? StoppedReason);
|
public sealed record ScrapeResultDto(List<CsMoneyItem> Items, int Pages, string? StoppedReason);
|
||||||
|
|
||||||
|
/// <summary>A unit of skin.land scrape work: one skin+wear, as its market page URL.</summary>
|
||||||
|
/// <param name="JobId">Opaque id the worker echoes back when posting results.</param>
|
||||||
|
/// <param name="SkinId">Catalogue skin this job targets.</param>
|
||||||
|
/// <param name="ConditionId">Wear band (skin_conditions row).</param>
|
||||||
|
/// <param name="Url">The skin.land market page, e.g.
|
||||||
|
/// "https://skin.land/market/csgo/ak-47-redline-field-tested/". The worker resolves the
|
||||||
|
/// internal skin_id from this page, then pages the obtained-skins API.</param>
|
||||||
|
/// <param name="MaxPages">Safety cap on offer-page fetches (Laravel paginator, ~26/page).</param>
|
||||||
|
public sealed record SkinLandJobDto(string JobId, int SkinId, int ConditionId, string Url, int MaxPages);
|
||||||
|
|
||||||
|
/// <summary>A worker's results for a claimed skin.land job: the offers it scraped.</summary>
|
||||||
|
/// <param name="Items">All obtained-skins offers gathered across pages (raw skin.land shape).</param>
|
||||||
|
/// <param name="Pages">How many offer pages the worker fetched.</param>
|
||||||
|
/// <param name="StoppedReason">Why it stopped. "completed" = full sweep (authoritative);
|
||||||
|
/// anything else (fetch-cap / challenged / no-skin-id) is partial.</param>
|
||||||
|
public sealed record SkinLandResultDto(List<SkinLandOffer> Items, int Pages, string? StoppedReason);
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
using BlueLaminate.Core.CsMoney;
|
|
||||||
using BlueLaminate.EFCore.Data;
|
using BlueLaminate.EFCore.Data;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
@@ -7,42 +6,58 @@ namespace BlueLaminate.C2;
|
|||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the
|
/// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the
|
||||||
/// catalogue's per-band checkpoints (<c>SkinCondition.ListingsSweptAt</c>) rather than
|
/// catalogue's per-band, per-site checkpoints (the rows in <c>skin_condition_sweeps</c>
|
||||||
/// a pre-built queue. Each claim picks the stalest band (never-swept first), leases it
|
/// for this queue's <see cref="_source"/>) rather than a pre-built queue. Each claim picks
|
||||||
/// in memory so two workers can't get the same one, and builds a free-text search. On
|
/// the stalest band (never-swept first), leases it in memory so two workers can't get the
|
||||||
/// completion the ingest stamps <c>ListingsSweptAt</c>, so the band drops to the back —
|
/// same one, and builds the work target. On completion the ingest stamps the band's
|
||||||
/// the sweep loops the whole catalogue continuously and resumes cleanly after restarts.
|
/// checkpoint, so it drops to the back — the sweep loops the whole catalogue continuously
|
||||||
|
/// and resumes cleanly after restarts. Because the checkpoint is per-site, a band one
|
||||||
|
/// market just swept is still due on another.
|
||||||
|
/// <para>
|
||||||
|
/// The queue is source-agnostic: it's constructed with the checkpoint
|
||||||
|
/// <see cref="_source"/> and a <see cref="_targetBuilder"/> that turns a band into the
|
||||||
|
/// thing a worker needs — a free-text search for cs.money, a market URL for skin.land — so
|
||||||
|
/// one class drives every market. Register one instance per source.
|
||||||
|
/// </para>
|
||||||
/// <para>
|
/// <para>
|
||||||
/// A <see cref="_minResweepInterval"/> floor keeps a band from being re-handed-out until
|
/// A <see cref="_minResweepInterval"/> floor keeps a band from being re-handed-out until
|
||||||
/// its data is at least that stale. Without it the queue re-scrapes the whole catalogue
|
/// its data is at least that stale. Without it the queue re-scrapes the whole catalogue as
|
||||||
/// as fast as the workers run, which on a metered residential proxy is the dominant cost;
|
/// fast as the workers run, which on a metered residential proxy is the dominant cost; the
|
||||||
/// the floor trades a little price-freshness for a roughly linear bandwidth cut (a 6h
|
/// floor trades a little price-freshness for a roughly linear bandwidth cut. When every
|
||||||
/// floor vs. continuous ≈ 6× less, if a full pass takes ~1h). When every band is fresher
|
/// band is fresher than the floor the queue hands out nothing (workers idle) until one ages.
|
||||||
/// than the floor the queue hands out nothing (workers idle) until one ages past it.
|
|
||||||
/// </para>
|
/// </para>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public sealed class JobQueue
|
public sealed class JobQueue
|
||||||
{
|
{
|
||||||
// A leased condition can't be re-handed-out until released or the lease expires
|
// A leased condition can't be re-handed-out until released or the lease expires (so a
|
||||||
// (so a crashed worker's band returns to the pool instead of stalling forever).
|
// crashed worker's band returns to the pool instead of stalling forever).
|
||||||
private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15);
|
private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15);
|
||||||
private const int CandidateBatch = 100;
|
private const int CandidateBatch = 100;
|
||||||
|
|
||||||
|
private readonly string _source;
|
||||||
private readonly TimeSpan _minResweepInterval;
|
private readonly TimeSpan _minResweepInterval;
|
||||||
|
private readonly Func<Candidate, string> _targetBuilder;
|
||||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||||
private readonly ConcurrentDictionary<int, DateTimeOffset> _leases = new(); // conditionId -> leasedAt
|
private readonly ConcurrentDictionary<int, DateTimeOffset> _leases = new(); // conditionId -> leasedAt
|
||||||
private readonly ConcurrentDictionary<string, JobMapping> _inFlight = new(); // jobId -> mapping
|
private readonly ConcurrentDictionary<string, JobMapping> _inFlight = new(); // jobId -> mapping
|
||||||
|
|
||||||
|
/// <param name="source">
|
||||||
|
/// The <c>skin_condition_sweeps.Source</c> this queue reads/leases on (a
|
||||||
|
/// <c>SweepSource</c> value, e.g. "csmoney" / "skinland").
|
||||||
|
/// </param>
|
||||||
/// <param name="minResweepInterval">
|
/// <param name="minResweepInterval">
|
||||||
/// How stale a band's <c>ListingsSweptAt</c> must be before it's eligible again.
|
/// How stale a band's checkpoint must be before it's eligible again.
|
||||||
/// <see cref="TimeSpan.Zero"/> disables the floor (continuous re-sweep).
|
/// <see cref="TimeSpan.Zero"/> disables the floor (continuous re-sweep).
|
||||||
/// </param>
|
/// </param>
|
||||||
public JobQueue(TimeSpan minResweepInterval)
|
/// <param name="targetBuilder">Turns a claimed band into the worker's target string.</param>
|
||||||
|
public JobQueue(string source, TimeSpan minResweepInterval, Func<Candidate, string> targetBuilder)
|
||||||
{
|
{
|
||||||
|
_source = source;
|
||||||
_minResweepInterval = minResweepInterval;
|
_minResweepInterval = minResweepInterval;
|
||||||
|
_targetBuilder = targetBuilder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public async Task<ScrapeJobDto?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
|
public async Task<ClaimedJob?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
|
||||||
{
|
{
|
||||||
await _gate.WaitAsync(ct);
|
await _gate.WaitAsync(ct);
|
||||||
try
|
try
|
||||||
@@ -58,17 +73,26 @@ public sealed class JobQueue
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Only consider bands that are never-swept or stale past the re-sweep floor,
|
// Only consider bands that are never-swept or stale past the re-sweep floor,
|
||||||
// then stalest first (never-swept null sorts before any timestamp). With the
|
// then stalest first (never-swept null sorts before any timestamp). The
|
||||||
// floor in place a fully-fresh catalogue yields no candidates, so workers idle
|
// checkpoint is read for THIS queue's source only (a correlated subquery over
|
||||||
// instead of needlessly re-pulling ~1MB pages on the metered proxy.
|
// the per-site sweep rows), so a band another market just swept is still
|
||||||
|
// never-swept here. With the floor in place a fully-fresh catalogue yields no
|
||||||
|
// candidates, so workers idle instead of needlessly re-pulling on the proxy.
|
||||||
var freshCutoff = DateTimeOffset.UtcNow - _minResweepInterval;
|
var freshCutoff = DateTimeOffset.UtcNow - _minResweepInterval;
|
||||||
var candidates = await db.SkinConditions
|
var candidates = await db.SkinConditions
|
||||||
.Where(c => c.ListingsSweptAt == null || c.ListingsSweptAt <= freshCutoff)
|
.Select(c => new
|
||||||
.OrderBy(c => c.ListingsSweptAt.HasValue)
|
{
|
||||||
.ThenBy(c => c.ListingsSweptAt)
|
Candidate = new Candidate(c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition),
|
||||||
.Select(c => new Candidate(
|
SweptAt = c.Sweeps
|
||||||
c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition))
|
.Where(s => s.Source == _source)
|
||||||
|
.Select(s => (DateTimeOffset?)s.SweptAt)
|
||||||
|
.FirstOrDefault(),
|
||||||
|
})
|
||||||
|
.Where(x => x.SweptAt == null || x.SweptAt <= freshCutoff)
|
||||||
|
.OrderBy(x => x.SweptAt.HasValue)
|
||||||
|
.ThenBy(x => x.SweptAt)
|
||||||
.Take(CandidateBatch)
|
.Take(CandidateBatch)
|
||||||
|
.Select(x => x.Candidate)
|
||||||
.ToListAsync(ct);
|
.ToListAsync(ct);
|
||||||
|
|
||||||
var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId));
|
var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId));
|
||||||
@@ -81,9 +105,7 @@ public sealed class JobQueue
|
|||||||
var jobId = Guid.NewGuid().ToString("N");
|
var jobId = Guid.NewGuid().ToString("N");
|
||||||
_inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId);
|
_inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId);
|
||||||
|
|
||||||
var code = Wear.ToCode(pick.Condition) ?? pick.Condition;
|
return new ClaimedJob(jobId, pick.SkinId, pick.ConditionId, _targetBuilder(pick), maxPages);
|
||||||
var search = $"{pick.Weapon} {pick.SkinName} {code}".Trim();
|
|
||||||
return new ScrapeJobDto(jobId, pick.SkinId, pick.ConditionId, search, maxPages);
|
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
@@ -107,5 +129,8 @@ public sealed class JobQueue
|
|||||||
|
|
||||||
public sealed record JobMapping(int SkinId, int ConditionId);
|
public sealed record JobMapping(int SkinId, int ConditionId);
|
||||||
|
|
||||||
private sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
|
/// <summary>A claimed band ready to hand to a worker: its ids + built target string.</summary>
|
||||||
|
public sealed record ClaimedJob(string JobId, int SkinId, int ConditionId, string Target, int MaxPages);
|
||||||
|
|
||||||
|
public sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
using BlueLaminate.C2;
|
using BlueLaminate.C2;
|
||||||
using BlueLaminate.Core.CsMoney;
|
using BlueLaminate.Core.CsMoney;
|
||||||
using BlueLaminate.Core.DependencyInjection;
|
using BlueLaminate.Core.DependencyInjection;
|
||||||
|
using BlueLaminate.Core.SkinLand;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
using BlueLaminate.EFCore.Data;
|
using BlueLaminate.EFCore.Data;
|
||||||
using Microsoft.EntityFrameworkCore;
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
|
||||||
// The C2: hands cs.money scrape jobs to Python workers and ingests their results.
|
// The C2: hands cs.money and skin.land scrape jobs to Python workers and ingests their
|
||||||
// Reuses the whole BlueLaminate stack (DB, ingest service) via the one composition root.
|
// results. Reuses the whole BlueLaminate stack (DB, ingest services) via the one
|
||||||
// Content root = the binary directory so appsettings.json is found regardless of the
|
// composition root. Content root = the binary directory so appsettings.json is found
|
||||||
// working directory the process is launched from (matches the CLI's approach).
|
// regardless of the working directory the process is launched from (matches the CLI).
|
||||||
var builder = WebApplication.CreateBuilder(new WebApplicationOptions
|
var builder = WebApplication.CreateBuilder(new WebApplicationOptions
|
||||||
{
|
{
|
||||||
Args = args,
|
Args = args,
|
||||||
@@ -15,17 +18,34 @@ var builder = WebApplication.CreateBuilder(new WebApplicationOptions
|
|||||||
});
|
});
|
||||||
builder.Services.AddBlueLaminateCore(builder.Configuration);
|
builder.Services.AddBlueLaminateCore(builder.Configuration);
|
||||||
|
|
||||||
// Re-sweep floor: don't re-hand-out a band whose listings were swept less than this
|
// Worker result bodies carry some numbers as JSON strings (skin.land's item_float comes
|
||||||
// many hours ago. The dominant cost on the metered residential proxy is re-scraping
|
// through as "0.60…"); allow string-encoded numbers so they bind, parsed straight to
|
||||||
// already-fresh bands, so this caps how often any band is re-pulled. 0 = continuous.
|
// decimal (full precision preserved). Harmless to cs.money's numeric fields.
|
||||||
|
builder.Services.ConfigureHttpJsonOptions(o =>
|
||||||
|
o.SerializerOptions.NumberHandling |= JsonNumberHandling.AllowReadingFromString);
|
||||||
|
|
||||||
|
// Re-sweep floor: don't re-hand-out a band whose listings were swept less than this many
|
||||||
|
// hours ago. The dominant cost on the metered residential proxy is re-scraping already-
|
||||||
|
// fresh bands, so this caps how often any band is re-pulled. 0 = continuous. Shared by
|
||||||
|
// both markets (each keeps its own per-site checkpoints, so the floors are independent).
|
||||||
var minResweepHours = builder.Configuration.GetValue("MinResweepHours", 6.0);
|
var minResweepHours = builder.Configuration.GetValue("MinResweepHours", 6.0);
|
||||||
builder.Services.AddSingleton(new JobQueue(TimeSpan.FromHours(minResweepHours)));
|
var floor = TimeSpan.FromHours(minResweepHours);
|
||||||
|
|
||||||
|
// One JobQueue per market source (same class, different checkpoint source + target). The
|
||||||
|
// candidate query reads each band's checkpoint for that queue's source only, so the two
|
||||||
|
// sweeps progress independently over the shared catalogue.
|
||||||
|
builder.Services.AddKeyedSingleton(CsMoneyIngestService.Source, new JobQueue(
|
||||||
|
CsMoneyIngestService.Source, floor,
|
||||||
|
c => $"{c.Weapon} {c.SkinName} {Wear.ToCode(c.Condition) ?? c.Condition}".Trim()));
|
||||||
|
builder.Services.AddKeyedSingleton(SkinLandIngestService.Source, new JobQueue(
|
||||||
|
SkinLandIngestService.Source, floor,
|
||||||
|
c => SkinLandSlug.MarketUrl(c.Weapon, c.SkinName, c.Condition)));
|
||||||
|
|
||||||
var app = builder.Build();
|
var app = builder.Build();
|
||||||
|
|
||||||
// Apply pending EF migrations at startup (incl. the market_listings view) so a fresh
|
// Apply pending EF migrations at startup (incl. the market_listings view) so a fresh
|
||||||
// container is ready with one command. Disable with AutoMigrate=false if you'd rather
|
// container is ready with one command. Disable with AutoMigrate=false if you'd rather run
|
||||||
// run `dotnet ef database update` yourself.
|
// `dotnet ef database update` yourself.
|
||||||
if (app.Configuration.GetValue("AutoMigrate", true))
|
if (app.Configuration.GetValue("AutoMigrate", true))
|
||||||
{
|
{
|
||||||
using var scope = app.Services.CreateScope();
|
using var scope = app.Services.CreateScope();
|
||||||
@@ -33,8 +53,8 @@ if (app.Configuration.GetValue("AutoMigrate", true))
|
|||||||
db.Database.Migrate();
|
db.Database.Migrate();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured
|
// Shared-secret gate. Workers send it as X-Worker-Token; if no token is configured the
|
||||||
// the gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
|
// gate is open (local dev). Set WorkerToken (config) / WORKER_TOKEN (env) in prod.
|
||||||
var workerToken = builder.Configuration["WorkerToken"];
|
var workerToken = builder.Configuration["WorkerToken"];
|
||||||
var maxPagesPerJob = builder.Configuration.GetValue("MaxPagesPerJob", 60);
|
var maxPagesPerJob = builder.Configuration.GetValue("MaxPagesPerJob", 60);
|
||||||
|
|
||||||
@@ -49,8 +69,10 @@ app.MapGet("/market/instance/{instanceId:int}", async (
|
|||||||
int instanceId, MarketPresenceService presence, CancellationToken ct) =>
|
int instanceId, MarketPresenceService presence, CancellationToken ct) =>
|
||||||
Results.Ok(await presence.ForInstanceAsync(instanceId, ct)));
|
Results.Ok(await presence.ForInstanceAsync(instanceId, ct)));
|
||||||
|
|
||||||
var jobs = app.MapGroup("/jobs");
|
// The same X-Worker-Token gate applied to every worker-facing route group.
|
||||||
jobs.AddEndpointFilter(async (ctx, next) =>
|
Func<RouteGroupBuilder, RouteGroupBuilder> withTokenGate = group =>
|
||||||
|
{
|
||||||
|
group.AddEndpointFilter(async (ctx, next) =>
|
||||||
{
|
{
|
||||||
if (!string.IsNullOrEmpty(workerToken)
|
if (!string.IsNullOrEmpty(workerToken)
|
||||||
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
|
&& ctx.HttpContext.Request.Headers["X-Worker-Token"].ToString() != workerToken)
|
||||||
@@ -60,19 +82,30 @@ jobs.AddEndpointFilter(async (ctx, next) =>
|
|||||||
|
|
||||||
return await next(ctx);
|
return await next(ctx);
|
||||||
});
|
});
|
||||||
|
return group;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --- cs.money worker endpoints (unchanged behaviour) ------------------------------------
|
||||||
|
var jobs = withTokenGate(app.MapGroup("/jobs"));
|
||||||
|
|
||||||
// Claim the next stalest skin+wear to scrape. 204 when nothing is currently available
|
// Claim the next stalest skin+wear to scrape. 204 when nothing is currently available
|
||||||
// (everything in the stalest batch is already leased to other workers).
|
// (everything in the stalest batch is already leased to other workers).
|
||||||
jobs.MapGet("/next", async (JobQueue queue, SkinTrackerDbContext db, CancellationToken ct) =>
|
jobs.MapGet("/next", async (
|
||||||
|
[FromKeyedServices(CsMoneyIngestService.Source)] JobQueue queue,
|
||||||
|
SkinTrackerDbContext db, CancellationToken ct) =>
|
||||||
{
|
{
|
||||||
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
|
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
|
||||||
return job is null ? Results.NoContent() : Results.Ok(job);
|
return job is null
|
||||||
|
? Results.NoContent()
|
||||||
|
: Results.Ok(new ScrapeJobDto(job.JobId, job.SkinId, job.ConditionId, job.Target, job.MaxPages));
|
||||||
});
|
});
|
||||||
|
|
||||||
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the
|
// Post a claimed job's scraped listings. The C2 owns parsing/persistence so the worker
|
||||||
// worker stays dumb: it just forwards the raw cs.money items it gathered.
|
// stays dumb: it just forwards the raw cs.money items it gathered.
|
||||||
jobs.MapPost("/{jobId}/result", async (
|
jobs.MapPost("/{jobId}/result", async (
|
||||||
string jobId, ScrapeResultDto result, JobQueue queue, CsMoneyIngestService ingest, CancellationToken ct) =>
|
string jobId, ScrapeResultDto result,
|
||||||
|
[FromKeyedServices(CsMoneyIngestService.Source)] JobQueue queue,
|
||||||
|
CsMoneyIngestService ingest, CancellationToken ct) =>
|
||||||
{
|
{
|
||||||
var mapping = queue.Complete(jobId);
|
var mapping = queue.Complete(jobId);
|
||||||
if (mapping is null)
|
if (mapping is null)
|
||||||
@@ -89,4 +122,33 @@ jobs.MapPost("/{jobId}/result", async (
|
|||||||
return Results.Ok(r);
|
return Results.Ok(r);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// --- skin.land worker endpoints ---------------------------------------------------------
|
||||||
|
var skinLandJobs = withTokenGate(app.MapGroup("/skinland/jobs"));
|
||||||
|
|
||||||
|
skinLandJobs.MapGet("/next", async (
|
||||||
|
[FromKeyedServices(SkinLandIngestService.Source)] JobQueue queue,
|
||||||
|
SkinTrackerDbContext db, CancellationToken ct) =>
|
||||||
|
{
|
||||||
|
var job = await queue.ClaimNextAsync(db, maxPagesPerJob, ct);
|
||||||
|
return job is null
|
||||||
|
? Results.NoContent()
|
||||||
|
: Results.Ok(new SkinLandJobDto(job.JobId, job.SkinId, job.ConditionId, job.Target, job.MaxPages));
|
||||||
|
});
|
||||||
|
|
||||||
|
skinLandJobs.MapPost("/{jobId}/result", async (
|
||||||
|
string jobId, SkinLandResultDto result,
|
||||||
|
[FromKeyedServices(SkinLandIngestService.Source)] JobQueue queue,
|
||||||
|
SkinLandIngestService ingest, CancellationToken ct) =>
|
||||||
|
{
|
||||||
|
var mapping = queue.Complete(jobId);
|
||||||
|
if (mapping is null)
|
||||||
|
{
|
||||||
|
return Results.NotFound(new { error = "unknown or expired jobId" });
|
||||||
|
}
|
||||||
|
|
||||||
|
var complete = string.Equals(result.StoppedReason, "completed", StringComparison.OrdinalIgnoreCase);
|
||||||
|
var r = await ingest.IngestAsync(mapping.SkinId, mapping.ConditionId, result.Items ?? [], complete, ct);
|
||||||
|
return Results.Ok(r);
|
||||||
|
});
|
||||||
|
|
||||||
app.Run();
|
app.Run();
|
||||||
|
|||||||
@@ -1,122 +0,0 @@
|
|||||||
using BlueLaminate.Scraper.CsMoney;
|
|
||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using Microsoft.Extensions.DependencyInjection;
|
|
||||||
using Microsoft.Extensions.Hosting;
|
|
||||||
using Microsoft.Extensions.Options;
|
|
||||||
using System.CommandLine;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Cli.Commands;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// <c>capture-csmoney</c>: open the cs.money market through the IPRoyal residential
|
|
||||||
/// proxy (local forwarding hop, no CDP) in a real, non-headless browser. You clear
|
|
||||||
/// the Cloudflare challenge once; the tool then pages the listings API from inside
|
|
||||||
/// the cleared page with human-like pacing, dumping each page's JSON and reporting
|
|
||||||
/// how many pages survive before a re-challenge. Discovery/measurement tool — writes
|
|
||||||
/// nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.
|
|
||||||
/// </summary>
|
|
||||||
internal static class CaptureCsMoneyCommand
|
|
||||||
{
|
|
||||||
public static Command Build(IHost host)
|
|
||||||
{
|
|
||||||
var countryOption = new Option<string?>("--country")
|
|
||||||
{
|
|
||||||
Description = "ISO country code(s) for the exit IP, e.g. \"us\". Default: configured/random.",
|
|
||||||
};
|
|
||||||
var loadImagesOption = new Option<bool>("--load-images")
|
|
||||||
{
|
|
||||||
Description = "Load images (uses more bandwidth). Default off to conserve the metered plan.",
|
|
||||||
};
|
|
||||||
var pagesOption = new Option<int>("--pages")
|
|
||||||
{
|
|
||||||
Description = "Maximum offset pages (60 items each) to fetch before stopping.",
|
|
||||||
DefaultValueFactory = _ => 50,
|
|
||||||
};
|
|
||||||
var noProxyOption = new Option<bool>("--no-proxy")
|
|
||||||
{
|
|
||||||
Description = "Diagnostic: drive the browser on this machine's own IP (no IPRoyal proxy), "
|
|
||||||
+ "to isolate whether re-challenges are IP reputation vs. the webdriver fingerprint.",
|
|
||||||
};
|
|
||||||
var outOption = new Option<string>("--out")
|
|
||||||
{
|
|
||||||
Description = "Directory to write captured JSON pages to.",
|
|
||||||
DefaultValueFactory = _ => "csmoney-captures",
|
|
||||||
};
|
|
||||||
|
|
||||||
var command = new Command(
|
|
||||||
"capture-csmoney",
|
|
||||||
"Open the cs.money market through the residential proxy, clear Cloudflare once, then page "
|
|
||||||
+ "the listings API with pacing and report how many pages survive. Discovery/measurement "
|
|
||||||
+ "tool — writes nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
|
|
||||||
{
|
|
||||||
countryOption,
|
|
||||||
loadImagesOption,
|
|
||||||
pagesOption,
|
|
||||||
outOption,
|
|
||||||
noProxyOption,
|
|
||||||
};
|
|
||||||
|
|
||||||
command.SetAction((parseResult, ct) => RunAsync(
|
|
||||||
host,
|
|
||||||
parseResult.GetValue(countryOption),
|
|
||||||
parseResult.GetValue(loadImagesOption),
|
|
||||||
parseResult.GetValue(pagesOption),
|
|
||||||
parseResult.GetValue(outOption)!,
|
|
||||||
parseResult.GetValue(noProxyOption),
|
|
||||||
ct));
|
|
||||||
|
|
||||||
return command;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static async Task<int> RunAsync(
|
|
||||||
IHost host, string? country, bool loadImages, int pages, string outDir, bool noProxy,
|
|
||||||
CancellationToken ct)
|
|
||||||
{
|
|
||||||
using var scope = host.Services.CreateScope();
|
|
||||||
var options = scope.ServiceProvider.GetRequiredService<IOptions<CsMoneyOptions>>().Value;
|
|
||||||
|
|
||||||
var exitCountry = string.IsNullOrWhiteSpace(country) ? options.Country : country;
|
|
||||||
var images = loadImages || options.LoadImages;
|
|
||||||
|
|
||||||
Console.WriteLine($"Opening {options.MarketUrl}{(noProxy ? " (DIRECT — no proxy)" : "")}");
|
|
||||||
Console.WriteLine(
|
|
||||||
"Solve any Cloudflare challenge in the window and wait until the market grid "
|
|
||||||
+ "(items + prices) is actually visible — that means the session is cleared.");
|
|
||||||
Console.WriteLine(
|
|
||||||
$"Press Enter here once it's visible. The tool then pages up to {pages} page(s) of "
|
|
||||||
+ "listings from inside the cleared page and reports how far it gets.");
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var capture = scope.ServiceProvider.GetRequiredService<CsMoneyCaptureService>();
|
|
||||||
|
|
||||||
// Block until the operator presses Enter; the browser stays open the whole
|
|
||||||
// time. ReadLine is sync, so push it off-thread.
|
|
||||||
var result = await capture.RunAsync(
|
|
||||||
outDir,
|
|
||||||
new ProxyRequest(Country: exitCountry, Sticky: true),
|
|
||||||
images,
|
|
||||||
useProxy: !noProxy,
|
|
||||||
pages,
|
|
||||||
() => Task.Run(() => Console.ReadLine(), ct),
|
|
||||||
ct);
|
|
||||||
|
|
||||||
var full = Path.GetFullPath(outDir);
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine(
|
|
||||||
$"Stopped: {result.StoppedReason}. {result.PagesSucceeded} page(s), "
|
|
||||||
+ $"{result.ItemsTotal} item(s) → {full}");
|
|
||||||
return result.PagesSucceeded > 0 ? 0 : 1;
|
|
||||||
}
|
|
||||||
catch (OperationCanceledException)
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine("Capture cancelled.");
|
|
||||||
return 130;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine($"cs.money capture failed: {ex.Message}");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using Microsoft.Extensions.DependencyInjection;
|
|
||||||
using Microsoft.Extensions.Hosting;
|
|
||||||
using System.CommandLine;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Cli.Commands;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// <c>probe-proxy</c>: launch a non-headless Edge browser through the IPRoyal
|
|
||||||
/// residential proxy and print the exit IP, to confirm authentication works and
|
|
||||||
/// the IP is genuinely residential. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.
|
|
||||||
/// Costs a few KB, so it's the right first check against a metered plan.
|
|
||||||
/// </summary>
|
|
||||||
internal static class ProbeProxyCommand
|
|
||||||
{
|
|
||||||
public static Command Build(IHost host)
|
|
||||||
{
|
|
||||||
var countryOption = new Option<string?>("--country")
|
|
||||||
{
|
|
||||||
Description = "Optional ISO country code(s) for the exit IP, e.g. \"us\" or \"us,gb\". "
|
|
||||||
+ "Default: random.",
|
|
||||||
};
|
|
||||||
var rotatingOption = new Option<bool>("--rotating")
|
|
||||||
{
|
|
||||||
Description = "Use a rotating exit IP instead of a pinned (sticky) session.",
|
|
||||||
};
|
|
||||||
|
|
||||||
var command = new Command(
|
|
||||||
"probe-proxy",
|
|
||||||
"Launch non-headless Edge through the IPRoyal residential proxy and print the exit IP "
|
|
||||||
+ "to confirm auth works and the IP is residential. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
|
|
||||||
{
|
|
||||||
countryOption,
|
|
||||||
rotatingOption,
|
|
||||||
};
|
|
||||||
|
|
||||||
command.SetAction((parseResult, ct) => RunAsync(
|
|
||||||
host,
|
|
||||||
parseResult.GetValue(countryOption),
|
|
||||||
parseResult.GetValue(rotatingOption),
|
|
||||||
ct));
|
|
||||||
|
|
||||||
return command;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static async Task<int> RunAsync(
|
|
||||||
IHost host, string? country, bool rotating, CancellationToken ct)
|
|
||||||
{
|
|
||||||
using var scope = host.Services.CreateScope();
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var probe = scope.ServiceProvider.GetRequiredService<ProxyProbe>();
|
|
||||||
var info = await probe.RunAsync(new ProxyRequest(Country: country, Sticky: !rotating));
|
|
||||||
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine($" Exit IP : {info.Ip}");
|
|
||||||
Console.WriteLine($" Location: {info.City}, {info.Region}, {info.Country}");
|
|
||||||
Console.WriteLine($" Org/ASN : {info.Org}");
|
|
||||||
Console.WriteLine($" Hostname: {info.Hostname ?? "—"}");
|
|
||||||
Console.WriteLine();
|
|
||||||
Console.WriteLine(
|
|
||||||
"Check Org/ASN: a consumer ISP = residential; a hosting provider = datacenter.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
Console.Error.WriteLine($"Proxy probe failed: {ex.Message}");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -72,8 +72,6 @@ var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker too
|
|||||||
FetchListingsCommand.Build(host),
|
FetchListingsCommand.Build(host),
|
||||||
SweepListingsCommand.Build(host),
|
SweepListingsCommand.Build(host),
|
||||||
SweepCatalogCommand.Build(host),
|
SweepCatalogCommand.Build(host),
|
||||||
ProbeProxyCommand.Build(host),
|
|
||||||
CaptureCsMoneyCommand.Build(host),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Ctrl+C → cancel the action's token so long-running commands (e.g. sweep-catalog,
|
// Ctrl+C → cancel the action's token so long-running commands (e.g. sweep-catalog,
|
||||||
|
|||||||
@@ -10,14 +10,6 @@
|
|||||||
"SkinCatalog": {
|
"SkinCatalog": {
|
||||||
"Url": "https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json"
|
"Url": "https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json"
|
||||||
},
|
},
|
||||||
"CsMoney": {
|
|
||||||
"MarketUrl": "https://cs.money/market/buy/",
|
|
||||||
"ApiUrlTemplate": "https://cs.money/2.0/market/sell-orders?limit=60&offset={0}",
|
|
||||||
"Country": "",
|
|
||||||
"LoadImages": false,
|
|
||||||
"PageDelaySeconds": 2.5,
|
|
||||||
"PageJitterSeconds": 2.0
|
|
||||||
},
|
|
||||||
"Sweep": {
|
"Sweep": {
|
||||||
"PageDelay": "00:00:05",
|
"PageDelay": "00:00:05",
|
||||||
"MaxJitter": "00:00:03",
|
"MaxJitter": "00:00:03",
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ public sealed record CsMoneyIngestResult(
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public sealed class CsMoneyIngestService
|
public sealed class CsMoneyIngestService
|
||||||
{
|
{
|
||||||
public const string Source = "csmoney";
|
public const string Source = SweepSource.CsMoney;
|
||||||
|
|
||||||
private readonly SkinTrackerDbContext _db;
|
private readonly SkinTrackerDbContext _db;
|
||||||
private readonly ILogger<CsMoneyIngestService> _logger;
|
private readonly ILogger<CsMoneyIngestService> _logger;
|
||||||
@@ -192,7 +192,7 @@ public sealed class CsMoneyIngestService
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
var seed = pattern.ToString();
|
var seed = pattern;
|
||||||
var st = it.Asset.IsStatTrak;
|
var st = it.Asset.IsStatTrak;
|
||||||
var sv = it.Asset.IsSouvenir;
|
var sv = it.Asset.IsSouvenir;
|
||||||
|
|
||||||
@@ -280,13 +280,13 @@ public sealed class CsMoneyIngestService
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stamp this band's cs.money checkpoint (upsert into skin_condition_sweeps under
|
||||||
|
// the csmoney source). Caller persists via SaveChangesAsync.
|
||||||
private async Task StampCheckpointAsync(int? conditionId, DateTimeOffset now, CancellationToken ct)
|
private async Task StampCheckpointAsync(int? conditionId, DateTimeOffset now, CancellationToken ct)
|
||||||
{
|
{
|
||||||
if (conditionId is { } cid)
|
if (conditionId is { } cid)
|
||||||
{
|
{
|
||||||
await _db.SkinConditions
|
await SweepCheckpoints.StampConditionAsync(_db, cid, Source, now, ct);
|
||||||
.Where(c => c.Id == cid)
|
|
||||||
.ExecuteUpdateAsync(s => s.SetProperty(c => c.ListingsSweptAt, now), ct);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,10 +2,7 @@ using BlueLaminate.Core.Listings;
|
|||||||
using BlueLaminate.Core.Options;
|
using BlueLaminate.Core.Options;
|
||||||
using BlueLaminate.Core.Skins;
|
using BlueLaminate.Core.Skins;
|
||||||
using BlueLaminate.EFCore.DependencyInjection;
|
using BlueLaminate.EFCore.DependencyInjection;
|
||||||
using BlueLaminate.Scraper.Browser;
|
|
||||||
using BlueLaminate.Scraper.CsFloat;
|
using BlueLaminate.Scraper.CsFloat;
|
||||||
using BlueLaminate.Scraper.CsMoney;
|
|
||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using BlueLaminate.Scraper.Skins;
|
using BlueLaminate.Scraper.Skins;
|
||||||
using Microsoft.Extensions.Configuration;
|
using Microsoft.Extensions.Configuration;
|
||||||
using Microsoft.Extensions.DependencyInjection;
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
@@ -54,8 +51,6 @@ public static class ServiceCollectionExtensions
|
|||||||
.Bind(configuration.GetSection(SkinCatalogOptions.SectionName));
|
.Bind(configuration.GetSection(SkinCatalogOptions.SectionName));
|
||||||
services.AddOptions<SweepOptions>()
|
services.AddOptions<SweepOptions>()
|
||||||
.Bind(configuration.GetSection(SweepOptions.SectionName));
|
.Bind(configuration.GetSection(SweepOptions.SectionName));
|
||||||
services.AddOptions<CsMoneyOptions>()
|
|
||||||
.Bind(configuration.GetSection(CsMoneyOptions.SectionName));
|
|
||||||
|
|
||||||
// Typed-handler pooling via IHttpClientFactory; clients are scoped so a
|
// Typed-handler pooling via IHttpClientFactory; clients are scoped so a
|
||||||
// command's handler and the service it drives share one instance (and thus
|
// command's handler and the service it drives share one instance (and thus
|
||||||
@@ -72,42 +67,12 @@ public static class ServiceCollectionExtensions
|
|||||||
sp.GetRequiredService<IHttpClientFactory>().CreateClient(CatalogHttpClient),
|
sp.GetRequiredService<IHttpClientFactory>().CreateClient(CatalogHttpClient),
|
||||||
sp.GetRequiredService<IOptions<SkinCatalogOptions>>().Value));
|
sp.GetRequiredService<IOptions<SkinCatalogOptions>>().Value));
|
||||||
|
|
||||||
// Residential proxy provider (IPRoyal). Credentials come from configuration
|
|
||||||
// — IPROYAL_USERNAME / IPROYAL_PASSWORD env vars in practice. Resolution
|
|
||||||
// throws a clear error only when a proxy-using command actually needs it, so
|
|
||||||
// API-only commands (sync, fetch) run without proxy creds configured.
|
|
||||||
services.AddSingleton<IProxyProvider>(sp =>
|
|
||||||
{
|
|
||||||
var username = configuration["IPROYAL_USERNAME"];
|
|
||||||
var password = configuration["IPROYAL_PASSWORD"];
|
|
||||||
if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password))
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException(
|
|
||||||
"IPRoyal credentials are not configured. Set IPROYAL_USERNAME and "
|
|
||||||
+ "IPROYAL_PASSWORD (env vars or user secrets) before running a proxy command.");
|
|
||||||
}
|
|
||||||
|
|
||||||
return new IpRoyalProxyProvider(username, password);
|
|
||||||
});
|
|
||||||
|
|
||||||
// cs.money is driven through a real, non-headless browser (Selenium/Edge,
|
|
||||||
// zero CDP) routed through a local forwarding proxy that chains to the
|
|
||||||
// residential gateway, not an HttpClient.
|
|
||||||
services.AddSingleton<LocalForwardingProxyFactory>();
|
|
||||||
services.AddScoped<BrowserDriverFactory>();
|
|
||||||
services.AddScoped<ProxyProbe>();
|
|
||||||
services.AddScoped(sp => new CsMoneyCaptureService(
|
|
||||||
sp.GetRequiredService<IProxyProvider>(),
|
|
||||||
sp.GetRequiredService<LocalForwardingProxyFactory>(),
|
|
||||||
sp.GetRequiredService<BrowserDriverFactory>(),
|
|
||||||
sp.GetRequiredService<IOptions<CsMoneyOptions>>().Value,
|
|
||||||
sp.GetRequiredService<ILogger<CsMoneyCaptureService>>()));
|
|
||||||
|
|
||||||
// Application services (constructor injection; DbContext keeps them scoped).
|
// Application services (constructor injection; DbContext keeps them scoped).
|
||||||
services.AddScoped<ListingSweepService>();
|
services.AddScoped<ListingSweepService>();
|
||||||
services.AddScoped<SkinSyncService>();
|
services.AddScoped<SkinSyncService>();
|
||||||
services.AddScoped<CsMoney.CsMoneyIngestService>();
|
services.AddScoped<CsMoney.CsMoneyIngestService>();
|
||||||
services.AddScoped<CsMoney.MarketPresenceService>();
|
services.AddScoped<CsMoney.MarketPresenceService>();
|
||||||
|
services.AddScoped<SkinLand.SkinLandIngestService>();
|
||||||
|
|
||||||
return services;
|
return services;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ namespace BlueLaminate.Core.Listings;
|
|||||||
public sealed class ListingSweepService
|
public sealed class ListingSweepService
|
||||||
{
|
{
|
||||||
public const string Source = "listings";
|
public const string Source = "listings";
|
||||||
public const string CatalogSource = "listings-catalog";
|
public const string CatalogSource = SweepSource.CsFloatCatalog;
|
||||||
|
|
||||||
private readonly SkinTrackerDbContext _db;
|
private readonly SkinTrackerDbContext _db;
|
||||||
private readonly CsFloatListingsClient _client;
|
private readonly CsFloatListingsClient _client;
|
||||||
@@ -79,6 +79,9 @@ public sealed class ListingSweepService
|
|||||||
.Select(s => new { s.Id, s.DefIndex, s.PaintIndex })
|
.Select(s => new { s.Id, s.DefIndex, s.PaintIndex })
|
||||||
.ToDictionaryAsync(s => (s.DefIndex!.Value, s.PaintIndex!.Value), s => s.Id, ct);
|
.ToDictionaryAsync(s => (s.DefIndex!.Value, s.PaintIndex!.Value), s => s.Id, ct);
|
||||||
|
|
||||||
|
// (skin, wear) -> condition id, so each listing's wear band is set directly.
|
||||||
|
var conditionLookup = await BuildConditionLookupAsync(ct);
|
||||||
|
|
||||||
// Track which listing ids we touched this run, so a complete pass can flag
|
// Track which listing ids we touched this run, so a complete pass can flag
|
||||||
// the rest as Removed.
|
// the rest as Removed.
|
||||||
var touchedIds = new HashSet<string>();
|
var touchedIds = new HashSet<string>();
|
||||||
@@ -118,7 +121,7 @@ public sealed class ListingSweepService
|
|||||||
seen += page.Listings.Count;
|
seen += page.Listings.Count;
|
||||||
|
|
||||||
var (ins, upd, link, allKnown) = await IngestPageAsync(
|
var (ins, upd, link, allKnown) = await IngestPageAsync(
|
||||||
page.Listings, skinByIndex, touchedIds, touchedInstanceIds, now, ct);
|
page.Listings, skinByIndex, conditionLookup, touchedIds, touchedInstanceIds, now, ct);
|
||||||
inserted += ins;
|
inserted += ins;
|
||||||
updated += upd;
|
updated += upd;
|
||||||
linked += link;
|
linked += link;
|
||||||
@@ -207,7 +210,7 @@ public sealed class ListingSweepService
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
// Repeat the whole catalogue until cancelled. Re-querying each pass picks
|
// Repeat the whole catalogue until cancelled. Re-querying each pass picks
|
||||||
// up newly-synced skins and re-orders by the latest ListingsSweptAt.
|
// up newly-synced skins and re-orders by this site's latest checkpoint.
|
||||||
while (!ct.IsCancellationRequested)
|
while (!ct.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
var now = DateTimeOffset.UtcNow;
|
var now = DateTimeOffset.UtcNow;
|
||||||
@@ -219,6 +222,9 @@ public sealed class ListingSweepService
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// (skin, wear) -> condition id, refreshed each pass alongside the units.
|
||||||
|
var conditionLookup = await BuildConditionLookupAsync(ct);
|
||||||
|
|
||||||
var index = 0;
|
var index = 0;
|
||||||
foreach (var unit in units)
|
foreach (var unit in units)
|
||||||
{
|
{
|
||||||
@@ -258,7 +264,7 @@ public sealed class ListingSweepService
|
|||||||
seen += page.Listings.Count;
|
seen += page.Listings.Count;
|
||||||
|
|
||||||
var (ins, upd, _, _) = await IngestPageAsync(
|
var (ins, upd, _, _) = await IngestPageAsync(
|
||||||
page.Listings, lookup, touchedIds, touchedInstanceIds, now, ct);
|
page.Listings, lookup, conditionLookup, touchedIds, touchedInstanceIds, now, ct);
|
||||||
inserted += ins;
|
inserted += ins;
|
||||||
updated += upd;
|
updated += upd;
|
||||||
|
|
||||||
@@ -293,20 +299,19 @@ public sealed class ListingSweepService
|
|||||||
{
|
{
|
||||||
removed += await MarkRemovedForSkinConditionAsync(
|
removed += await MarkRemovedForSkinConditionAsync(
|
||||||
unit.SkinId, unit.Condition!, touchedIds, now, ct);
|
unit.SkinId, unit.Condition!, touchedIds, now, ct);
|
||||||
await _db.SkinConditions
|
await SweepCheckpoints.StampConditionAsync(_db, conditionId, CatalogSource, now, ct);
|
||||||
.Where(c => c.Id == conditionId)
|
|
||||||
.ExecuteUpdateAsync(
|
|
||||||
setters => setters.SetProperty(c => c.ListingsSweptAt, now), ct);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
removed += await MarkRemovedForSkinAsync(unit.SkinId, touchedIds, now, ct);
|
removed += await MarkRemovedForSkinAsync(unit.SkinId, touchedIds, now, ct);
|
||||||
await _db.Skins
|
await SweepCheckpoints.StampSkinAsync(_db, unit.SkinId, CatalogSource, now, ct);
|
||||||
.Where(s => s.Id == unit.SkinId)
|
|
||||||
.ExecuteUpdateAsync(
|
|
||||||
setters => setters.SetProperty(s => s.ListingsSweptAt, now), ct);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Persist the checkpoint upsert now so a cancellation between bands
|
||||||
|
// doesn't lose it (the stamp goes through the change tracker, not a
|
||||||
|
// set-based update).
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
|
||||||
covered++;
|
covered++;
|
||||||
|
|
||||||
await PaceAsync(delayBetweenPages, ct);
|
await PaceAsync(delayBetweenPages, ct);
|
||||||
@@ -352,8 +357,9 @@ public sealed class ListingSweepService
|
|||||||
|
|
||||||
// One unit of catalogue-sweep work: a skin filtered to a single wear band, or a
|
// One unit of catalogue-sweep work: a skin filtered to a single wear band, or a
|
||||||
// whole skin when it has no bands. Float bounds + ConditionId are null for the
|
// whole skin when it has no bands. Float bounds + ConditionId are null for the
|
||||||
// whole-skin case (tracked by Skin.ListingsSweptAt instead). SweptAt drives the
|
// whole-skin case (checkpointed in skin_sweeps rather than skin_condition_sweeps).
|
||||||
// never-swept-first / stalest-first ordering.
|
// SweptAt is this site's checkpoint for the unit and drives the never-swept-first /
|
||||||
|
// stalest-first ordering.
|
||||||
private sealed record SweepUnit(
|
private sealed record SweepUnit(
|
||||||
int SkinId,
|
int SkinId,
|
||||||
int Def,
|
int Def,
|
||||||
@@ -383,6 +389,9 @@ public sealed class ListingSweepService
|
|||||||
// small (~2k skins) so this is negligible.
|
// small (~2k skins) so this is negligible.
|
||||||
private async Task<List<SweepUnit>> BuildSweepUnitsAsync(CancellationToken ct)
|
private async Task<List<SweepUnit>> BuildSweepUnitsAsync(CancellationToken ct)
|
||||||
{
|
{
|
||||||
|
// Read each unit's checkpoint for THIS site only (a correlated subquery over the
|
||||||
|
// per-source sweep rows), so a band swept on another site still sorts as
|
||||||
|
// never-swept here. No row for this source => null => front of the queue.
|
||||||
var skins = await _db.Skins
|
var skins = await _db.Skins
|
||||||
.Where(s => s.DefIndex != null && s.PaintIndex != null)
|
.Where(s => s.DefIndex != null && s.PaintIndex != null)
|
||||||
.Select(s => new
|
.Select(s => new
|
||||||
@@ -393,9 +402,22 @@ public sealed class ListingSweepService
|
|||||||
s.Name,
|
s.Name,
|
||||||
Weapon = s.Weapon.Name,
|
Weapon = s.Weapon.Name,
|
||||||
s.Rarity,
|
s.Rarity,
|
||||||
s.ListingsSweptAt,
|
SweptAt = s.Sweeps
|
||||||
|
.Where(x => x.Source == CatalogSource)
|
||||||
|
.Select(x => (DateTimeOffset?)x.SweptAt)
|
||||||
|
.FirstOrDefault(),
|
||||||
Conditions = s.Conditions
|
Conditions = s.Conditions
|
||||||
.Select(c => new { c.Id, c.Condition, c.MinFloat, c.MaxFloat, c.ListingsSweptAt })
|
.Select(c => new
|
||||||
|
{
|
||||||
|
c.Id,
|
||||||
|
c.Condition,
|
||||||
|
c.FloatMin,
|
||||||
|
c.FloatMax,
|
||||||
|
SweptAt = c.Sweeps
|
||||||
|
.Where(x => x.Source == CatalogSource)
|
||||||
|
.Select(x => (DateTimeOffset?)x.SweptAt)
|
||||||
|
.FirstOrDefault(),
|
||||||
|
})
|
||||||
.ToList(),
|
.ToList(),
|
||||||
})
|
})
|
||||||
.ToListAsync(ct);
|
.ToListAsync(ct);
|
||||||
@@ -408,7 +430,7 @@ public sealed class ListingSweepService
|
|||||||
units.Add(new SweepUnit(
|
units.Add(new SweepUnit(
|
||||||
s.Id, s.Def, s.Paint, s.Name, s.Weapon, s.Rarity,
|
s.Id, s.Def, s.Paint, s.Name, s.Weapon, s.Rarity,
|
||||||
ConditionId: null, Condition: null, MinFloat: null, MaxFloat: null,
|
ConditionId: null, Condition: null, MinFloat: null, MaxFloat: null,
|
||||||
SweptAt: s.ListingsSweptAt));
|
SweptAt: s.SweptAt));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -417,8 +439,8 @@ public sealed class ListingSweepService
|
|||||||
units.Add(new SweepUnit(
|
units.Add(new SweepUnit(
|
||||||
s.Id, s.Def, s.Paint, s.Name, s.Weapon, s.Rarity,
|
s.Id, s.Def, s.Paint, s.Name, s.Weapon, s.Rarity,
|
||||||
ConditionId: c.Id, Condition: c.Condition,
|
ConditionId: c.Id, Condition: c.Condition,
|
||||||
MinFloat: c.MinFloat, MaxFloat: c.MaxFloat,
|
MinFloat: c.FloatMin, MaxFloat: c.FloatMax,
|
||||||
SweptAt: c.ListingsSweptAt));
|
SweptAt: c.SweptAt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -431,6 +453,15 @@ public sealed class ListingSweepService
|
|||||||
.ToList();
|
.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// (skinId, wear name) -> skin_conditions.id, built once per run so each listing's
|
||||||
|
// wear band resolves without a per-row query. The wear name equals
|
||||||
|
// skin_conditions.condition (CSFloat's authoritative tier name, e.g. "Factory New").
|
||||||
|
private async Task<Dictionary<(int SkinId, string Condition), int>> BuildConditionLookupAsync(
|
||||||
|
CancellationToken ct) =>
|
||||||
|
await _db.SkinConditions
|
||||||
|
.Select(c => new { c.SkinId, c.Condition, c.Id })
|
||||||
|
.ToDictionaryAsync(c => (c.SkinId, c.Condition), c => c.Id, ct);
|
||||||
|
|
||||||
// Flag this skin's once-Active listings that we didn't see this run as Removed.
|
// Flag this skin's once-Active listings that we didn't see this run as Removed.
|
||||||
private async Task<int> MarkRemovedForSkinAsync(
|
private async Task<int> MarkRemovedForSkinAsync(
|
||||||
int skinId, HashSet<string> touchedIds, DateTimeOffset now, CancellationToken ct)
|
int skinId, HashSet<string> touchedIds, DateTimeOffset now, CancellationToken ct)
|
||||||
@@ -472,6 +503,7 @@ public sealed class ListingSweepService
|
|||||||
private async Task<(int Inserted, int Updated, int Linked, bool AllKnown)> IngestPageAsync(
|
private async Task<(int Inserted, int Updated, int Linked, bool AllKnown)> IngestPageAsync(
|
||||||
IReadOnlyList<CsFloatListing> listings,
|
IReadOnlyList<CsFloatListing> listings,
|
||||||
IReadOnlyDictionary<(int, int), int> skinByIndex,
|
IReadOnlyDictionary<(int, int), int> skinByIndex,
|
||||||
|
IReadOnlyDictionary<(int, string), int> conditionBySkinAndWear,
|
||||||
HashSet<string> touchedIds,
|
HashSet<string> touchedIds,
|
||||||
HashSet<int> touchedInstanceIds,
|
HashSet<int> touchedInstanceIds,
|
||||||
DateTimeOffset now,
|
DateTimeOffset now,
|
||||||
@@ -501,6 +533,14 @@ public sealed class ListingSweepService
|
|||||||
linked++;
|
linked++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wear band: resolve from (skin, wear name) so both the catalogue and the
|
||||||
|
// incremental sweep set the same condition_id. Null when the skin is
|
||||||
|
// unknown or the item has no wear (e.g. vanilla knives).
|
||||||
|
int? conditionId = skinId is { } skinForCond && l.WearName is { } wearForCond
|
||||||
|
&& conditionBySkinAndWear.TryGetValue((skinForCond, wearForCond), out var resolvedCond)
|
||||||
|
? resolvedCond
|
||||||
|
: null;
|
||||||
|
|
||||||
// Resolve the physical item only when we know the skin — the
|
// Resolve the physical item only when we know the skin — the
|
||||||
// fingerprint is meaningless without it.
|
// fingerprint is meaningless without it.
|
||||||
var instance = skinId is { } sid
|
var instance = skinId is { } sid
|
||||||
@@ -520,6 +560,7 @@ public sealed class ListingSweepService
|
|||||||
row.Status = ListingStatus.Active;
|
row.Status = ListingStatus.Active;
|
||||||
row.RemovedAt = null;
|
row.RemovedAt = null;
|
||||||
row.SkinId = skinId;
|
row.SkinId = skinId;
|
||||||
|
row.ConditionId = conditionId;
|
||||||
row.AssetId = l.AssetId;
|
row.AssetId = l.AssetId;
|
||||||
row.SkinInstance = instance;
|
row.SkinInstance = instance;
|
||||||
updated++;
|
updated++;
|
||||||
@@ -527,7 +568,7 @@ public sealed class ListingSweepService
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
allKnown = false;
|
allKnown = false;
|
||||||
var entity = MapToEntity(l, skinId, now);
|
var entity = MapToEntity(l, skinId, conditionId, now);
|
||||||
entity.SkinInstance = instance;
|
entity.SkinInstance = instance;
|
||||||
_db.Listings.Add(entity);
|
_db.Listings.Add(entity);
|
||||||
inserted++;
|
inserted++;
|
||||||
@@ -541,16 +582,23 @@ public sealed class ListingSweepService
|
|||||||
// The fingerprint is (skin, full-precision float, seed, stattrak, souvenir).
|
// The fingerprint is (skin, full-precision float, seed, stattrak, souvenir).
|
||||||
// It is deliberately NOT unique — duped copies share it — so a match may
|
// It is deliberately NOT unique — duped copies share it — so a match may
|
||||||
// already represent more than one physical item; dupe detection runs later.
|
// already represent more than one physical item; dupe detection runs later.
|
||||||
private async Task<SkinInstance> ResolveInstanceAsync(
|
private async Task<SkinInstance?> ResolveInstanceAsync(
|
||||||
int skinId, CsFloatListing l, DateTimeOffset now, CancellationToken ct)
|
int skinId, CsFloatListing l, DateTimeOffset now, CancellationToken ct)
|
||||||
{
|
{
|
||||||
var seed = l.PaintSeed.ToString();
|
// Floatless items (e.g. Vanilla knives) can't be fingerprinted; skip the
|
||||||
|
// instance and leave the listing's SkinInstanceId null, like the cs.money path.
|
||||||
|
if (l.FloatValue is not { } floatValue)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var seed = l.PaintSeed;
|
||||||
|
|
||||||
// Check the change-tracker first (an instance just added earlier this page
|
// Check the change-tracker first (an instance just added earlier this page
|
||||||
// isn't queryable yet), then the database.
|
// isn't queryable yet), then the database.
|
||||||
var tracked = _db.ChangeTracker.Entries<SkinInstance>()
|
var tracked = _db.ChangeTracker.Entries<SkinInstance>()
|
||||||
.Select(e => e.Entity)
|
.Select(e => e.Entity)
|
||||||
.FirstOrDefault(i => i.SkinId == skinId && i.FloatValue == l.FloatValue
|
.FirstOrDefault(i => i.SkinId == skinId && i.FloatValue == floatValue
|
||||||
&& i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir);
|
&& i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir);
|
||||||
if (tracked is not null)
|
if (tracked is not null)
|
||||||
{
|
{
|
||||||
@@ -559,7 +607,7 @@ public sealed class ListingSweepService
|
|||||||
}
|
}
|
||||||
|
|
||||||
var instance = await _db.SkinInstances.FirstOrDefaultAsync(
|
var instance = await _db.SkinInstances.FirstOrDefaultAsync(
|
||||||
i => i.SkinId == skinId && i.FloatValue == l.FloatValue
|
i => i.SkinId == skinId && i.FloatValue == floatValue
|
||||||
&& i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir,
|
&& i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir,
|
||||||
ct);
|
ct);
|
||||||
|
|
||||||
@@ -572,7 +620,7 @@ public sealed class ListingSweepService
|
|||||||
instance = new SkinInstance
|
instance = new SkinInstance
|
||||||
{
|
{
|
||||||
SkinId = skinId,
|
SkinId = skinId,
|
||||||
FloatValue = l.FloatValue,
|
FloatValue = floatValue,
|
||||||
PaintSeed = seed,
|
PaintSeed = seed,
|
||||||
StatTrak = l.IsStatTrak,
|
StatTrak = l.IsStatTrak,
|
||||||
Souvenir = l.IsSouvenir,
|
Souvenir = l.IsSouvenir,
|
||||||
@@ -583,7 +631,7 @@ public sealed class ListingSweepService
|
|||||||
return instance;
|
return instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Listing MapToEntity(CsFloatListing l, int? skinId, DateTimeOffset now) => new()
|
private static Listing MapToEntity(CsFloatListing l, int? skinId, int? conditionId, DateTimeOffset now) => new()
|
||||||
{
|
{
|
||||||
CsFloatListingId = l.ListingId,
|
CsFloatListingId = l.ListingId,
|
||||||
Type = l.Type,
|
Type = l.Type,
|
||||||
@@ -602,6 +650,7 @@ public sealed class ListingSweepService
|
|||||||
SellerSteamId = l.SellerSteamId,
|
SellerSteamId = l.SellerSteamId,
|
||||||
InspectLink = l.InspectLink,
|
InspectLink = l.InspectLink,
|
||||||
SkinId = skinId,
|
SkinId = skinId,
|
||||||
|
ConditionId = conditionId,
|
||||||
FirstSeenAt = now,
|
FirstSeenAt = now,
|
||||||
LastSeenAt = now,
|
LastSeenAt = now,
|
||||||
Status = ListingStatus.Active,
|
Status = ListingStatus.Active,
|
||||||
|
|||||||
205
BlueLaminate/BlueLaminate.Core/SkinLand/SkinLandIngestService.cs
Normal file
205
BlueLaminate/BlueLaminate.Core/SkinLand/SkinLandIngestService.cs
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
using BlueLaminate.EFCore.Data;
|
||||||
|
using BlueLaminate.EFCore.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
|
||||||
|
namespace BlueLaminate.Core.SkinLand;
|
||||||
|
|
||||||
|
/// <summary>Outcome of ingesting one skin+wear scrape job's results.</summary>
|
||||||
|
public sealed record SkinLandIngestResult(
|
||||||
|
int Matched, int Inserted, int Updated, int Removed, int Skipped);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Persists the offers the worker scraped for one targeted skin+wear job into the
|
||||||
|
/// <c>skin_land_listings</c> table. Mirrors <see cref="CsMoney.CsMoneyIngestService"/>'s
|
||||||
|
/// upsert-by-natural-key + soft-track-Removed + complete-vs-partial flow, but is thinner:
|
||||||
|
/// skin.land exposes no paint seed, so there's no <c>SkinInstance</c> resolution and no
|
||||||
|
/// dupe detection. The scraped page is already one exact skin+wear (the worker fetches it
|
||||||
|
/// by slug), so instead of cs.money's fuzzy name filter we only validate defensively that
|
||||||
|
/// each offer's slug matches the targeted band, skipping any that don't.
|
||||||
|
/// </summary>
|
||||||
|
public sealed class SkinLandIngestService
|
||||||
|
{
|
||||||
|
public const string Source = SweepSource.SkinLand;
|
||||||
|
|
||||||
|
private readonly SkinTrackerDbContext _db;
|
||||||
|
private readonly ILogger<SkinLandIngestService> _logger;
|
||||||
|
|
||||||
|
public SkinLandIngestService(SkinTrackerDbContext db, ILogger<SkinLandIngestService> logger)
|
||||||
|
{
|
||||||
|
_db = db;
|
||||||
|
_logger = logger;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <param name="complete">
|
||||||
|
/// True only when the worker walked every page of the skin+wear (stoppedReason
|
||||||
|
/// "completed"). On a partial sweep we upsert what we saw but skip Removed-marking,
|
||||||
|
/// the price point, and the swept-checkpoint — unseen offers may just be unfetched, so
|
||||||
|
/// the band stays un-stamped and gets re-queued rather than being wrongly pruned.
|
||||||
|
/// </param>
|
||||||
|
public async Task<SkinLandIngestResult> IngestAsync(
|
||||||
|
int skinId, int? conditionId, IReadOnlyList<SkinLandOffer> offers, bool complete, CancellationToken ct = default)
|
||||||
|
{
|
||||||
|
var now = DateTimeOffset.UtcNow;
|
||||||
|
|
||||||
|
var skin = await _db.Skins
|
||||||
|
.Where(s => s.Id == skinId)
|
||||||
|
.Select(s => new { s.Id, s.Name, Weapon = s.Weapon.Name })
|
||||||
|
.FirstOrDefaultAsync(ct);
|
||||||
|
if (skin is null)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Ingest skipped: skin {SkinId} not found.", skinId);
|
||||||
|
return new SkinLandIngestResult(0, 0, 0, 0, offers.Count);
|
||||||
|
}
|
||||||
|
|
||||||
|
string? conditionName = null;
|
||||||
|
if (conditionId is { } cid)
|
||||||
|
{
|
||||||
|
conditionName = await _db.SkinConditions
|
||||||
|
.Where(c => c.Id == cid).Select(c => c.Condition).FirstOrDefaultAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each offer carries its skin's slug; the targeted band has a known slug. When we
|
||||||
|
// can build the expected slug, keep only offers whose slug matches (a cheap guard
|
||||||
|
// against a wrong/redirected page); otherwise accept all (the worker targeted it).
|
||||||
|
var expectedSlug = conditionName is null
|
||||||
|
? null
|
||||||
|
: SkinLandSlug.Slugify($"{skin.Weapon} {skin.Name} {conditionName}");
|
||||||
|
var matched = offers.Where(o =>
|
||||||
|
expectedSlug is null
|
||||||
|
|| string.Equals(o.Skin?.Url, expectedSlug, StringComparison.OrdinalIgnoreCase)).ToList();
|
||||||
|
|
||||||
|
var skipped = offers.Count - matched.Count;
|
||||||
|
if (matched.Count == 0)
|
||||||
|
{
|
||||||
|
// Nothing for this skin+wear. If the sweep was complete this is genuine (none
|
||||||
|
// listed, or a slug mismatch) — stamp the checkpoint so it advances. If partial
|
||||||
|
// (e.g. challenged before any page), leave it un-stamped so the band is retried.
|
||||||
|
if (complete)
|
||||||
|
{
|
||||||
|
await StampCheckpointAsync(conditionId, now, ct);
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new SkinLandIngestResult(0, 0, 0, 0, skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
var listingIds = matched.Select(o => o.Id).ToList();
|
||||||
|
var existing = await _db.SkinLandListings
|
||||||
|
.Where(l => listingIds.Contains(l.ListingId))
|
||||||
|
.ToDictionaryAsync(l => l.ListingId, ct);
|
||||||
|
|
||||||
|
var inserted = 0;
|
||||||
|
var updated = 0;
|
||||||
|
var touched = new HashSet<long>();
|
||||||
|
|
||||||
|
foreach (var o in matched)
|
||||||
|
{
|
||||||
|
touched.Add(o.Id);
|
||||||
|
if (existing.TryGetValue(o.Id, out var row))
|
||||||
|
{
|
||||||
|
row.Price = o.FinalWithdrawalPrice ?? row.Price;
|
||||||
|
row.FloatValue = o.ItemFloat;
|
||||||
|
row.NameTag = o.NameTag;
|
||||||
|
row.InspectLink = o.ItemLink;
|
||||||
|
row.StickerCount = o.Stickers?.Count(s => s is not null) ?? 0;
|
||||||
|
row.LastSeenAt = now;
|
||||||
|
row.Status = ListingStatus.Active;
|
||||||
|
row.RemovedAt = null;
|
||||||
|
row.ConditionId = conditionId;
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_db.SkinLandListings.Add(Map(o, skinId, conditionId, now));
|
||||||
|
inserted++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist inserts/updates before the set-based Removed query runs.
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
|
||||||
|
// The following only hold if we saw the FULL skin+wear set. On a partial sweep,
|
||||||
|
// offers we didn't fetch are not gone (so don't mark them Removed), the cheapest
|
||||||
|
// offer may be among the unfetched (so don't record a price point), and the band
|
||||||
|
// isn't fully swept (so don't stamp the checkpoint — let it re-queue).
|
||||||
|
var removed = 0;
|
||||||
|
if (complete)
|
||||||
|
{
|
||||||
|
removed = await MarkRemovedAsync(skinId, conditionId, touched, now, ct);
|
||||||
|
|
||||||
|
if (conditionId is { } condId)
|
||||||
|
{
|
||||||
|
var priced = matched.Where(m => m.FinalWithdrawalPrice is not null)
|
||||||
|
.Select(m => m.FinalWithdrawalPrice!.Value).ToList();
|
||||||
|
if (priced.Count > 0)
|
||||||
|
{
|
||||||
|
await _db.PriceHistories.AddAsync(new PriceHistory
|
||||||
|
{
|
||||||
|
SkinId = skinId,
|
||||||
|
ConditionId = condId,
|
||||||
|
Price = priced.Min(),
|
||||||
|
Currency = "USD",
|
||||||
|
RecordedAt = now,
|
||||||
|
Source = Source,
|
||||||
|
}, ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await StampCheckpointAsync(conditionId, now, ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
|
||||||
|
_logger.LogInformation(
|
||||||
|
"skin.land ingest {Weapon} | {Skin} ({Wear}): {Matched} matched ({Ins} new, {Upd} upd, "
|
||||||
|
+ "{Rem} removed), {Skipped} skipped by filter{Partial}.",
|
||||||
|
skin.Weapon, skin.Name, conditionName ?? "all", matched.Count, inserted, updated, removed, skipped,
|
||||||
|
complete ? "" : " [PARTIAL — not pruned/checkpointed]");
|
||||||
|
|
||||||
|
return new SkinLandIngestResult(matched.Count, inserted, updated, removed, skipped);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flag this skin+wear's once-Active offers we didn't see this run as Removed.
|
||||||
|
private async Task<int> MarkRemovedAsync(
|
||||||
|
int skinId, int? conditionId, HashSet<long> touched, DateTimeOffset now, CancellationToken ct)
|
||||||
|
{
|
||||||
|
return await _db.SkinLandListings
|
||||||
|
.Where(l => l.SkinId == skinId
|
||||||
|
&& l.ConditionId == conditionId
|
||||||
|
&& l.Status == ListingStatus.Active
|
||||||
|
&& !touched.Contains(l.ListingId))
|
||||||
|
.ExecuteUpdateAsync(setters => setters
|
||||||
|
.SetProperty(l => l.Status, ListingStatus.Removed)
|
||||||
|
.SetProperty(l => l.RemovedAt, now), ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stamp this band's skin.land checkpoint (upsert into skin_condition_sweeps under the
|
||||||
|
// skinland source). Caller persists via SaveChangesAsync.
|
||||||
|
private async Task StampCheckpointAsync(int? conditionId, DateTimeOffset now, CancellationToken ct)
|
||||||
|
{
|
||||||
|
if (conditionId is { } cid)
|
||||||
|
{
|
||||||
|
await SweepCheckpoints.StampConditionAsync(_db, cid, Source, now, ct);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static SkinLandListing Map(SkinLandOffer o, int skinId, int? conditionId, DateTimeOffset now) => new()
|
||||||
|
{
|
||||||
|
ListingId = o.Id,
|
||||||
|
SkinId = skinId,
|
||||||
|
ConditionId = conditionId,
|
||||||
|
MarketHashName = o.Skin?.Name ?? "",
|
||||||
|
FloatValue = o.ItemFloat,
|
||||||
|
IsStatTrak = o.Skin?.IsStatTrak ?? false,
|
||||||
|
IsSouvenir = o.Skin?.IsSouvenir ?? false,
|
||||||
|
NameTag = o.NameTag,
|
||||||
|
StickerCount = o.Stickers?.Count(s => s is not null) ?? 0,
|
||||||
|
Price = o.FinalWithdrawalPrice ?? 0m,
|
||||||
|
Currency = "USD",
|
||||||
|
InspectLink = o.ItemLink,
|
||||||
|
FirstSeenAt = now,
|
||||||
|
LastSeenAt = now,
|
||||||
|
Status = ListingStatus.Active,
|
||||||
|
};
|
||||||
|
}
|
||||||
35
BlueLaminate/BlueLaminate.Core/SkinLand/SkinLandJson.cs
Normal file
35
BlueLaminate/BlueLaminate.Core/SkinLand/SkinLandJson.cs
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
using System.Text.Json.Serialization;
|
||||||
|
|
||||||
|
namespace BlueLaminate.Core.SkinLand;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The subset of a skin.land <c>obtained-skins</c> offer we persist, parsed from the
|
||||||
|
/// JSON the Python worker scrapes (the paginated <c>data[]</c> array). Decimals are
|
||||||
|
/// parsed directly (not via double) so the full-precision float round-trips exactly into
|
||||||
|
/// <c>numeric(20,18)</c>. skin.land exposes no paint seed / def index, so there's nothing
|
||||||
|
/// to fingerprint a <c>SkinInstance</c> with — the shape is intentionally thin.
|
||||||
|
/// </summary>
|
||||||
|
public sealed class SkinLandOffer
|
||||||
|
{
|
||||||
|
[JsonPropertyName("id")] public long Id { get; set; }
|
||||||
|
[JsonPropertyName("item_float")] public decimal? ItemFloat { get; set; }
|
||||||
|
[JsonPropertyName("final_withdrawal_price")] public decimal? FinalWithdrawalPrice { get; set; }
|
||||||
|
[JsonPropertyName("name_tag")] public string? NameTag { get; set; }
|
||||||
|
[JsonPropertyName("item_link")] public string? ItemLink { get; set; }
|
||||||
|
[JsonPropertyName("stickers")] public List<SkinLandSticker?>? Stickers { get; set; }
|
||||||
|
[JsonPropertyName("skin")] public SkinLandSkin? Skin { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class SkinLandSkin
|
||||||
|
{
|
||||||
|
[JsonPropertyName("id")] public long? Id { get; set; }
|
||||||
|
[JsonPropertyName("name")] public string? Name { get; set; }
|
||||||
|
[JsonPropertyName("url")] public string? Url { get; set; } // the market slug
|
||||||
|
[JsonPropertyName("is_stattrak")] public bool IsStatTrak { get; set; }
|
||||||
|
[JsonPropertyName("is_souvenir")] public bool IsSouvenir { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public sealed class SkinLandSticker
|
||||||
|
{
|
||||||
|
[JsonPropertyName("name")] public string? Name { get; set; }
|
||||||
|
}
|
||||||
55
BlueLaminate/BlueLaminate.Core/SkinLand/SkinLandSlug.cs
Normal file
55
BlueLaminate/BlueLaminate.Core/SkinLand/SkinLandSlug.cs
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace BlueLaminate.Core.SkinLand;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Builds a skin.land market URL from the catalogue's weapon + skin + wear. skin.land's
|
||||||
|
/// market routes are <c>/market/csgo/{slug}/</c> where the slug is simply
|
||||||
|
/// <c>{weapon}-{skin}-{wear}</c> kebab-cased — verified against the live site (e.g.
|
||||||
|
/// "M4A4" + "Global Offensive" + "Battle-Scarred" → <c>m4a4-global-offensive-battle-scarred</c>,
|
||||||
|
/// "AK-47" + "Redline" + "Field-Tested" → <c>ak-47-redline-field-tested</c>). No discovery
|
||||||
|
/// or stored mapping is needed.
|
||||||
|
/// <para>
|
||||||
|
/// StatTrak and Souvenir are <em>separate</em> pages on skin.land (<c>stattrak-</c>/
|
||||||
|
/// <c>souvenir-</c> prefixed slugs); this builds the base (non-special) page, which is the
|
||||||
|
/// unit v1 sweeps per <c>SkinCondition</c>.
|
||||||
|
/// </para>
|
||||||
|
/// </summary>
|
||||||
|
public static class SkinLandSlug
|
||||||
|
{
|
||||||
|
private const string MarketBase = "https://skin.land/market/csgo/";
|
||||||
|
|
||||||
|
/// <summary>"M4A4", "Global Offensive", "Battle-Scarred" → the full market URL.</summary>
|
||||||
|
public static string MarketUrl(string weapon, string skinName, string condition) =>
|
||||||
|
$"{MarketBase}{Slugify($"{weapon} {skinName} {condition}")}/";
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Lowercase, collapse every run of non-alphanumeric characters to a single hyphen,
|
||||||
|
/// and trim leading/trailing hyphens. So "AK-47 | Redline (Field-Tested)" and the
|
||||||
|
/// catalogue's "AK-47 Redline Field-Tested" both reduce to "ak-47-redline-field-tested".
|
||||||
|
/// </summary>
|
||||||
|
public static string Slugify(string value)
|
||||||
|
{
|
||||||
|
var sb = new StringBuilder(value.Length);
|
||||||
|
var pendingHyphen = false;
|
||||||
|
foreach (var ch in value)
|
||||||
|
{
|
||||||
|
if (char.IsLetterOrDigit(ch))
|
||||||
|
{
|
||||||
|
if (pendingHyphen && sb.Length > 0)
|
||||||
|
{
|
||||||
|
sb.Append('-');
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.Append(char.ToLowerInvariant(ch));
|
||||||
|
pendingHyphen = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pendingHyphen = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.ToString();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,7 +8,8 @@ public class InventoryItemConfiguration : IEntityTypeConfiguration<InventoryItem
|
|||||||
{
|
{
|
||||||
public void Configure(EntityTypeBuilder<InventoryItem> entity)
|
public void Configure(EntityTypeBuilder<InventoryItem> entity)
|
||||||
{
|
{
|
||||||
entity.HasIndex(e => e.AssetId);
|
// A Steam asset id identifies one physical copy; never store it twice.
|
||||||
|
entity.HasIndex(e => e.AssetId).IsUnique();
|
||||||
|
|
||||||
entity.HasOne(e => e.User)
|
entity.HasOne(e => e.User)
|
||||||
.WithMany(u => u.InventoryItems)
|
.WithMany(u => u.InventoryItems)
|
||||||
|
|||||||
@@ -31,6 +31,14 @@ public class ListingConfiguration : IEntityTypeConfiguration<Listing>
|
|||||||
.HasForeignKey(e => e.SkinId)
|
.HasForeignKey(e => e.SkinId)
|
||||||
.OnDelete(DeleteBehavior.SetNull);
|
.OnDelete(DeleteBehavior.SetNull);
|
||||||
|
|
||||||
|
// Wear band the sweep targeted (set directly from the sweep unit, not
|
||||||
|
// best-effort). Set null on delete so a condition row can change without
|
||||||
|
// blocking its listings — matching the cs.money/skin.land tables.
|
||||||
|
entity.HasOne(e => e.Condition)
|
||||||
|
.WithMany()
|
||||||
|
.HasForeignKey(e => e.ConditionId)
|
||||||
|
.OnDelete(DeleteBehavior.SetNull);
|
||||||
|
|
||||||
// Listings roll up to the physical item they represent.
|
// Listings roll up to the physical item they represent.
|
||||||
entity.HasOne(e => e.SkinInstance)
|
entity.HasOne(e => e.SkinInstance)
|
||||||
.WithMany(i => i.Listings)
|
.WithMany(i => i.Listings)
|
||||||
|
|||||||
@@ -8,12 +8,11 @@ public class SkinConditionConfiguration : IEntityTypeConfiguration<SkinCondition
|
|||||||
{
|
{
|
||||||
public void Configure(EntityTypeBuilder<SkinCondition> entity)
|
public void Configure(EntityTypeBuilder<SkinCondition> entity)
|
||||||
{
|
{
|
||||||
entity.Property(e => e.MinFloat).HasColumnType("numeric(10,9)");
|
entity.Property(e => e.FloatMin).HasColumnType("numeric(10,9)");
|
||||||
entity.Property(e => e.MaxFloat).HasColumnType("numeric(10,9)");
|
entity.Property(e => e.FloatMax).HasColumnType("numeric(10,9)");
|
||||||
|
|
||||||
// The catalogue sweep orders bands by this (never-swept first, then stalest),
|
// Per-site "last swept" checkpoints live in skin_condition_sweeps (one row per
|
||||||
// so index it like the equivalent column on skins.
|
// site); see SkinConditionSweepConfiguration for the indexes that order them.
|
||||||
entity.HasIndex(e => e.ListingsSweptAt);
|
|
||||||
|
|
||||||
entity.HasOne(e => e.Skin)
|
entity.HasOne(e => e.Skin)
|
||||||
.WithMany(s => s.Conditions)
|
.WithMany(s => s.Conditions)
|
||||||
|
|||||||
@@ -0,0 +1,24 @@
|
|||||||
|
using BlueLaminate.EFCore.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.EntityFrameworkCore.Metadata.Builders;
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Configurations;
|
||||||
|
|
||||||
|
public class SkinConditionSweepConfiguration : IEntityTypeConfiguration<SkinConditionSweep>
|
||||||
|
{
|
||||||
|
public void Configure(EntityTypeBuilder<SkinConditionSweep> entity)
|
||||||
|
{
|
||||||
|
// One checkpoint per band per site: the natural key, and what the upsert
|
||||||
|
// ("stamp") in SweepCheckpoints relies on.
|
||||||
|
entity.HasIndex(e => new { e.SkinConditionId, e.Source }).IsUnique();
|
||||||
|
|
||||||
|
// Each site's sweep orders its bands never-swept-first then stalest; index the
|
||||||
|
// ordering it scans (filter by source, sort by swept_at).
|
||||||
|
entity.HasIndex(e => new { e.Source, e.SweptAt });
|
||||||
|
|
||||||
|
entity.HasOne(e => e.SkinCondition)
|
||||||
|
.WithMany(c => c.Sweeps)
|
||||||
|
.HasForeignKey(e => e.SkinConditionId)
|
||||||
|
.OnDelete(DeleteBehavior.Cascade);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -29,9 +29,8 @@ public class SkinConfiguration : IEntityTypeConfiguration<Skin>
|
|||||||
.IsUnique()
|
.IsUnique()
|
||||||
.HasFilter("def_index IS NOT NULL AND paint_index IS NOT NULL");
|
.HasFilter("def_index IS NOT NULL AND paint_index IS NOT NULL");
|
||||||
|
|
||||||
// The catalogue sweep orders skins by when they were last swept (nulls
|
// Per-site "last swept" checkpoints live in skin_sweeps (one row per site);
|
||||||
// first) to resume across capped runs; index that ordering.
|
// see SkinSweepConfiguration for the indexes that order them.
|
||||||
entity.HasIndex(e => e.ListingsSweptAt);
|
|
||||||
|
|
||||||
entity.HasOne(e => e.Weapon)
|
entity.HasOne(e => e.Weapon)
|
||||||
.WithMany(w => w.Skins)
|
.WithMany(w => w.Skins)
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
using BlueLaminate.EFCore.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.EntityFrameworkCore.Metadata.Builders;
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Configurations;
|
||||||
|
|
||||||
|
public class SkinLandListingConfiguration : IEntityTypeConfiguration<SkinLandListing>
|
||||||
|
{
|
||||||
|
public void Configure(EntityTypeBuilder<SkinLandListing> entity)
|
||||||
|
{
|
||||||
|
// skin.land's offer id is the natural key; ingest upserts against it and must
|
||||||
|
// never create duplicates.
|
||||||
|
entity.HasIndex(e => e.ListingId).IsUnique();
|
||||||
|
|
||||||
|
entity.Property(e => e.Price).HasPrecision(18, 2);
|
||||||
|
// Full precision (matches SkinInstance/cs.money) even though skin.land offers
|
||||||
|
// aren't fingerprinted — keep the float lossless for later analysis.
|
||||||
|
entity.Property(e => e.FloatValue).HasColumnType("numeric(20,18)");
|
||||||
|
|
||||||
|
// Enum as text so the DB is self-describing (matches the project's leaning).
|
||||||
|
entity.Property(e => e.Status).HasConversion<string>();
|
||||||
|
|
||||||
|
// Targeted scrape: results are filtered/sorted by skin+wear and by activity.
|
||||||
|
entity.HasIndex(e => new { e.SkinId, e.ConditionId });
|
||||||
|
entity.HasIndex(e => e.Status);
|
||||||
|
|
||||||
|
// Each job targets a known skin, so this link is required (Restrict: a skin with
|
||||||
|
// live listings shouldn't be deleted out from under them).
|
||||||
|
entity.HasOne(e => e.Skin)
|
||||||
|
.WithMany()
|
||||||
|
.HasForeignKey(e => e.SkinId)
|
||||||
|
.OnDelete(DeleteBehavior.Restrict);
|
||||||
|
|
||||||
|
entity.HasOne(e => e.Condition)
|
||||||
|
.WithMany()
|
||||||
|
.HasForeignKey(e => e.ConditionId)
|
||||||
|
.OnDelete(DeleteBehavior.SetNull);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
using BlueLaminate.EFCore.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.EntityFrameworkCore.Metadata.Builders;
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Configurations;
|
||||||
|
|
||||||
|
public class SkinSweepConfiguration : IEntityTypeConfiguration<SkinSweep>
|
||||||
|
{
|
||||||
|
public void Configure(EntityTypeBuilder<SkinSweep> entity)
|
||||||
|
{
|
||||||
|
// One checkpoint per skin per site: the natural key the upsert relies on.
|
||||||
|
entity.HasIndex(e => new { e.SkinId, e.Source }).IsUnique();
|
||||||
|
|
||||||
|
// Mirror SkinConditionSweep: index the (source, swept_at) ordering each sweep scans.
|
||||||
|
entity.HasIndex(e => new { e.Source, e.SweptAt });
|
||||||
|
|
||||||
|
entity.HasOne(e => e.Skin)
|
||||||
|
.WithMany(s => s.Sweeps)
|
||||||
|
.HasForeignKey(e => e.SkinId)
|
||||||
|
.OnDelete(DeleteBehavior.Cascade);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,10 @@ public class TradeConfiguration : IEntityTypeConfiguration<Trade>
|
|||||||
{
|
{
|
||||||
public void Configure(EntityTypeBuilder<Trade> entity)
|
public void Configure(EntityTypeBuilder<Trade> entity)
|
||||||
{
|
{
|
||||||
|
// Steam's trade id is the natural key for an observed trade. Nullable (some
|
||||||
|
// trades are reconstructed without one); Postgres keeps multiple NULLs distinct.
|
||||||
|
entity.HasIndex(e => e.SteamTradeId).IsUnique();
|
||||||
|
|
||||||
entity.HasOne(e => e.FromUser)
|
entity.HasOne(e => e.FromUser)
|
||||||
.WithMany(u => u.TradesSent)
|
.WithMany(u => u.TradesSent)
|
||||||
.HasForeignKey(e => e.FromUserId)
|
.HasForeignKey(e => e.FromUserId)
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ public class SkinTrackerDbContext : DbContext
|
|||||||
public DbSet<Collection> Collections => Set<Collection>();
|
public DbSet<Collection> Collections => Set<Collection>();
|
||||||
public DbSet<Skin> Skins => Set<Skin>();
|
public DbSet<Skin> Skins => Set<Skin>();
|
||||||
public DbSet<SkinCondition> SkinConditions => Set<SkinCondition>();
|
public DbSet<SkinCondition> SkinConditions => Set<SkinCondition>();
|
||||||
|
public DbSet<SkinSweep> SkinSweeps => Set<SkinSweep>();
|
||||||
|
public DbSet<SkinConditionSweep> SkinConditionSweeps => Set<SkinConditionSweep>();
|
||||||
public DbSet<SteamUser> SteamUsers => Set<SteamUser>();
|
public DbSet<SteamUser> SteamUsers => Set<SteamUser>();
|
||||||
public DbSet<SkinInstance> SkinInstances => Set<SkinInstance>();
|
public DbSet<SkinInstance> SkinInstances => Set<SkinInstance>();
|
||||||
public DbSet<InventoryItem> InventoryItems => Set<InventoryItem>();
|
public DbSet<InventoryItem> InventoryItems => Set<InventoryItem>();
|
||||||
@@ -31,6 +33,7 @@ public class SkinTrackerDbContext : DbContext
|
|||||||
public DbSet<PriceHistory> PriceHistories => Set<PriceHistory>();
|
public DbSet<PriceHistory> PriceHistories => Set<PriceHistory>();
|
||||||
public DbSet<Listing> Listings => Set<Listing>();
|
public DbSet<Listing> Listings => Set<Listing>();
|
||||||
public DbSet<CsMoneyListing> CsMoneyListings => Set<CsMoneyListing>();
|
public DbSet<CsMoneyListing> CsMoneyListings => Set<CsMoneyListing>();
|
||||||
|
public DbSet<SkinLandListing> SkinLandListings => Set<SkinLandListing>();
|
||||||
|
|
||||||
/// <summary>Read-only cross-market view UNIONing the per-market listing tables.</summary>
|
/// <summary>Read-only cross-market view UNIONing the per-market listing tables.</summary>
|
||||||
public DbSet<MarketListing> MarketListings => Set<MarketListing>();
|
public DbSet<MarketListing> MarketListings => Set<MarketListing>();
|
||||||
@@ -47,6 +50,8 @@ public class SkinTrackerDbContext : DbContext
|
|||||||
modelBuilder.ApplyConfiguration(new CollectionConfiguration());
|
modelBuilder.ApplyConfiguration(new CollectionConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new SkinConfiguration());
|
modelBuilder.ApplyConfiguration(new SkinConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new SkinConditionConfiguration());
|
modelBuilder.ApplyConfiguration(new SkinConditionConfiguration());
|
||||||
|
modelBuilder.ApplyConfiguration(new SkinSweepConfiguration());
|
||||||
|
modelBuilder.ApplyConfiguration(new SkinConditionSweepConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new SteamUserConfiguration());
|
modelBuilder.ApplyConfiguration(new SteamUserConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new SkinInstanceConfiguration());
|
modelBuilder.ApplyConfiguration(new SkinInstanceConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new InventoryItemConfiguration());
|
modelBuilder.ApplyConfiguration(new InventoryItemConfiguration());
|
||||||
@@ -55,6 +60,7 @@ public class SkinTrackerDbContext : DbContext
|
|||||||
modelBuilder.ApplyConfiguration(new PriceHistoryConfiguration());
|
modelBuilder.ApplyConfiguration(new PriceHistoryConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new ListingConfiguration());
|
modelBuilder.ApplyConfiguration(new ListingConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new CsMoneyListingConfiguration());
|
modelBuilder.ApplyConfiguration(new CsMoneyListingConfiguration());
|
||||||
|
modelBuilder.ApplyConfiguration(new SkinLandListingConfiguration());
|
||||||
modelBuilder.ApplyConfiguration(new MarketListingConfiguration());
|
modelBuilder.ApplyConfiguration(new MarketListingConfiguration());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
64
BlueLaminate/BlueLaminate.EFCore/Data/SweepCheckpoints.cs
Normal file
64
BlueLaminate/BlueLaminate.EFCore/Data/SweepCheckpoints.cs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
using BlueLaminate.EFCore.Entities;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Data;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Write helpers for the per-site sweep checkpoints (<see cref="SkinSweep"/> /
|
||||||
|
/// <see cref="SkinConditionSweep"/>). Each marketplace sweeper stamps its own row
|
||||||
|
/// keyed by <c>(entity, source)</c>, so a band swept on one site is still "never
|
||||||
|
/// swept" on another. Adding a new site means a new <see cref="SweepSource"/>
|
||||||
|
/// constant — no schema changes.
|
||||||
|
/// <para>
|
||||||
|
/// Reads stay inline in the sweep queries (a correlated subquery over the navigation
|
||||||
|
/// for the relevant <c>Source</c>) so EF can translate and order by them server-side.
|
||||||
|
/// </para>
|
||||||
|
/// </summary>
|
||||||
|
public static class SweepCheckpoints
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Record that <paramref name="source"/> just swept this wear band. Upserts the
|
||||||
|
/// single (condition, source) row via the change tracker; the caller persists with
|
||||||
|
/// <see cref="DbContext.SaveChangesAsync"/>.
|
||||||
|
/// </summary>
|
||||||
|
public static async Task StampConditionAsync(
|
||||||
|
SkinTrackerDbContext db, int conditionId, string source, DateTimeOffset sweptAt, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var existing = await db.SkinConditionSweeps
|
||||||
|
.FirstOrDefaultAsync(s => s.SkinConditionId == conditionId && s.Source == source, ct);
|
||||||
|
if (existing is null)
|
||||||
|
{
|
||||||
|
db.SkinConditionSweeps.Add(new SkinConditionSweep
|
||||||
|
{
|
||||||
|
SkinConditionId = conditionId,
|
||||||
|
Source = source,
|
||||||
|
SweptAt = sweptAt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
existing.SweptAt = sweptAt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>As <see cref="StampConditionAsync"/>, for a whole-skin unit (no wear bands).</summary>
|
||||||
|
public static async Task StampSkinAsync(
|
||||||
|
SkinTrackerDbContext db, int skinId, string source, DateTimeOffset sweptAt, CancellationToken ct)
|
||||||
|
{
|
||||||
|
var existing = await db.SkinSweeps
|
||||||
|
.FirstOrDefaultAsync(s => s.SkinId == skinId && s.Source == source, ct);
|
||||||
|
if (existing is null)
|
||||||
|
{
|
||||||
|
db.SkinSweeps.Add(new SkinSweep
|
||||||
|
{
|
||||||
|
SkinId = skinId,
|
||||||
|
Source = source,
|
||||||
|
SweptAt = sweptAt,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
existing.SweptAt = sweptAt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -36,9 +36,12 @@ public class Listing
|
|||||||
/// <summary>"buy_now" or "auction".</summary>
|
/// <summary>"buy_now" or "auction".</summary>
|
||||||
public string Type { get; set; } = null!;
|
public string Type { get; set; } = null!;
|
||||||
|
|
||||||
/// <summary>Asking price in USD.</summary>
|
/// <summary>Asking price.</summary>
|
||||||
public decimal Price { get; set; }
|
public decimal Price { get; set; }
|
||||||
|
|
||||||
|
/// <summary>Currency of <see cref="Price"/>. CSFloat lists in USD.</summary>
|
||||||
|
public string Currency { get; set; } = "USD";
|
||||||
|
|
||||||
/// <summary>When CSFloat says the listing was created.</summary>
|
/// <summary>When CSFloat says the listing was created.</summary>
|
||||||
public DateTimeOffset ListedAt { get; set; }
|
public DateTimeOffset ListedAt { get; set; }
|
||||||
|
|
||||||
@@ -48,7 +51,13 @@ public class Listing
|
|||||||
public int PaintIndex { get; set; }
|
public int PaintIndex { get; set; }
|
||||||
public string MarketHashName { get; set; } = null!;
|
public string MarketHashName { get; set; } = null!;
|
||||||
public string? WearName { get; set; }
|
public string? WearName { get; set; }
|
||||||
public decimal FloatValue { get; set; }
|
|
||||||
|
/// <summary>
|
||||||
|
/// Exact float, or null for items with no float at all (e.g. Vanilla knives).
|
||||||
|
/// Null is deliberately distinct from a genuine 0.0 float; a floatless item
|
||||||
|
/// also can't be fingerprinted, so its <see cref="SkinInstanceId"/> stays null.
|
||||||
|
/// </summary>
|
||||||
|
public decimal? FloatValue { get; set; }
|
||||||
public int PaintSeed { get; set; }
|
public int PaintSeed { get; set; }
|
||||||
public bool IsStatTrak { get; set; }
|
public bool IsStatTrak { get; set; }
|
||||||
public bool IsSouvenir { get; set; }
|
public bool IsSouvenir { get; set; }
|
||||||
@@ -68,6 +77,15 @@ public class Listing
|
|||||||
public int? SkinId { get; set; }
|
public int? SkinId { get; set; }
|
||||||
public Skin? Skin { get; set; }
|
public Skin? Skin { get; set; }
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The wear band this listing belongs to. Unlike <see cref="SkinId"/> this is NOT
|
||||||
|
/// best-effort: the catalogue sweep pages one skin+wear band at a time, so the band
|
||||||
|
/// is set directly from the sweep unit. Null for whole-skin sweeps (e.g. vanilla
|
||||||
|
/// knives with no wear bands).
|
||||||
|
/// </summary>
|
||||||
|
public int? ConditionId { get; set; }
|
||||||
|
public SkinCondition? Condition { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The physical item (by fingerprint) this listing is for. Many listings over
|
/// The physical item (by fingerprint) this listing is for. Many listings over
|
||||||
/// time roll up to one instance, forming its market-movement history. Nullable
|
/// time roll up to one instance, forming its market-movement history. Nullable
|
||||||
|
|||||||
@@ -16,12 +16,6 @@ public class Skin
|
|||||||
public int? DefIndex { get; set; }
|
public int? DefIndex { get; set; }
|
||||||
public int? PaintIndex { get; set; }
|
public int? PaintIndex { get; set; }
|
||||||
|
|
||||||
// When the catalogue-driven listing sweep last fully covered this skin. The
|
|
||||||
// sweep processes least-recently-swept skins first (nulls = never swept), so
|
|
||||||
// capped runs chain across the whole catalogue and the stalest data refreshes
|
|
||||||
// first. Null until the first sweep reaches this skin.
|
|
||||||
public DateTimeOffset? ListingsSweptAt { get; set; }
|
|
||||||
|
|
||||||
public string Name { get; set; } = null!;
|
public string Name { get; set; } = null!;
|
||||||
public string Rarity { get; set; } = null!;
|
public string Rarity { get; set; } = null!;
|
||||||
public string? Description { get; set; }
|
public string? Description { get; set; }
|
||||||
@@ -44,6 +38,12 @@ public class Skin
|
|||||||
public bool? TrueFloat { get; private set; }
|
public bool? TrueFloat { get; private set; }
|
||||||
|
|
||||||
public ICollection<SkinCondition> Conditions { get; set; } = new List<SkinCondition>();
|
public ICollection<SkinCondition> Conditions { get; set; } = new List<SkinCondition>();
|
||||||
|
|
||||||
|
// Per-site "last swept" checkpoints for the whole-skin sweep unit — only used for
|
||||||
|
// skins with no wear bands (the per-band checkpoint lives on SkinCondition.Sweeps).
|
||||||
|
// The sweep processes never-swept (no row) / stalest skins first. See SkinSweep.
|
||||||
|
public ICollection<SkinSweep> Sweeps { get; set; } = new List<SkinSweep>();
|
||||||
|
|
||||||
public ICollection<SkinInstance> Instances { get; set; } = new List<SkinInstance>();
|
public ICollection<SkinInstance> Instances { get; set; } = new List<SkinInstance>();
|
||||||
public ICollection<PriceHistory> PriceHistories { get; set; } = new List<PriceHistory>();
|
public ICollection<PriceHistory> PriceHistories { get; set; } = new List<PriceHistory>();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,14 +7,15 @@ public class SkinCondition
|
|||||||
public Skin Skin { get; set; } = null!;
|
public Skin Skin { get; set; } = null!;
|
||||||
|
|
||||||
public string Condition { get; set; } = null!;
|
public string Condition { get; set; } = null!;
|
||||||
public decimal MinFloat { get; set; }
|
public decimal FloatMin { get; set; }
|
||||||
public decimal MaxFloat { get; set; }
|
public decimal FloatMax { get; set; }
|
||||||
|
|
||||||
// When the catalogue-driven listing sweep last fully covered this skin's wear
|
// Per-site "last swept" checkpoints for this wear band — one row per marketplace
|
||||||
// band. The sweep splits each skin by wear and pages one band at a time, so this
|
// (Source). The sweep splits each skin by wear and pages one band at a time, so
|
||||||
// is the per-band checkpoint: an interrupted run resumes from never-swept/stalest
|
// this is the per-band checkpoint: an interrupted run resumes from never-swept
|
||||||
// bands rather than redoing a whole skin. Null until the first sweep reaches it.
|
// (no row) / stalest bands rather than redoing a whole skin. Tracked per site so a
|
||||||
public DateTimeOffset? ListingsSweptAt { get; set; }
|
// band swept on CSFloat is still never-swept on cs.money. See SkinConditionSweep.
|
||||||
|
public ICollection<SkinConditionSweep> Sweeps { get; set; } = new List<SkinConditionSweep>();
|
||||||
|
|
||||||
public ICollection<SkinInstance> Instances { get; set; } = new List<SkinInstance>();
|
public ICollection<SkinInstance> Instances { get; set; } = new List<SkinInstance>();
|
||||||
public ICollection<PriceHistory> PriceHistories { get; set; } = new List<PriceHistory>();
|
public ICollection<PriceHistory> PriceHistories { get; set; } = new List<PriceHistory>();
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
namespace BlueLaminate.EFCore.Entities;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// One site's "last swept" checkpoint for a single wear band. The catalogue sweep
|
||||||
|
/// processes least-recently-swept bands first (no row = never swept), so capped/looping
|
||||||
|
/// runs chain across the catalogue and refresh the stalest data first. Keyed by
|
||||||
|
/// <c>(SkinConditionId, Source)</c> so each marketplace tracks its own progress
|
||||||
|
/// independently — a band swept on one site stays never-swept on another.
|
||||||
|
/// </summary>
|
||||||
|
public class SkinConditionSweep
|
||||||
|
{
|
||||||
|
public int Id { get; set; }
|
||||||
|
|
||||||
|
public int SkinConditionId { get; set; }
|
||||||
|
public SkinCondition SkinCondition { get; set; } = null!;
|
||||||
|
|
||||||
|
/// <summary>Which site swept it — a <see cref="SweepSource"/> value.</summary>
|
||||||
|
public string Source { get; set; } = null!;
|
||||||
|
|
||||||
|
public DateTimeOffset SweptAt { get; set; }
|
||||||
|
}
|
||||||
@@ -26,9 +26,11 @@ public class SkinInstance
|
|||||||
public SkinCondition? Condition { get; set; }
|
public SkinCondition? Condition { get; set; }
|
||||||
|
|
||||||
// The fingerprint. FloatValue is stored at full precision (see config) so
|
// The fingerprint. FloatValue is stored at full precision (see config) so
|
||||||
// that exact-match dupe detection isn't fooled by rounding.
|
// that exact-match dupe detection isn't fooled by rounding. An instance is
|
||||||
|
// only created for items that have a float + paint seed (skins), so both are
|
||||||
|
// non-null here even though some listings (e.g. vanilla knives) lack them.
|
||||||
public decimal FloatValue { get; set; }
|
public decimal FloatValue { get; set; }
|
||||||
public string PaintSeed { get; set; } = null!;
|
public int PaintSeed { get; set; }
|
||||||
public bool StatTrak { get; set; }
|
public bool StatTrak { get; set; }
|
||||||
public bool Souvenir { get; set; }
|
public bool Souvenir { get; set; }
|
||||||
public DateTimeOffset FirstSeenAt { get; set; }
|
public DateTimeOffset FirstSeenAt { get; set; }
|
||||||
|
|||||||
54
BlueLaminate/BlueLaminate.EFCore/Entities/SkinLandListing.cs
Normal file
54
BlueLaminate/BlueLaminate.EFCore/Entities/SkinLandListing.cs
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
namespace BlueLaminate.EFCore.Entities;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// One offer observed on skin.land via its internal
|
||||||
|
/// <c>GET /api/v2/obtained-skins?skin_id={id}&page={n}</c> endpoint (scraped through
|
||||||
|
/// the Python worker, since skin.land has no public API and sits behind Cloudflare).
|
||||||
|
/// <para>
|
||||||
|
/// Kept in its own table like <see cref="CsMoneyListing"/>, but deliberately thinner:
|
||||||
|
/// skin.land exposes a full-precision float and price but <b>no paint seed / def index</b>,
|
||||||
|
/// so an offer can't be fingerprinted to a market-agnostic <see cref="SkinInstance"/> and
|
||||||
|
/// there is no cross-market roll-up or dupe detection here (revisit if pattern is ever
|
||||||
|
/// exposed). StatTrak and Souvenir live on <em>separate</em> skin.land pages (their own
|
||||||
|
/// <c>stattrak-</c>/<c>souvenir-</c> slugs); v1 sweeps the base page per skin+wear, so
|
||||||
|
/// <see cref="IsStatTrak"/>/<see cref="IsSouvenir"/> are normally false.
|
||||||
|
/// </para>
|
||||||
|
/// Soft-tracked across sweeps exactly like <see cref="CsMoneyListing"/>:
|
||||||
|
/// <see cref="FirstSeenAt"/>/<see cref="LastSeenAt"/> bound the observation window and
|
||||||
|
/// <see cref="Status"/> flips to <see cref="ListingStatus.Removed"/> when a once-seen
|
||||||
|
/// offer stops appearing (sold/delisted).
|
||||||
|
/// </summary>
|
||||||
|
public class SkinLandListing
|
||||||
|
{
|
||||||
|
public int Id { get; set; }
|
||||||
|
|
||||||
|
/// <summary>skin.land's offer id (obtained-skin <c>id</c>). Natural key for dedup.</summary>
|
||||||
|
public long ListingId { get; set; }
|
||||||
|
|
||||||
|
// Catalogue links. Like cs.money (and unlike the CSFloat global sweep) these are NOT
|
||||||
|
// best-effort: each scrape job targets one skin+wear, so we set them directly.
|
||||||
|
public int SkinId { get; set; }
|
||||||
|
public Skin Skin { get; set; } = null!;
|
||||||
|
public int? ConditionId { get; set; }
|
||||||
|
public SkinCondition? Condition { get; set; }
|
||||||
|
|
||||||
|
// Item identity, from the offer's skin block.
|
||||||
|
public string MarketHashName { get; set; } = null!;
|
||||||
|
public decimal? FloatValue { get; set; } // item_float (string, full precision)
|
||||||
|
public bool IsStatTrak { get; set; }
|
||||||
|
public bool IsSouvenir { get; set; }
|
||||||
|
public string? NameTag { get; set; } // offer.name_tag (rare; affects value)
|
||||||
|
public int StickerCount { get; set; }
|
||||||
|
|
||||||
|
// Pricing. skin.land returns a single price (the amount to buy/withdraw the item).
|
||||||
|
public decimal Price { get; set; } // final_withdrawal_price
|
||||||
|
public string Currency { get; set; } = "USD"; // prices are read in USD
|
||||||
|
|
||||||
|
public string? InspectLink { get; set; } // item_link (steam:// inspect)
|
||||||
|
|
||||||
|
// Soft-tracking across sweeps.
|
||||||
|
public DateTimeOffset FirstSeenAt { get; set; }
|
||||||
|
public DateTimeOffset LastSeenAt { get; set; }
|
||||||
|
public ListingStatus Status { get; set; }
|
||||||
|
public DateTimeOffset? RemovedAt { get; set; }
|
||||||
|
}
|
||||||
20
BlueLaminate/BlueLaminate.EFCore/Entities/SkinSweep.cs
Normal file
20
BlueLaminate/BlueLaminate.EFCore/Entities/SkinSweep.cs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
namespace BlueLaminate.EFCore.Entities;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// One site's "last swept" checkpoint for a whole skin — used only for skins with no
|
||||||
|
/// wear bands (e.g. vanilla knives), which are swept as a single unit. The per-band
|
||||||
|
/// equivalent is <see cref="SkinConditionSweep"/>. Keyed by <c>(SkinId, Source)</c> so
|
||||||
|
/// each marketplace tracks its own progress independently.
|
||||||
|
/// </summary>
|
||||||
|
public class SkinSweep
|
||||||
|
{
|
||||||
|
public int Id { get; set; }
|
||||||
|
|
||||||
|
public int SkinId { get; set; }
|
||||||
|
public Skin Skin { get; set; } = null!;
|
||||||
|
|
||||||
|
/// <summary>Which site swept it — a <see cref="SweepSource"/> value.</summary>
|
||||||
|
public string Source { get; set; } = null!;
|
||||||
|
|
||||||
|
public DateTimeOffset SweptAt { get; set; }
|
||||||
|
}
|
||||||
23
BlueLaminate/BlueLaminate.EFCore/Entities/SweepSource.cs
Normal file
23
BlueLaminate/BlueLaminate.EFCore/Entities/SweepSource.cs
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
namespace BlueLaminate.EFCore.Entities;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Canonical site identifiers for per-site sweep checkpoints — the <c>Source</c>
|
||||||
|
/// discriminator on <see cref="SkinSweep"/> and <see cref="SkinConditionSweep"/>.
|
||||||
|
/// Each marketplace sweeper stamps its own checkpoint under one of these, so a band
|
||||||
|
/// swept on one site is still "never swept" on another.
|
||||||
|
/// <para>
|
||||||
|
/// To add sweeping for a new marketplace, add one constant here and have that
|
||||||
|
/// sweeper read/stamp checkpoints with it — no schema or query changes needed.
|
||||||
|
/// </para>
|
||||||
|
/// </summary>
|
||||||
|
public static class SweepSource
|
||||||
|
{
|
||||||
|
/// <summary>CSFloat catalogue-driven sweep (<c>ListingSweepService.SweepCatalogAsync</c>).</summary>
|
||||||
|
public const string CsFloatCatalog = "listings-catalog";
|
||||||
|
|
||||||
|
/// <summary>cs.money worker sweep (<c>CsMoneyIngestService</c>).</summary>
|
||||||
|
public const string CsMoney = "csmoney";
|
||||||
|
|
||||||
|
/// <summary>skin.land worker sweep (<c>SkinLandIngestService</c>).</summary>
|
||||||
|
public const string SkinLand = "skinland";
|
||||||
|
}
|
||||||
1207
BlueLaminate/BlueLaminate.EFCore/Migrations/20260531203937_AddPerSiteSweepCheckpoints.Designer.cs
generated
Normal file
1207
BlueLaminate/BlueLaminate.EFCore/Migrations/20260531203937_AddPerSiteSweepCheckpoints.Designer.cs
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,146 @@
|
|||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class AddPerSiteSweepCheckpoints : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropIndex(
|
||||||
|
name: "ix_skins_listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skins");
|
||||||
|
|
||||||
|
migrationBuilder.DropIndex(
|
||||||
|
name: "ix_skin_conditions_listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions");
|
||||||
|
|
||||||
|
migrationBuilder.DropColumn(
|
||||||
|
name: "listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skins");
|
||||||
|
|
||||||
|
migrationBuilder.DropColumn(
|
||||||
|
name: "listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions");
|
||||||
|
|
||||||
|
migrationBuilder.CreateTable(
|
||||||
|
name: "skin_condition_sweeps",
|
||||||
|
schema: "skintracker",
|
||||||
|
columns: table => new
|
||||||
|
{
|
||||||
|
id = table.Column<int>(type: "integer", nullable: false)
|
||||||
|
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
|
||||||
|
skin_condition_id = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
source = table.Column<string>(type: "text", nullable: false),
|
||||||
|
swept_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false)
|
||||||
|
},
|
||||||
|
constraints: table =>
|
||||||
|
{
|
||||||
|
table.PrimaryKey("pk_skin_condition_sweeps", x => x.id);
|
||||||
|
table.ForeignKey(
|
||||||
|
name: "fk_skin_condition_sweeps_skin_conditions_skin_condition_id",
|
||||||
|
column: x => x.skin_condition_id,
|
||||||
|
principalSchema: "skintracker",
|
||||||
|
principalTable: "skin_conditions",
|
||||||
|
principalColumn: "id",
|
||||||
|
onDelete: ReferentialAction.Cascade);
|
||||||
|
});
|
||||||
|
|
||||||
|
migrationBuilder.CreateTable(
|
||||||
|
name: "skin_sweeps",
|
||||||
|
schema: "skintracker",
|
||||||
|
columns: table => new
|
||||||
|
{
|
||||||
|
id = table.Column<int>(type: "integer", nullable: false)
|
||||||
|
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
|
||||||
|
skin_id = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
source = table.Column<string>(type: "text", nullable: false),
|
||||||
|
swept_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false)
|
||||||
|
},
|
||||||
|
constraints: table =>
|
||||||
|
{
|
||||||
|
table.PrimaryKey("pk_skin_sweeps", x => x.id);
|
||||||
|
table.ForeignKey(
|
||||||
|
name: "fk_skin_sweeps_skins_skin_id",
|
||||||
|
column: x => x.skin_id,
|
||||||
|
principalSchema: "skintracker",
|
||||||
|
principalTable: "skins",
|
||||||
|
principalColumn: "id",
|
||||||
|
onDelete: ReferentialAction.Cascade);
|
||||||
|
});
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_condition_sweeps_skin_condition_id_source",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_condition_sweeps",
|
||||||
|
columns: new[] { "skin_condition_id", "source" },
|
||||||
|
unique: true);
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_condition_sweeps_source_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_condition_sweeps",
|
||||||
|
columns: new[] { "source", "swept_at" });
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_sweeps_skin_id_source",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_sweeps",
|
||||||
|
columns: new[] { "skin_id", "source" },
|
||||||
|
unique: true);
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_sweeps_source_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_sweeps",
|
||||||
|
columns: new[] { "source", "swept_at" });
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropTable(
|
||||||
|
name: "skin_condition_sweeps",
|
||||||
|
schema: "skintracker");
|
||||||
|
|
||||||
|
migrationBuilder.DropTable(
|
||||||
|
name: "skin_sweeps",
|
||||||
|
schema: "skintracker");
|
||||||
|
|
||||||
|
migrationBuilder.AddColumn<DateTimeOffset>(
|
||||||
|
name: "listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skins",
|
||||||
|
type: "timestamp with time zone",
|
||||||
|
nullable: true);
|
||||||
|
|
||||||
|
migrationBuilder.AddColumn<DateTimeOffset>(
|
||||||
|
name: "listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions",
|
||||||
|
type: "timestamp with time zone",
|
||||||
|
nullable: true);
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skins_listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skins",
|
||||||
|
column: "listings_swept_at");
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_conditions_listings_swept_at",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions",
|
||||||
|
column: "listings_swept_at");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1323
BlueLaminate/BlueLaminate.EFCore/Migrations/20260531212842_AddSkinLandListings.Designer.cs
generated
Normal file
1323
BlueLaminate/BlueLaminate.EFCore/Migrations/20260531212842_AddSkinLandListings.Designer.cs
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,239 @@
|
|||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class AddSkinLandListings : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.CreateTable(
|
||||||
|
name: "skin_land_listings",
|
||||||
|
schema: "skintracker",
|
||||||
|
columns: table => new
|
||||||
|
{
|
||||||
|
id = table.Column<int>(type: "integer", nullable: false)
|
||||||
|
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
|
||||||
|
listing_id = table.Column<long>(type: "bigint", nullable: false),
|
||||||
|
skin_id = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
condition_id = table.Column<int>(type: "integer", nullable: true),
|
||||||
|
market_hash_name = table.Column<string>(type: "text", nullable: false),
|
||||||
|
float_value = table.Column<decimal>(type: "numeric(20,18)", nullable: true),
|
||||||
|
is_stat_trak = table.Column<bool>(type: "boolean", nullable: false),
|
||||||
|
is_souvenir = table.Column<bool>(type: "boolean", nullable: false),
|
||||||
|
name_tag = table.Column<string>(type: "text", nullable: true),
|
||||||
|
sticker_count = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
price = table.Column<decimal>(type: "numeric(18,2)", precision: 18, scale: 2, nullable: false),
|
||||||
|
currency = table.Column<string>(type: "text", nullable: false),
|
||||||
|
inspect_link = table.Column<string>(type: "text", nullable: true),
|
||||||
|
first_seen_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false),
|
||||||
|
last_seen_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: false),
|
||||||
|
status = table.Column<string>(type: "text", nullable: false),
|
||||||
|
removed_at = table.Column<DateTimeOffset>(type: "timestamp with time zone", nullable: true)
|
||||||
|
},
|
||||||
|
constraints: table =>
|
||||||
|
{
|
||||||
|
table.PrimaryKey("pk_skin_land_listings", x => x.id);
|
||||||
|
table.ForeignKey(
|
||||||
|
name: "fk_skin_land_listings_skin_conditions_condition_id",
|
||||||
|
column: x => x.condition_id,
|
||||||
|
principalSchema: "skintracker",
|
||||||
|
principalTable: "skin_conditions",
|
||||||
|
principalColumn: "id",
|
||||||
|
onDelete: ReferentialAction.SetNull);
|
||||||
|
table.ForeignKey(
|
||||||
|
name: "fk_skin_land_listings_skins_skin_id",
|
||||||
|
column: x => x.skin_id,
|
||||||
|
principalSchema: "skintracker",
|
||||||
|
principalTable: "skins",
|
||||||
|
principalColumn: "id",
|
||||||
|
onDelete: ReferentialAction.Restrict);
|
||||||
|
});
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_land_listings_condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_land_listings",
|
||||||
|
column: "condition_id");
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_land_listings_listing_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_land_listings",
|
||||||
|
column: "listing_id",
|
||||||
|
unique: true);
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_land_listings_skin_id_condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_land_listings",
|
||||||
|
columns: new[] { "skin_id", "condition_id" });
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_skin_land_listings_status",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_land_listings",
|
||||||
|
column: "status");
|
||||||
|
|
||||||
|
// Extend the cross-market read view with a skin.land arm. skin.land exposes no
|
||||||
|
// paint seed / asset id / instance fingerprint, so those columns are NULL; the
|
||||||
|
// wear comes from the joined condition row (the offer table doesn't store it).
|
||||||
|
migrationBuilder.Sql("""
|
||||||
|
CREATE OR REPLACE VIEW skintracker.market_listings AS
|
||||||
|
SELECT
|
||||||
|
'csfloat'::text AS marketplace,
|
||||||
|
l.cs_float_listing_id AS external_id,
|
||||||
|
l.skin_id AS skin_id,
|
||||||
|
NULL::integer AS condition_id,
|
||||||
|
l.skin_instance_id AS skin_instance_id,
|
||||||
|
l.market_hash_name AS market_hash_name,
|
||||||
|
l.wear_name AS wear,
|
||||||
|
l.float_value AS float_value,
|
||||||
|
l.paint_seed AS paint_seed,
|
||||||
|
l.is_stat_trak AS is_stat_trak,
|
||||||
|
l.is_souvenir AS is_souvenir,
|
||||||
|
l.sticker_count AS sticker_count,
|
||||||
|
l.price AS price,
|
||||||
|
'USD'::text AS currency,
|
||||||
|
l.inspect_link AS inspect_link,
|
||||||
|
l.asset_id AS asset_id,
|
||||||
|
l.status AS status,
|
||||||
|
l.first_seen_at AS first_seen_at,
|
||||||
|
l.last_seen_at AS last_seen_at,
|
||||||
|
l.removed_at AS removed_at
|
||||||
|
FROM skintracker.listings l
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'csmoney'::text,
|
||||||
|
c.sell_order_id::text,
|
||||||
|
c.skin_id,
|
||||||
|
c.condition_id,
|
||||||
|
c.skin_instance_id,
|
||||||
|
c.market_hash_name,
|
||||||
|
-- Normalise cs.money's wear short code to the full wear name the
|
||||||
|
-- other arms emit (csfloat wear_name / skinland condition), so the
|
||||||
|
-- view's `wear` column is consistent across marketplaces.
|
||||||
|
CASE lower(c.quality)
|
||||||
|
WHEN 'fn' THEN 'Factory New'
|
||||||
|
WHEN 'mw' THEN 'Minimal Wear'
|
||||||
|
WHEN 'ft' THEN 'Field-Tested'
|
||||||
|
WHEN 'ww' THEN 'Well-Worn'
|
||||||
|
WHEN 'bs' THEN 'Battle-Scarred'
|
||||||
|
ELSE c.quality
|
||||||
|
END,
|
||||||
|
c.float_value,
|
||||||
|
c.paint_seed,
|
||||||
|
c.is_stat_trak,
|
||||||
|
c.is_souvenir,
|
||||||
|
c.sticker_count,
|
||||||
|
c.price,
|
||||||
|
c.currency,
|
||||||
|
c.inspect_link,
|
||||||
|
c.asset_id,
|
||||||
|
c.status,
|
||||||
|
c.first_seen_at,
|
||||||
|
c.last_seen_at,
|
||||||
|
c.removed_at
|
||||||
|
FROM skintracker.cs_money_listings c
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'skinland'::text,
|
||||||
|
s.listing_id::text,
|
||||||
|
s.skin_id,
|
||||||
|
s.condition_id,
|
||||||
|
NULL::integer,
|
||||||
|
s.market_hash_name,
|
||||||
|
sc.condition,
|
||||||
|
s.float_value,
|
||||||
|
NULL::integer,
|
||||||
|
s.is_stat_trak,
|
||||||
|
s.is_souvenir,
|
||||||
|
s.sticker_count,
|
||||||
|
s.price,
|
||||||
|
s.currency,
|
||||||
|
s.inspect_link,
|
||||||
|
NULL::text,
|
||||||
|
s.status,
|
||||||
|
s.first_seen_at,
|
||||||
|
s.last_seen_at,
|
||||||
|
s.removed_at
|
||||||
|
FROM skintracker.skin_land_listings s
|
||||||
|
LEFT JOIN skintracker.skin_conditions sc ON sc.id = s.condition_id;
|
||||||
|
""");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
// Restore the pre-skin.land view (csfloat + csmoney) before dropping the table
|
||||||
|
// it references, so the view never points at a missing relation.
|
||||||
|
migrationBuilder.Sql("""
|
||||||
|
CREATE OR REPLACE VIEW skintracker.market_listings AS
|
||||||
|
SELECT
|
||||||
|
'csfloat'::text AS marketplace,
|
||||||
|
l.cs_float_listing_id AS external_id,
|
||||||
|
l.skin_id AS skin_id,
|
||||||
|
NULL::integer AS condition_id,
|
||||||
|
l.skin_instance_id AS skin_instance_id,
|
||||||
|
l.market_hash_name AS market_hash_name,
|
||||||
|
l.wear_name AS wear,
|
||||||
|
l.float_value AS float_value,
|
||||||
|
l.paint_seed AS paint_seed,
|
||||||
|
l.is_stat_trak AS is_stat_trak,
|
||||||
|
l.is_souvenir AS is_souvenir,
|
||||||
|
l.sticker_count AS sticker_count,
|
||||||
|
l.price AS price,
|
||||||
|
'USD'::text AS currency,
|
||||||
|
l.inspect_link AS inspect_link,
|
||||||
|
l.asset_id AS asset_id,
|
||||||
|
l.status AS status,
|
||||||
|
l.first_seen_at AS first_seen_at,
|
||||||
|
l.last_seen_at AS last_seen_at,
|
||||||
|
l.removed_at AS removed_at
|
||||||
|
FROM skintracker.listings l
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'csmoney'::text,
|
||||||
|
c.sell_order_id::text,
|
||||||
|
c.skin_id,
|
||||||
|
c.condition_id,
|
||||||
|
c.skin_instance_id,
|
||||||
|
c.market_hash_name,
|
||||||
|
-- Normalise cs.money's wear short code to the full wear name the
|
||||||
|
-- other arms emit (csfloat wear_name / skinland condition), so the
|
||||||
|
-- view's `wear` column is consistent across marketplaces.
|
||||||
|
CASE lower(c.quality)
|
||||||
|
WHEN 'fn' THEN 'Factory New'
|
||||||
|
WHEN 'mw' THEN 'Minimal Wear'
|
||||||
|
WHEN 'ft' THEN 'Field-Tested'
|
||||||
|
WHEN 'ww' THEN 'Well-Worn'
|
||||||
|
WHEN 'bs' THEN 'Battle-Scarred'
|
||||||
|
ELSE c.quality
|
||||||
|
END,
|
||||||
|
c.float_value,
|
||||||
|
c.paint_seed,
|
||||||
|
c.is_stat_trak,
|
||||||
|
c.is_souvenir,
|
||||||
|
c.sticker_count,
|
||||||
|
c.price,
|
||||||
|
c.currency,
|
||||||
|
c.inspect_link,
|
||||||
|
c.asset_id,
|
||||||
|
c.status,
|
||||||
|
c.first_seen_at,
|
||||||
|
c.last_seen_at,
|
||||||
|
c.removed_at
|
||||||
|
FROM skintracker.cs_money_listings c;
|
||||||
|
""");
|
||||||
|
|
||||||
|
migrationBuilder.DropTable(
|
||||||
|
name: "skin_land_listings",
|
||||||
|
schema: "skintracker");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1323
BlueLaminate/BlueLaminate.EFCore/Migrations/20260601024227_MakeListingFloatNullable.Designer.cs
generated
Normal file
1323
BlueLaminate/BlueLaminate.EFCore/Migrations/20260601024227_MakeListingFloatNullable.Designer.cs
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,38 @@
|
|||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class MakeListingFloatNullable : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.AlterColumn<decimal>(
|
||||||
|
name: "float_value",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings",
|
||||||
|
type: "numeric(20,18)",
|
||||||
|
nullable: true,
|
||||||
|
oldClrType: typeof(decimal),
|
||||||
|
oldType: "numeric(20,18)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.AlterColumn<decimal>(
|
||||||
|
name: "float_value",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings",
|
||||||
|
type: "numeric(20,18)",
|
||||||
|
nullable: false,
|
||||||
|
defaultValue: 0m,
|
||||||
|
oldClrType: typeof(decimal),
|
||||||
|
oldType: "numeric(20,18)",
|
||||||
|
oldNullable: true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,308 @@
|
|||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace BlueLaminate.EFCore.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class ConsistencyPass_FloatBoundsCurrencyConditionPaintSeed : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropIndex(
|
||||||
|
name: "ix_inventory_items_asset_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "inventory_items");
|
||||||
|
|
||||||
|
migrationBuilder.RenameColumn(
|
||||||
|
name: "min_float",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions",
|
||||||
|
newName: "float_min");
|
||||||
|
|
||||||
|
migrationBuilder.RenameColumn(
|
||||||
|
name: "max_float",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions",
|
||||||
|
newName: "float_max");
|
||||||
|
|
||||||
|
// text -> integer needs an explicit USING cast; EF's AlterColumn omits it and
|
||||||
|
// Postgres won't cast automatically. Every stored seed is a stringified
|
||||||
|
// integer, so the cast is total.
|
||||||
|
migrationBuilder.Sql(
|
||||||
|
"ALTER TABLE skintracker.skin_instances " +
|
||||||
|
"ALTER COLUMN paint_seed TYPE integer USING paint_seed::integer;");
|
||||||
|
|
||||||
|
migrationBuilder.AddColumn<int>(
|
||||||
|
name: "condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings",
|
||||||
|
type: "integer",
|
||||||
|
nullable: true);
|
||||||
|
|
||||||
|
migrationBuilder.AddColumn<string>(
|
||||||
|
name: "currency",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings",
|
||||||
|
type: "text",
|
||||||
|
nullable: false,
|
||||||
|
defaultValue: "USD");
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_trades_steam_trade_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "trades",
|
||||||
|
column: "steam_trade_id",
|
||||||
|
unique: true);
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_listings_condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings",
|
||||||
|
column: "condition_id");
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_inventory_items_asset_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "inventory_items",
|
||||||
|
column: "asset_id",
|
||||||
|
unique: true);
|
||||||
|
|
||||||
|
migrationBuilder.AddForeignKey(
|
||||||
|
name: "fk_listings_skin_conditions_condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings",
|
||||||
|
column: "condition_id",
|
||||||
|
principalSchema: "skintracker",
|
||||||
|
principalTable: "skin_conditions",
|
||||||
|
principalColumn: "id",
|
||||||
|
onDelete: ReferentialAction.SetNull);
|
||||||
|
|
||||||
|
// Now that listings carries its own condition_id and currency, the csfloat
|
||||||
|
// arm of the cross-market view uses them instead of NULL / a hardcoded 'USD'.
|
||||||
|
migrationBuilder.Sql("""
|
||||||
|
CREATE OR REPLACE VIEW skintracker.market_listings AS
|
||||||
|
SELECT
|
||||||
|
'csfloat'::text AS marketplace,
|
||||||
|
l.cs_float_listing_id AS external_id,
|
||||||
|
l.skin_id AS skin_id,
|
||||||
|
l.condition_id AS condition_id,
|
||||||
|
l.skin_instance_id AS skin_instance_id,
|
||||||
|
l.market_hash_name AS market_hash_name,
|
||||||
|
l.wear_name AS wear,
|
||||||
|
l.float_value AS float_value,
|
||||||
|
l.paint_seed AS paint_seed,
|
||||||
|
l.is_stat_trak AS is_stat_trak,
|
||||||
|
l.is_souvenir AS is_souvenir,
|
||||||
|
l.sticker_count AS sticker_count,
|
||||||
|
l.price AS price,
|
||||||
|
l.currency AS currency,
|
||||||
|
l.inspect_link AS inspect_link,
|
||||||
|
l.asset_id AS asset_id,
|
||||||
|
l.status AS status,
|
||||||
|
l.first_seen_at AS first_seen_at,
|
||||||
|
l.last_seen_at AS last_seen_at,
|
||||||
|
l.removed_at AS removed_at
|
||||||
|
FROM skintracker.listings l
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'csmoney'::text,
|
||||||
|
c.sell_order_id::text,
|
||||||
|
c.skin_id,
|
||||||
|
c.condition_id,
|
||||||
|
c.skin_instance_id,
|
||||||
|
c.market_hash_name,
|
||||||
|
CASE lower(c.quality)
|
||||||
|
WHEN 'fn' THEN 'Factory New'
|
||||||
|
WHEN 'mw' THEN 'Minimal Wear'
|
||||||
|
WHEN 'ft' THEN 'Field-Tested'
|
||||||
|
WHEN 'ww' THEN 'Well-Worn'
|
||||||
|
WHEN 'bs' THEN 'Battle-Scarred'
|
||||||
|
ELSE c.quality
|
||||||
|
END,
|
||||||
|
c.float_value,
|
||||||
|
c.paint_seed,
|
||||||
|
c.is_stat_trak,
|
||||||
|
c.is_souvenir,
|
||||||
|
c.sticker_count,
|
||||||
|
c.price,
|
||||||
|
c.currency,
|
||||||
|
c.inspect_link,
|
||||||
|
c.asset_id,
|
||||||
|
c.status,
|
||||||
|
c.first_seen_at,
|
||||||
|
c.last_seen_at,
|
||||||
|
c.removed_at
|
||||||
|
FROM skintracker.cs_money_listings c
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'skinland'::text,
|
||||||
|
s.listing_id::text,
|
||||||
|
s.skin_id,
|
||||||
|
s.condition_id,
|
||||||
|
NULL::integer,
|
||||||
|
s.market_hash_name,
|
||||||
|
sc.condition,
|
||||||
|
s.float_value,
|
||||||
|
NULL::integer,
|
||||||
|
s.is_stat_trak,
|
||||||
|
s.is_souvenir,
|
||||||
|
s.sticker_count,
|
||||||
|
s.price,
|
||||||
|
s.currency,
|
||||||
|
s.inspect_link,
|
||||||
|
NULL::text,
|
||||||
|
s.status,
|
||||||
|
s.first_seen_at,
|
||||||
|
s.last_seen_at,
|
||||||
|
s.removed_at
|
||||||
|
FROM skintracker.skin_land_listings s
|
||||||
|
LEFT JOIN skintracker.skin_conditions sc ON sc.id = s.condition_id;
|
||||||
|
""");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
// Restore the view to its pre-migration form (csfloat condition_id/currency
|
||||||
|
// hardcoded) FIRST, so the listings columns it now references can be dropped.
|
||||||
|
migrationBuilder.Sql("""
|
||||||
|
CREATE OR REPLACE VIEW skintracker.market_listings AS
|
||||||
|
SELECT
|
||||||
|
'csfloat'::text AS marketplace,
|
||||||
|
l.cs_float_listing_id AS external_id,
|
||||||
|
l.skin_id AS skin_id,
|
||||||
|
NULL::integer AS condition_id,
|
||||||
|
l.skin_instance_id AS skin_instance_id,
|
||||||
|
l.market_hash_name AS market_hash_name,
|
||||||
|
l.wear_name AS wear,
|
||||||
|
l.float_value AS float_value,
|
||||||
|
l.paint_seed AS paint_seed,
|
||||||
|
l.is_stat_trak AS is_stat_trak,
|
||||||
|
l.is_souvenir AS is_souvenir,
|
||||||
|
l.sticker_count AS sticker_count,
|
||||||
|
l.price AS price,
|
||||||
|
'USD'::text AS currency,
|
||||||
|
l.inspect_link AS inspect_link,
|
||||||
|
l.asset_id AS asset_id,
|
||||||
|
l.status AS status,
|
||||||
|
l.first_seen_at AS first_seen_at,
|
||||||
|
l.last_seen_at AS last_seen_at,
|
||||||
|
l.removed_at AS removed_at
|
||||||
|
FROM skintracker.listings l
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'csmoney'::text,
|
||||||
|
c.sell_order_id::text,
|
||||||
|
c.skin_id,
|
||||||
|
c.condition_id,
|
||||||
|
c.skin_instance_id,
|
||||||
|
c.market_hash_name,
|
||||||
|
CASE lower(c.quality)
|
||||||
|
WHEN 'fn' THEN 'Factory New'
|
||||||
|
WHEN 'mw' THEN 'Minimal Wear'
|
||||||
|
WHEN 'ft' THEN 'Field-Tested'
|
||||||
|
WHEN 'ww' THEN 'Well-Worn'
|
||||||
|
WHEN 'bs' THEN 'Battle-Scarred'
|
||||||
|
ELSE c.quality
|
||||||
|
END,
|
||||||
|
c.float_value,
|
||||||
|
c.paint_seed,
|
||||||
|
c.is_stat_trak,
|
||||||
|
c.is_souvenir,
|
||||||
|
c.sticker_count,
|
||||||
|
c.price,
|
||||||
|
c.currency,
|
||||||
|
c.inspect_link,
|
||||||
|
c.asset_id,
|
||||||
|
c.status,
|
||||||
|
c.first_seen_at,
|
||||||
|
c.last_seen_at,
|
||||||
|
c.removed_at
|
||||||
|
FROM skintracker.cs_money_listings c
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'skinland'::text,
|
||||||
|
s.listing_id::text,
|
||||||
|
s.skin_id,
|
||||||
|
s.condition_id,
|
||||||
|
NULL::integer,
|
||||||
|
s.market_hash_name,
|
||||||
|
sc.condition,
|
||||||
|
s.float_value,
|
||||||
|
NULL::integer,
|
||||||
|
s.is_stat_trak,
|
||||||
|
s.is_souvenir,
|
||||||
|
s.sticker_count,
|
||||||
|
s.price,
|
||||||
|
s.currency,
|
||||||
|
s.inspect_link,
|
||||||
|
NULL::text,
|
||||||
|
s.status,
|
||||||
|
s.first_seen_at,
|
||||||
|
s.last_seen_at,
|
||||||
|
s.removed_at
|
||||||
|
FROM skintracker.skin_land_listings s
|
||||||
|
LEFT JOIN skintracker.skin_conditions sc ON sc.id = s.condition_id;
|
||||||
|
""");
|
||||||
|
|
||||||
|
migrationBuilder.DropForeignKey(
|
||||||
|
name: "fk_listings_skin_conditions_condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings");
|
||||||
|
|
||||||
|
migrationBuilder.DropIndex(
|
||||||
|
name: "ix_trades_steam_trade_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "trades");
|
||||||
|
|
||||||
|
migrationBuilder.DropIndex(
|
||||||
|
name: "ix_listings_condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings");
|
||||||
|
|
||||||
|
migrationBuilder.DropIndex(
|
||||||
|
name: "ix_inventory_items_asset_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "inventory_items");
|
||||||
|
|
||||||
|
migrationBuilder.DropColumn(
|
||||||
|
name: "condition_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings");
|
||||||
|
|
||||||
|
migrationBuilder.DropColumn(
|
||||||
|
name: "currency",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "listings");
|
||||||
|
|
||||||
|
migrationBuilder.RenameColumn(
|
||||||
|
name: "float_min",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions",
|
||||||
|
newName: "min_float");
|
||||||
|
|
||||||
|
migrationBuilder.RenameColumn(
|
||||||
|
name: "float_max",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_conditions",
|
||||||
|
newName: "max_float");
|
||||||
|
|
||||||
|
migrationBuilder.AlterColumn<string>(
|
||||||
|
name: "paint_seed",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "skin_instances",
|
||||||
|
type: "text",
|
||||||
|
nullable: false,
|
||||||
|
oldClrType: typeof(int),
|
||||||
|
oldType: "integer");
|
||||||
|
|
||||||
|
migrationBuilder.CreateIndex(
|
||||||
|
name: "ix_inventory_items_asset_id",
|
||||||
|
schema: "skintracker",
|
||||||
|
table: "inventory_items",
|
||||||
|
column: "asset_id");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -215,6 +215,7 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.HasName("pk_inventory_items");
|
.HasName("pk_inventory_items");
|
||||||
|
|
||||||
b.HasIndex("AssetId")
|
b.HasIndex("AssetId")
|
||||||
|
.IsUnique()
|
||||||
.HasDatabaseName("ix_inventory_items_asset_id");
|
.HasDatabaseName("ix_inventory_items_asset_id");
|
||||||
|
|
||||||
b.HasIndex("SkinInstanceId")
|
b.HasIndex("SkinInstanceId")
|
||||||
@@ -239,11 +240,20 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.HasColumnType("text")
|
.HasColumnType("text")
|
||||||
.HasColumnName("asset_id");
|
.HasColumnName("asset_id");
|
||||||
|
|
||||||
|
b.Property<int?>("ConditionId")
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("condition_id");
|
||||||
|
|
||||||
b.Property<string>("CsFloatListingId")
|
b.Property<string>("CsFloatListingId")
|
||||||
.IsRequired()
|
.IsRequired()
|
||||||
.HasColumnType("text")
|
.HasColumnType("text")
|
||||||
.HasColumnName("cs_float_listing_id");
|
.HasColumnName("cs_float_listing_id");
|
||||||
|
|
||||||
|
b.Property<string>("Currency")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("currency");
|
||||||
|
|
||||||
b.Property<int>("DefIndex")
|
b.Property<int>("DefIndex")
|
||||||
.HasColumnType("integer")
|
.HasColumnType("integer")
|
||||||
.HasColumnName("def_index");
|
.HasColumnName("def_index");
|
||||||
@@ -252,7 +262,7 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.HasColumnType("timestamp with time zone")
|
.HasColumnType("timestamp with time zone")
|
||||||
.HasColumnName("first_seen_at");
|
.HasColumnName("first_seen_at");
|
||||||
|
|
||||||
b.Property<decimal>("FloatValue")
|
b.Property<decimal?>("FloatValue")
|
||||||
.HasColumnType("numeric(20,18)")
|
.HasColumnType("numeric(20,18)")
|
||||||
.HasColumnName("float_value");
|
.HasColumnName("float_value");
|
||||||
|
|
||||||
@@ -334,6 +344,9 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.HasIndex("AssetId")
|
b.HasIndex("AssetId")
|
||||||
.HasDatabaseName("ix_listings_asset_id");
|
.HasDatabaseName("ix_listings_asset_id");
|
||||||
|
|
||||||
|
b.HasIndex("ConditionId")
|
||||||
|
.HasDatabaseName("ix_listings_condition_id");
|
||||||
|
|
||||||
b.HasIndex("CsFloatListingId")
|
b.HasIndex("CsFloatListingId")
|
||||||
.IsUnique()
|
.IsUnique()
|
||||||
.HasDatabaseName("ix_listings_cs_float_listing_id");
|
.HasDatabaseName("ix_listings_cs_float_listing_id");
|
||||||
@@ -553,10 +566,6 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.HasColumnType("text")
|
.HasColumnType("text")
|
||||||
.HasColumnName("image_url");
|
.HasColumnName("image_url");
|
||||||
|
|
||||||
b.Property<DateTimeOffset?>("ListingsSweptAt")
|
|
||||||
.HasColumnType("timestamp with time zone")
|
|
||||||
.HasColumnName("listings_swept_at");
|
|
||||||
|
|
||||||
b.Property<string>("Name")
|
b.Property<string>("Name")
|
||||||
.IsRequired()
|
.IsRequired()
|
||||||
.HasColumnType("text")
|
.HasColumnType("text")
|
||||||
@@ -597,9 +606,6 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.HasKey("Id")
|
b.HasKey("Id")
|
||||||
.HasName("pk_skins");
|
.HasName("pk_skins");
|
||||||
|
|
||||||
b.HasIndex("ListingsSweptAt")
|
|
||||||
.HasDatabaseName("ix_skins_listings_swept_at");
|
|
||||||
|
|
||||||
b.HasIndex("Slug")
|
b.HasIndex("Slug")
|
||||||
.IsUnique()
|
.IsUnique()
|
||||||
.HasDatabaseName("ix_skins_slug");
|
.HasDatabaseName("ix_skins_slug");
|
||||||
@@ -632,17 +638,13 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.HasColumnType("text")
|
.HasColumnType("text")
|
||||||
.HasColumnName("condition");
|
.HasColumnName("condition");
|
||||||
|
|
||||||
b.Property<DateTimeOffset?>("ListingsSweptAt")
|
b.Property<decimal>("FloatMax")
|
||||||
.HasColumnType("timestamp with time zone")
|
|
||||||
.HasColumnName("listings_swept_at");
|
|
||||||
|
|
||||||
b.Property<decimal>("MaxFloat")
|
|
||||||
.HasColumnType("numeric(10,9)")
|
.HasColumnType("numeric(10,9)")
|
||||||
.HasColumnName("max_float");
|
.HasColumnName("float_max");
|
||||||
|
|
||||||
b.Property<decimal>("MinFloat")
|
b.Property<decimal>("FloatMin")
|
||||||
.HasColumnType("numeric(10,9)")
|
.HasColumnType("numeric(10,9)")
|
||||||
.HasColumnName("min_float");
|
.HasColumnName("float_min");
|
||||||
|
|
||||||
b.Property<int>("SkinId")
|
b.Property<int>("SkinId")
|
||||||
.HasColumnType("integer")
|
.HasColumnType("integer")
|
||||||
@@ -651,15 +653,47 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.HasKey("Id")
|
b.HasKey("Id")
|
||||||
.HasName("pk_skin_conditions");
|
.HasName("pk_skin_conditions");
|
||||||
|
|
||||||
b.HasIndex("ListingsSweptAt")
|
|
||||||
.HasDatabaseName("ix_skin_conditions_listings_swept_at");
|
|
||||||
|
|
||||||
b.HasIndex("SkinId")
|
b.HasIndex("SkinId")
|
||||||
.HasDatabaseName("ix_skin_conditions_skin_id");
|
.HasDatabaseName("ix_skin_conditions_skin_id");
|
||||||
|
|
||||||
b.ToTable("skin_conditions", "skintracker");
|
b.ToTable("skin_conditions", "skintracker");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinConditionSweep", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("id");
|
||||||
|
|
||||||
|
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
|
||||||
|
|
||||||
|
b.Property<int>("SkinConditionId")
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("skin_condition_id");
|
||||||
|
|
||||||
|
b.Property<string>("Source")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("source");
|
||||||
|
|
||||||
|
b.Property<DateTimeOffset>("SweptAt")
|
||||||
|
.HasColumnType("timestamp with time zone")
|
||||||
|
.HasColumnName("swept_at");
|
||||||
|
|
||||||
|
b.HasKey("Id")
|
||||||
|
.HasName("pk_skin_condition_sweeps");
|
||||||
|
|
||||||
|
b.HasIndex("SkinConditionId", "Source")
|
||||||
|
.IsUnique()
|
||||||
|
.HasDatabaseName("ix_skin_condition_sweeps_skin_condition_id_source");
|
||||||
|
|
||||||
|
b.HasIndex("Source", "SweptAt")
|
||||||
|
.HasDatabaseName("ix_skin_condition_sweeps_source_swept_at");
|
||||||
|
|
||||||
|
b.ToTable("skin_condition_sweeps", "skintracker");
|
||||||
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
|
||||||
{
|
{
|
||||||
b.Property<int>("Id")
|
b.Property<int>("Id")
|
||||||
@@ -689,9 +723,8 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.HasColumnType("timestamp with time zone")
|
.HasColumnType("timestamp with time zone")
|
||||||
.HasColumnName("last_seen_at");
|
.HasColumnName("last_seen_at");
|
||||||
|
|
||||||
b.Property<string>("PaintSeed")
|
b.Property<int>("PaintSeed")
|
||||||
.IsRequired()
|
.HasColumnType("integer")
|
||||||
.HasColumnType("text")
|
|
||||||
.HasColumnName("paint_seed");
|
.HasColumnName("paint_seed");
|
||||||
|
|
||||||
b.Property<int>("SkinId")
|
b.Property<int>("SkinId")
|
||||||
@@ -725,6 +758,137 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.ToTable("skin_instances", "skintracker");
|
b.ToTable("skin_instances", "skintracker");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinLandListing", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("id");
|
||||||
|
|
||||||
|
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
|
||||||
|
|
||||||
|
b.Property<int?>("ConditionId")
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("condition_id");
|
||||||
|
|
||||||
|
b.Property<string>("Currency")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("currency");
|
||||||
|
|
||||||
|
b.Property<DateTimeOffset>("FirstSeenAt")
|
||||||
|
.HasColumnType("timestamp with time zone")
|
||||||
|
.HasColumnName("first_seen_at");
|
||||||
|
|
||||||
|
b.Property<decimal?>("FloatValue")
|
||||||
|
.HasColumnType("numeric(20,18)")
|
||||||
|
.HasColumnName("float_value");
|
||||||
|
|
||||||
|
b.Property<string>("InspectLink")
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("inspect_link");
|
||||||
|
|
||||||
|
b.Property<bool>("IsSouvenir")
|
||||||
|
.HasColumnType("boolean")
|
||||||
|
.HasColumnName("is_souvenir");
|
||||||
|
|
||||||
|
b.Property<bool>("IsStatTrak")
|
||||||
|
.HasColumnType("boolean")
|
||||||
|
.HasColumnName("is_stat_trak");
|
||||||
|
|
||||||
|
b.Property<DateTimeOffset>("LastSeenAt")
|
||||||
|
.HasColumnType("timestamp with time zone")
|
||||||
|
.HasColumnName("last_seen_at");
|
||||||
|
|
||||||
|
b.Property<long>("ListingId")
|
||||||
|
.HasColumnType("bigint")
|
||||||
|
.HasColumnName("listing_id");
|
||||||
|
|
||||||
|
b.Property<string>("MarketHashName")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("market_hash_name");
|
||||||
|
|
||||||
|
b.Property<string>("NameTag")
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("name_tag");
|
||||||
|
|
||||||
|
b.Property<decimal>("Price")
|
||||||
|
.HasPrecision(18, 2)
|
||||||
|
.HasColumnType("numeric(18,2)")
|
||||||
|
.HasColumnName("price");
|
||||||
|
|
||||||
|
b.Property<DateTimeOffset?>("RemovedAt")
|
||||||
|
.HasColumnType("timestamp with time zone")
|
||||||
|
.HasColumnName("removed_at");
|
||||||
|
|
||||||
|
b.Property<int>("SkinId")
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("skin_id");
|
||||||
|
|
||||||
|
b.Property<string>("Status")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("status");
|
||||||
|
|
||||||
|
b.Property<int>("StickerCount")
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("sticker_count");
|
||||||
|
|
||||||
|
b.HasKey("Id")
|
||||||
|
.HasName("pk_skin_land_listings");
|
||||||
|
|
||||||
|
b.HasIndex("ConditionId")
|
||||||
|
.HasDatabaseName("ix_skin_land_listings_condition_id");
|
||||||
|
|
||||||
|
b.HasIndex("ListingId")
|
||||||
|
.IsUnique()
|
||||||
|
.HasDatabaseName("ix_skin_land_listings_listing_id");
|
||||||
|
|
||||||
|
b.HasIndex("Status")
|
||||||
|
.HasDatabaseName("ix_skin_land_listings_status");
|
||||||
|
|
||||||
|
b.HasIndex("SkinId", "ConditionId")
|
||||||
|
.HasDatabaseName("ix_skin_land_listings_skin_id_condition_id");
|
||||||
|
|
||||||
|
b.ToTable("skin_land_listings", "skintracker");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinSweep", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("id");
|
||||||
|
|
||||||
|
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
|
||||||
|
|
||||||
|
b.Property<int>("SkinId")
|
||||||
|
.HasColumnType("integer")
|
||||||
|
.HasColumnName("skin_id");
|
||||||
|
|
||||||
|
b.Property<string>("Source")
|
||||||
|
.IsRequired()
|
||||||
|
.HasColumnType("text")
|
||||||
|
.HasColumnName("source");
|
||||||
|
|
||||||
|
b.Property<DateTimeOffset>("SweptAt")
|
||||||
|
.HasColumnType("timestamp with time zone")
|
||||||
|
.HasColumnName("swept_at");
|
||||||
|
|
||||||
|
b.HasKey("Id")
|
||||||
|
.HasName("pk_skin_sweeps");
|
||||||
|
|
||||||
|
b.HasIndex("SkinId", "Source")
|
||||||
|
.IsUnique()
|
||||||
|
.HasDatabaseName("ix_skin_sweeps_skin_id_source");
|
||||||
|
|
||||||
|
b.HasIndex("Source", "SweptAt")
|
||||||
|
.HasDatabaseName("ix_skin_sweeps_source_swept_at");
|
||||||
|
|
||||||
|
b.ToTable("skin_sweeps", "skintracker");
|
||||||
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SteamUser", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SteamUser", b =>
|
||||||
{
|
{
|
||||||
b.Property<int>("Id")
|
b.Property<int>("Id")
|
||||||
@@ -788,6 +952,10 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.HasIndex("FromUserId")
|
b.HasIndex("FromUserId")
|
||||||
.HasDatabaseName("ix_trades_from_user_id");
|
.HasDatabaseName("ix_trades_from_user_id");
|
||||||
|
|
||||||
|
b.HasIndex("SteamTradeId")
|
||||||
|
.IsUnique()
|
||||||
|
.HasDatabaseName("ix_trades_steam_trade_id");
|
||||||
|
|
||||||
b.HasIndex("ToUserId")
|
b.HasIndex("ToUserId")
|
||||||
.HasDatabaseName("ix_trades_to_user_id");
|
.HasDatabaseName("ix_trades_to_user_id");
|
||||||
|
|
||||||
@@ -927,6 +1095,12 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.Listing", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.Listing", b =>
|
||||||
{
|
{
|
||||||
|
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
|
||||||
|
.WithMany()
|
||||||
|
.HasForeignKey("ConditionId")
|
||||||
|
.OnDelete(DeleteBehavior.SetNull)
|
||||||
|
.HasConstraintName("fk_listings_skin_conditions_condition_id");
|
||||||
|
|
||||||
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
|
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
|
||||||
.WithMany()
|
.WithMany()
|
||||||
.HasForeignKey("SkinId")
|
.HasForeignKey("SkinId")
|
||||||
@@ -939,6 +1113,8 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
.OnDelete(DeleteBehavior.SetNull)
|
.OnDelete(DeleteBehavior.SetNull)
|
||||||
.HasConstraintName("fk_listings_skin_instances_skin_instance_id");
|
.HasConstraintName("fk_listings_skin_instances_skin_instance_id");
|
||||||
|
|
||||||
|
b.Navigation("Condition");
|
||||||
|
|
||||||
b.Navigation("Skin");
|
b.Navigation("Skin");
|
||||||
|
|
||||||
b.Navigation("SkinInstance");
|
b.Navigation("SkinInstance");
|
||||||
@@ -989,6 +1165,18 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.Navigation("Skin");
|
b.Navigation("Skin");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinConditionSweep", b =>
|
||||||
|
{
|
||||||
|
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "SkinCondition")
|
||||||
|
.WithMany("Sweeps")
|
||||||
|
.HasForeignKey("SkinConditionId")
|
||||||
|
.OnDelete(DeleteBehavior.Cascade)
|
||||||
|
.IsRequired()
|
||||||
|
.HasConstraintName("fk_skin_condition_sweeps_skin_conditions_skin_condition_id");
|
||||||
|
|
||||||
|
b.Navigation("SkinCondition");
|
||||||
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
|
||||||
{
|
{
|
||||||
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
|
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
|
||||||
@@ -1009,6 +1197,38 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.Navigation("Skin");
|
b.Navigation("Skin");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinLandListing", b =>
|
||||||
|
{
|
||||||
|
b.HasOne("BlueLaminate.EFCore.Entities.SkinCondition", "Condition")
|
||||||
|
.WithMany()
|
||||||
|
.HasForeignKey("ConditionId")
|
||||||
|
.OnDelete(DeleteBehavior.SetNull)
|
||||||
|
.HasConstraintName("fk_skin_land_listings_skin_conditions_condition_id");
|
||||||
|
|
||||||
|
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
|
||||||
|
.WithMany()
|
||||||
|
.HasForeignKey("SkinId")
|
||||||
|
.OnDelete(DeleteBehavior.Restrict)
|
||||||
|
.IsRequired()
|
||||||
|
.HasConstraintName("fk_skin_land_listings_skins_skin_id");
|
||||||
|
|
||||||
|
b.Navigation("Condition");
|
||||||
|
|
||||||
|
b.Navigation("Skin");
|
||||||
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinSweep", b =>
|
||||||
|
{
|
||||||
|
b.HasOne("BlueLaminate.EFCore.Entities.Skin", "Skin")
|
||||||
|
.WithMany("Sweeps")
|
||||||
|
.HasForeignKey("SkinId")
|
||||||
|
.OnDelete(DeleteBehavior.Cascade)
|
||||||
|
.IsRequired()
|
||||||
|
.HasConstraintName("fk_skin_sweeps_skins_skin_id");
|
||||||
|
|
||||||
|
b.Navigation("Skin");
|
||||||
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.Trade", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.Trade", b =>
|
||||||
{
|
{
|
||||||
b.HasOne("BlueLaminate.EFCore.Entities.SteamUser", "FromUser")
|
b.HasOne("BlueLaminate.EFCore.Entities.SteamUser", "FromUser")
|
||||||
@@ -1080,6 +1300,8 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.Navigation("Instances");
|
b.Navigation("Instances");
|
||||||
|
|
||||||
b.Navigation("PriceHistories");
|
b.Navigation("PriceHistories");
|
||||||
|
|
||||||
|
b.Navigation("Sweeps");
|
||||||
});
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinCondition", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinCondition", b =>
|
||||||
@@ -1087,6 +1309,8 @@ namespace BlueLaminate.EFCore.Migrations
|
|||||||
b.Navigation("Instances");
|
b.Navigation("Instances");
|
||||||
|
|
||||||
b.Navigation("PriceHistories");
|
b.Navigation("PriceHistories");
|
||||||
|
|
||||||
|
b.Navigation("Sweeps");
|
||||||
});
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
|
modelBuilder.Entity("BlueLaminate.EFCore.Entities.SkinInstance", b =>
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||||
<PackageReference Include="Selenium.WebDriver" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -1,79 +0,0 @@
|
|||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using OpenQA.Selenium;
|
|
||||||
using OpenQA.Selenium.Edge;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.Browser;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
|
|
||||||
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
|
|
||||||
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
|
|
||||||
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
|
|
||||||
/// the local proxy already carries the upstream credentials, so there's no 407 to
|
|
||||||
/// answer in the browser. Combined with a warmed, persistent profile this is the
|
|
||||||
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
|
|
||||||
/// (which has no .NET equivalent).
|
|
||||||
/// <para>
|
|
||||||
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
|
|
||||||
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
|
|
||||||
/// whether pictures render, so this stays realistic.
|
|
||||||
/// </para>
|
|
||||||
/// </summary>
|
|
||||||
public sealed class BrowserDriverFactory
|
|
||||||
{
|
|
||||||
private readonly ILogger<BrowserDriverFactory> _logger;
|
|
||||||
|
|
||||||
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
|
||||||
{
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
|
|
||||||
/// auth). When <paramref name="profileDir"/> is set the profile persists across
|
|
||||||
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
|
|
||||||
/// history carry over — a warmed profile looks far less like a fresh bot); when
|
|
||||||
/// null a throwaway profile is used.
|
|
||||||
/// </summary>
|
|
||||||
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
|
|
||||||
{
|
|
||||||
var options = new EdgeOptions();
|
|
||||||
|
|
||||||
// Route browser traffic through the local proxy via the launch argument
|
|
||||||
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
|
|
||||||
// driver download). No scheme = all protocols use the proxy. When null/empty
|
|
||||||
// the browser uses the machine's direct connection (diagnostic --no-proxy).
|
|
||||||
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
|
|
||||||
{
|
|
||||||
options.AddArgument($"--proxy-server={proxyEndpoint}");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reduce the most obvious automation tells; residential exit + a real
|
|
||||||
// (non-headless) browser + a warmed profile do the rest.
|
|
||||||
options.AddArgument("--disable-blink-features=AutomationControlled");
|
|
||||||
options.AddExcludedArgument("enable-automation");
|
|
||||||
options.AddAdditionalOption("useAutomationExtension", false);
|
|
||||||
options.AddArgument("--no-first-run");
|
|
||||||
options.AddArgument("--no-default-browser-check");
|
|
||||||
options.AddArgument("--start-maximized");
|
|
||||||
|
|
||||||
var persist = !string.IsNullOrWhiteSpace(profileDir);
|
|
||||||
var dir = persist
|
|
||||||
? profileDir!
|
|
||||||
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
|
||||||
Directory.CreateDirectory(dir);
|
|
||||||
options.AddArgument($"--user-data-dir={dir}");
|
|
||||||
|
|
||||||
if (blockImages)
|
|
||||||
{
|
|
||||||
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Launching Edge via {Route} (profile: {Profile}).",
|
|
||||||
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
|
|
||||||
persist ? dir : "throwaway");
|
|
||||||
|
|
||||||
return new EdgeDriver(options);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -15,7 +15,10 @@ namespace BlueLaminate.Scraper.CsFloat;
|
|||||||
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
|
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
|
||||||
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
|
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
|
||||||
/// <param name="PaintSeed">Pattern seed.</param>
|
/// <param name="PaintSeed">Pattern seed.</param>
|
||||||
/// <param name="FloatValue">Exact float/wear value.</param>
|
/// <param name="FloatValue">
|
||||||
|
/// Exact float/wear value, or null for items that have no float at all
|
||||||
|
/// (e.g. Vanilla knives). A null is distinct from a genuine 0.0 float.
|
||||||
|
/// </param>
|
||||||
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
|
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
|
||||||
/// <param name="IsStatTrak">StatTrak™ variant.</param>
|
/// <param name="IsStatTrak">StatTrak™ variant.</param>
|
||||||
/// <param name="IsSouvenir">Souvenir variant.</param>
|
/// <param name="IsSouvenir">Souvenir variant.</param>
|
||||||
@@ -37,7 +40,7 @@ public sealed record CsFloatListing(
|
|||||||
int DefIndex,
|
int DefIndex,
|
||||||
int PaintIndex,
|
int PaintIndex,
|
||||||
int PaintSeed,
|
int PaintSeed,
|
||||||
decimal FloatValue,
|
decimal? FloatValue,
|
||||||
string? WearName,
|
string? WearName,
|
||||||
bool IsStatTrak,
|
bool IsStatTrak,
|
||||||
bool IsSouvenir,
|
bool IsSouvenir,
|
||||||
|
|||||||
@@ -321,7 +321,7 @@ public sealed class CsFloatListingsClient
|
|||||||
public int DefIndex { get; init; }
|
public int DefIndex { get; init; }
|
||||||
public int PaintIndex { get; init; }
|
public int PaintIndex { get; init; }
|
||||||
public int PaintSeed { get; init; }
|
public int PaintSeed { get; init; }
|
||||||
public decimal FloatValue { get; init; }
|
public decimal? FloatValue { get; init; }
|
||||||
public string? WearName { get; init; }
|
public string? WearName { get; init; }
|
||||||
public bool IsStatTrak { get; init; }
|
public bool IsStatTrak { get; init; }
|
||||||
public bool IsSouvenir { get; init; }
|
public bool IsSouvenir { get; init; }
|
||||||
|
|||||||
@@ -1,211 +0,0 @@
|
|||||||
using System.Text;
|
|
||||||
using System.Text.Json;
|
|
||||||
using BlueLaminate.Scraper.Browser;
|
|
||||||
using BlueLaminate.Scraper.Proxies;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using OpenQA.Selenium;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.CsMoney;
|
|
||||||
|
|
||||||
/// <summary>Outcome of a stealth pagination run.</summary>
|
|
||||||
/// <param name="PagesSucceeded">How many offset pages returned listings JSON before stopping.</param>
|
|
||||||
/// <param name="ItemsTotal">Total listing items captured across those pages.</param>
|
|
||||||
/// <param name="StoppedReason">Why pagination stopped: "challenged", "empty", "completed", or "error".</param>
|
|
||||||
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
|
|
||||||
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
|
|
||||||
/// the listings API with human-like pacing using in-page <c>fetch()</c> calls from
|
|
||||||
/// the cleared origin (so the cf_clearance cookie rides along). It records each
|
|
||||||
/// page's JSON and — crucially for the current phase — <b>measures how many pages
|
|
||||||
/// survive before Cloudflare re-challenges</b>, which tells us whether the
|
|
||||||
/// fingerprint reductions are enough for a real sweep.
|
|
||||||
/// </summary>
|
|
||||||
public sealed class CsMoneyCaptureService
|
|
||||||
{
|
|
||||||
private readonly IProxyProvider _provider;
|
|
||||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
|
||||||
private readonly BrowserDriverFactory _factory;
|
|
||||||
private readonly CsMoneyOptions _options;
|
|
||||||
private readonly ILogger<CsMoneyCaptureService> _logger;
|
|
||||||
|
|
||||||
public CsMoneyCaptureService(
|
|
||||||
IProxyProvider provider,
|
|
||||||
LocalForwardingProxyFactory proxyFactory,
|
|
||||||
BrowserDriverFactory factory,
|
|
||||||
CsMoneyOptions options,
|
|
||||||
ILogger<CsMoneyCaptureService> logger)
|
|
||||||
{
|
|
||||||
_provider = provider;
|
|
||||||
_proxyFactory = proxyFactory;
|
|
||||||
_factory = factory;
|
|
||||||
_options = options;
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Open the market, wait for <paramref name="browseUntilDone"/> (the operator
|
|
||||||
/// clears Cloudflare and presses Enter), then page the listings API up to
|
|
||||||
/// <paramref name="maxPages"/> times, stopping early on a re-challenge or an
|
|
||||||
/// empty page. Each page's body is written to <paramref name="outputDir"/>.
|
|
||||||
/// </summary>
|
|
||||||
public async Task<CsMoneyCaptureResult> RunAsync(
|
|
||||||
string outputDir,
|
|
||||||
ProxyRequest request,
|
|
||||||
bool loadImages,
|
|
||||||
bool useProxy,
|
|
||||||
int maxPages,
|
|
||||||
Func<Task> browseUntilDone,
|
|
||||||
CancellationToken ct = default)
|
|
||||||
{
|
|
||||||
Directory.CreateDirectory(outputDir);
|
|
||||||
|
|
||||||
// --no-proxy (useProxy=false) drives the automated browser on the machine's
|
|
||||||
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
|
|
||||||
// or the webdriver fingerprint itself.
|
|
||||||
LocalForwardingProxy? localProxy = null;
|
|
||||||
string? proxyEndpoint = null;
|
|
||||||
if (useProxy)
|
|
||||||
{
|
|
||||||
var lease = _provider.Acquire(request);
|
|
||||||
localProxy = _proxyFactory.Create(lease).Start();
|
|
||||||
proxyEndpoint = localProxy.Endpoint;
|
|
||||||
}
|
|
||||||
|
|
||||||
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
|
|
||||||
|
|
||||||
var pages = 0;
|
|
||||||
var items = 0;
|
|
||||||
var reason = "completed";
|
|
||||||
try
|
|
||||||
{
|
|
||||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
|
|
||||||
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
|
|
||||||
|
|
||||||
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
|
|
||||||
driver.Navigate().GoToUrl(_options.MarketUrl);
|
|
||||||
|
|
||||||
// Operator clears the Cloudflare challenge in the visible window, waits
|
|
||||||
// until the market grid is actually rendered, then presses Enter.
|
|
||||||
await browseUntilDone();
|
|
||||||
|
|
||||||
for (var offset = 0; pages < maxPages; offset += 60)
|
|
||||||
{
|
|
||||||
ct.ThrowIfCancellationRequested();
|
|
||||||
|
|
||||||
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
|
|
||||||
var (status, body) = DirectFetch(driver, apiUrl);
|
|
||||||
|
|
||||||
if (LooksLikeChallenge(status, body))
|
|
||||||
{
|
|
||||||
_logger.LogWarning(
|
|
||||||
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
|
|
||||||
offset, pages);
|
|
||||||
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
|
|
||||||
reason = "challenged";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
var count = TryCountItems(body);
|
|
||||||
if (count is 0)
|
|
||||||
{
|
|
||||||
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
|
|
||||||
reason = "empty";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
|
|
||||||
pages++;
|
|
||||||
items += count ?? 0;
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
|
|
||||||
pages, offset, status, count, body.Length);
|
|
||||||
|
|
||||||
await DelayAsync(ct);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (OperationCanceledException)
|
|
||||||
{
|
|
||||||
reason = "cancelled";
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
|
|
||||||
reason = "error";
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
driver.Quit();
|
|
||||||
if (localProxy is not null)
|
|
||||||
{
|
|
||||||
await localProxy.DisposeAsync();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new CsMoneyCaptureResult(pages, items, reason);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
|
|
||||||
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
|
|
||||||
// cs.money origin, so the cf_clearance cookie is sent automatically.
|
|
||||||
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
|
|
||||||
{
|
|
||||||
const string script = """
|
|
||||||
const url = arguments[0];
|
|
||||||
const done = arguments[arguments.length - 1];
|
|
||||||
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
|
|
||||||
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
|
|
||||||
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
|
|
||||||
""";
|
|
||||||
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
|
|
||||||
if (string.IsNullOrEmpty(raw))
|
|
||||||
{
|
|
||||||
return (-1, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
using var doc = JsonDocument.Parse(raw);
|
|
||||||
var status = doc.RootElement.GetProperty("status").GetInt32();
|
|
||||||
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
|
|
||||||
return (status, body);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static bool LooksLikeChallenge(int status, string body) =>
|
|
||||||
status is 403 or 503 or -1
|
|
||||||
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|
|
||||||
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|
|
||||||
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
|
|
||||||
|
|
||||||
// Count items[] without binding a full model (the typed model is Phase 2).
|
|
||||||
private static int? TryCountItems(string body)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
using var doc = JsonDocument.Parse(body);
|
|
||||||
return doc.RootElement.TryGetProperty("items", out var items)
|
|
||||||
&& items.ValueKind == JsonValueKind.Array
|
|
||||||
? items.GetArrayLength()
|
|
||||||
: null;
|
|
||||||
}
|
|
||||||
catch (JsonException)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async Task DelayAsync(CancellationToken ct)
|
|
||||||
{
|
|
||||||
var jitter = _options.PageJitterSeconds > 0
|
|
||||||
? Random.Shared.NextDouble() * _options.PageJitterSeconds
|
|
||||||
: 0;
|
|
||||||
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
|
|
||||||
if (seconds > 0)
|
|
||||||
{
|
|
||||||
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
|
|
||||||
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
|
|
||||||
}
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
namespace BlueLaminate.Scraper.CsMoney;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Configuration for the cs.money scraper, bound from the <c>CsMoney</c>
|
|
||||||
/// configuration section.
|
|
||||||
/// <para>
|
|
||||||
/// cs.money exposes no public API and sits behind Cloudflare bot protection, so we
|
|
||||||
/// drive a real, non-headless browser (Selenium/Edge) routed through an IPRoyal
|
|
||||||
/// residential proxy via a local forwarding hop (no CDP). The market endpoint
|
|
||||||
/// re-challenges aggressively during pagination, so these options also tune the
|
|
||||||
/// warmed profile and request pacing we use to survive longer.
|
|
||||||
/// </para>
|
|
||||||
/// </summary>
|
|
||||||
public sealed class CsMoneyOptions
|
|
||||||
{
|
|
||||||
public const string SectionName = "CsMoney";
|
|
||||||
|
|
||||||
/// <summary>Public market page the browser opens (and where the operator clears Cloudflare).</summary>
|
|
||||||
public string MarketUrl { get; set; } = "https://cs.money/market/buy/";
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Listings API template; <c>{0}</c> is the page offset (steps of 60). Fetched
|
|
||||||
/// in-page from the cleared market origin so the cf_clearance cookie is sent.
|
|
||||||
/// </summary>
|
|
||||||
public string ApiUrlTemplate { get; set; } =
|
|
||||||
"https://cs.money/2.0/market/sell-orders?limit=60&offset={0}";
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Persistent Chromium profile directory. Reusing one profile keeps the
|
|
||||||
/// cf_clearance cookie and history between runs — a warmed profile is far less
|
|
||||||
/// likely to be re-challenged than a fresh one. Empty = throwaway profile.
|
|
||||||
/// </summary>
|
|
||||||
public string ProfileDir { get; set; } =
|
|
||||||
Path.Combine(Path.GetTempPath(), "bluelaminate-csmoney-profile");
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Optional ISO country code(s) for the residential exit IP, e.g. "us". Null/empty
|
|
||||||
/// lets IPRoyal pick at random.
|
|
||||||
/// </summary>
|
|
||||||
public string? Country { get; set; }
|
|
||||||
|
|
||||||
/// <summary>Load images. Off by default to conserve the metered residential plan.</summary>
|
|
||||||
public bool LoadImages { get; set; }
|
|
||||||
|
|
||||||
/// <summary>Base delay between paginated API fetches, in seconds (human-like pacing).</summary>
|
|
||||||
public double PageDelaySeconds { get; set; } = 2.5;
|
|
||||||
|
|
||||||
/// <summary>Extra random jitter added to each delay, in seconds (0..value).</summary>
|
|
||||||
public double PageJitterSeconds { get; set; } = 2.0;
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Source of proxy endpoints. The whole point of this seam is that the rest of
|
|
||||||
/// the scraper depends only on this interface and <see cref="ProxyLease"/>, so a
|
|
||||||
/// different residential provider — or the future C2 that allocates IPs to
|
|
||||||
/// containers, or a composite "grab-bag" over several providers — drops in
|
|
||||||
/// without changing any browser or scraping code.
|
|
||||||
/// </summary>
|
|
||||||
public interface IProxyProvider
|
|
||||||
{
|
|
||||||
/// <summary>Identifier recorded on issued leases, e.g. "iproyal".</summary>
|
|
||||||
string Name { get; }
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Produce a usable endpoint for the given request. For gateway providers
|
|
||||||
/// this is pure string composition (no network call); the C2 implementation
|
|
||||||
/// can override that later with real allocation.
|
|
||||||
/// </summary>
|
|
||||||
ProxyLease Acquire(ProxyRequest request);
|
|
||||||
}
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// <see cref="IProxyProvider"/> for IPRoyal's residential gateway. IPRoyal keeps
|
|
||||||
/// one fixed host/port (geo.iproyal.com:12321) and encodes everything else —
|
|
||||||
/// country, sticky-session id, session lifetime — as underscore-delimited
|
|
||||||
/// parameters appended to the account password. Example password:
|
|
||||||
/// "secret_country-us_session-ab12cd_lifetime-30m". The account username is sent
|
|
||||||
/// unchanged. Docs: https://docs.iproyal.com/proxies/residential/proxy
|
|
||||||
/// </summary>
|
|
||||||
public sealed class IpRoyalProxyProvider : IProxyProvider
|
|
||||||
{
|
|
||||||
public const string GatewayHost = "geo.iproyal.com";
|
|
||||||
public const int GatewayPort = 12321;
|
|
||||||
|
|
||||||
// IPRoyal caps sticky sessions; 30 minutes is a safe default that comfortably
|
|
||||||
// covers a single scrape pass without forcing an early IP rotation.
|
|
||||||
private static readonly TimeSpan DefaultLifetime = TimeSpan.FromMinutes(30);
|
|
||||||
|
|
||||||
private readonly string _username;
|
|
||||||
private readonly string _password;
|
|
||||||
|
|
||||||
public IpRoyalProxyProvider(string username, string password)
|
|
||||||
{
|
|
||||||
if (string.IsNullOrWhiteSpace(username))
|
|
||||||
{
|
|
||||||
throw new ArgumentException("IPRoyal username is required.", nameof(username));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (string.IsNullOrWhiteSpace(password))
|
|
||||||
{
|
|
||||||
throw new ArgumentException("IPRoyal password is required.", nameof(password));
|
|
||||||
}
|
|
||||||
|
|
||||||
_username = username;
|
|
||||||
_password = password;
|
|
||||||
}
|
|
||||||
|
|
||||||
public string Name => "iproyal";
|
|
||||||
|
|
||||||
public ProxyLease Acquire(ProxyRequest request)
|
|
||||||
{
|
|
||||||
var password = _password;
|
|
||||||
string? sessionId = null;
|
|
||||||
DateTimeOffset? expiresAt = null;
|
|
||||||
|
|
||||||
// Country first; the router picks one at random when several are listed.
|
|
||||||
if (!string.IsNullOrWhiteSpace(request.Country))
|
|
||||||
{
|
|
||||||
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (request.Sticky)
|
|
||||||
{
|
|
||||||
sessionId = request.SessionId ?? NewSessionId();
|
|
||||||
var lifetime = request.Lifetime ?? DefaultLifetime;
|
|
||||||
// IPRoyal expresses lifetime as whole minutes (e.g. "_lifetime-30m").
|
|
||||||
var minutes = Math.Max(1, (int)Math.Round(lifetime.TotalMinutes));
|
|
||||||
password += $"_session-{sessionId}_lifetime-{minutes}m";
|
|
||||||
expiresAt = DateTimeOffset.UtcNow.AddMinutes(minutes);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ProxyLease(
|
|
||||||
Host: GatewayHost,
|
|
||||||
Port: GatewayPort,
|
|
||||||
Username: _username,
|
|
||||||
Password: password,
|
|
||||||
Provider: Name,
|
|
||||||
SessionId: sessionId,
|
|
||||||
ExpiresAt: expiresAt);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Short, URL/param-safe token. IPRoyal treats the session value opaquely;
|
|
||||||
// it only needs to be stable for the duration of a sticky lease.
|
|
||||||
private static string NewSessionId() =>
|
|
||||||
Guid.NewGuid().ToString("N")[..10];
|
|
||||||
}
|
|
||||||
@@ -1,232 +0,0 @@
|
|||||||
using System.Net;
|
|
||||||
using System.Net.Sockets;
|
|
||||||
using System.Text;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
|
|
||||||
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
|
|
||||||
/// gateway's <c>Proxy-Authorization</c> header itself.
|
|
||||||
/// <para>
|
|
||||||
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
|
|
||||||
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
|
|
||||||
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
|
|
||||||
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
|
|
||||||
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
|
|
||||||
/// CDP — far less detectable — while the upstream still carries the IPRoyal
|
|
||||||
/// username/password (and its baked-in country/session params).
|
|
||||||
/// </para>
|
|
||||||
/// <para>
|
|
||||||
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
|
|
||||||
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
|
|
||||||
/// the browser does TLS end-to-end with the real host — this proxy never sees
|
|
||||||
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
|
|
||||||
/// </para>
|
|
||||||
/// </summary>
|
|
||||||
public sealed class LocalForwardingProxy : IAsyncDisposable
|
|
||||||
{
|
|
||||||
private readonly ProxyLease _upstream;
|
|
||||||
private readonly ILogger _logger;
|
|
||||||
private readonly TcpListener _listener;
|
|
||||||
private readonly CancellationTokenSource _cts = new();
|
|
||||||
private readonly string _authHeader;
|
|
||||||
private Task? _acceptLoop;
|
|
||||||
|
|
||||||
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
|
|
||||||
{
|
|
||||||
_upstream = upstream;
|
|
||||||
_logger = logger;
|
|
||||||
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
|
|
||||||
var token = Convert.ToBase64String(
|
|
||||||
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
|
|
||||||
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
|
|
||||||
public string Endpoint { get; private set; } = "";
|
|
||||||
|
|
||||||
/// <summary>Bind the local port and start accepting browser connections.</summary>
|
|
||||||
public LocalForwardingProxy Start()
|
|
||||||
{
|
|
||||||
_listener.Start();
|
|
||||||
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
|
|
||||||
Endpoint = $"127.0.0.1:{port}";
|
|
||||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
|
|
||||||
Endpoint, _upstream.Endpoint, _upstream.Provider);
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
|
||||||
{
|
|
||||||
while (!ct.IsCancellationRequested)
|
|
||||||
{
|
|
||||||
TcpClient client;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
client = await _listener.AcceptTcpClientAsync(ct);
|
|
||||||
}
|
|
||||||
catch (OperationCanceledException)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
_logger.LogDebug(ex, "Accept failed.");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fire-and-forget per connection; exceptions are swallowed per client so
|
|
||||||
// one bad tunnel never takes down the listener.
|
|
||||||
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
|
|
||||||
{
|
|
||||||
using (client)
|
|
||||||
{
|
|
||||||
client.NoDelay = true;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var clientStream = client.GetStream();
|
|
||||||
var header = await ReadHeaderAsync(clientStream, ct);
|
|
||||||
if (header is null)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var requestLine = header.Split("\r\n", 2)[0];
|
|
||||||
var parts = requestLine.Split(' ');
|
|
||||||
if (parts.Length < 2)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var method = parts[0];
|
|
||||||
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
|
|
||||||
{
|
|
||||||
await HandleConnectAsync(clientStream, parts[1], ct);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
await HandlePlainAsync(clientStream, header, ct);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
_logger.LogDebug(ex, "Client connection error.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
|
|
||||||
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
|
|
||||||
{
|
|
||||||
using var upstream = new TcpClient { NoDelay = true };
|
|
||||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
|
||||||
var upstreamStream = upstream.GetStream();
|
|
||||||
|
|
||||||
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
|
|
||||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
|
|
||||||
|
|
||||||
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
|
|
||||||
var ok = upstreamHeader is not null
|
|
||||||
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
|
|
||||||
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
|
|
||||||
&& sl[1] == "200";
|
|
||||||
if (!ok)
|
|
||||||
{
|
|
||||||
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
|
|
||||||
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
|
|
||||||
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
|
|
||||||
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await clientStream.WriteAsync(
|
|
||||||
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
|
|
||||||
|
|
||||||
await RelayAsync(clientStream, upstreamStream, ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
|
|
||||||
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
|
|
||||||
{
|
|
||||||
var hostLine = header.Split("\r\n")
|
|
||||||
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
|
|
||||||
if (hostLine is null)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
using var upstream = new TcpClient { NoDelay = true };
|
|
||||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
|
||||||
var upstreamStream = upstream.GetStream();
|
|
||||||
|
|
||||||
// Insert the Proxy-Authorization header right after the request line.
|
|
||||||
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
|
|
||||||
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
|
|
||||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
|
|
||||||
|
|
||||||
await RelayAsync(clientStream, upstreamStream, ct);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pipe both directions until either side closes.
|
|
||||||
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
|
|
||||||
{
|
|
||||||
var toUpstream = a.CopyToAsync(b, ct);
|
|
||||||
var toClient = b.CopyToAsync(a, ct);
|
|
||||||
await Task.WhenAny(toUpstream, toClient);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
|
|
||||||
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
|
|
||||||
{
|
|
||||||
var buffer = new byte[1];
|
|
||||||
var sb = new StringBuilder(256);
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
var read = await stream.ReadAsync(buffer, ct);
|
|
||||||
if (read == 0)
|
|
||||||
{
|
|
||||||
return sb.Length > 0 ? sb.ToString() : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.Append((char)buffer[0]);
|
|
||||||
if (sb.Length >= 4
|
|
||||||
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
|
|
||||||
{
|
|
||||||
return sb.ToString();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Guard against a runaway/garbage stream.
|
|
||||||
if (sb.Length > 64 * 1024)
|
|
||||||
{
|
|
||||||
return sb.ToString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public async ValueTask DisposeAsync()
|
|
||||||
{
|
|
||||||
await _cts.CancelAsync();
|
|
||||||
_listener.Stop();
|
|
||||||
if (_acceptLoop is not null)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
await _acceptLoop;
|
|
||||||
}
|
|
||||||
catch (OperationCanceledException)
|
|
||||||
{
|
|
||||||
// expected on shutdown
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_cts.Dispose();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
using Microsoft.Extensions.Logging;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
|
|
||||||
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
|
|
||||||
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
|
|
||||||
/// </summary>
|
|
||||||
public sealed class LocalForwardingProxyFactory
|
|
||||||
{
|
|
||||||
private readonly ILogger<LocalForwardingProxy> _logger;
|
|
||||||
|
|
||||||
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
|
|
||||||
{
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
|
|
||||||
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// A concrete, ready-to-use proxy endpoint handed back by an
|
|
||||||
/// <see cref="IProxyProvider"/>. This is the only proxy type a consumer ever
|
|
||||||
/// sees, so swapping providers (or mixing several in a grab-bag) never touches
|
|
||||||
/// the calling code. <see cref="Username"/> and <see cref="Password"/> are the
|
|
||||||
/// literal credentials to present to the gateway — for providers like IPRoyal
|
|
||||||
/// the targeting/session parameters are already baked into them.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
|
|
||||||
/// <param name="Port">Gateway port, e.g. 12321.</param>
|
|
||||||
/// <param name="Username">Credential username for the gateway.</param>
|
|
||||||
/// <param name="Password">Credential password (may carry encoded session/geo params).</param>
|
|
||||||
/// <param name="Provider">Name of the provider that issued this lease.</param>
|
|
||||||
/// <param name="SessionId">The sticky session key, if this is a pinned IP.</param>
|
|
||||||
/// <param name="ExpiresAt">When a sticky IP may be recycled; null if rotating/unbounded.</param>
|
|
||||||
public sealed record ProxyLease(
|
|
||||||
string Host,
|
|
||||||
int Port,
|
|
||||||
string Username,
|
|
||||||
string Password,
|
|
||||||
string Provider,
|
|
||||||
string? SessionId = null,
|
|
||||||
DateTimeOffset? ExpiresAt = null)
|
|
||||||
{
|
|
||||||
/// <summary>"host:port" form used by browser proxy settings.</summary>
|
|
||||||
public string Endpoint => $"{Host}:{Port}";
|
|
||||||
}
|
|
||||||
@@ -1,103 +0,0 @@
|
|||||||
using System.Text.Json;
|
|
||||||
using BlueLaminate.Scraper.Browser;
|
|
||||||
using Microsoft.Extensions.Logging;
|
|
||||||
using OpenQA.Selenium;
|
|
||||||
|
|
||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
|
||||||
/// <param name="Org">
|
|
||||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
|
||||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
|
||||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
|
||||||
/// </param>
|
|
||||||
public sealed record ProxyExitInfo(
|
|
||||||
string? Ip,
|
|
||||||
string? City,
|
|
||||||
string? Region,
|
|
||||||
string? Country,
|
|
||||||
string? Org,
|
|
||||||
string? Hostname,
|
|
||||||
string? Timezone);
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
|
||||||
/// launch the real browser through it, and read back the exit IP from an
|
|
||||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
|
||||||
/// against a metered residential plan — it proves auth works and shows whether
|
|
||||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
|
||||||
/// </summary>
|
|
||||||
public sealed class ProxyProbe
|
|
||||||
{
|
|
||||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
|
||||||
|
|
||||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
|
||||||
{
|
|
||||||
PropertyNameCaseInsensitive = true,
|
|
||||||
};
|
|
||||||
|
|
||||||
private readonly IProxyProvider _provider;
|
|
||||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
|
||||||
private readonly BrowserDriverFactory _factory;
|
|
||||||
private readonly ILogger<ProxyProbe> _logger;
|
|
||||||
|
|
||||||
public ProxyProbe(
|
|
||||||
IProxyProvider provider,
|
|
||||||
LocalForwardingProxyFactory proxyFactory,
|
|
||||||
BrowserDriverFactory factory,
|
|
||||||
ILogger<ProxyProbe> logger)
|
|
||||||
{
|
|
||||||
_provider = provider;
|
|
||||||
_proxyFactory = proxyFactory;
|
|
||||||
_factory = factory;
|
|
||||||
_logger = logger;
|
|
||||||
}
|
|
||||||
|
|
||||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
|
||||||
{
|
|
||||||
var lease = _provider.Acquire(request);
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Acquired {Provider} lease (exit {Mode}).",
|
|
||||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
|
||||||
|
|
||||||
await using var localProxy = _proxyFactory.Create(lease).Start();
|
|
||||||
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
|
|
||||||
try
|
|
||||||
{
|
|
||||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
|
||||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
|
||||||
|
|
||||||
// Read the document's text rather than the DOM so the browser's
|
|
||||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
|
||||||
// JSON object it rendered.
|
|
||||||
var rendered = ((IJavaScriptExecutor)driver)
|
|
||||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
|
||||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
|
||||||
|
|
||||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
|
||||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
|
||||||
|
|
||||||
_logger.LogInformation(
|
|
||||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
|
||||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
|
||||||
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
driver.Quit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string ExtractJson(string text)
|
|
||||||
{
|
|
||||||
var start = text.IndexOf('{');
|
|
||||||
var end = text.LastIndexOf('}');
|
|
||||||
if (start < 0 || end <= start)
|
|
||||||
{
|
|
||||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
|
||||||
}
|
|
||||||
|
|
||||||
return text[start..(end + 1)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
namespace BlueLaminate.Scraper.Proxies;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// What kind of exit IP the caller wants. Provider-agnostic: each
|
|
||||||
/// <see cref="IProxyProvider"/> translates these knobs into its own gateway
|
|
||||||
/// syntax. A sticky request asks the provider to pin one residential IP for the
|
|
||||||
/// session's lifetime; a non-sticky request lets the IP rotate per connection.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="Country">
|
|
||||||
/// Optional ISO 3166-1 alpha-2 code, or a comma-separated list to let the
|
|
||||||
/// provider pick one at random (e.g. "us" or "us,gb,de"). Null means no
|
|
||||||
/// geo constraint.
|
|
||||||
/// </param>
|
|
||||||
/// <param name="Sticky">
|
|
||||||
/// True to keep the same exit IP for the whole session; false to rotate.
|
|
||||||
/// </param>
|
|
||||||
/// <param name="SessionId">
|
|
||||||
/// Optional caller-supplied session key for a sticky lease. When null and
|
|
||||||
/// <paramref name="Sticky"/> is true the provider generates one.
|
|
||||||
/// </param>
|
|
||||||
/// <param name="Lifetime">
|
|
||||||
/// How long a sticky IP should be held before the provider may recycle it.
|
|
||||||
/// Ignored when <paramref name="Sticky"/> is false. Null lets the provider
|
|
||||||
/// apply its own default.
|
|
||||||
/// </param>
|
|
||||||
public sealed record ProxyRequest(
|
|
||||||
string? Country = null,
|
|
||||||
bool Sticky = true,
|
|
||||||
string? SessionId = null,
|
|
||||||
TimeSpan? Lifetime = null);
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"data":{"aed":3.67308,"afn":63.8101,"all":81.9632,"amd":368.143,"ang":1.80234,"aoa":918.907,"ars":1408.71,"aud":1.39151,"awg":1.79,"azn":1.69966,"bam":1.68079,"bbd":1.99,"bdt":122.756,"bgn":1.67724,"bhd":0.377063,"bif":2977.25,"bmd":1,"bnd":1.27739,"bob":6.93362,"brl":5.03662,"bsd":1,"btn":94.9823,"bwp":13.4051,"byn":2.76,"bzd":2,"cad":1.38011,"cdf":2303.13,"chf":0.781072,"clp":889.925,"cny":6.76633,"cop":3658.64,"crc":456.323,"cve":94.8541,"czk":20.8256,"djf":177.6,"dkk":6.41027,"dop":58.34,"dzd":132.483,"eek":11.7036,"egp":52.2449,"etb":158.478,"eur":0.85756,"eurc":0.85756,"fjd":2.22183,"fkp":0.743205,"gbp":0.743163,"gel":2.6635,"ghs":11.738,"gip":0.743205,"gmd":71.7,"gnf":8733.01,"gtq":7.62826,"gyd":209.218,"hkd":7.83683,"hnl":26.5919,"hrk":6.46045,"htg":131.051,"huf":303.494,"idr":17846.4,"ils":2.81558,"inr":94.9244,"isk":122.978,"jmd":157.512,"jod":0.709142,"jpy":159.298,"kes":129.43,"kgs":87.4636,"khr":4026.38,"kmf":422.97,"krw":1507.45,"kwd":0.306761,"kyd":0.831626,"kzt":485.776,"lak":21934.5,"lbp":89500,"lkr":330.556,"lrd":182.518,"lsl":16.2382,"ltl":2.85333,"lvl":0.666172,"mad":9.18233,"mdl":17.2495,"mga":4197.32,"mkd":52.9711,"mmk":3658.01,"mnt":3578.79,"mop":8.07515,"mro":357.429,"mur":47.3605,"mvr":15.4615,"mwk":1734.01,"mxn":17.3547,"myr":3.96506,"mzn":63.7022,"nad":16.2435,"ngn":1407.3,"nio":36.6243,"nok":9.25345,"npr":152.04,"nzd":1.67028,"omr":0.385044,"pab":1,"pen":3.4017,"pgk":4.36134,"php":61.5484,"pkr":278.578,"pln":3.62897,"pyg":6017.9,"qar":3.64153,"ron":4.5042,"rsd":100.688,"rub":71.0734,"rwf":1463.11,"sar":3.75298,"sbd":8.0556,"scr":14.4837,"sek":9.24372,"sgd":1.27675,"shp":0.743619,"sle":22.7529,"sll":22791.4,"sos":571.375,"srd":37.1698,"std":20979.6,"svc":8.75278,"szl":16.2358,"thb":32.5267,"tjs":9.25184,"tnd":2.92,"top":2.35974,"try":45.8529,"ttd":6.74984,"twd":31.4269,"tzs":2629.69,"uah":44.2847,"ugx":3771.6,"usd":1,"usdc":1,"usdt":1.0013,"uyu":40.1504,"uzs":12004,"vef":50.1656,"vnd":26311,"vuv":118.053,"wst":2.70421,"xaf":562.45,"xcd":2.6882,"xcg":1.80234,"xof":562.975,"xpf":102.465,"yer":1566.65,"zar":16.2289,"zmw":18.3213}}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"inferred_location":{"short":"US","long":"United States","currency":"USD"}}
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1 +0,0 @@
|
|||||||
{"code":1,"message":"You need to be logged in to search listings"}
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"inferred_location":{"short":"US","long":"United States","currency":"USD"}}
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -29,11 +29,6 @@
|
|||||||
<!-- CLI / telemetry -->
|
<!-- CLI / telemetry -->
|
||||||
<PackageVersion Include="System.CommandLine" Version="2.0.8" />
|
<PackageVersion Include="System.CommandLine" Version="2.0.8" />
|
||||||
<PackageVersion Include="OpenTelemetry" Version="1.15.3" />
|
<PackageVersion Include="OpenTelemetry" Version="1.15.3" />
|
||||||
|
|
||||||
<!-- Browser automation (cs.money sits behind Cloudflare; a real, non-headless
|
|
||||||
browser routed through a residential proxy is required to clear the
|
|
||||||
challenge and observe the site's internal API). -->
|
|
||||||
<PackageVersion Include="Selenium.WebDriver" Version="4.44.0" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -20,12 +20,12 @@
|
|||||||
|
|
||||||
SET search_path = skintracker;
|
SET search_path = skintracker;
|
||||||
|
|
||||||
INSERT INTO skin_conditions (skin_id, condition, min_float, max_float)
|
INSERT INTO skin_conditions (skin_id, condition, float_min, float_max)
|
||||||
SELECT
|
SELECT
|
||||||
s.id,
|
s.id,
|
||||||
t.name,
|
t.name,
|
||||||
GREATEST(s.float_min, t.lo) AS min_float, -- clamp the tier to the skin's range
|
GREATEST(s.float_min, t.lo) AS float_min, -- clamp the tier to the skin's range
|
||||||
LEAST(s.float_max, t.hi) AS max_float
|
LEAST(s.float_max, t.hi) AS float_max
|
||||||
FROM skins s
|
FROM skins s
|
||||||
CROSS JOIN (VALUES
|
CROSS JOIN (VALUES
|
||||||
('Factory New', 0.00, 0.07),
|
('Factory New', 0.00, 0.07),
|
||||||
@@ -51,9 +51,9 @@ ORDER BY s.id, t.lo;
|
|||||||
-- Sanity checks (optional)
|
-- Sanity checks (optional)
|
||||||
-- ------------------------------------------------------------
|
-- ------------------------------------------------------------
|
||||||
-- Rows per condition:
|
-- Rows per condition:
|
||||||
-- SELECT condition, count(*) FROM skin_conditions GROUP BY condition ORDER BY min(min_float);
|
-- SELECT condition, count(*) FROM skin_conditions GROUP BY condition ORDER BY min(float_min);
|
||||||
--
|
--
|
||||||
-- Spot-check a capped skin (e.g. an Asiimov) shows clamped FT bounds:
|
-- Spot-check a capped skin (e.g. an Asiimov) shows clamped FT bounds:
|
||||||
-- SELECT s.name, sc.condition, sc.min_float, sc.max_float
|
-- SELECT s.name, sc.condition, sc.float_min, sc.float_max
|
||||||
-- FROM skin_conditions sc JOIN skins s ON s.id = sc.skin_id
|
-- FROM skin_conditions sc JOIN skins s ON s.id = sc.skin_id
|
||||||
-- WHERE s.name ILIKE 'Asiimov' ORDER BY sc.min_float;
|
-- WHERE s.name ILIKE 'Asiimov' ORDER BY sc.float_min;
|
||||||
|
|||||||
@@ -1,44 +1,18 @@
|
|||||||
-- ============================================================
|
-- ============================================================
|
||||||
-- CS2 Skin Tracker — backfill skin_conditions.listings_swept_at
|
-- CS2 Skin Tracker — backfill skin_conditions.listings_swept_at
|
||||||
-- Run against the skintracker database as the app role, ONCE,
|
|
||||||
-- after the AddSkinConditionListingsSweptAt migration is applied
|
|
||||||
-- and 05_fill_skin_conditions.sql has populated the wear bands.
|
|
||||||
-- Idempotent: re-running only touches still-null bands.
|
|
||||||
--
|
--
|
||||||
-- Why: the catalogue sweep used to page each skin to completion
|
-- SUPERSEDED — DO NOT RUN.
|
||||||
-- as a single unit, so a non-null skins.listings_swept_at means
|
|
||||||
-- EVERY wear of that skin was covered at that time. The sweep now
|
|
||||||
-- checkpoints per wear band (skin_conditions.listings_swept_at).
|
|
||||||
-- Without this backfill, every band of an already-swept skin would
|
|
||||||
-- look never-swept and jump to the front of the queue, needlessly
|
|
||||||
-- re-sweeping skins that are already current. Inheriting the skin's
|
|
||||||
-- timestamp marks those bands as covered so the sweep moves on.
|
|
||||||
--
|
--
|
||||||
-- Only fills bands that are still null, so bands already swept under
|
-- The single shared `listings_swept_at` columns on `skins` and
|
||||||
-- the new per-band logic keep their (newer) timestamp.
|
-- `skin_conditions` were replaced by per-site checkpoint tables
|
||||||
|
-- (`skin_sweeps` / `skin_condition_sweeps`, keyed by (entity, source))
|
||||||
|
-- in the AddPerSiteSweepCheckpoints migration. Each site now tracks its
|
||||||
|
-- own "last swept" under its own `source`, so a band swept on CSFloat is
|
||||||
|
-- still never-swept on cs.money.
|
||||||
|
--
|
||||||
|
-- The columns this script updated no longer exist, so running it now
|
||||||
|
-- would error. We intentionally did NOT migrate the old values into the
|
||||||
|
-- new tables: both sites simply cold-sweep the catalogue once and the
|
||||||
|
-- never-swept-first ordering refills the checkpoints. This file is kept
|
||||||
|
-- only so the db/ script numbering stays stable.
|
||||||
-- ============================================================
|
-- ============================================================
|
||||||
|
|
||||||
SET search_path = skintracker;
|
|
||||||
|
|
||||||
UPDATE skin_conditions sc
|
|
||||||
SET listings_swept_at = s.listings_swept_at
|
|
||||||
FROM skins s
|
|
||||||
WHERE sc.skin_id = s.id
|
|
||||||
AND s.listings_swept_at IS NOT NULL -- skin was fully swept under the old per-skin logic
|
|
||||||
AND sc.listings_swept_at IS NULL; -- don't overwrite bands already swept per-band
|
|
||||||
|
|
||||||
-- ------------------------------------------------------------
|
|
||||||
-- Sanity checks (optional)
|
|
||||||
-- ------------------------------------------------------------
|
|
||||||
-- Bands backfilled vs still never-swept:
|
|
||||||
-- SELECT
|
|
||||||
-- count(*) FILTER (WHERE listings_swept_at IS NOT NULL) AS swept,
|
|
||||||
-- count(*) FILTER (WHERE listings_swept_at IS NULL) AS never_swept
|
|
||||||
-- FROM skin_conditions;
|
|
||||||
--
|
|
||||||
-- A previously-swept skin should now have all its bands stamped:
|
|
||||||
-- SELECT s.name, sc.condition, sc.listings_swept_at
|
|
||||||
-- FROM skin_conditions sc JOIN skins s ON s.id = sc.skin_id
|
|
||||||
-- WHERE s.listings_swept_at IS NOT NULL
|
|
||||||
-- ORDER BY s.name, sc.min_float
|
|
||||||
-- LIMIT 20;
|
|
||||||
|
|||||||
@@ -1,14 +1,19 @@
|
|||||||
# One-command startup for the cs.money scraper control plane + worker.
|
# One-command startup for the scraper control plane + per-market workers.
|
||||||
# Postgres is external (runs independently on the host); the C2 connects to it via
|
# Postgres is external (runs independently on the host); the C2 connects to it via
|
||||||
# host.docker.internal and auto-applies EF migrations on boot.
|
# host.docker.internal and auto-applies EF migrations on boot.
|
||||||
#
|
#
|
||||||
# docker compose up --build
|
# docker compose up --build
|
||||||
#
|
#
|
||||||
# Scale workers (drop the worker `ports:` first — noVNC can't share one host port):
|
# Worker counts per market are env-driven (deploy.replicas), so one command sets the mix —
|
||||||
# docker compose up --build --scale worker=10
|
# e.g. 1 skin.land worker and 0 cs.money workers (PowerShell):
|
||||||
|
# $env:CSMONEY_WORKERS=0; $env:SKINLAND_WORKERS=1; docker compose up --build
|
||||||
|
# bash/sh:
|
||||||
|
# CSMONEY_WORKERS=0 SKINLAND_WORKERS=1 docker compose up --build
|
||||||
|
# (Or set them in a .env file next to this compose file.) Defaults: 1 of each.
|
||||||
|
#
|
||||||
# Each worker mints its own IPRoyal sticky session at startup, so every replica gets a
|
# Each worker mints its own IPRoyal sticky session at startup, so every replica gets a
|
||||||
# distinct residential exit IP. Set IPROYAL_USERNAME / IPROYAL_PASSWORD (e.g. in a .env
|
# distinct residential exit IP. Set IPROYAL_USERNAME / IPROYAL_PASSWORD (.env works) to
|
||||||
# file next to this compose file) to turn the proxy on.
|
# turn the proxy on. The worker `ports:` are ephemeral so replicas never collide.
|
||||||
services:
|
services:
|
||||||
c2:
|
c2:
|
||||||
build:
|
build:
|
||||||
@@ -33,7 +38,11 @@ services:
|
|||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: worker/Dockerfile
|
dockerfile: worker/Dockerfile
|
||||||
|
# cs.money worker count. Set CSMONEY_WORKERS=0 to run none (e.g. skin.land-only).
|
||||||
|
deploy:
|
||||||
|
replicas: ${CSMONEY_WORKERS:-1}
|
||||||
environment:
|
environment:
|
||||||
|
WORKER_SCRIPT: csmoney_worker.py # (also the image default; explicit for symmetry)
|
||||||
C2_URL: http://c2:5080
|
C2_URL: http://c2:5080
|
||||||
WORKER_TOKEN: ${WORKER_TOKEN:-dev-worker-token}
|
WORKER_TOKEN: ${WORKER_TOKEN:-dev-worker-token}
|
||||||
# IPRoyal residential proxy: each replica self-assigns a unique sticky session
|
# IPRoyal residential proxy: each replica self-assigns a unique sticky session
|
||||||
@@ -53,3 +62,30 @@ services:
|
|||||||
# http://localhost:<mapped>/vnc.html to watch / solve a challenge.
|
# http://localhost:<mapped>/vnc.html to watch / solve a challenge.
|
||||||
- "6080"
|
- "6080"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# The skin.land worker: same image, but runs skinland_worker.py against the C2's
|
||||||
|
# /skinland job group and warms on a skin.land page. Each replica gets its own IPRoyal
|
||||||
|
# sticky exit IP exactly like the cs.money worker. Count via SKINLAND_WORKERS.
|
||||||
|
skinland-worker:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: worker/Dockerfile
|
||||||
|
deploy:
|
||||||
|
replicas: ${SKINLAND_WORKERS:-1}
|
||||||
|
environment:
|
||||||
|
WORKER_SCRIPT: skinland_worker.py
|
||||||
|
C2_URL: http://c2:5080
|
||||||
|
MARKET_URL: ${SKINLAND_MARKET_URL:-https://skin.land/market/csgo/}
|
||||||
|
WORKER_TOKEN: ${WORKER_TOKEN:-dev-worker-token}
|
||||||
|
IPROYAL_USERNAME: ${IPROYAL_USERNAME:-}
|
||||||
|
IPROYAL_PASSWORD: ${IPROYAL_PASSWORD:-}
|
||||||
|
IPROYAL_COUNTRY: ${IPROYAL_COUNTRY:-us}
|
||||||
|
IPROYAL_LIFETIME_MIN: ${IPROYAL_LIFETIME_MIN:-60}
|
||||||
|
PROXY: ${PROXY:-}
|
||||||
|
SOLVE_SECONDS: ${SOLVE_SECONDS:-45}
|
||||||
|
LOAD_IMAGES: ${LOAD_IMAGES:-}
|
||||||
|
depends_on:
|
||||||
|
- c2
|
||||||
|
ports:
|
||||||
|
- "6080"
|
||||||
|
restart: unless-stopped
|
||||||
|
|||||||
148
monitoring/README.md
Normal file
148
monitoring/README.md
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
# BlueLaminate observability stack (standalone, Proxmox LXC)
|
||||||
|
|
||||||
|
A self-contained Grafana **LGTM** stack — **L**oki (logs), **G**rafana (dashboards),
|
||||||
|
**T**empo (traces), and Prometheus (**M**etrics) — fronted by **Grafana Alloy** as a single
|
||||||
|
OTLP ingress. It runs as native systemd services on its own Proxmox LXC, decoupled from the
|
||||||
|
app's `docker-compose.yml`. The C2 and Python workers push OpenTelemetry data to Alloy, which
|
||||||
|
fans the three signals out to the backends; Grafana ties them together.
|
||||||
|
|
||||||
|
```
|
||||||
|
C2 / workers ──OTLP(4317 grpc / 4318 http)──► Alloy ──┬─► Loki (logs, :3100)
|
||||||
|
(other host) ├─► Prometheus (metrics, :9090, remote-write)
|
||||||
|
└─► Tempo (traces, :4319 OTLP → store)
|
||||||
|
│
|
||||||
|
Grafana (:3000)
|
||||||
|
datasources: Loki + Prometheus + Tempo
|
||||||
|
```
|
||||||
|
|
||||||
|
Only Alloy's OTLP ports (`4317`/`4318`) and Grafana (`3000`) need to be reachable from the
|
||||||
|
LAN. Loki and Tempo bind localhost; Alloy is the only client that talks to them.
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
monitoring/
|
||||||
|
install.sh # idempotent provisioner — run as root in the LXC
|
||||||
|
alloy/config.alloy # OTLP receiver → batch → Loki / Prometheus / Tempo
|
||||||
|
prometheus/prometheus.yml # self-monitoring scrapes (app metrics arrive via remote-write)
|
||||||
|
prometheus/prometheus.service # systemd unit: remote-write + OTLP receivers, 15d retention
|
||||||
|
loki/loki.yml # single-binary, filesystem store, 15d retention
|
||||||
|
tempo/tempo.yml # OTLP on :4319, local store, metrics_generator → Prometheus
|
||||||
|
grafana/datasources.yml # Loki + Prometheus(default) + Tempo, correlated
|
||||||
|
grafana/dashboards.yml # file-based dashboard provider
|
||||||
|
grafana/dashboards/overview.json # starter dashboard (target health, span rates, logs)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 1. Create the LXC (run on the Proxmox host)
|
||||||
|
|
||||||
|
Reference only — adjust the storage, bridge, and template names to your node. An unprivileged
|
||||||
|
Debian 13 container with ~2 vCPU / 2–4 GB RAM / 20–40 GB disk is plenty.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Make sure a Debian 13 template is present (once):
|
||||||
|
# pveam update && pveam available | grep debian-13
|
||||||
|
# pveam download local debian-13-standard_*_amd64.tar.zst
|
||||||
|
|
||||||
|
pct create 910 local:vztmpl/debian-13-standard_13.0-1_amd64.tar.zst \
|
||||||
|
--hostname grafana-lxc \
|
||||||
|
--cores 2 --memory 4096 --swap 1024 \
|
||||||
|
--rootfs local-lvm:32 \
|
||||||
|
--net0 name=eth0,bridge=vmbr0,ip=dhcp \
|
||||||
|
--unprivileged 1 --features nesting=0 \
|
||||||
|
--onboot 1 --start 1
|
||||||
|
|
||||||
|
# (Optional) give it a static IP instead of dhcp, e.g.
|
||||||
|
# --net0 name=eth0,bridge=vmbr0,ip=192.168.1.50/24,gw=192.168.1.1
|
||||||
|
```
|
||||||
|
|
||||||
|
`nesting=0` is fine — there's no Docker here, just native binaries.
|
||||||
|
|
||||||
|
## 2. Deploy the stack (inside the LXC)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pct enter 910 # or: ssh root@<lxc-ip>
|
||||||
|
apt-get update && apt-get install -y git
|
||||||
|
git clone <this-repo-url> /opt/bluelaminate
|
||||||
|
cd /opt/bluelaminate/monitoring
|
||||||
|
sudo bash install.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
No git on the LXC? Copy just this folder over instead:
|
||||||
|
`scp -r monitoring root@<lxc-ip>:/opt/monitoring && ssh root@<lxc-ip> 'cd /opt/monitoring && bash install.sh'`
|
||||||
|
|
||||||
|
The script adds the Grafana apt repo, installs grafana/loki/tempo/alloy, drops the Prometheus
|
||||||
|
release binary into `/opt/prometheus`, lays our configs over the packaged defaults, and
|
||||||
|
enables all five services. It prints the URLs and the OTLP endpoint when done.
|
||||||
|
|
||||||
|
## 3. Verify
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl is-active grafana-server loki tempo prometheus alloy # all → active
|
||||||
|
curl -s localhost:3100/ready # Loki → ready
|
||||||
|
curl -s localhost:3200/ready # Tempo → ready
|
||||||
|
curl -s localhost:9090/-/ready # Prometheus → Ready
|
||||||
|
```
|
||||||
|
|
||||||
|
Open Grafana at `http://<lxc-ip>:3000` (first login `admin` / `admin` — change it). The three
|
||||||
|
datasources and the **BlueLaminate → Stack Overview** dashboard are provisioned automatically.
|
||||||
|
Alloy's pipeline graph is at `http://<lxc-ip>:12345`.
|
||||||
|
|
||||||
|
### End-to-end OTLP smoke test (no app changes needed)
|
||||||
|
|
||||||
|
Send synthetic telemetry from any machine that can reach the LXC, using the OpenTelemetry
|
||||||
|
`telemetrygen` tool (`go install github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetrygen@latest`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
telemetrygen traces --otlp-endpoint <lxc-ip>:4317 --otlp-insecure --traces 5
|
||||||
|
telemetrygen metrics --otlp-endpoint <lxc-ip>:4317 --otlp-insecure --duration 10s
|
||||||
|
telemetrygen logs --otlp-endpoint <lxc-ip>:4317 --otlp-insecure --logs 5
|
||||||
|
```
|
||||||
|
|
||||||
|
Then in Grafana **Explore**: pick **Tempo** (search recent traces), **Prometheus** (query
|
||||||
|
`gen`), and **Loki** (`{service_name=~".+"}`) — seeing data in all three confirms the full
|
||||||
|
fan-out before any app is wired up.
|
||||||
|
|
||||||
|
## 4. Wiring the apps later (the OTLP contract)
|
||||||
|
|
||||||
|
This deployment is **stack-only**; the C2 and workers aren't instrumented yet. When you do,
|
||||||
|
point them at this LXC — nothing here changes. The drop-in:
|
||||||
|
|
||||||
|
**.NET C2** (`BlueLaminate.C2`) — add packages `OpenTelemetry.Extensions.Hosting`,
|
||||||
|
`OpenTelemetry.Exporter.OpenTelemetryProtocol`, and the
|
||||||
|
`OpenTelemetry.Instrumentation.AspNetCore` / `.Http` / runtime instrumentations, then
|
||||||
|
`builder.Services.AddOpenTelemetry().WithTracing(...).WithMetrics(...)` plus
|
||||||
|
`builder.Logging.AddOpenTelemetry(...)`. Configure via env:
|
||||||
|
|
||||||
|
```
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT=http://<lxc-ip>:4318
|
||||||
|
OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
|
||||||
|
OTEL_SERVICE_NAME=bluelaminate-c2
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python workers** (`worker/csmoney_worker.py`, `skinland_worker.py`) — add
|
||||||
|
`opentelemetry-distro` and `opentelemetry-exporter-otlp` to `worker/requirements.txt`, run
|
||||||
|
under `opentelemetry-instrument python csmoney_worker.py`, same env vars with
|
||||||
|
`OTEL_SERVICE_NAME=csmoney-worker` / `skinland-worker`. (Today the workers emit structured
|
||||||
|
JSON logs to stdout — `LOG_JSON=1`, set by default in the image; an interim option is to
|
||||||
|
ship their Docker stdout to Loki with an Alloy `loki.source.docker` component on the app
|
||||||
|
host, which can parse those JSON fields directly, instead of instrumenting in-process.)
|
||||||
|
|
||||||
|
Add those env vars to the matching `docker-compose.yml` services when the instrumentation lands.
|
||||||
|
|
||||||
|
## Hardening
|
||||||
|
|
||||||
|
- **Firewall the OTLP ports.** `4317`/`4318` are bound to `0.0.0.0`. Restrict them to the app
|
||||||
|
host, e.g. `ufw allow from <app-host-ip> to any port 4317,4318 proto tcp`.
|
||||||
|
- **Auth on ingest (optional).** Add an `otelcol.auth.bearer` handler to
|
||||||
|
`otelcol.receiver.otlp` in `alloy/config.alloy` and send a matching
|
||||||
|
`OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer <token>` from the apps.
|
||||||
|
- **Grafana password.** Change `admin` on first login, or set
|
||||||
|
`GF_SECURITY_ADMIN_PASSWORD` in `/etc/grafana/grafana.ini`.
|
||||||
|
|
||||||
|
## Retention / sizing
|
||||||
|
|
||||||
|
Defaults are LXC-friendly: Prometheus **15d**, Loki **15d**, Tempo **7d**. Bump the
|
||||||
|
`retention.time` flag (`prometheus.service`), `limits_config.retention_period` (`loki.yml`),
|
||||||
|
and `compactor.compaction.block_retention` (`tempo.yml`) if you have the disk. Re-run
|
||||||
|
`install.sh` to apply config edits.
|
||||||
|
```
|
||||||
67
monitoring/alloy/config.alloy
Normal file
67
monitoring/alloy/config.alloy
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
// Grafana Alloy — the single OTLP ingress for the BlueLaminate fleet.
|
||||||
|
//
|
||||||
|
// Receives OTLP (gRPC :4317 / HTTP :4318) from the C2 and the Python workers, batches it,
|
||||||
|
// then fans the three signals out to the local backends:
|
||||||
|
// metrics -> Prometheus (remote-write)
|
||||||
|
// logs -> Loki (push API)
|
||||||
|
// traces -> Tempo (OTLP gRPC on :4319, a non-colliding port)
|
||||||
|
//
|
||||||
|
// OTLP is bound on 0.0.0.0 so apps on other LAN hosts can push to this LXC. Everything it
|
||||||
|
// forwards to listens on localhost only (see each backend's config) — Alloy is the only
|
||||||
|
// thing that talks to Loki/Prometheus/Tempo. See README "Hardening" to add a bearer token.
|
||||||
|
|
||||||
|
otelcol.receiver.otlp "in" {
|
||||||
|
grpc {
|
||||||
|
endpoint = "0.0.0.0:4317"
|
||||||
|
}
|
||||||
|
http {
|
||||||
|
endpoint = "0.0.0.0:4318"
|
||||||
|
}
|
||||||
|
output {
|
||||||
|
metrics = [otelcol.processor.batch.default.input]
|
||||||
|
logs = [otelcol.processor.batch.default.input]
|
||||||
|
traces = [otelcol.processor.batch.default.input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
otelcol.processor.batch "default" {
|
||||||
|
output {
|
||||||
|
metrics = [otelcol.exporter.prometheus.to_prom.input]
|
||||||
|
logs = [otelcol.exporter.loki.to_loki.input]
|
||||||
|
traces = [otelcol.exporter.otlp.to_tempo.input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- metrics -> Prometheus remote-write ---------------------------------------------------
|
||||||
|
otelcol.exporter.prometheus "to_prom" {
|
||||||
|
forward_to = [prometheus.remote_write.local.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
prometheus.remote_write "local" {
|
||||||
|
endpoint {
|
||||||
|
url = "http://localhost:9090/api/v1/write"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- logs -> Loki push --------------------------------------------------------------------
|
||||||
|
otelcol.exporter.loki "to_loki" {
|
||||||
|
forward_to = [loki.write.local.receiver]
|
||||||
|
}
|
||||||
|
|
||||||
|
loki.write "local" {
|
||||||
|
endpoint {
|
||||||
|
url = "http://localhost:3100/loki/api/v1/push"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- traces -> Tempo ----------------------------------------------------------------------
|
||||||
|
// Tempo's own OTLP receiver listens on :4319 so it doesn't collide with this Alloy receiver
|
||||||
|
// on :4317/:4318. TLS off — it's a localhost hop.
|
||||||
|
otelcol.exporter.otlp "to_tempo" {
|
||||||
|
client {
|
||||||
|
endpoint = "localhost:4319"
|
||||||
|
tls {
|
||||||
|
insecure = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
monitoring/grafana/dashboards.yml
Normal file
15
monitoring/grafana/dashboards.yml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Grafana dashboard provider — loads JSON dashboards from /var/lib/grafana/dashboards.
|
||||||
|
# Copied to /etc/grafana/provisioning/dashboards/ by install.sh.
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: BlueLaminate
|
||||||
|
orgId: 1
|
||||||
|
folder: BlueLaminate
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
allowUiUpdates: true
|
||||||
|
updateIntervalSeconds: 30
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
|
foldersFromFilesStructure: false
|
||||||
109
monitoring/grafana/dashboards/overview.json
Normal file
109
monitoring/grafana/dashboards/overview.json
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [
|
||||||
|
{ "type": "value", "options": { "0": { "text": "DOWN", "color": "red" }, "1": { "text": "UP", "color": "green" } } }
|
||||||
|
],
|
||||||
|
"thresholds": { "mode": "absolute", "steps": [ { "color": "red", "value": null }, { "color": "green", "value": 1 } ] }
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 0 },
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"pluginVersion": "11.0.0",
|
||||||
|
"targets": [
|
||||||
|
{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "up", "legendFormat": "{{job}}", "refId": "A" }
|
||||||
|
],
|
||||||
|
"title": "Stack targets — up/down",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1 }, "unit": "reqps" },
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
|
||||||
|
"id": 2,
|
||||||
|
"options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "sum by (service_name) (rate(traces_spanmetrics_calls_total[5m]))",
|
||||||
|
"legendFormat": "{{service_name}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Span call rate by service (Tempo span-metrics)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": { "custom": { "drawStyle": "line", "fillOpacity": 10, "lineWidth": 1 }, "unit": "bytes" },
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
|
||||||
|
"id": 3,
|
||||||
|
"options": { "legend": { "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"expr": "process_resident_memory_bytes",
|
||||||
|
"legendFormat": "{{job}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Stack process memory",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 },
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"dedupStrategy": "none",
|
||||||
|
"enableLogDetails": true,
|
||||||
|
"showTime": true,
|
||||||
|
"sortOrder": "Descending",
|
||||||
|
"wrapLogMessage": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"expr": "{service_name=~\".+\"}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Recent logs (all services)",
|
||||||
|
"type": "logs"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["bluelaminate"],
|
||||||
|
"templating": { "list": [] },
|
||||||
|
"time": { "from": "now-6h", "to": "now" },
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "",
|
||||||
|
"title": "BlueLaminate — Stack Overview",
|
||||||
|
"uid": "bl-overview",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
53
monitoring/grafana/datasources.yml
Normal file
53
monitoring/grafana/datasources.yml
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# Grafana datasource provisioning — Prometheus (default), Loki, Tempo, wired for
|
||||||
|
# trace <-> log <-> metric correlation. Copied to
|
||||||
|
# /etc/grafana/provisioning/datasources/ by install.sh.
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
uid: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://localhost:9090
|
||||||
|
isDefault: true
|
||||||
|
jsonData:
|
||||||
|
httpMethod: POST
|
||||||
|
|
||||||
|
- name: Loki
|
||||||
|
type: loki
|
||||||
|
uid: loki
|
||||||
|
access: proxy
|
||||||
|
url: http://localhost:3100
|
||||||
|
jsonData:
|
||||||
|
# Turn a trace_id found on a log line into a clickable jump to the trace in Tempo.
|
||||||
|
# OTLP logs carry the id as structured metadata `trace_id`; adjust the regex if your
|
||||||
|
# app instrumentation emits it differently.
|
||||||
|
derivedFields:
|
||||||
|
- name: TraceID
|
||||||
|
matcherType: label
|
||||||
|
matcherRegex: trace_id
|
||||||
|
datasourceUid: tempo
|
||||||
|
url: "${__value.raw}"
|
||||||
|
urlDisplayLabel: "View trace"
|
||||||
|
|
||||||
|
- name: Tempo
|
||||||
|
type: tempo
|
||||||
|
uid: tempo
|
||||||
|
access: proxy
|
||||||
|
url: http://localhost:3200
|
||||||
|
jsonData:
|
||||||
|
# Span -> related logs in Loki.
|
||||||
|
tracesToLogsV2:
|
||||||
|
datasourceUid: loki
|
||||||
|
spanStartTimeShift: "-1h"
|
||||||
|
spanEndTimeShift: "1h"
|
||||||
|
filterByTraceID: true
|
||||||
|
filterBySpanID: false
|
||||||
|
# Span -> RED metrics in Prometheus (from Tempo's metrics_generator).
|
||||||
|
tracesToMetrics:
|
||||||
|
datasourceUid: prometheus
|
||||||
|
# Service graph + node graph from the generator's service-graph metrics.
|
||||||
|
serviceMap:
|
||||||
|
datasourceUid: prometheus
|
||||||
|
nodeGraph:
|
||||||
|
enabled: true
|
||||||
122
monitoring/install.sh
Normal file
122
monitoring/install.sh
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# Provision the standalone BlueLaminate observability stack on a fresh Debian LXC:
|
||||||
|
# Grafana + Loki + Tempo + Alloy (Grafana apt repo, each with its own systemd unit)
|
||||||
|
# Prometheus (official release tarball -> /opt/prometheus + our unit)
|
||||||
|
#
|
||||||
|
# Idempotent: safe to re-run (re-applies configs and restarts services). Run as root.
|
||||||
|
#
|
||||||
|
# sudo ./install.sh
|
||||||
|
#
|
||||||
|
# Override the Prometheus version with PROM_VERSION=x.y.z ./install.sh if needed.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
|
||||||
|
if [[ "${EUID}" -ne 0 ]]; then
|
||||||
|
echo "ERROR: run as root (sudo ./install.sh)." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
ARCH="$(dpkg --print-architecture)" # amd64 / arm64
|
||||||
|
echo "==> Target architecture: ${ARCH}"
|
||||||
|
|
||||||
|
# --- prerequisites ------------------------------------------------------------------------
|
||||||
|
echo "==> Installing prerequisites"
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
apt-get update -y
|
||||||
|
apt-get install -y apt-transport-https software-properties-common gpg wget curl tar
|
||||||
|
|
||||||
|
# --- Grafana apt repo: grafana, loki, tempo, alloy ----------------------------------------
|
||||||
|
echo "==> Adding the Grafana apt repository"
|
||||||
|
mkdir -p /etc/apt/keyrings
|
||||||
|
if [[ ! -s /etc/apt/keyrings/grafana.asc ]]; then
|
||||||
|
wget -qO /etc/apt/keyrings/grafana.asc https://apt.grafana.com/gpg-full.key
|
||||||
|
fi
|
||||||
|
echo "deb [signed-by=/etc/apt/keyrings/grafana.asc] https://apt.grafana.com stable main" \
|
||||||
|
> /etc/apt/sources.list.d/grafana.list
|
||||||
|
apt-get update -y
|
||||||
|
|
||||||
|
echo "==> Installing Grafana, Loki, Tempo, Alloy"
|
||||||
|
apt-get install -y grafana loki tempo alloy
|
||||||
|
|
||||||
|
# --- Prometheus (release tarball) ---------------------------------------------------------
|
||||||
|
echo "==> Installing Prometheus"
|
||||||
|
PROM_VERSION="${PROM_VERSION:-$(curl -fsSL https://api.github.com/repos/prometheus/prometheus/releases/latest \
|
||||||
|
| grep -oP '"tag_name":\s*"v\K[^"]+' || true)}"
|
||||||
|
PROM_VERSION="${PROM_VERSION:-3.2.1}"
|
||||||
|
echo " Prometheus version: ${PROM_VERSION}"
|
||||||
|
|
||||||
|
id -u prometheus &>/dev/null || useradd --system --no-create-home --shell /usr/sbin/nologin prometheus
|
||||||
|
|
||||||
|
PROM_DIR="prometheus-${PROM_VERSION}.linux-${ARCH}"
|
||||||
|
TMP="$(mktemp -d)"
|
||||||
|
trap 'rm -rf "${TMP}"' EXIT
|
||||||
|
wget -qO "${TMP}/prom.tar.gz" \
|
||||||
|
"https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/${PROM_DIR}.tar.gz"
|
||||||
|
tar -xzf "${TMP}/prom.tar.gz" -C "${TMP}"
|
||||||
|
install -d /opt/prometheus
|
||||||
|
install -m 0755 "${TMP}/${PROM_DIR}/prometheus" /opt/prometheus/prometheus
|
||||||
|
install -m 0755 "${TMP}/${PROM_DIR}/promtool" /opt/prometheus/promtool
|
||||||
|
|
||||||
|
# --- data directories ---------------------------------------------------------------------
|
||||||
|
echo "==> Creating data directories"
|
||||||
|
install -d -o prometheus -g prometheus /var/lib/prometheus
|
||||||
|
install -d -o loki -g loki /var/lib/loki /var/lib/loki/chunks /var/lib/loki/rules /var/lib/loki/compactor
|
||||||
|
install -d -o tempo -g tempo /var/lib/tempo /var/lib/tempo/wal /var/lib/tempo/blocks \
|
||||||
|
/var/lib/tempo/generator/wal /var/lib/tempo/generator/traces
|
||||||
|
|
||||||
|
# --- configuration ------------------------------------------------------------------------
|
||||||
|
echo "==> Installing configuration files"
|
||||||
|
install -d /etc/alloy /etc/loki /etc/tempo /etc/prometheus
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/alloy/config.alloy" /etc/alloy/config.alloy
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/loki/loki.yml" /etc/loki/config.yml
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/tempo/tempo.yml" /etc/tempo/config.yml
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/prometheus/prometheus.yml" /etc/prometheus/prometheus.yml
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/prometheus/prometheus.service" /etc/systemd/system/prometheus.service
|
||||||
|
|
||||||
|
# Point Alloy's systemd unit at our config (the package reads /etc/default/alloy).
|
||||||
|
cat > /etc/default/alloy <<'EOF'
|
||||||
|
CONFIG_FILE="/etc/alloy/config.alloy"
|
||||||
|
CUSTOM_ARGS=""
|
||||||
|
RESTART_ON_UPGRADE=true
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Grafana provisioning (datasources + dashboards).
|
||||||
|
echo "==> Installing Grafana provisioning"
|
||||||
|
install -d /etc/grafana/provisioning/datasources \
|
||||||
|
/etc/grafana/provisioning/dashboards \
|
||||||
|
/var/lib/grafana/dashboards
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/grafana/datasources.yml" /etc/grafana/provisioning/datasources/bluelaminate.yml
|
||||||
|
install -m 0644 "${SCRIPT_DIR}/grafana/dashboards.yml" /etc/grafana/provisioning/dashboards/bluelaminate.yml
|
||||||
|
install -m 0644 "${SCRIPT_DIR}"/grafana/dashboards/*.json /var/lib/grafana/dashboards/
|
||||||
|
chown -R grafana:grafana /var/lib/grafana/dashboards 2>/dev/null || true
|
||||||
|
|
||||||
|
# --- start everything ---------------------------------------------------------------------
|
||||||
|
echo "==> Enabling + starting services"
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl enable --now grafana-server loki tempo prometheus alloy
|
||||||
|
systemctl restart loki tempo prometheus alloy grafana-server
|
||||||
|
|
||||||
|
# --- summary ------------------------------------------------------------------------------
|
||||||
|
IP="$(hostname -I 2>/dev/null | awk '{print $1}')"
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
============================================================================
|
||||||
|
BlueLaminate observability stack installed.
|
||||||
|
|
||||||
|
Grafana UI : http://${IP:-<lxc-ip>}:3000 (first login admin/admin)
|
||||||
|
OTLP ingress : ${IP:-<lxc-ip>}:4317 (gRPC) / ${IP:-<lxc-ip>}:4318 (HTTP)
|
||||||
|
Alloy debug UI : http://${IP:-<lxc-ip>}:12345
|
||||||
|
Prometheus : http://${IP:-<lxc-ip>}:9090
|
||||||
|
|
||||||
|
Point apps at: OTEL_EXPORTER_OTLP_ENDPOINT=http://${IP:-<lxc-ip>}:4318
|
||||||
|
|
||||||
|
Readiness checks:
|
||||||
|
systemctl is-active grafana-server loki tempo prometheus alloy
|
||||||
|
curl -s localhost:3100/ready # Loki
|
||||||
|
curl -s localhost:3200/ready # Tempo
|
||||||
|
curl -s localhost:9090/-/ready # Prometheus
|
||||||
|
============================================================================
|
||||||
|
EOF
|
||||||
59
monitoring/loki/loki.yml
Normal file
59
monitoring/loki/loki.yml
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# Loki — single-binary, filesystem-backed, no auth (localhost-only; Alloy is the only writer).
|
||||||
|
# Tuned for an LXC: TSDB index, 15-day retention with the compactor enforcing deletes.
|
||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_address: 127.0.0.1
|
||||||
|
http_listen_port: 3100
|
||||||
|
grpc_listen_port: 9096
|
||||||
|
log_level: info
|
||||||
|
|
||||||
|
common:
|
||||||
|
instance_addr: 127.0.0.1
|
||||||
|
path_prefix: /var/lib/loki
|
||||||
|
storage:
|
||||||
|
filesystem:
|
||||||
|
chunks_directory: /var/lib/loki/chunks
|
||||||
|
rules_directory: /var/lib/loki/rules
|
||||||
|
replication_factor: 1
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2024-01-01
|
||||||
|
store: tsdb
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v13
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
retention_period: 360h # 15 days
|
||||||
|
reject_old_samples: true
|
||||||
|
reject_old_samples_max_age: 168h
|
||||||
|
# Required so OTLP resource/scope attributes (and trace_id/span_id) land as structured metadata.
|
||||||
|
allow_structured_metadata: true
|
||||||
|
volume_enabled: true
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
working_directory: /var/lib/loki/compactor
|
||||||
|
compaction_interval: 10m
|
||||||
|
retention_enabled: true
|
||||||
|
retention_delete_delay: 2h
|
||||||
|
delete_request_store: filesystem
|
||||||
|
|
||||||
|
query_range:
|
||||||
|
results_cache:
|
||||||
|
cache:
|
||||||
|
embedded_cache:
|
||||||
|
enabled: true
|
||||||
|
max_size_mb: 100
|
||||||
|
|
||||||
|
ruler:
|
||||||
|
storage:
|
||||||
|
type: local
|
||||||
|
local:
|
||||||
|
directory: /var/lib/loki/rules
|
||||||
25
monitoring/prometheus/prometheus.service
Normal file
25
monitoring/prometheus/prometheus.service
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Prometheus is not in the Grafana apt repo, so install.sh drops the release binary into
|
||||||
|
# /opt/prometheus and installs this unit. Flags: remote-write + OTLP receivers ON (Alloy and
|
||||||
|
# Tempo push to it), 15-day local retention.
|
||||||
|
[Unit]
|
||||||
|
Description=Prometheus
|
||||||
|
Documentation=https://prometheus.io/docs/
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
User=prometheus
|
||||||
|
Group=prometheus
|
||||||
|
Type=simple
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
ExecStart=/opt/prometheus/prometheus \
|
||||||
|
--config.file=/etc/prometheus/prometheus.yml \
|
||||||
|
--storage.tsdb.path=/var/lib/prometheus \
|
||||||
|
--storage.tsdb.retention.time=15d \
|
||||||
|
--web.enable-remote-write-receiver \
|
||||||
|
--web.enable-otlp-receiver \
|
||||||
|
--web.listen-address=0.0.0.0:9090
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
32
monitoring/prometheus/prometheus.yml
Normal file
32
monitoring/prometheus/prometheus.yml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Prometheus for the BlueLaminate observability LXC.
|
||||||
|
#
|
||||||
|
# App + Tempo metrics arrive via REMOTE-WRITE (Alloy and Tempo's metrics_generator push to
|
||||||
|
# /api/v1/write — enabled by the --web.enable-remote-write-receiver flag in prometheus.service),
|
||||||
|
# so they need no scrape config. The scrape jobs below are just the stack's own self-monitoring.
|
||||||
|
|
||||||
|
global:
|
||||||
|
scrape_interval: 30s
|
||||||
|
evaluation_interval: 30s
|
||||||
|
external_labels:
|
||||||
|
monitor: bluelaminate-lxc
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:9090"]
|
||||||
|
|
||||||
|
- job_name: alloy
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:12345"]
|
||||||
|
|
||||||
|
- job_name: loki
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:3100"]
|
||||||
|
|
||||||
|
- job_name: tempo
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:3200"]
|
||||||
|
|
||||||
|
- job_name: grafana
|
||||||
|
static_configs:
|
||||||
|
- targets: ["localhost:3000"]
|
||||||
48
monitoring/tempo/tempo.yml
Normal file
48
monitoring/tempo/tempo.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# Tempo — local-disk trace store. Receives OTLP from Alloy on :4319 (Alloy owns :4317/:4318),
|
||||||
|
# and runs the metrics_generator to emit RED + service-graph metrics, remote-written into
|
||||||
|
# Prometheus so Grafana can draw request rates and the service map without any app metrics.
|
||||||
|
server:
|
||||||
|
http_listen_address: 0.0.0.0
|
||||||
|
http_listen_port: 3200
|
||||||
|
grpc_listen_port: 9095
|
||||||
|
log_level: info
|
||||||
|
|
||||||
|
distributor:
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: "0.0.0.0:4319"
|
||||||
|
|
||||||
|
ingester:
|
||||||
|
max_block_duration: 5m
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
compaction:
|
||||||
|
block_retention: 168h # 7 days of traces
|
||||||
|
|
||||||
|
metrics_generator:
|
||||||
|
registry:
|
||||||
|
external_labels:
|
||||||
|
source: tempo
|
||||||
|
storage:
|
||||||
|
path: /var/lib/tempo/generator/wal
|
||||||
|
remote_write:
|
||||||
|
- url: http://localhost:9090/api/v1/write
|
||||||
|
send_exemplars: true
|
||||||
|
traces_storage:
|
||||||
|
path: /var/lib/tempo/generator/traces
|
||||||
|
|
||||||
|
storage:
|
||||||
|
trace:
|
||||||
|
backend: local
|
||||||
|
wal:
|
||||||
|
path: /var/lib/tempo/wal
|
||||||
|
local:
|
||||||
|
path: /var/lib/tempo/blocks
|
||||||
|
|
||||||
|
# Turn the generator on for every tenant (single-tenant here).
|
||||||
|
overrides:
|
||||||
|
defaults:
|
||||||
|
metrics_generator:
|
||||||
|
processors: [service-graphs, span-metrics]
|
||||||
@@ -18,13 +18,20 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY worker/requirements.txt ./
|
COPY worker/requirements.txt ./
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
COPY worker/worker.py worker/entrypoint.sh ./
|
# blworker/ is the shared package both market scripts import; ship it + the two thin
|
||||||
|
# market scripts + the entrypoint.
|
||||||
|
COPY worker/blworker ./blworker
|
||||||
|
COPY worker/csmoney_worker.py worker/skinland_worker.py worker/entrypoint.sh ./
|
||||||
RUN chmod +x entrypoint.sh
|
RUN chmod +x entrypoint.sh
|
||||||
|
|
||||||
|
# Which worker this image runs (overridden per service in docker-compose). The cs.money
|
||||||
|
# worker is the default; the skin.land service sets WORKER_SCRIPT=skinland_worker.py.
|
||||||
ENV BROWSER_PATH=/usr/bin/chromium \
|
ENV BROWSER_PATH=/usr/bin/chromium \
|
||||||
CHROME_NO_SANDBOX=1 \
|
CHROME_NO_SANDBOX=1 \
|
||||||
DISPLAY=:99 \
|
DISPLAY=:99 \
|
||||||
SOLVE_SECONDS=45 \
|
SOLVE_SECONDS=45 \
|
||||||
|
WORKER_SCRIPT=csmoney_worker.py \
|
||||||
|
LOG_JSON=1 \
|
||||||
PYTHONUNBUFFERED=1
|
PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,47 +14,27 @@ webdriver` and chromedriver `cdc_` artifacts that Cloudflare keys on. `nodriver`
|
|||||||
drives a normal Chromium directly over CDP (no chromedriver) and patches those
|
drives a normal Chromium directly over CDP (no chromedriver) and patches those
|
||||||
tells, so it passes where Selenium loops.
|
tells, so it passes where Selenium loops.
|
||||||
|
|
||||||
## Step 1: prove it (current)
|
## Local setup
|
||||||
|
|
||||||
`poc.py` proves nodriver can clear cs.money's Cloudflare and fetch the listings API
|
|
||||||
before we build the full pull-based fleet.
|
|
||||||
|
|
||||||
```powershell
|
```powershell
|
||||||
cd worker
|
cd worker
|
||||||
py -m venv .venv
|
py -m venv .venv
|
||||||
.venv\Scripts\Activate.ps1
|
.venv\Scripts\Activate.ps1
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
python poc.py
|
|
||||||
```
|
```
|
||||||
|
|
||||||
A Chromium window opens on the market. Solve the Cloudflare check if shown; the
|
|
||||||
script waits, then pages `sell-orders` deeply (PAGES), reporting how far the warm
|
|
||||||
session survives before any re-challenge and confirming full float precision.
|
|
||||||
Output lands in `worker/captures/`.
|
|
||||||
|
|
||||||
**Targeted skin+wear search.** cs.money search is free-text on the page
|
|
||||||
(`?search=cyber+security+ft`). Set `SEARCH` and the PoC navigates there, **captures
|
|
||||||
the actual filtered `sell-orders` API request the page fires** (so we learn the real
|
|
||||||
filter params instead of guessing), prints it, then pages that filtered API:
|
|
||||||
|
|
||||||
```powershell
|
|
||||||
$env:SEARCH="cyber security ft"; python poc.py # FT M4A4 Cyber Security only
|
|
||||||
```
|
|
||||||
|
|
||||||
The `>>> DISCOVERED sell-orders API call` line shows how the search maps to API
|
|
||||||
params — that's how the C2 will build targeted jobs.
|
|
||||||
|
|
||||||
Run on your own IP first (no proxy) — that's the clean A/B vs. the Selenium run.
|
|
||||||
If auto-detect can't find a browser, set `BROWSER_PATH` to Chrome or Edge
|
If auto-detect can't find a browser, set `BROWSER_PATH` to Chrome or Edge
|
||||||
(`C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe`).
|
(`C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe`).
|
||||||
|
|
||||||
## Step 2: the pull fleet
|
## The pull fleet
|
||||||
|
|
||||||
`worker.py` holds one warm nodriver session and loops: poll the .NET C2 for a job
|
`csmoney_worker.py` holds one warm nodriver session and loops: poll the .NET C2 for a
|
||||||
(a skin+wear search), scrape that search's sell-orders via in-page fetch, and post
|
job (a skin+wear search), scrape that search's sell-orders via in-page fetch, and post
|
||||||
the items back. The C2 (`BlueLaminate.C2`) picks the stalest skin+wear from the
|
the items back. The C2 (`BlueLaminate.C2`) picks the stalest skin+wear from the
|
||||||
catalogue, and on result persists to `cs_money_listings` + `price_history`
|
catalogue, and on result persists to `cs_money_listings` + `price_history`
|
||||||
(`Source = "csmoney"`), stamping `SkinCondition.ListingsSweptAt`.
|
(`Source = "csmoney"`), stamping that band's per-site checkpoint (the `csmoney`
|
||||||
|
row in `skin_condition_sweeps`). The checkpoint is per-site, so a band CSFloat
|
||||||
|
already swept is still due for a cs.money sweep.
|
||||||
|
|
||||||
Run the C2 (needs Postgres migrated), then the worker:
|
Run the C2 (needs Postgres migrated), then the worker:
|
||||||
|
|
||||||
@@ -65,8 +45,64 @@ dotnet run --project BlueLaminate\BlueLaminate.C2 # serves http://local
|
|||||||
# terminal 2 — the worker
|
# terminal 2 — the worker
|
||||||
cd worker; .venv\Scripts\Activate.ps1
|
cd worker; .venv\Scripts\Activate.ps1
|
||||||
$env:WORKER_TOKEN="dev-worker-token" # must match the C2's WorkerToken
|
$env:WORKER_TOKEN="dev-worker-token" # must match the C2's WorkerToken
|
||||||
python worker.py
|
python csmoney_worker.py
|
||||||
```
|
```
|
||||||
|
|
||||||
The worker warms the session (you clear Cloudflare once), then runs continuously.
|
The worker warms the session (you clear Cloudflare once), then runs continuously.
|
||||||
Scale out by starting more workers (each with its own `PROXY`).
|
Scale out by starting more workers (each with its own `PROXY`).
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
|
||||||
|
Both market scripts are thin: each subclasses `blworker.Worker` and fills in only its
|
||||||
|
own scrape + cookie-consent steps. Everything shared lives in the `blworker/` package:
|
||||||
|
|
||||||
|
| file | responsibility |
|
||||||
|
| --- | --- |
|
||||||
|
| `blworker/config.py` | `Settings` — every env knob, parsed once |
|
||||||
|
| `blworker/log.py` | stdout logging, human or `LOG_JSON=1` (for Loki) |
|
||||||
|
| `blworker/proxy.py` | IPRoyal forwarder + session/password helpers |
|
||||||
|
| `blworker/c2.py` | `C2Client` — claim a job, post a result |
|
||||||
|
| `blworker/runtime.py` | `Worker` base: proxy/browser bring-up, the poll→scrape→post loop, Cloudflare IP rotation, graceful shutdown |
|
||||||
|
| `csmoney_worker.py` / `skinland_worker.py` | the per-market scrape strategies |
|
||||||
|
|
||||||
|
To add a market: subclass `Worker`, set `name`/`jobs_path`/`default_market_url`, implement
|
||||||
|
`scrape_job` + `describe_job` (+ `dismiss_consent` if it has a banner), and call
|
||||||
|
`run(YourWorker)`.
|
||||||
|
|
||||||
|
## skin.land worker
|
||||||
|
|
||||||
|
`skinland_worker.py` is the same pull model for **skin.land** (also Cloudflare-walled). It
|
||||||
|
shares all the proxy/Cloudflare/C2 plumbing with the cs.money worker via `blworker`; only
|
||||||
|
the scrape differs. The C2 hands out jobs from its **`/skinland/jobs`** group (the
|
||||||
|
`skinland` rows in `skin_condition_sweeps`, so a band cs.money/CSFloat already swept is
|
||||||
|
still due here) and on result persists to `skin_land_listings` + `price_history`
|
||||||
|
(`Source = "skinland"`).
|
||||||
|
|
||||||
|
How it scrapes (learned during discovery):
|
||||||
|
|
||||||
|
- A job's target is the market **page URL**, e.g.
|
||||||
|
`https://skin.land/market/csgo/ak-47-redline-field-tested/`. The slug is just
|
||||||
|
`{weapon}-{skin}-{wear}` kebab-cased — the C2 builds it from the catalogue, no lookup.
|
||||||
|
- skin.land is a Nuxt SSR app. The page embeds an internal numeric `skin_id`; the worker
|
||||||
|
resolves it once from the `__NUXT__` payload (the skin object whose `url` == the slug),
|
||||||
|
caches it per slug, then pages the clean JSON API
|
||||||
|
`GET https://app.skin.land/api/v2/obtained-skins?skin_id={id}&page={n}` (a Laravel
|
||||||
|
paginator `{data:[…offers], meta:{current_page,last_page,…}}`), walking to `last_page`.
|
||||||
|
- Each offer carries a full-precision `item_float`, `final_withdrawal_price`, and the steam
|
||||||
|
`item_link`. skin.land exposes **no paint seed**, so listings aren't fingerprinted to a
|
||||||
|
`SkinInstance` (no cross-market roll-up / dupe detection here). StatTrak and Souvenir are
|
||||||
|
separate pages (`stattrak-`/`souvenir-` slugs); v1 sweeps the base page per skin+wear.
|
||||||
|
|
||||||
|
Run it alongside (or instead of) the cs.money worker — it points at the same C2:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cd worker; .venv\Scripts\Activate.ps1
|
||||||
|
$env:WORKER_TOKEN="dev-worker-token"
|
||||||
|
python skinland_worker.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Under Docker it's the `skinland-worker` service (same image, `WORKER_SCRIPT=skinland_worker.py`):
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
docker compose up --build --scale skinland-worker=5
|
||||||
|
```
|
||||||
|
|||||||
20
worker/blworker/__init__.py
Normal file
20
worker/blworker/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""Shared scaffolding for the BlueLaminate market scrape workers.
|
||||||
|
|
||||||
|
A market worker (cs.money, skin.land, …) subclasses `Worker`, fills in its scrape +
|
||||||
|
consent steps, and calls `run(MyWorker)`. Everything else — config, logging, the IPRoyal
|
||||||
|
proxy/forwarder, the C2 client, the poll/scrape/post loop, IP rotation, graceful
|
||||||
|
shutdown — lives here so it's written once.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .config import Settings
|
||||||
|
from .runtime import ScrapeResult, Worker, click, looks_like_challenge, page_fetch, run
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Settings",
|
||||||
|
"ScrapeResult",
|
||||||
|
"Worker",
|
||||||
|
"click",
|
||||||
|
"looks_like_challenge",
|
||||||
|
"page_fetch",
|
||||||
|
"run",
|
||||||
|
]
|
||||||
57
worker/blworker/c2.py
Normal file
57
worker/blworker/c2.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
"""HTTP client for the .NET C2's job endpoints.
|
||||||
|
|
||||||
|
Stdlib urllib so the blocking calls run off the asyncio loop via to_thread (the event
|
||||||
|
loop belongs to the browser). Each worker points at one job route group — "/jobs" for
|
||||||
|
cs.money, "/skinland/jobs" for skin.land — set once at construction.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
log = logging.getLogger("c2")
|
||||||
|
|
||||||
|
|
||||||
|
class C2Client:
|
||||||
|
def __init__(self, base_url: str, token: str, jobs_path: str):
|
||||||
|
self._base = base_url.rstrip("/")
|
||||||
|
self._token = token
|
||||||
|
self._jobs = jobs_path.strip("/")
|
||||||
|
|
||||||
|
def _get_job_sync(self):
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{self._base}/{self._jobs}/next", headers={"X-Worker-Token": self._token})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as r:
|
||||||
|
if r.status == 204:
|
||||||
|
return None
|
||||||
|
return json.loads(r.read() or b"null")
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
log.warning("/%s/next -> HTTP %s", self._jobs, e.code)
|
||||||
|
return None
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
log.warning("C2 unreachable: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _post_result_sync(self, job_id: str, payload: dict):
|
||||||
|
data = json.dumps(payload).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{self._base}/{self._jobs}/{job_id}/result", data=data, method="POST",
|
||||||
|
headers={"X-Worker-Token": self._token, "Content-Type": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=60) as r:
|
||||||
|
return json.loads(r.read() or b"null")
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
log.warning("result -> HTTP %s: %r", e.code, e.read()[:200])
|
||||||
|
return None
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
log.warning("C2 unreachable posting result: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_job(self):
|
||||||
|
return await asyncio.to_thread(self._get_job_sync)
|
||||||
|
|
||||||
|
async def post_result(self, job_id, payload):
|
||||||
|
return await asyncio.to_thread(self._post_result_sync, job_id, payload)
|
||||||
81
worker/blworker/config.py
Normal file
81
worker/blworker/config.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
"""Worker configuration, parsed once from the environment.
|
||||||
|
|
||||||
|
All env knobs the workers honor live here so there's a single source of truth (the
|
||||||
|
two market workers used to each re-parse the same ~15 vars). Frozen dataclass — read
|
||||||
|
it, don't mutate it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
def _int(name: str, default: int) -> int:
|
||||||
|
return int(os.environ.get(name, str(default)))
|
||||||
|
|
||||||
|
|
||||||
|
def _float(name: str, default: float) -> float:
|
||||||
|
return float(os.environ.get(name, str(default)))
|
||||||
|
|
||||||
|
|
||||||
|
def _flag(name: str) -> bool:
|
||||||
|
return os.environ.get(name) == "1"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Settings:
|
||||||
|
# C2
|
||||||
|
c2_url: str
|
||||||
|
token: str
|
||||||
|
# Session / pacing
|
||||||
|
market_url: str # "" => use the worker's own default page
|
||||||
|
solve_seconds: int
|
||||||
|
delay: float
|
||||||
|
jitter: float
|
||||||
|
idle_seconds: int
|
||||||
|
# Browser
|
||||||
|
browser_path: str | None
|
||||||
|
load_images: bool
|
||||||
|
chrome_no_sandbox: bool
|
||||||
|
# Proxy (auth-free fallback)
|
||||||
|
proxy: str | None
|
||||||
|
# IPRoyal residential gateway
|
||||||
|
iproyal_host: str
|
||||||
|
iproyal_port: int
|
||||||
|
iproyal_username: str | None
|
||||||
|
iproyal_password: str | None
|
||||||
|
iproyal_country: str
|
||||||
|
iproyal_lifetime_min: int
|
||||||
|
# Logging
|
||||||
|
log_level: str
|
||||||
|
log_json: bool
|
||||||
|
|
||||||
|
@property
|
||||||
|
def use_iproyal(self) -> bool:
|
||||||
|
"""IPRoyal takes priority over a plain PROXY when its creds are set."""
|
||||||
|
return bool(self.iproyal_username and self.iproyal_password)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_env(cls) -> "Settings":
|
||||||
|
return cls(
|
||||||
|
c2_url=os.environ.get("C2_URL", "http://localhost:5080").rstrip("/"),
|
||||||
|
token=os.environ.get("WORKER_TOKEN", "dev-worker-token"),
|
||||||
|
market_url=os.environ.get("MARKET_URL", ""),
|
||||||
|
solve_seconds=_int("SOLVE_SECONDS", 30),
|
||||||
|
delay=_float("DELAY", 2.0),
|
||||||
|
jitter=_float("JITTER", 1.5),
|
||||||
|
idle_seconds=_int("IDLE_SECONDS", 10),
|
||||||
|
browser_path=os.environ.get("BROWSER_PATH") or None,
|
||||||
|
# Residential proxy is metered per GB; Cloudflare gates on JS, not images, and
|
||||||
|
# the market APIs are pure JSON — so block images unless explicitly debugging.
|
||||||
|
load_images=_flag("LOAD_IMAGES"),
|
||||||
|
chrome_no_sandbox=_flag("CHROME_NO_SANDBOX"),
|
||||||
|
proxy=os.environ.get("PROXY") or None,
|
||||||
|
iproyal_host=os.environ.get("IPROYAL_HOST", "geo.iproyal.com"),
|
||||||
|
iproyal_port=_int("IPROYAL_PORT", 12321),
|
||||||
|
iproyal_username=os.environ.get("IPROYAL_USERNAME") or None,
|
||||||
|
iproyal_password=os.environ.get("IPROYAL_PASSWORD") or None,
|
||||||
|
iproyal_country=os.environ.get("IPROYAL_COUNTRY", "us").strip().lower(),
|
||||||
|
iproyal_lifetime_min=_int("IPROYAL_LIFETIME_MIN", 60),
|
||||||
|
log_level=os.environ.get("LOG_LEVEL", "INFO").upper(),
|
||||||
|
log_json=_flag("LOG_JSON"),
|
||||||
|
)
|
||||||
47
worker/blworker/log.py
Normal file
47
worker/blworker/log.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
"""Stdlib logging setup — one stream handler on stdout, human or JSON.
|
||||||
|
|
||||||
|
Workers used to print() everything; that gives no levels, no timestamps, and nothing
|
||||||
|
Loki can parse. Default is a compact human format for local runs; set LOG_JSON=1 in the
|
||||||
|
container so Grafana Alloy -> Loki gets structured fields (ts, level, logger, msg) plus
|
||||||
|
any `extra=` keys a call site attaches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# logging.LogRecord built-ins we don't want to echo into a JSON line as "extra" fields.
|
||||||
|
_RESERVED = set(
|
||||||
|
logging.makeLogRecord({}).__dict__
|
||||||
|
) | {"message", "asctime", "taskName"}
|
||||||
|
|
||||||
|
|
||||||
|
class _JsonFormatter(logging.Formatter):
|
||||||
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
payload = {
|
||||||
|
"ts": self.formatTime(record, "%Y-%m-%dT%H:%M:%S%z"),
|
||||||
|
"level": record.levelname,
|
||||||
|
"logger": record.name,
|
||||||
|
"msg": record.getMessage(),
|
||||||
|
}
|
||||||
|
for key, value in record.__dict__.items():
|
||||||
|
if key not in _RESERVED and not key.startswith("_"):
|
||||||
|
payload[key] = value
|
||||||
|
if record.exc_info:
|
||||||
|
payload["exc"] = self.formatException(record.exc_info)
|
||||||
|
return json.dumps(payload, default=str)
|
||||||
|
|
||||||
|
|
||||||
|
def configure(level: str = "INFO", json_logs: bool = False) -> None:
|
||||||
|
"""Install a single stdout handler on the root logger (idempotent)."""
|
||||||
|
handler = logging.StreamHandler(sys.stdout)
|
||||||
|
if json_logs:
|
||||||
|
handler.setFormatter(_JsonFormatter())
|
||||||
|
else:
|
||||||
|
handler.setFormatter(
|
||||||
|
logging.Formatter("%(asctime)s %(levelname)-5s %(name)s | %(message)s", "%H:%M:%S")
|
||||||
|
)
|
||||||
|
root = logging.getLogger()
|
||||||
|
root.handlers.clear()
|
||||||
|
root.addHandler(handler)
|
||||||
|
root.setLevel(level)
|
||||||
154
worker/blworker/proxy.py
Normal file
154
worker/blworker/proxy.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
"""IPRoyal residential proxy plumbing.
|
||||||
|
|
||||||
|
The in-process forwarder + the password/session helpers — identical across every market
|
||||||
|
worker, so they live here. HTTPS market traffic flows through the CONNECT tunnel, so the
|
||||||
|
forwarder only ever relays ciphertext. Ported from the .NET LocalForwardingProxy /
|
||||||
|
IpRoyalProxyProvider.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
log = logging.getLogger("proxy")
|
||||||
|
|
||||||
|
|
||||||
|
def new_session_id() -> str:
|
||||||
|
"""Short, opaque, URL-safe token. IPRoyal pins one residential exit IP per distinct
|
||||||
|
session value, so a fresh id == a fresh IP."""
|
||||||
|
return uuid.uuid4().hex[:10]
|
||||||
|
|
||||||
|
|
||||||
|
def iproyal_password(password: str, country: str, lifetime_min: int, session_id: str) -> str:
|
||||||
|
"""Bake the targeting/session knobs onto the account password, IPRoyal-style:
|
||||||
|
"<pass>_country-us_session-<id>_lifetime-60m". Country is optional."""
|
||||||
|
pw = password
|
||||||
|
if country:
|
||||||
|
pw += f"_country-{country}"
|
||||||
|
pw += f"_session-{session_id}_lifetime-{lifetime_min}m"
|
||||||
|
return pw
|
||||||
|
|
||||||
|
|
||||||
|
class LocalForwardingProxy:
|
||||||
|
"""In-process HTTP proxy on 127.0.0.1 that chains every connection to the IPRoyal
|
||||||
|
gateway, injecting the Proxy-Authorization header itself. Chromium ignores creds in
|
||||||
|
--proxy-server and the in-browser ways to answer the gateway's 407 (a CDP auth
|
||||||
|
handler, or a disabled MV2 extension) are Cloudflare tells — so we terminate the
|
||||||
|
browser->proxy hop locally and add auth here, leaving Chrome to talk to an auth-free
|
||||||
|
endpoint at zero CDP. HTTPS (all market traffic) flows through the CONNECT tunnel, so
|
||||||
|
this proxy only relays ciphertext and never sees plaintext. The active session token
|
||||||
|
can be swapped live (set_password) to move to a fresh exit IP without restarting the
|
||||||
|
browser. (New tunnels pick up the new IP; any still-open keep-alive tunnel stays on
|
||||||
|
the old one until it closes.)"""
|
||||||
|
|
||||||
|
def __init__(self, host: str, port: int, username: str, password: str):
|
||||||
|
self._host = host
|
||||||
|
self._port = port
|
||||||
|
self._username = username
|
||||||
|
self._password = password
|
||||||
|
self._server: asyncio.AbstractServer | None = None
|
||||||
|
self.endpoint = ""
|
||||||
|
|
||||||
|
def set_password(self, password: str) -> None:
|
||||||
|
self._password = password
|
||||||
|
|
||||||
|
def _auth_header(self) -> str:
|
||||||
|
token = base64.b64encode(f"{self._username}:{self._password}".encode()).decode()
|
||||||
|
return f"Proxy-Authorization: Basic {token}\r\n"
|
||||||
|
|
||||||
|
async def start(self) -> "LocalForwardingProxy":
|
||||||
|
self._server = await asyncio.start_server(self._handle, "127.0.0.1", 0)
|
||||||
|
port = self._server.sockets[0].getsockname()[1]
|
||||||
|
self.endpoint = f"127.0.0.1:{port}"
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
if self._server is not None:
|
||||||
|
self._server.close()
|
||||||
|
try:
|
||||||
|
await self._server.wait_closed()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _read_header(reader: asyncio.StreamReader) -> str | None:
|
||||||
|
"""Read up to the end of the HTTP header block (CRLFCRLF). None on EOF/overflow."""
|
||||||
|
try:
|
||||||
|
data = await reader.readuntil(b"\r\n\r\n")
|
||||||
|
except (asyncio.IncompleteReadError, asyncio.LimitOverrunError):
|
||||||
|
return None
|
||||||
|
return data.decode("latin-1")
|
||||||
|
|
||||||
|
async def _handle(self, client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter) -> None:
|
||||||
|
up_writer: asyncio.StreamWriter | None = None
|
||||||
|
try:
|
||||||
|
header = await self._read_header(client_reader)
|
||||||
|
if not header:
|
||||||
|
return
|
||||||
|
parts = header.split("\r\n", 1)[0].split(" ")
|
||||||
|
if len(parts) < 2:
|
||||||
|
return
|
||||||
|
method, target = parts[0], parts[1]
|
||||||
|
|
||||||
|
up_reader, up_writer = await asyncio.open_connection(self._host, self._port)
|
||||||
|
if method.upper() == "CONNECT":
|
||||||
|
# HTTPS: open an authenticated tunnel upstream, then relay raw bytes.
|
||||||
|
up_writer.write(
|
||||||
|
f"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{self._auth_header()}\r\n".encode())
|
||||||
|
await up_writer.drain()
|
||||||
|
up_header = await self._read_header(up_reader)
|
||||||
|
status = up_header.split(" ", 2) if up_header else []
|
||||||
|
if len(status) < 2 or status[1] != "200":
|
||||||
|
line = (up_header or "no response").split("\r\n", 1)[0]
|
||||||
|
log.warning("upstream refused CONNECT %s: %s", target, line)
|
||||||
|
client_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n")
|
||||||
|
await client_writer.drain()
|
||||||
|
return
|
||||||
|
client_writer.write(b"HTTP/1.1 200 Connection established\r\n\r\n")
|
||||||
|
await client_writer.drain()
|
||||||
|
else:
|
||||||
|
# Plain HTTP: re-inject the request upstream with auth, then relay.
|
||||||
|
idx = header.index("\r\n") + 2
|
||||||
|
up_writer.write((header[:idx] + self._auth_header() + header[idx:]).encode())
|
||||||
|
await up_writer.drain()
|
||||||
|
|
||||||
|
await self._relay(client_reader, client_writer, up_reader, up_writer)
|
||||||
|
except Exception:
|
||||||
|
pass # one bad tunnel must never take down the listener
|
||||||
|
finally:
|
||||||
|
for w in (client_writer, up_writer):
|
||||||
|
if w is not None:
|
||||||
|
try:
|
||||||
|
w.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _relay(
|
||||||
|
client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter,
|
||||||
|
up_reader: asyncio.StreamReader, up_writer: asyncio.StreamWriter) -> None:
|
||||||
|
# Pipe both directions, but tear the whole tunnel down as soon as EITHER side
|
||||||
|
# closes (mirrors the .NET WhenAny). Waiting for both — as a plain gather does —
|
||||||
|
# leaks a task holding two sockets on every half-closed connection, which piles
|
||||||
|
# up fast across a long multi-worker run. Closing both writers when the first pipe
|
||||||
|
# finishes unblocks the other's pending read so both tasks settle.
|
||||||
|
async def pipe(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
||||||
|
try:
|
||||||
|
while data := await reader.read(65536):
|
||||||
|
writer.write(data)
|
||||||
|
await writer.drain()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
a = asyncio.create_task(pipe(client_reader, up_writer))
|
||||||
|
b = asyncio.create_task(pipe(up_reader, client_writer))
|
||||||
|
try:
|
||||||
|
await asyncio.wait({a, b}, return_when=asyncio.FIRST_COMPLETED)
|
||||||
|
finally:
|
||||||
|
for w in (client_writer, up_writer):
|
||||||
|
try:
|
||||||
|
w.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
await asyncio.gather(a, b, return_exceptions=True)
|
||||||
235
worker/blworker/runtime.py
Normal file
235
worker/blworker/runtime.py
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
"""The shared worker runtime — everything that's identical across market workers.
|
||||||
|
|
||||||
|
`Worker` is a template-method base: it owns the proxy/browser bring-up, the poll ->
|
||||||
|
scrape -> post loop, Cloudflare-driven IP rotation, result logging, and graceful
|
||||||
|
shutdown. A market worker subclasses it and fills in only what differs — how to dismiss
|
||||||
|
the consent banner, how to scrape one job, and how to describe a job in the log. The two
|
||||||
|
~300-line workers used to copy this whole loop verbatim.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import signal
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import nodriver as uc
|
||||||
|
|
||||||
|
from .c2 import C2Client
|
||||||
|
from .config import Settings
|
||||||
|
from .proxy import LocalForwardingProxy, iproyal_password, new_session_id
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScrapeResult:
|
||||||
|
"""What a single job scrape yields. `wire_bytes` is the metered (compressed) cost."""
|
||||||
|
items: list
|
||||||
|
pages: int
|
||||||
|
reason: str
|
||||||
|
wire_bytes: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
def looks_like_challenge(body: str) -> bool:
|
||||||
|
"""True for an actual Cloudflare interstitial (or an empty body). Keyed on CF markers,
|
||||||
|
NOT a leading '<' — a real market page IS html, so a startswith('<') check would flag
|
||||||
|
every good page fetch as a challenge."""
|
||||||
|
b = body or ""
|
||||||
|
return not b.strip() or "Just a moment" in b or "challenge-platform" in b
|
||||||
|
|
||||||
|
|
||||||
|
async def page_fetch(page, url: str, accept: str = "application/json") -> tuple[int, str, int]:
|
||||||
|
"""Fetch in-page from the warm (Cloudflare-cleared) session and read back the Resource
|
||||||
|
Timing transferSize — the actual compressed bytes the metered proxy bills (or -1 when
|
||||||
|
cross-origin timing isn't exposed). Returns (status, body, wire_bytes). Use
|
||||||
|
accept='text/html' for an SSR page payload, the default JSON for an API."""
|
||||||
|
expr = (
|
||||||
|
f"fetch({url!r}, {{credentials:'include', headers:{{'accept': {accept!r}}}}})"
|
||||||
|
f".then(async r => {{"
|
||||||
|
f" const body = await r.text();"
|
||||||
|
f" const e = performance.getEntriesByName({url!r}).slice(-1)[0];"
|
||||||
|
f" return JSON.stringify({{status: r.status, body: body, wire: e ? e.transferSize : -1}});"
|
||||||
|
f"}}).catch(e => JSON.stringify({{status: -1, body: String(e), wire: -1}}))"
|
||||||
|
)
|
||||||
|
raw = await page.evaluate(expr, await_promise=True)
|
||||||
|
if not isinstance(raw, str):
|
||||||
|
return (-1, "", -1)
|
||||||
|
try:
|
||||||
|
obj = json.loads(raw)
|
||||||
|
return (int(obj.get("status", -1)), obj.get("body", ""), int(obj.get("wire", -1)))
|
||||||
|
except (json.JSONDecodeError, ValueError, TypeError):
|
||||||
|
return (-1, raw, -1)
|
||||||
|
|
||||||
|
|
||||||
|
async def click(page, text: str, timeout: int = 3) -> bool:
|
||||||
|
"""Best-match click on visible text; swallow the not-found/timeout case."""
|
||||||
|
try:
|
||||||
|
el = await page.find(text, best_match=True, timeout=timeout)
|
||||||
|
if el:
|
||||||
|
await el.click()
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class Worker(ABC):
|
||||||
|
# Per-market constants, set by the subclass.
|
||||||
|
name: str = "worker"
|
||||||
|
jobs_path: str = "/jobs"
|
||||||
|
default_market_url: str = ""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings):
|
||||||
|
self.settings = settings
|
||||||
|
self.market_url = settings.market_url or self.default_market_url
|
||||||
|
self.c2 = C2Client(settings.c2_url, settings.token, self.jobs_path)
|
||||||
|
self.log = logging.getLogger(self.name)
|
||||||
|
self._forwarder: LocalForwardingProxy | None = None
|
||||||
|
self._session_id: str | None = None
|
||||||
|
self._stop = asyncio.Event()
|
||||||
|
|
||||||
|
# --- hooks a market worker overrides ------------------------------------------
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def scrape_job(self, page, job) -> ScrapeResult:
|
||||||
|
"""Scrape ALL listings for one job and return them."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def describe_job(self, job) -> str:
|
||||||
|
"""One-line job description for the log (e.g. the search term or slug)."""
|
||||||
|
|
||||||
|
async def dismiss_consent(self, page) -> str | None:
|
||||||
|
"""Dismiss the cookie banner privacy-first; return a note, or None if absent.
|
||||||
|
Default: nothing to do. Markets with a banner override this."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
# --- shared machinery ---------------------------------------------------------
|
||||||
|
|
||||||
|
def _iproyal_password(self, session_id: str) -> str:
|
||||||
|
s = self.settings
|
||||||
|
return iproyal_password(s.iproyal_password, s.iproyal_country, s.iproyal_lifetime_min, session_id)
|
||||||
|
|
||||||
|
async def _pace(self, page) -> None:
|
||||||
|
await page.sleep(self.settings.delay + random.uniform(0, self.settings.jitter))
|
||||||
|
|
||||||
|
async def warm(self, page) -> None:
|
||||||
|
"""Open the market and clear Cloudflare so the session holds cf_clearance."""
|
||||||
|
s = self.settings
|
||||||
|
self.log.info("warming session at %s (clear Cloudflare; %ds)", self.market_url, s.solve_seconds)
|
||||||
|
await page.get(self.market_url)
|
||||||
|
await page.sleep(s.solve_seconds)
|
||||||
|
note = await self.dismiss_consent(page)
|
||||||
|
self.log.info("consent: %s", note or "left up")
|
||||||
|
|
||||||
|
async def _setup_proxy(self) -> tuple[str | None, str]:
|
||||||
|
"""IPRoyal (auth'd, per-worker sticky IP) takes priority; else a plain auth-free
|
||||||
|
PROXY; else this host's own IP. Returns (proxy_endpoint, human_label)."""
|
||||||
|
s = self.settings
|
||||||
|
if s.use_iproyal:
|
||||||
|
self._session_id = new_session_id()
|
||||||
|
self._forwarder = await LocalForwardingProxy(
|
||||||
|
s.iproyal_host, s.iproyal_port, s.iproyal_username,
|
||||||
|
self._iproyal_password(self._session_id)).start()
|
||||||
|
label = f"iproyal[{s.iproyal_country or 'any'}] session {self._session_id} via {self._forwarder.endpoint}"
|
||||||
|
return self._forwarder.endpoint, label
|
||||||
|
return s.proxy, (s.proxy or "own IP")
|
||||||
|
|
||||||
|
def _browser_args(self, proxy: str | None) -> list[str]:
|
||||||
|
s = self.settings
|
||||||
|
args = [f"--proxy-server={proxy}"] if proxy else []
|
||||||
|
if not s.load_images:
|
||||||
|
# Disable image loading at the engine level — the dominant bandwidth cost on
|
||||||
|
# an image-heavy market, and unneeded for CF clearance or the JSON API.
|
||||||
|
args.append("--blink-settings=imagesEnabled=false")
|
||||||
|
if s.chrome_no_sandbox:
|
||||||
|
# Required when running Chromium as root in a container.
|
||||||
|
args += ["--no-sandbox", "--disable-dev-shm-usage"]
|
||||||
|
return args
|
||||||
|
|
||||||
|
async def _on_challenge(self, page) -> None:
|
||||||
|
"""The exit IP is likely flagged. On IPRoyal, rotate to a fresh sticky session
|
||||||
|
(new IP) before re-warming; otherwise just re-solve in place."""
|
||||||
|
if self._forwarder is not None:
|
||||||
|
self._session_id = new_session_id()
|
||||||
|
self._forwarder.set_password(self._iproyal_password(self._session_id))
|
||||||
|
self.log.warning("challenged; rotating exit IP -> session %s, re-warming", self._session_id)
|
||||||
|
else:
|
||||||
|
self.log.warning("challenged; re-warming session")
|
||||||
|
await self.warm(page)
|
||||||
|
|
||||||
|
def _log_result(self, res: ScrapeResult, posted: dict | None, total_wire: int) -> None:
|
||||||
|
if posted:
|
||||||
|
summary = (f"matched {posted.get('matched')}, new {posted.get('inserted')}, "
|
||||||
|
f"upd {posted.get('updated')}, removed {posted.get('removed')}")
|
||||||
|
else:
|
||||||
|
summary = "post failed"
|
||||||
|
self.log.info("scraped %d items (%dp, %s, %.0fKB wire) -> %s [lifetime %.1fMB]",
|
||||||
|
len(res.items), res.pages, res.reason, res.wire_bytes / 1024,
|
||||||
|
summary, total_wire / 1_048_576)
|
||||||
|
|
||||||
|
def _install_signal_handlers(self) -> None:
|
||||||
|
"""Stop the loop on SIGINT/SIGTERM so `docker stop` shuts down cleanly. Not
|
||||||
|
supported on Windows (ProactorEventLoop) — there Ctrl-C still raises
|
||||||
|
KeyboardInterrupt, which the run loop's finally handles just as well."""
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||||
|
loop.add_signal_handler(sig, self._stop.set)
|
||||||
|
except (NotImplementedError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _idle(self) -> None:
|
||||||
|
"""Sleep when the C2 has no work, but wake immediately on shutdown."""
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(self._stop.wait(), timeout=self.settings.idle_seconds)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def run(self) -> None:
|
||||||
|
self._install_signal_handlers()
|
||||||
|
s = self.settings
|
||||||
|
proxy, proxy_label = await self._setup_proxy()
|
||||||
|
self.log.info("starting (C2=%s, proxy=%s, images=%s)",
|
||||||
|
s.c2_url, proxy_label, "on" if s.load_images else "off")
|
||||||
|
browser = await uc.start(
|
||||||
|
headless=False, browser_executable_path=s.browser_path,
|
||||||
|
browser_args=self._browser_args(proxy))
|
||||||
|
try:
|
||||||
|
page = await browser.get("about:blank")
|
||||||
|
await self.warm(page)
|
||||||
|
|
||||||
|
total_wire = 0 # metered (compressed) bytes pulled, lifetime
|
||||||
|
while not self._stop.is_set():
|
||||||
|
job = await self.c2.get_job()
|
||||||
|
if not job:
|
||||||
|
await self._idle()
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.log.info("job %s — %s", job["jobId"][:8], self.describe_job(job))
|
||||||
|
res = await self.scrape_job(page, job)
|
||||||
|
total_wire += res.wire_bytes
|
||||||
|
|
||||||
|
if res.reason == "challenged":
|
||||||
|
await self._on_challenge(page)
|
||||||
|
|
||||||
|
posted = await self.c2.post_result(job["jobId"], {
|
||||||
|
"items": res.items, "pages": res.pages, "stoppedReason": res.reason})
|
||||||
|
self._log_result(res, posted, total_wire)
|
||||||
|
|
||||||
|
await self._pace(page)
|
||||||
|
finally:
|
||||||
|
self.log.info("shutting down")
|
||||||
|
browser.stop()
|
||||||
|
if self._forwarder is not None:
|
||||||
|
await self._forwarder.stop()
|
||||||
|
|
||||||
|
|
||||||
|
def run(worker_cls: type[Worker]) -> None:
|
||||||
|
"""Boot a worker from the environment: parse config, set up logging, run the loop on
|
||||||
|
nodriver's event loop. The thin market scripts call this and nothing else."""
|
||||||
|
from . import log as log_setup
|
||||||
|
|
||||||
|
settings = Settings.from_env()
|
||||||
|
log_setup.configure(settings.log_level, settings.log_json)
|
||||||
|
uc.loop().run_until_complete(worker_cls(settings).run())
|
||||||
129
worker/csmoney_worker.py
Normal file
129
worker/csmoney_worker.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
"""cs.money scrape worker (pull model).
|
||||||
|
|
||||||
|
A thin strategy over blworker.Worker: it supplies only the cs.money-specific bits — the
|
||||||
|
consent banner steps and how to scrape one skin+wear's sell-orders. The warm session, the
|
||||||
|
poll/scrape/post loop, the IPRoyal proxy and IP rotation, logging and shutdown all live in
|
||||||
|
the shared runtime. Env knobs are documented in worker/README.md.
|
||||||
|
|
||||||
|
cs.money is an Astro SSR app: the free-text market search filters server-side and the
|
||||||
|
resulting listings are embedded in the page as a __page-params JSON blob. The
|
||||||
|
/2.0/market/sell-orders API rejects a `search` param (HTTP 400), so we fetch the PAGE for
|
||||||
|
a search and read the embedded items — same item shape as the API.
|
||||||
|
|
||||||
|
A page returns at most 60 and offset is ignored, so we paginate with a FORWARD CURSOR on
|
||||||
|
float: cs.money honors `order=asc&sort=float` + `minFloat`, and float is full-precision and
|
||||||
|
effectively unique per item. We grab the 60 lowest-float items at/above `lo`, advance `lo`
|
||||||
|
to the highest float returned, and repeat until a page is under the cap. (The old
|
||||||
|
minPrice/maxPrice bisection silently truncated cheap skins: >60 listings can share a
|
||||||
|
sub-$0.02 reference band, which no price window can split — floats almost never tie, so the
|
||||||
|
cursor always makes progress.)
|
||||||
|
|
||||||
|
cd worker
|
||||||
|
.venv\\Scripts\\Activate.ps1
|
||||||
|
pip install -r requirements.txt
|
||||||
|
python csmoney_worker.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from blworker import ScrapeResult, Worker, click, page_fetch, run
|
||||||
|
|
||||||
|
PAGE = ("https://cs.money/market/buy/?search={search}"
|
||||||
|
"&order=asc&sort=float&minFloat={lo:.12f}&maxFloat=1")
|
||||||
|
PAGE_CAP = 60 # items per SSR page
|
||||||
|
PAGE_PARAMS_RE = re.compile(
|
||||||
|
r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_items(html: str) -> list:
|
||||||
|
"""Pull inventory.items out of the page's __page-params JSON blob."""
|
||||||
|
m = PAGE_PARAMS_RE.search(html)
|
||||||
|
if not m:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
return json.loads(m.group(1)).get("inventory", {}).get("items", []) or []
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
class CsMoneyWorker(Worker):
|
||||||
|
name = "csmoney"
|
||||||
|
jobs_path = "/jobs"
|
||||||
|
default_market_url = "https://cs.money/market/buy/"
|
||||||
|
|
||||||
|
def describe_job(self, job) -> str:
|
||||||
|
return f"search {job['search']!r}"
|
||||||
|
|
||||||
|
async def dismiss_consent(self, page) -> str | None:
|
||||||
|
"""Privacy-preserving. The banner only offers 'Accept all' / 'Manage cookies';
|
||||||
|
the Reject-all control lives inside the Manage window. So: Manage -> Reject all ->
|
||||||
|
Confirm. (The data path reads SSR __page-params regardless, but this keeps the
|
||||||
|
session honest and unblocks any future interaction.)"""
|
||||||
|
steps = []
|
||||||
|
if await click(page, "Manage cookies") or await click(page, "Manage"):
|
||||||
|
await page.sleep(1)
|
||||||
|
if await click(page, "Reject all"):
|
||||||
|
steps.append("reject-all")
|
||||||
|
for c in ("Confirm my choice", "Confirm", "Save"):
|
||||||
|
if await click(page, c):
|
||||||
|
steps.append(f"confirm:{c}")
|
||||||
|
break
|
||||||
|
return ", ".join(steps) if steps else None
|
||||||
|
|
||||||
|
async def scrape_job(self, page, job) -> ScrapeResult:
|
||||||
|
"""Scrape ALL listings for one skin+wear via a forward float cursor.
|
||||||
|
|
||||||
|
Grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float on
|
||||||
|
the page, repeat until a page is under the cap. The boundary item is re-fetched
|
||||||
|
(minFloat is inclusive) and dropped by the id dedup."""
|
||||||
|
search = urllib.parse.quote_plus(job["search"])
|
||||||
|
max_fetches = job.get("maxPages", 40) # safety cap on page fetches per job
|
||||||
|
seen: dict = {}
|
||||||
|
fetches = 0
|
||||||
|
wire = 0
|
||||||
|
lo = 0.0
|
||||||
|
reason = "completed"
|
||||||
|
|
||||||
|
while fetches < max_fetches:
|
||||||
|
_status, body, wbytes = await page_fetch(page, PAGE.format(search=search, lo=lo))
|
||||||
|
fetches += 1
|
||||||
|
if wbytes > 0:
|
||||||
|
wire += wbytes
|
||||||
|
|
||||||
|
if "Just a moment" in body or "challenge-platform" in body:
|
||||||
|
return ScrapeResult(list(seen.values()), fetches, "challenged", wire)
|
||||||
|
|
||||||
|
items = extract_items(body)
|
||||||
|
floats = []
|
||||||
|
for it in items:
|
||||||
|
if it.get("id") is not None:
|
||||||
|
seen[it["id"]] = it
|
||||||
|
fl = (it.get("asset") or {}).get("float")
|
||||||
|
if isinstance(fl, (int, float)):
|
||||||
|
floats.append(fl)
|
||||||
|
|
||||||
|
if len(items) < PAGE_CAP:
|
||||||
|
break # last page — fewer than the cap means we've seen everything
|
||||||
|
|
||||||
|
# Advance the cursor past the highest float on this page. Items at exactly that
|
||||||
|
# float are re-fetched next round (minFloat is inclusive) and deduped by id.
|
||||||
|
nxt = max(floats) if floats else None
|
||||||
|
if nxt is None or nxt <= lo:
|
||||||
|
# Cursor can't advance: >60 listings share a single float value, or the
|
||||||
|
# items carry no float. Bail loudly rather than spin — a flagged gap beats
|
||||||
|
# a silent one (this is the failure the price-window version hid).
|
||||||
|
reason = "stuck-float-tie"
|
||||||
|
break
|
||||||
|
lo = nxt
|
||||||
|
|
||||||
|
await self._pace(page)
|
||||||
|
else:
|
||||||
|
reason = "fetch-cap"
|
||||||
|
|
||||||
|
return ScrapeResult(list(seen.values()), fetches, reason, wire)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run(CsMoneyWorker)
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
"""
|
|
||||||
Diagnose the cs.money cookie-consent banner so we can dismiss it programmatically.
|
|
||||||
It's likely a Shadow DOM web component (CookieConsentSystem), which is why
|
|
||||||
document.querySelectorAll-based clicks miss the real buttons.
|
|
||||||
|
|
||||||
Saves:
|
|
||||||
captures/_consent.png - screenshot (so we can SEE the banner + button positions)
|
|
||||||
captures/_consent.txt - shadow-host tags + every consent-like button found by
|
|
||||||
piercing shadow roots, with center coordinates.
|
|
||||||
|
|
||||||
cd worker; .venv\\Scripts\\Activate.ps1
|
|
||||||
python diag_consent.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
|
|
||||||
URL = os.environ.get("URL", "https://cs.money/market/buy/?search=ak-47+redline")
|
|
||||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
|
||||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
|
||||||
OUT = pathlib.Path(__file__).parent / "captures"
|
|
||||||
|
|
||||||
# Pierce shadow roots to find consent buttons + their viewport-center coords.
|
|
||||||
DEEP_FIND = r"""
|
|
||||||
JSON.stringify((()=>{
|
|
||||||
const hits=[], hosts=[];
|
|
||||||
function walk(root){
|
|
||||||
root.querySelectorAll('*').forEach(e=>{
|
|
||||||
if(e.shadowRoot){ hosts.push(e.tagName.toLowerCase()); walk(e.shadowRoot); }
|
|
||||||
const t=(e.textContent||'').trim();
|
|
||||||
if(t.length<40 && /accept all|manage cookies|reject all|confirm my choice|^accept$|^manage$/i.test(t)){
|
|
||||||
const r=e.getBoundingClientRect();
|
|
||||||
if(r.width>0&&r.height>0)
|
|
||||||
hits.push({tag:e.tagName, text:t, x:Math.round(r.x+r.width/2), y:Math.round(r.y+r.height/2)});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
walk(document);
|
|
||||||
return {shadowHosts:[...new Set(hosts)], buttons:hits};
|
|
||||||
})())
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
OUT.mkdir(exist_ok=True)
|
|
||||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH)
|
|
||||||
try:
|
|
||||||
page = await browser.get(URL)
|
|
||||||
print(f"Loaded {URL}; waiting {SOLVE_SECONDS}s for Cloudflare...")
|
|
||||||
await page.sleep(SOLVE_SECONDS)
|
|
||||||
|
|
||||||
png = str(OUT / "_consent.png")
|
|
||||||
await page.save_screenshot(png)
|
|
||||||
print(f"screenshot -> {png}")
|
|
||||||
|
|
||||||
raw = await page.evaluate(DEEP_FIND)
|
|
||||||
info = json.loads(raw) if isinstance(raw, str) else {"error": repr(raw)}
|
|
||||||
(OUT / "_consent.txt").write_text(json.dumps(info, indent=2), encoding="utf-8")
|
|
||||||
print("shadow hosts:", info.get("shadowHosts"))
|
|
||||||
print("consent buttons found:")
|
|
||||||
for b in info.get("buttons", []):
|
|
||||||
print(f" {b}")
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
@@ -1,183 +0,0 @@
|
|||||||
"""
|
|
||||||
Discover how cs.money paginates a filtered search past the initial ~60 SSR items.
|
|
||||||
|
|
||||||
Tests two hypotheses against a high-result search (default "ak-47 redline", which has
|
|
||||||
well over 60 listings):
|
|
||||||
|
|
||||||
A. Does the SSR page honor offset/limit in the URL? Fetch ?search=...&offset=60 and
|
|
||||||
?search=...&limit=120 and compare item ids to page 1. If disjoint/larger, we can
|
|
||||||
paginate cheaply by re-fetching the page.
|
|
||||||
B. The real client "load more": scroll hard to trigger lazy-load and capture any
|
|
||||||
cs.money /2.0/ XHR via Resource Timing — that request carries the structured
|
|
||||||
filter params + offset, i.e. a lighter direct-API pagination path.
|
|
||||||
|
|
||||||
Findings are printed and saved to captures/_pagination.txt.
|
|
||||||
|
|
||||||
cd worker; .venv\\Scripts\\Activate.ps1
|
|
||||||
python discover_pagination.py
|
|
||||||
$env:SEARCH="ak-47 redline"; python discover_pagination.py # override the search
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
import re
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
from nodriver import cdp
|
|
||||||
|
|
||||||
SEARCH = os.environ.get("SEARCH", "ak-47 redline")
|
|
||||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
|
||||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
|
||||||
PROXY = os.environ.get("PROXY")
|
|
||||||
|
|
||||||
BASE = "https://cs.money/market/buy/"
|
|
||||||
PAGE_PARAMS_RE = re.compile(r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
|
|
||||||
OUT = pathlib.Path(__file__).parent / "captures"
|
|
||||||
CONSENT = ["Reject all", "Only necessary", "Reject", "Decline", "Deny"]
|
|
||||||
|
|
||||||
# Aggressive scroll: window + every scrollable container (the grid scrolls in a div,
|
|
||||||
# which is why a plain window.scrollTo didn't trigger lazy-load before).
|
|
||||||
SCROLL_JS = (
|
|
||||||
"window.scrollTo(0, document.body.scrollHeight);"
|
|
||||||
"document.querySelectorAll('*').forEach(e=>{"
|
|
||||||
" if (e.scrollHeight > e.clientHeight + 80) e.scrollTop = e.scrollHeight;});")
|
|
||||||
|
|
||||||
|
|
||||||
async def js(page, expr):
|
|
||||||
raw = await page.evaluate(f"JSON.stringify({expr})")
|
|
||||||
try:
|
|
||||||
return json.loads(raw) if isinstance(raw, str) else None
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_text(page, url):
|
|
||||||
expr = (f"fetch({url!r},{{credentials:'include'}}).then(async r=>"
|
|
||||||
f"JSON.stringify({{status:r.status, body:await r.text()}}))")
|
|
||||||
raw = await page.evaluate(expr, await_promise=True)
|
|
||||||
try:
|
|
||||||
o = json.loads(raw)
|
|
||||||
return o.get("status"), o.get("body", "")
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
return None, ""
|
|
||||||
|
|
||||||
|
|
||||||
def page_item_ids(html):
|
|
||||||
m = PAGE_PARAMS_RE.search(html or "")
|
|
||||||
if not m:
|
|
||||||
return []
|
|
||||||
try:
|
|
||||||
return [it.get("id") for it in json.loads(m.group(1)).get("inventory", {}).get("items", [])]
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
async def click_visible(page, pattern):
|
|
||||||
"""Click the first VISIBLE element whose trimmed text matches `pattern` (case-
|
|
||||||
insensitive). nodriver's find() was matching hidden/duplicate nodes; restricting
|
|
||||||
to offsetParent!=null + short text hits the real button."""
|
|
||||||
expr = ("JSON.stringify((()=>{"
|
|
||||||
"const re=new RegExp(" + json.dumps(pattern) + ",'i');"
|
|
||||||
"const els=[...document.querySelectorAll('button,a,[role=\"button\"],span,div')];"
|
|
||||||
"const b=els.find(e=>e.offsetParent!==null && (e.textContent||'').trim().length<40 "
|
|
||||||
"&& re.test((e.textContent||'').trim()));"
|
|
||||||
"if(b){b.click();return true}return false})())")
|
|
||||||
r = await page.evaluate(expr)
|
|
||||||
return isinstance(r, str) and "true" in r
|
|
||||||
|
|
||||||
|
|
||||||
async def banner_present(page):
|
|
||||||
r = await page.evaluate(
|
|
||||||
"JSON.stringify(/Manage cookies|Accept all/i.test(document.body.innerText||''))")
|
|
||||||
return isinstance(r, str) and "true" in r
|
|
||||||
|
|
||||||
|
|
||||||
async def dismiss(page):
|
|
||||||
"""Privacy-preserving first (Manage -> Reject all -> Confirm); if the banner is
|
|
||||||
still up, fall back to Accept all so the page becomes interactive (discovery
|
|
||||||
needs scrolling to work)."""
|
|
||||||
steps = []
|
|
||||||
if await click_visible(page, "manage cookies|^manage$"):
|
|
||||||
steps.append("manage")
|
|
||||||
await page.sleep(1.2)
|
|
||||||
if await click_visible(page, "reject all"):
|
|
||||||
steps.append("reject-all")
|
|
||||||
await page.sleep(0.4)
|
|
||||||
for c in ("confirm my choice", "^confirm$", "^save$"):
|
|
||||||
if await click_visible(page, c):
|
|
||||||
steps.append("confirm")
|
|
||||||
break
|
|
||||||
await page.sleep(1)
|
|
||||||
if await banner_present(page):
|
|
||||||
steps.append("still-up->accept" if await click_visible(page, "accept all|^accept$") else "still-up")
|
|
||||||
await page.sleep(0.5)
|
|
||||||
steps.append("gone" if not await banner_present(page) else "STILL-PRESENT")
|
|
||||||
return ", ".join(steps)
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
OUT.mkdir(exist_ok=True)
|
|
||||||
args = [f"--proxy-server={PROXY}"] if PROXY else []
|
|
||||||
args.append("--blink-settings=imagesEnabled=false")
|
|
||||||
from urllib.parse import quote_plus
|
|
||||||
q = quote_plus(SEARCH)
|
|
||||||
findings = []
|
|
||||||
|
|
||||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args)
|
|
||||||
try:
|
|
||||||
url0 = f"{BASE}?search={q}"
|
|
||||||
page = await browser.get(url0)
|
|
||||||
print(f"Warming on {url0} ({SOLVE_SECONDS}s for Cloudflare)...")
|
|
||||||
await page.sleep(SOLVE_SECONDS)
|
|
||||||
print(f"Consent: {await dismiss(page)}")
|
|
||||||
|
|
||||||
# --- A. URL offset/limit on the SSR page ---
|
|
||||||
_, h0 = await fetch_text(page, f"{BASE}?search={q}")
|
|
||||||
_, h1 = await fetch_text(page, f"{BASE}?search={q}&offset=60")
|
|
||||||
_, h2 = await fetch_text(page, f"{BASE}?search={q}&limit=120")
|
|
||||||
a, b, c = page_item_ids(h0), page_item_ids(h1), page_item_ids(h2)
|
|
||||||
overlap = len(set(a) & set(b))
|
|
||||||
findings.append(f"page1 ids={len(a)} offset=60 ids={len(b)} (overlap with page1={overlap}) limit=120 ids={len(c)}")
|
|
||||||
findings.append(f" -> offset works? {'YES (disjoint)' if b and overlap == 0 else 'no/ignored'}")
|
|
||||||
findings.append(f" -> limit works? {'YES (>60)' if len(c) > 60 else 'no/ignored'}")
|
|
||||||
|
|
||||||
# --- B. Trigger client load-more, capture cs.money /2.0/ XHRs ---
|
|
||||||
# Infinite scroll only fires on GRADUAL downward scrolling — jumping to the
|
|
||||||
# bottom skips the trigger. So step down in small wheel increments and watch
|
|
||||||
# the item count grow.
|
|
||||||
before = set(await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or [])
|
|
||||||
async def card_count():
|
|
||||||
n = await page.evaluate(
|
|
||||||
"JSON.stringify(document.querySelectorAll('[href*=\"/item/\"],[class*=\"item\" i]').length)")
|
|
||||||
return n
|
|
||||||
print(f" cards before scroll: {await card_count()}")
|
|
||||||
for step in range(60):
|
|
||||||
try:
|
|
||||||
await page.send(cdp.input_.dispatch_mouse_event(
|
|
||||||
type_="mouseWheel", x=720, y=450, delta_x=0, delta_y=500))
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
await page.sleep(0.7)
|
|
||||||
if step % 15 == 14:
|
|
||||||
now = [u for u in (await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or [])
|
|
||||||
if u not in before and "cs.money" in u and "metrics." not in u and "traces." not in u]
|
|
||||||
print(f" step {step+1}: cards={await card_count()} new cs.money reqs={len(now)}")
|
|
||||||
after = await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or []
|
|
||||||
new_xhrs = [u for u in after if u not in before and "cs.money" in u
|
|
||||||
and "metrics." not in u and "traces." not in u]
|
|
||||||
findings.append(f"\nclient requests after scrolling ({len(new_xhrs)} new cs.money):")
|
|
||||||
findings.extend(f" {u}" for u in dict.fromkeys(new_xhrs))
|
|
||||||
if not new_xhrs:
|
|
||||||
findings.append(" (none — grid may not lazy-load via XHR, or scroll didn't reach the trigger)")
|
|
||||||
|
|
||||||
report = "\n".join(findings)
|
|
||||||
print("\n=== FINDINGS ===\n" + report)
|
|
||||||
(OUT / "_pagination.txt").write_text(f"search: {SEARCH}\n\n{report}\n", encoding="utf-8")
|
|
||||||
print(f"\nsaved to {OUT / '_pagination.txt'}")
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
"""
|
|
||||||
Find cs.money's price-filter URL param (the basis for price-bucket pagination).
|
|
||||||
|
|
||||||
The market has a Price from/to filter in the sidebar. `search=` works via the URL and
|
|
||||||
the page SSRs the filtered listings into __page-params, so a price param likely works
|
|
||||||
the same way. We baseline the cheapest set, then try candidate param names with a high
|
|
||||||
floor and check whether the returned listings actually shift above it.
|
|
||||||
|
|
||||||
cd worker; .venv\\Scripts\\Activate.ps1
|
|
||||||
python discover_price_param.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
import re
|
|
||||||
from urllib.parse import quote_plus
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
|
|
||||||
SEARCH = os.environ.get("SEARCH", "ak-47 redline")
|
|
||||||
FLOOR = float(os.environ.get("FLOOR", "200"))
|
|
||||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
|
||||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
|
||||||
BASE = "https://cs.money/market/buy/"
|
|
||||||
PP = re.compile(r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
|
|
||||||
OUT = pathlib.Path(__file__).parent / "captures"
|
|
||||||
|
|
||||||
# Param-name variants for a price floor (and a couple of from/to pairs).
|
|
||||||
CANDIDATES = [
|
|
||||||
"minPrice", "priceFrom", "price_from", "priceMin", "min_price",
|
|
||||||
"priceGte", "from", "price_min", "minprice", "price.gte", "pricegte",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_prices(page, url):
|
|
||||||
expr = (f"fetch({url!r},{{credentials:'include'}}).then(async r=>"
|
|
||||||
f"JSON.stringify({{status:r.status, body:await r.text()}}))")
|
|
||||||
raw = await page.evaluate(expr, await_promise=True)
|
|
||||||
try:
|
|
||||||
body = json.loads(raw).get("body", "")
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
return None
|
|
||||||
m = PP.search(body or "")
|
|
||||||
if not m:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
items = json.loads(m.group(1)).get("inventory", {}).get("items", [])
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return None
|
|
||||||
return [it.get("pricing", {}) for it in items if it.get("pricing")]
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
OUT.mkdir(exist_ok=True)
|
|
||||||
q = quote_plus(SEARCH)
|
|
||||||
lines = []
|
|
||||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH,
|
|
||||||
browser_args=["--blink-settings=imagesEnabled=false"])
|
|
||||||
try:
|
|
||||||
page = await browser.get(f"{BASE}?search={q}")
|
|
||||||
print(f"Warming ({SOLVE_SECONDS}s)..."); await page.sleep(SOLVE_SECONDS)
|
|
||||||
|
|
||||||
# Test minPrice/maxPrice semantics directly (old cs.money API used these).
|
|
||||||
tests = [
|
|
||||||
("baseline", f"{BASE}?search={q}"),
|
|
||||||
("maxPrice=200", f"{BASE}?search={q}&maxPrice=200"),
|
|
||||||
("minPrice=300", f"{BASE}?search={q}&minPrice=300"),
|
|
||||||
("minPrice=300&maxPrice=400", f"{BASE}?search={q}&minPrice=300&maxPrice=400"),
|
|
||||||
("minPrice=500&maxPrice=1000", f"{BASE}?search={q}&minPrice=500&maxPrice=1000"),
|
|
||||||
]
|
|
||||||
def rng(pr, field):
|
|
||||||
vals = [p.get(field) for p in pr if isinstance(p.get(field), (int, float))]
|
|
||||||
return (min(vals), max(vals)) if vals else (None, None)
|
|
||||||
|
|
||||||
for name, url in tests:
|
|
||||||
pr = await fetch_prices(page, url)
|
|
||||||
if not pr:
|
|
||||||
lines.append(f"{name:28} -> no items")
|
|
||||||
else:
|
|
||||||
d0, d1 = rng(pr, "default")
|
|
||||||
c0, c1 = rng(pr, "computed")
|
|
||||||
b0, b1 = rng(pr, "basePrice")
|
|
||||||
lines.append(f"{name:28} -> n={len(pr)} default[{d0:.2f},{d1:.2f}] "
|
|
||||||
f"computed[{c0:.2f},{c1:.2f}] base[{b0:.2f},{b1:.2f}]")
|
|
||||||
print(lines[-1])
|
|
||||||
|
|
||||||
(OUT / "_price_param.txt").write_text(
|
|
||||||
f"search={SEARCH} floor={FLOOR}\n\n" + "\n".join(lines), encoding="utf-8")
|
|
||||||
print(f"\nsaved to {OUT/'_price_param.txt'}")
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
@@ -15,5 +15,6 @@ x11vnc -display "${DISPLAY_NUM}" -forever -shared -nopw -quiet -bg
|
|||||||
echo "[entrypoint] starting noVNC on :6080 (open http://localhost:6080/vnc.html)"
|
echo "[entrypoint] starting noVNC on :6080 (open http://localhost:6080/vnc.html)"
|
||||||
websockify --web=/usr/share/novnc 6080 localhost:5900 &
|
websockify --web=/usr/share/novnc 6080 localhost:5900 &
|
||||||
|
|
||||||
echo "[entrypoint] launching worker"
|
WORKER_SCRIPT="${WORKER_SCRIPT:-csmoney_worker.py}"
|
||||||
exec python worker.py
|
echo "[entrypoint] launching ${WORKER_SCRIPT}"
|
||||||
|
exec python "${WORKER_SCRIPT}"
|
||||||
|
|||||||
285
worker/poc.py
285
worker/poc.py
@@ -1,285 +0,0 @@
|
|||||||
"""
|
|
||||||
Proof-of-concept / pre-fleet validation for the cs.money scraper.
|
|
||||||
|
|
||||||
Proves the things we need before building the C2 + worker fleet:
|
|
||||||
1. nodriver clears cs.money's Cloudflare where .NET Selenium couldn't.
|
|
||||||
2. a single WARM session can page the sell-orders API deeply without re-challenge.
|
|
||||||
3. a free-text market search (e.g. "cyber security ft") can be turned into a
|
|
||||||
filtered sell-orders API call — we DISCOVER the real API params by capturing the
|
|
||||||
request the page itself fires, instead of guessing.
|
|
||||||
|
|
||||||
It opens the market (optionally a search URL) in a real non-headless Chromium, lets
|
|
||||||
you clear Cloudflare, dismisses the cookie banner (privacy-preserving), captures the
|
|
||||||
sell-orders request the page makes, then pages that API from inside the cleared page
|
|
||||||
(same-origin fetch carries cf_clearance), pacing itself and stopping on re-challenge.
|
|
||||||
|
|
||||||
cd worker
|
|
||||||
.venv\\Scripts\\Activate.ps1
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
python poc.py # whole-market sweep
|
|
||||||
$env:SEARCH="cyber security ft"; python poc.py # targeted: FT M4A4 Cyber Security
|
|
||||||
|
|
||||||
Env knobs (all optional):
|
|
||||||
SEARCH free-text market search; when set, scrape only those results
|
|
||||||
MARKET_URL market page base (default the buy market)
|
|
||||||
SOLVE_SECONDS seconds to wait for you to clear Cloudflare (default 30)
|
|
||||||
PAGES how many offset pages (60 each) to attempt (default 20)
|
|
||||||
START_OFFSET first offset (default 0)
|
|
||||||
DELAY / JITTER base + random seconds between fetches (default 2.0 / 1.5)
|
|
||||||
PROXY host:port for an auth-free proxy (omit to use your own IP)
|
|
||||||
BROWSER_PATH path to Chrome/Edge if auto-detect fails
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
import random
|
|
||||||
from urllib.parse import quote_plus, urlsplit, parse_qsl, urlencode, urlunsplit
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
from nodriver import cdp
|
|
||||||
|
|
||||||
SEARCH = os.environ.get("SEARCH")
|
|
||||||
MARKET_URL = os.environ.get("MARKET_URL", "https://cs.money/market/buy/")
|
|
||||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
|
||||||
PAGES = int(os.environ.get("PAGES", "20"))
|
|
||||||
START_OFFSET = int(os.environ.get("START_OFFSET", "0"))
|
|
||||||
DELAY = float(os.environ.get("DELAY", "2.0"))
|
|
||||||
JITTER = float(os.environ.get("JITTER", "1.5"))
|
|
||||||
PROXY = os.environ.get("PROXY")
|
|
||||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
|
||||||
|
|
||||||
# Fallback template if we fail to capture the page's own request (offset = {}).
|
|
||||||
DEFAULT_TEMPLATE = "https://cs.money/2.0/market/sell-orders?limit=60&offset={}"
|
|
||||||
OUT_DIR = pathlib.Path(__file__).parent / "captures"
|
|
||||||
CONSENT_LABELS = ["Reject all", "Reject All", "Only necessary", "Necessary only",
|
|
||||||
"Reject", "Decline", "Deny"]
|
|
||||||
|
|
||||||
# Filled by the CDP network handler with sell-orders request URLs the page fires.
|
|
||||||
_seen_urls: list[str] = []
|
|
||||||
|
|
||||||
|
|
||||||
def looks_like_challenge(body: str) -> bool:
|
|
||||||
s = (body or "").lstrip()
|
|
||||||
return not s or s.startswith("<") or "Just a moment" in body or "challenge-platform" in body
|
|
||||||
|
|
||||||
|
|
||||||
def decimals(v: float) -> int:
|
|
||||||
r = repr(float(v))
|
|
||||||
return len(r.split(".")[-1]) if "." in r else 0
|
|
||||||
|
|
||||||
|
|
||||||
def template_from(url: str) -> str:
|
|
||||||
"""Turn a captured sell-orders URL into a template with offset as '{}',
|
|
||||||
preserving every other param (the search/filter encoding we want to learn)."""
|
|
||||||
parts = urlsplit(url)
|
|
||||||
q = [(k, v) for k, v in parse_qsl(parts.query, keep_blank_values=True) if k != "offset"]
|
|
||||||
if not any(k == "limit" for k, _ in q):
|
|
||||||
q.append(("limit", "60"))
|
|
||||||
base_q = urlencode(q)
|
|
||||||
new_q = (base_q + "&" if base_q else "") + "offset={}"
|
|
||||||
return urlunsplit((parts.scheme, parts.netloc, parts.path, new_q, ""))
|
|
||||||
|
|
||||||
|
|
||||||
async def dismiss_consent(page) -> str | None:
|
|
||||||
"""Best-effort, privacy-preserving — never clicks 'Accept all'."""
|
|
||||||
for label in CONSENT_LABELS:
|
|
||||||
try:
|
|
||||||
el = await page.find(label, best_match=True, timeout=2)
|
|
||||||
except Exception:
|
|
||||||
el = None
|
|
||||||
if el:
|
|
||||||
try:
|
|
||||||
await el.click()
|
|
||||||
return label
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def fetch_json(page, url: str) -> tuple[str, str]:
|
|
||||||
expr = (
|
|
||||||
f"fetch({url!r}, {{credentials:'include', headers:{{'accept':'application/json'}}}})"
|
|
||||||
f".then(async r => JSON.stringify({{status: r.status, body: await r.text()}}))"
|
|
||||||
)
|
|
||||||
raw = await page.evaluate(expr, await_promise=True)
|
|
||||||
if not isinstance(raw, str):
|
|
||||||
return ("-1", "")
|
|
||||||
try:
|
|
||||||
obj = json.loads(raw)
|
|
||||||
return (str(obj.get("status", "-1")), obj.get("body", ""))
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return ("-1", raw)
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
OUT_DIR.mkdir(exist_ok=True)
|
|
||||||
args = [f"--proxy-server={PROXY}"] if PROXY else []
|
|
||||||
|
|
||||||
target_url = MARKET_URL
|
|
||||||
tag = "market"
|
|
||||||
if SEARCH:
|
|
||||||
sep = "&" if "?" in MARKET_URL else "?"
|
|
||||||
target_url = f"{MARKET_URL}{sep}search={quote_plus(SEARCH)}"
|
|
||||||
tag = "search_" + "".join(c if c.isalnum() else "_" for c in SEARCH)[:40]
|
|
||||||
|
|
||||||
print(f"Launching nodriver Chromium (proxy={PROXY or 'none / own IP'})...")
|
|
||||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args)
|
|
||||||
|
|
||||||
pages_ok = items_total = floats_total = low_prec = 0
|
|
||||||
dp_min, dp_max = 99, 0
|
|
||||||
deepest_offset = None
|
|
||||||
reason = "completed (hit PAGES limit)"
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Open a blank tab first so the network handler is attached BEFORE the page
|
|
||||||
# fires its filtered sell-orders request (otherwise we'd miss it).
|
|
||||||
page = await browser.get("about:blank")
|
|
||||||
|
|
||||||
async def on_request(evt):
|
|
||||||
url = evt.request.url
|
|
||||||
if "/market/sell-orders" in url:
|
|
||||||
_seen_urls.append(url)
|
|
||||||
|
|
||||||
page.add_handler(cdp.network.RequestWillBeSent, on_request)
|
|
||||||
try:
|
|
||||||
await page.send(cdp.network.enable())
|
|
||||||
except Exception as ex:
|
|
||||||
print(f"(network capture unavailable: {ex})")
|
|
||||||
|
|
||||||
print(f"Opening {target_url}")
|
|
||||||
await page.get(target_url)
|
|
||||||
print(f"Solve any Cloudflare challenge. Waiting {SOLVE_SECONDS}s for the grid...")
|
|
||||||
await page.sleep(SOLVE_SECONDS)
|
|
||||||
|
|
||||||
clicked = await dismiss_consent(page)
|
|
||||||
print(f"Consent banner: {'dismissed via ' + clicked if clicked else 'left up (does not block fetch)'}")
|
|
||||||
|
|
||||||
# Reliable discovery via the Resource Timing API: the browser records EVERY
|
|
||||||
# request the page made, so we read the real sell-orders URL straight out of it
|
|
||||||
# (no flaky CDP event timing). Also dump nearby API calls for context.
|
|
||||||
# cs.money is an Astro SSR app — the initial filtered listings are rendered
|
|
||||||
# server-side (no client XHR to capture). Scroll to provoke lazy-load
|
|
||||||
# pagination, which DOES fire a client request carrying the real filter params.
|
|
||||||
print("Scrolling to trigger lazy-load pagination...")
|
|
||||||
for _ in range(6):
|
|
||||||
try:
|
|
||||||
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
await page.sleep(2)
|
|
||||||
|
|
||||||
# nodriver returns arrays unreliably from evaluate(), so JSON.stringify in JS
|
|
||||||
# and json.loads here (the string path is proven by fetch_json).
|
|
||||||
async def js_list(expr: str) -> list:
|
|
||||||
raw = await page.evaluate(f"JSON.stringify({expr})")
|
|
||||||
try:
|
|
||||||
return json.loads(raw) if isinstance(raw, str) else []
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
|
||||||
all_urls = await js_list("performance.getEntriesByType('resource').map(e=>e.name)")
|
|
||||||
print(f">>> Resource Timing saw {len(all_urls)} requests total")
|
|
||||||
if all_urls:
|
|
||||||
(OUT_DIR / "_all_requests.txt").write_text(
|
|
||||||
"\n".join(dict.fromkeys(all_urls)), encoding="utf-8")
|
|
||||||
sell = [u for u in all_urls if "/market/sell-orders" in u]
|
|
||||||
_seen_urls.extend(sell)
|
|
||||||
api = [u for u in all_urls if "cs.money/" in u and ("/2.0/" in u or "/1.0/" in u)]
|
|
||||||
if api:
|
|
||||||
(OUT_DIR / "_api_calls.txt").write_text("\n".join(dict.fromkeys(api)), encoding="utf-8")
|
|
||||||
print(f">>> {len(set(api))} cs.money API calls; saved to {OUT_DIR / '_api_calls.txt'}")
|
|
||||||
except Exception as ex:
|
|
||||||
print(f"(resource-timing query failed: {ex})")
|
|
||||||
|
|
||||||
# Dump the SSR'd page so we can see how the filter is encoded and where the
|
|
||||||
# listings data lives (Astro embeds island props / hydration JSON in the HTML).
|
|
||||||
try:
|
|
||||||
html = await page.evaluate("document.documentElement.outerHTML")
|
|
||||||
if isinstance(html, str) and html:
|
|
||||||
(OUT_DIR / "_page.html").write_text(html, encoding="utf-8")
|
|
||||||
print(f">>> saved page HTML ({len(html)} bytes) to {OUT_DIR / '_page.html'}")
|
|
||||||
except Exception as ex:
|
|
||||||
print(f"(page HTML dump failed: {ex})")
|
|
||||||
|
|
||||||
# Discovery: what sell-orders request did the page actually make?
|
|
||||||
if _seen_urls:
|
|
||||||
captured = _seen_urls[-1]
|
|
||||||
template = template_from(captured)
|
|
||||||
print("\n>>> DISCOVERED sell-orders API call the page fired:")
|
|
||||||
print(f" {captured}")
|
|
||||||
print(f">>> pagination template: {template}\n")
|
|
||||||
# Persist it — the console line is easy to lose, and this is the one bit
|
|
||||||
# of ground truth (the real filter-param scheme) we need.
|
|
||||||
(OUT_DIR / "_discovered.txt").write_text(
|
|
||||||
"ALL captured sell-orders requests:\n"
|
|
||||||
+ "\n".join(dict.fromkeys(_seen_urls))
|
|
||||||
+ f"\n\npagination template:\n{template}\n",
|
|
||||||
encoding="utf-8")
|
|
||||||
print(f">>> saved to {OUT_DIR / '_discovered.txt'}")
|
|
||||||
else:
|
|
||||||
template = DEFAULT_TEMPLATE
|
|
||||||
if SEARCH:
|
|
||||||
template = template.replace("offset={}", f"search={quote_plus(SEARCH)}&offset={{}}")
|
|
||||||
print(f"\n(no request captured; falling back to template: {template})\n")
|
|
||||||
|
|
||||||
for i in range(PAGES):
|
|
||||||
offset = START_OFFSET + i * 60
|
|
||||||
status, body = await fetch_json(page, template.format(offset))
|
|
||||||
|
|
||||||
if looks_like_challenge(body):
|
|
||||||
print(f" page {i + 1} [offset {offset}]: RE-CHALLENGED (status {status}). Stopping.")
|
|
||||||
(OUT_DIR / f"{tag}_challenge_offset_{offset}.html").write_text(body, encoding="utf-8")
|
|
||||||
reason = f"re-challenged at offset {offset}"
|
|
||||||
break
|
|
||||||
|
|
||||||
try:
|
|
||||||
items = json.loads(body).get("items", [])
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
print(f" page {i + 1} [offset {offset}]: non-JSON (status {status}). Stopping.")
|
|
||||||
reason = f"non-JSON at offset {offset}"
|
|
||||||
break
|
|
||||||
|
|
||||||
if not items:
|
|
||||||
print(f" page {i + 1} [offset {offset}]: 0 items — end of results.")
|
|
||||||
reason = "end of results"
|
|
||||||
break
|
|
||||||
|
|
||||||
(OUT_DIR / f"{tag}_offset_{offset:06d}.json").write_text(body, encoding="utf-8")
|
|
||||||
pages_ok += 1
|
|
||||||
deepest_offset = offset
|
|
||||||
items_total += len(items)
|
|
||||||
names = set()
|
|
||||||
for it in items:
|
|
||||||
fl = it.get("asset", {}).get("float")
|
|
||||||
if fl is not None:
|
|
||||||
floats_total += 1
|
|
||||||
d = decimals(fl)
|
|
||||||
dp_min, dp_max = min(dp_min, d), max(dp_max, d)
|
|
||||||
if d <= 6: # short repr — exact binary fraction (e.g. 1/16), not truncation
|
|
||||||
low_prec += 1
|
|
||||||
names.add(it.get("asset", {}).get("names", {}).get("full"))
|
|
||||||
sample = next(iter(names), None) if SEARCH else None
|
|
||||||
print(f" page {i + 1} [offset {offset}] OK — {len(items)} items"
|
|
||||||
+ (f" (e.g. {sample}; {len(names)} distinct names)" if SEARCH else ""))
|
|
||||||
|
|
||||||
await page.sleep(DELAY + random.uniform(0, JITTER))
|
|
||||||
|
|
||||||
print("\n=== summary ===")
|
|
||||||
print(f" query: {SEARCH or '(whole market)'}")
|
|
||||||
print(f" stopped: {reason}")
|
|
||||||
print(f" clean pages: {pages_ok} deepest offset: {deepest_offset} items: {items_total}")
|
|
||||||
if floats_total:
|
|
||||||
# Truncation would make MANY values short, not one exact binary fraction.
|
|
||||||
verdict = "FULL precision" if low_prec / floats_total < 0.02 else "POSSIBLE TRUNCATION"
|
|
||||||
print(f" floats: {floats_total} items, {dp_max}-decimal max, "
|
|
||||||
f"{low_prec} short-repr (exact fractions) — {verdict}")
|
|
||||||
print(f" files in {OUT_DIR}")
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
"""
|
|
||||||
Probe which extra filter params cs.money's SSR market search honors, so we can
|
|
||||||
pick a SECOND pagination axis to break apart dense price bands that saturate the
|
|
||||||
60-cap (see diag_windows.py). For a saturating search we try candidate params and
|
|
||||||
report how the returned set's size + float range + price range change.
|
|
||||||
|
|
||||||
python probe_filters.py "Glock-18 Candy Apple mw"
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
|
|
||||||
import worker
|
|
||||||
|
|
||||||
BASE = "https://cs.money/market/buy/?search={q}"
|
|
||||||
# (label, extra query string) — candidates cs.money markets commonly expose.
|
|
||||||
CANDIDATES = [
|
|
||||||
("baseline", ""),
|
|
||||||
("sort=price asc", "&order=asc&sort=price"),
|
|
||||||
("sort=price desc", "&order=desc&sort=price"),
|
|
||||||
("sort=float", "&sort=float"),
|
|
||||||
("minFloat/maxFloat lo", "&minFloat=0.07&maxFloat=0.10"),
|
|
||||||
("minFloat/maxFloat hi", "&minFloat=0.10&maxFloat=0.15"),
|
|
||||||
("maxWear lo", "&minWear=0.07&maxWear=0.10"),
|
|
||||||
("isStatTrak=true", "&isStatTrak=true"),
|
|
||||||
("hasStickers=false", "&hasStickers=false"),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def stats(items):
|
|
||||||
floats = [(((it.get("asset") or {}).get("float"))) for it in items]
|
|
||||||
floats = [f for f in floats if isinstance(f, (int, float))]
|
|
||||||
bases = []
|
|
||||||
for it in items:
|
|
||||||
p = it.get("pricing") or {}
|
|
||||||
b = p.get("basePrice", p.get("computed"))
|
|
||||||
if isinstance(b, (int, float)):
|
|
||||||
bases.append(b)
|
|
||||||
fr = f"[{min(floats):.4f},{max(floats):.4f}]" if floats else "[-]"
|
|
||||||
br = f"[{min(bases):.2f},{max(bases):.2f}]" if bases else "[-]"
|
|
||||||
return f"n={len(items):3d} float{fr} base{br}"
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
search = " ".join(sys.argv[1:]) or "Glock-18 Candy Apple mw"
|
|
||||||
q = worker.urllib.parse.quote_plus(search)
|
|
||||||
|
|
||||||
args = ["--blink-settings=imagesEnabled=false"]
|
|
||||||
browser = await uc.start(headless=False, browser_args=args)
|
|
||||||
try:
|
|
||||||
page = await browser.get("about:blank")
|
|
||||||
await worker.warm(page)
|
|
||||||
|
|
||||||
base_ids = None
|
|
||||||
for label, extra in CANDIDATES:
|
|
||||||
url = BASE.format(q=q) + extra
|
|
||||||
status, body = await worker.fetch_json(page, url)
|
|
||||||
if "Just a moment" in body or "challenge-platform" in body:
|
|
||||||
print(f" {label:24s} CHALLENGED"); break
|
|
||||||
items = worker.extract_items(body)
|
|
||||||
ids = {it.get("id") for it in items}
|
|
||||||
if label == "baseline":
|
|
||||||
base_ids = ids
|
|
||||||
delta = ""
|
|
||||||
else:
|
|
||||||
# If a param is IGNORED, the set is identical to baseline.
|
|
||||||
delta = "IGNORED (== baseline)" if ids == base_ids else f"CHANGED ({len(ids ^ (base_ids or set()))} diff ids)"
|
|
||||||
print(f" {label:24s} {stats(items)} {delta}")
|
|
||||||
await page.sleep(worker.DELAY)
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
@@ -1,5 +1,9 @@
|
|||||||
# cs.money scraping worker.
|
# Market scraping workers (cs.money, skin.land).
|
||||||
# nodriver = the modern successor to undetected-chromedriver: it drives a normal
|
# nodriver = the modern successor to undetected-chromedriver: it drives a normal
|
||||||
# Chromium over CDP directly (no chromedriver, so none of the cdc_/webdriver tells
|
# Chromium over CDP directly (no chromedriver, so none of the cdc_/webdriver tells
|
||||||
# that got our .NET Selenium setup insta-challenged by Cloudflare).
|
# that got our .NET Selenium setup insta-challenged by Cloudflare).
|
||||||
nodriver>=0.39
|
#
|
||||||
|
# Everything else the workers use is the Python stdlib (asyncio, urllib, logging, json) —
|
||||||
|
# no other third-party deps. Upper bound is a guard against a surprise breaking release;
|
||||||
|
# bump it deliberately after testing a challenge solve.
|
||||||
|
nodriver>=0.39,<0.50
|
||||||
|
|||||||
174
worker/skinland_worker.py
Normal file
174
worker/skinland_worker.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
"""skin.land scrape worker (pull model).
|
||||||
|
|
||||||
|
A thin strategy over blworker.Worker, mirroring the cs.money worker — it supplies only the
|
||||||
|
skin.land-specific bits; the warm session, poll/scrape/post loop, IPRoyal proxy, IP
|
||||||
|
rotation, logging and shutdown all live in the shared runtime. Env knobs: worker/README.md.
|
||||||
|
|
||||||
|
How skin.land is scraped (learned from the discovery probes):
|
||||||
|
- A job's target is the market PAGE URL, e.g.
|
||||||
|
https://skin.land/market/csgo/ak-47-redline-field-tested/
|
||||||
|
- That Nuxt page embeds an internal numeric skin_id. We resolve it once from the page's
|
||||||
|
__NUXT__ payload (the skin object whose `url` == the page slug), cache it per slug, then
|
||||||
|
page the clean JSON API:
|
||||||
|
GET https://app.skin.land/api/v2/obtained-skins?skin_id={id}&page={n}
|
||||||
|
which returns a Laravel paginator {data:[...offers], meta:{current_page,last_page,…}}.
|
||||||
|
- We walk pages 1..last_page (capped by the job's maxPages), dedup offers by id, and post.
|
||||||
|
|
||||||
|
cd worker
|
||||||
|
.venv\\Scripts\\Activate.ps1
|
||||||
|
pip install -r requirements.txt
|
||||||
|
python skinland_worker.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from blworker import ScrapeResult, Worker, click, looks_like_challenge, page_fetch, run
|
||||||
|
|
||||||
|
# The offers API. skin_id is skin.land's internal id (resolved from the page); page is the
|
||||||
|
# Laravel paginator page. Same warm session, fetched in-page (CORS-allowed app subdomain).
|
||||||
|
API = "https://app.skin.land/api/v2/obtained-skins?skin_id={skin_id}&page={page}"
|
||||||
|
|
||||||
|
# The page's Nuxt payload is a devalue flat array; the main skin object is the one whose
|
||||||
|
# `url` field resolves to the page slug, and its `id` field resolves to the skin_id.
|
||||||
|
NUXT_ARRAY_RE = re.compile(r'\[\["(?:ShallowReactive|Reactive)",\d+\]')
|
||||||
|
|
||||||
|
|
||||||
|
def slug_of(url: str) -> str:
|
||||||
|
return url.rstrip("/").rsplit("/", 1)[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_nuxt_array(html: str):
|
||||||
|
"""Pull the Nuxt devalue payload (a JSON flat array of values with index references)
|
||||||
|
out of the page HTML. Returns the parsed list, or None."""
|
||||||
|
m = NUXT_ARRAY_RE.search(html)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
start = m.start()
|
||||||
|
depth = 0
|
||||||
|
instr = False
|
||||||
|
esc = False
|
||||||
|
for i in range(start, len(html)):
|
||||||
|
ch = html[i]
|
||||||
|
if esc:
|
||||||
|
esc = False
|
||||||
|
continue
|
||||||
|
if ch == "\\":
|
||||||
|
esc = True
|
||||||
|
continue
|
||||||
|
if ch == '"':
|
||||||
|
instr = not instr
|
||||||
|
continue
|
||||||
|
if instr:
|
||||||
|
continue
|
||||||
|
if ch == "[":
|
||||||
|
depth += 1
|
||||||
|
elif ch == "]":
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
try:
|
||||||
|
return json.loads(html[start:i + 1])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_skin_id(html: str, slug: str) -> int | None:
|
||||||
|
"""Find the page's main skin object in the Nuxt payload — the dict whose `url` field
|
||||||
|
resolves to the page slug — and return its resolved `id` (skin.land's internal skin_id
|
||||||
|
used by the obtained-skins API)."""
|
||||||
|
arr = extract_nuxt_array(html)
|
||||||
|
if not arr:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def val(ref):
|
||||||
|
return arr[ref] if isinstance(ref, int) and 0 <= ref < len(arr) else ref
|
||||||
|
|
||||||
|
for el in arr:
|
||||||
|
if isinstance(el, dict) and "url" in el and "id" in el and val(el["url"]) == slug:
|
||||||
|
sid = val(el["id"])
|
||||||
|
if isinstance(sid, int):
|
||||||
|
return sid
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class SkinLandWorker(Worker):
|
||||||
|
name = "skinland"
|
||||||
|
jobs_path = "/skinland/jobs"
|
||||||
|
default_market_url = "https://skin.land/market/csgo/"
|
||||||
|
|
||||||
|
def __init__(self, settings):
|
||||||
|
super().__init__(settings)
|
||||||
|
# skin_id is stable per skin+wear, so cache it per slug to skip the ~page fetch on
|
||||||
|
# re-sweeps.
|
||||||
|
self._skin_id_cache: dict[str, int] = {}
|
||||||
|
|
||||||
|
def describe_job(self, job) -> str:
|
||||||
|
return slug_of(job["url"])
|
||||||
|
|
||||||
|
async def dismiss_consent(self, page) -> str | None:
|
||||||
|
"""Privacy-preserving: dismiss the cookie banner with essential-only if present."""
|
||||||
|
for label in ("Accept essential", "ACCEPT ESSENTIAL", "Reject all"):
|
||||||
|
if await click(page, label):
|
||||||
|
return f"dismissed via {label!r}"
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _get_skin_id(self, page, job, slug: str) -> tuple[int | None, str, int]:
|
||||||
|
"""Resolve (and cache) skin.land's skin_id for this slug. Returns
|
||||||
|
(skin_id, reason, wire); reason is "" on success, else a partial-stop reason."""
|
||||||
|
if slug in self._skin_id_cache:
|
||||||
|
return self._skin_id_cache[slug], "", 0
|
||||||
|
|
||||||
|
_status, html, wire = await page_fetch(page, job["url"], accept="text/html")
|
||||||
|
if looks_like_challenge(html):
|
||||||
|
return None, "challenged", max(wire, 0)
|
||||||
|
skin_id = resolve_skin_id(html, slug)
|
||||||
|
if skin_id is None:
|
||||||
|
return None, "no-skin-id", max(wire, 0)
|
||||||
|
self._skin_id_cache[slug] = skin_id
|
||||||
|
return skin_id, "", max(wire, 0)
|
||||||
|
|
||||||
|
async def scrape_job(self, page, job) -> ScrapeResult:
|
||||||
|
"""Scrape ALL offers for one skin+wear by paging the obtained-skins API."""
|
||||||
|
slug = slug_of(job["url"])
|
||||||
|
max_pages = job.get("maxPages", 40)
|
||||||
|
|
||||||
|
skin_id, reason, wire = await self._get_skin_id(page, job, slug)
|
||||||
|
if skin_id is None:
|
||||||
|
return ScrapeResult([], 0, reason, wire)
|
||||||
|
|
||||||
|
seen: dict = {}
|
||||||
|
fetches = 0
|
||||||
|
page_n = 1
|
||||||
|
reason = "completed"
|
||||||
|
while page_n <= max_pages:
|
||||||
|
_status, body, wbytes = await page_fetch(page, API.format(skin_id=skin_id, page=page_n))
|
||||||
|
fetches += 1
|
||||||
|
if wbytes > 0:
|
||||||
|
wire += wbytes
|
||||||
|
|
||||||
|
if looks_like_challenge(body):
|
||||||
|
return ScrapeResult(list(seen.values()), fetches, "challenged", wire)
|
||||||
|
try:
|
||||||
|
payload = json.loads(body)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return ScrapeResult(list(seen.values()), fetches, "bad-json", wire)
|
||||||
|
|
||||||
|
for o in payload.get("data") or []:
|
||||||
|
if o.get("id") is not None:
|
||||||
|
seen[o["id"]] = o
|
||||||
|
|
||||||
|
meta = payload.get("meta") or {}
|
||||||
|
last = meta.get("last_page")
|
||||||
|
if not payload.get("data") or (isinstance(last, int) and page_n >= last):
|
||||||
|
break # walked the final page
|
||||||
|
page_n += 1
|
||||||
|
await self._pace(page)
|
||||||
|
else:
|
||||||
|
reason = "fetch-cap"
|
||||||
|
|
||||||
|
return ScrapeResult(list(seen.values()), fetches, reason, wire)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run(SkinLandWorker)
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
"""
|
|
||||||
One-off count verification: scrape a single skin+wear search from cs.money and
|
|
||||||
report how many distinct sell-orders come back, reusing the production worker's
|
|
||||||
warm-session + price-window bisection logic (worker.scrape_job).
|
|
||||||
|
|
||||||
Use it to sanity-check that our pagination actually recovers the FULL listing
|
|
||||||
count cs.money shows on the site (the known ground truth) for one query.
|
|
||||||
|
|
||||||
cd worker
|
|
||||||
.venv\\Scripts\\Activate.ps1
|
|
||||||
python verify_count.py "Desert Eagle Bronze Deco fn"
|
|
||||||
|
|
||||||
Env knobs (same meaning as worker.py): SOLVE_SECONDS, DELAY, JITTER, PROXY,
|
|
||||||
BROWSER_PATH, LOAD_IMAGES. MAX_FETCHES caps window fetches (default 80).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from collections import Counter
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
|
|
||||||
import worker
|
|
||||||
|
|
||||||
MAX_FETCHES = int(os.environ.get("MAX_FETCHES", "80"))
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
search = " ".join(sys.argv[1:]) or "Desert Eagle Bronze Deco fn"
|
|
||||||
|
|
||||||
args = [f"--proxy-server={worker.PROXY}"] if worker.PROXY else []
|
|
||||||
if not worker.LOAD_IMAGES:
|
|
||||||
args.append("--blink-settings=imagesEnabled=false")
|
|
||||||
if os.environ.get("CHROME_NO_SANDBOX") == "1":
|
|
||||||
args += ["--no-sandbox", "--disable-dev-shm-usage"]
|
|
||||||
|
|
||||||
print(f"Verifying count for search {search!r} (proxy={worker.PROXY or 'own IP'})")
|
|
||||||
browser = await uc.start(
|
|
||||||
headless=False, browser_executable_path=worker.BROWSER_PATH, browser_args=args)
|
|
||||||
try:
|
|
||||||
page = await browser.get("about:blank")
|
|
||||||
await worker.warm(page)
|
|
||||||
|
|
||||||
job = {"search": search, "maxPages": MAX_FETCHES}
|
|
||||||
items, fetches, reason = await worker.scrape_job(page, job)
|
|
||||||
|
|
||||||
print("\n=== result ===")
|
|
||||||
print(f" search: {search}")
|
|
||||||
print(f" stopped: {reason}")
|
|
||||||
print(f" fetches: {fetches}")
|
|
||||||
print(f" DISTINCT sell-orders (deduped by id): {len(items)}")
|
|
||||||
|
|
||||||
# Break down what came back so we can see whether the count is inflated by
|
|
||||||
# off-target names/wears (the C2's name+wear filter would drop those later).
|
|
||||||
names = Counter()
|
|
||||||
wears = Counter()
|
|
||||||
st = 0
|
|
||||||
for it in items:
|
|
||||||
asset = it.get("asset") or {}
|
|
||||||
names[(asset.get("names") or {}).get("full")] += 1
|
|
||||||
wears[asset.get("quality")] += 1
|
|
||||||
if asset.get("isStatTrak"):
|
|
||||||
st += 1
|
|
||||||
print(f" StatTrak in set: {st}")
|
|
||||||
print(" by name:")
|
|
||||||
for name, n in names.most_common():
|
|
||||||
print(f" {n:4d} {name}")
|
|
||||||
print(" by wear (quality code):")
|
|
||||||
for w, n in wears.most_common():
|
|
||||||
print(f" {n:4d} {w}")
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
"""
|
|
||||||
Validate the float-cursor scrape by walking the float axis in BOTH directions and
|
|
||||||
comparing the recovered sell-order id sets. If ascending (lowest float first) and
|
|
||||||
descending (highest float first) independently land on the same listings, the
|
|
||||||
cursor is exhaustive and order-independent — i.e. the count is real, not an artifact
|
|
||||||
of walk direction or boundary double-counting.
|
|
||||||
|
|
||||||
python verify_crosscheck.py "Glock-18 Candy Apple mw"
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
|
|
||||||
import worker
|
|
||||||
|
|
||||||
CAP = worker.PAGE_CAP
|
|
||||||
ASC = ("https://cs.money/market/buy/?search={q}"
|
|
||||||
"&order=asc&sort=float&minFloat={cur:.12f}&maxFloat=1")
|
|
||||||
DESC = ("https://cs.money/market/buy/?search={q}"
|
|
||||||
"&order=desc&sort=float&minFloat=0&maxFloat={cur:.12f}")
|
|
||||||
|
|
||||||
|
|
||||||
async def walk(page, q, template, ascending, max_fetches=60):
|
|
||||||
seen = {}
|
|
||||||
cur = 0.0 if ascending else 1.0
|
|
||||||
fetches = 0
|
|
||||||
while fetches < max_fetches:
|
|
||||||
status, body = await worker.fetch_json(page, template.format(q=q, cur=cur))
|
|
||||||
fetches += 1
|
|
||||||
if "Just a moment" in body or "challenge-platform" in body:
|
|
||||||
return seen, fetches, "challenged"
|
|
||||||
items = worker.extract_items(body)
|
|
||||||
floats = []
|
|
||||||
for it in items:
|
|
||||||
if it.get("id") is not None:
|
|
||||||
seen[it["id"]] = it
|
|
||||||
fl = (it.get("asset") or {}).get("float")
|
|
||||||
if isinstance(fl, (int, float)):
|
|
||||||
floats.append(fl)
|
|
||||||
if len(items) < CAP:
|
|
||||||
return seen, fetches, "completed"
|
|
||||||
nxt = (max(floats) if ascending else min(floats)) if floats else None
|
|
||||||
if nxt is None or (ascending and nxt <= cur) or (not ascending and nxt >= cur):
|
|
||||||
return seen, fetches, "stuck"
|
|
||||||
cur = nxt
|
|
||||||
await page.sleep(worker.DELAY)
|
|
||||||
return seen, fetches, "fetch-cap"
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
search = " ".join(sys.argv[1:]) or "Glock-18 Candy Apple mw"
|
|
||||||
q = worker.urllib.parse.quote_plus(search)
|
|
||||||
browser = await uc.start(headless=False, browser_args=["--blink-settings=imagesEnabled=false"])
|
|
||||||
try:
|
|
||||||
page = await browser.get("about:blank")
|
|
||||||
await worker.warm(page)
|
|
||||||
|
|
||||||
asc, fa, ra = await walk(page, q, ASC, ascending=True)
|
|
||||||
print(f"ASC : {len(asc):4d} ids {fa} fetches {ra}")
|
|
||||||
desc, fd, rd = await walk(page, q, DESC, ascending=False)
|
|
||||||
print(f"DESC: {len(desc):4d} ids {fd} fetches {rd}")
|
|
||||||
|
|
||||||
a, d = set(asc), set(desc)
|
|
||||||
union = a | d
|
|
||||||
print("\n=== cross-check ===")
|
|
||||||
print(f" ASC only: {len(a - d)}")
|
|
||||||
print(f" DESC only: {len(d - a)}")
|
|
||||||
print(f" in both: {len(a & d)}")
|
|
||||||
print(f" UNION (distinct):{len(union)}")
|
|
||||||
agree = "AGREE — count is solid" if a == d else "DISAGREE — one walk missed listings"
|
|
||||||
print(f" verdict: {agree}")
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
483
worker/worker.py
483
worker/worker.py
@@ -1,483 +0,0 @@
|
|||||||
"""
|
|
||||||
cs.money scrape worker (pull model).
|
|
||||||
|
|
||||||
Holds ONE warm nodriver session (the thing that beats Cloudflare), then loops:
|
|
||||||
poll the .NET C2 for a job, scrape that skin+wear's sell-orders via in-page fetch
|
|
||||||
from the cleared session, and post the results back. The C2 owns job selection
|
|
||||||
(stalest skin+wear first) and persistence; this worker just fetches and forwards.
|
|
||||||
|
|
||||||
cd worker
|
|
||||||
.venv\\Scripts\\Activate.ps1
|
|
||||||
pip install -r requirements.txt
|
|
||||||
python worker.py
|
|
||||||
|
|
||||||
Env knobs:
|
|
||||||
C2_URL C2 base URL (default http://localhost:5080)
|
|
||||||
WORKER_TOKEN shared secret, must match the C2's WorkerToken (default dev-worker-token)
|
|
||||||
MARKET_URL market page to warm the session on (default the buy market)
|
|
||||||
SOLVE_SECONDS seconds to clear Cloudflare on startup (default 30)
|
|
||||||
DELAY / JITTER base + random seconds between page fetches (default 2.0 / 1.5)
|
|
||||||
IDLE_SECONDS sleep when the C2 has no work (default 10)
|
|
||||||
BROWSER_PATH path to Chrome/Edge if auto-detect fails
|
|
||||||
|
|
||||||
Proxy (pick one; IPRoyal takes priority when its creds are set):
|
|
||||||
IPROYAL_USERNAME IPRoyal residential account username
|
|
||||||
IPROYAL_PASSWORD IPRoyal residential account password
|
|
||||||
IPROYAL_COUNTRY ISO country for the exit (default us; blank = any)
|
|
||||||
IPROYAL_LIFETIME_MIN sticky-IP hold in minutes (default 60)
|
|
||||||
PROXY host:port for an auth-free proxy (fallback; omit to use your own IP)
|
|
||||||
|
|
||||||
Each worker process mints its own random IPRoyal sticky session at startup, so N
|
|
||||||
workers get N distinct residential exit IPs with no coordination — scale with
|
|
||||||
`docker compose up --scale worker=N`. On a Cloudflare challenge the worker rotates
|
|
||||||
to a fresh session (new IP) and re-warms. Chromium can't carry proxy credentials on
|
|
||||||
--proxy-server, so we run a tiny in-process forwarder (LocalForwardingProxy below)
|
|
||||||
that injects the IPRoyal auth and chains to the gateway; Chrome talks only to an
|
|
||||||
auth-free 127.0.0.1 endpoint, keeping us at zero CDP (a CDP auth handler is a
|
|
||||||
Cloudflare tell).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import base64
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import re
|
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
import nodriver as uc
|
|
||||||
|
|
||||||
C2_URL = os.environ.get("C2_URL", "http://localhost:5080").rstrip("/")
|
|
||||||
TOKEN = os.environ.get("WORKER_TOKEN", "dev-worker-token")
|
|
||||||
MARKET_URL = os.environ.get("MARKET_URL", "https://cs.money/market/buy/")
|
|
||||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
|
||||||
DELAY = float(os.environ.get("DELAY", "2.0"))
|
|
||||||
JITTER = float(os.environ.get("JITTER", "1.5"))
|
|
||||||
IDLE_SECONDS = int(os.environ.get("IDLE_SECONDS", "10"))
|
|
||||||
PROXY = os.environ.get("PROXY")
|
|
||||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
|
||||||
|
|
||||||
# IPRoyal residential gateway. One fixed host/port; country, sticky-session id and
|
|
||||||
# lifetime are encoded as underscore params appended to the password (see
|
|
||||||
# _iproyal_password). Mirrors the .NET IpRoyalProxyProvider scheme.
|
|
||||||
IPROYAL_HOST = os.environ.get("IPROYAL_HOST", "geo.iproyal.com")
|
|
||||||
IPROYAL_PORT = int(os.environ.get("IPROYAL_PORT", "12321"))
|
|
||||||
IPROYAL_USERNAME = os.environ.get("IPROYAL_USERNAME")
|
|
||||||
IPROYAL_PASSWORD = os.environ.get("IPROYAL_PASSWORD")
|
|
||||||
IPROYAL_COUNTRY = os.environ.get("IPROYAL_COUNTRY", "us").strip().lower()
|
|
||||||
IPROYAL_LIFETIME_MIN = int(os.environ.get("IPROYAL_LIFETIME_MIN", "60"))
|
|
||||||
# Residential proxy is metered per GB. Cloudflare gates on JS, not images, and the
|
|
||||||
# sell-orders API is pure JSON — so block images by default to slash page-render
|
|
||||||
# bandwidth. Set LOAD_IMAGES=1 to re-enable (e.g. for debugging the visible page).
|
|
||||||
LOAD_IMAGES = os.environ.get("LOAD_IMAGES") == "1"
|
|
||||||
|
|
||||||
# cs.money is an Astro SSR app: the free-text market search filters server-side and
|
|
||||||
# the resulting listings are embedded in the page as a __page-params JSON blob. The
|
|
||||||
# /2.0/market/sell-orders API rejects a `search` param (HTTP 400), so we fetch the
|
|
||||||
# PAGE for a search and read the embedded items — same item shape as the API.
|
|
||||||
#
|
|
||||||
# A page returns at most 60 and offset is ignored, so we paginate with a FORWARD
|
|
||||||
# CURSOR on float: cs.money honors `order=asc&sort=float` + `minFloat`, and float is
|
|
||||||
# full-precision and effectively unique per item. We grab the 60 lowest-float items
|
|
||||||
# at/above `lo`, advance `lo` to the highest float returned, and repeat until a page
|
|
||||||
# is under the cap. (The old minPrice/maxPrice bisection silently truncated cheap
|
|
||||||
# skins: >60 listings can share a sub-$0.02 reference band, which no price window can
|
|
||||||
# split — floats almost never tie, so the cursor always makes progress.)
|
|
||||||
PAGE = ("https://cs.money/market/buy/?search={search}"
|
|
||||||
"&order=asc&sort=float&minFloat={lo:.12f}&maxFloat=1")
|
|
||||||
PAGE_CAP = 60 # items per SSR page
|
|
||||||
PAGE_PARAMS_RE = re.compile(
|
|
||||||
r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
|
|
||||||
|
|
||||||
|
|
||||||
# --- IPRoyal residential proxy ----------------------------------------------------
|
|
||||||
|
|
||||||
def _new_session_id() -> str:
|
|
||||||
"""Short, opaque, URL-safe token. IPRoyal pins one residential exit IP per
|
|
||||||
distinct session value, so a fresh id == a fresh IP."""
|
|
||||||
return uuid.uuid4().hex[:10]
|
|
||||||
|
|
||||||
|
|
||||||
def _iproyal_password(session_id: str) -> str:
|
|
||||||
"""Bake the targeting/session knobs onto the account password, IPRoyal-style:
|
|
||||||
"<pass>_country-us_session-<id>_lifetime-60m". Country is optional."""
|
|
||||||
pw = IPROYAL_PASSWORD
|
|
||||||
if IPROYAL_COUNTRY:
|
|
||||||
pw += f"_country-{IPROYAL_COUNTRY}"
|
|
||||||
pw += f"_session-{session_id}_lifetime-{IPROYAL_LIFETIME_MIN}m"
|
|
||||||
return pw
|
|
||||||
|
|
||||||
|
|
||||||
class LocalForwardingProxy:
|
|
||||||
"""In-process HTTP proxy on 127.0.0.1 that chains every connection to the IPRoyal
|
|
||||||
gateway, injecting the Proxy-Authorization header itself. Chromium ignores creds in
|
|
||||||
--proxy-server and the in-browser ways to answer the gateway's 407 (a CDP auth
|
|
||||||
handler, or a disabled MV2 extension) are Cloudflare tells — so we terminate the
|
|
||||||
browser->proxy hop locally and add auth here, leaving Chrome to talk to an auth-free
|
|
||||||
endpoint at zero CDP. HTTPS (all cs.money serves) flows through the CONNECT tunnel,
|
|
||||||
so this proxy only relays ciphertext and never sees plaintext. Ported from the .NET
|
|
||||||
LocalForwardingProxy. The active session token can be swapped live (set_password) to
|
|
||||||
move to a fresh exit IP without restarting the browser. (New tunnels pick up the new
|
|
||||||
IP; any still-open keep-alive tunnel stays on the old one until it closes.)"""
|
|
||||||
|
|
||||||
def __init__(self, host: str, port: int, username: str, password: str):
|
|
||||||
self._host = host
|
|
||||||
self._port = port
|
|
||||||
self._username = username
|
|
||||||
self._password = password
|
|
||||||
self._server: asyncio.AbstractServer | None = None
|
|
||||||
self.endpoint = ""
|
|
||||||
|
|
||||||
def set_password(self, password: str) -> None:
|
|
||||||
self._password = password
|
|
||||||
|
|
||||||
def _auth_header(self) -> str:
|
|
||||||
token = base64.b64encode(f"{self._username}:{self._password}".encode()).decode()
|
|
||||||
return f"Proxy-Authorization: Basic {token}\r\n"
|
|
||||||
|
|
||||||
async def start(self) -> "LocalForwardingProxy":
|
|
||||||
self._server = await asyncio.start_server(self._handle, "127.0.0.1", 0)
|
|
||||||
port = self._server.sockets[0].getsockname()[1]
|
|
||||||
self.endpoint = f"127.0.0.1:{port}"
|
|
||||||
return self
|
|
||||||
|
|
||||||
async def stop(self) -> None:
|
|
||||||
if self._server is not None:
|
|
||||||
self._server.close()
|
|
||||||
try:
|
|
||||||
await self._server.wait_closed()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
async def _read_header(reader: asyncio.StreamReader) -> str | None:
|
|
||||||
"""Read up to the end of the HTTP header block (CRLFCRLF). None on EOF/overflow."""
|
|
||||||
try:
|
|
||||||
data = await reader.readuntil(b"\r\n\r\n")
|
|
||||||
except (asyncio.IncompleteReadError, asyncio.LimitOverrunError):
|
|
||||||
return None
|
|
||||||
return data.decode("latin-1")
|
|
||||||
|
|
||||||
async def _handle(self, client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter) -> None:
|
|
||||||
up_writer: asyncio.StreamWriter | None = None
|
|
||||||
try:
|
|
||||||
header = await self._read_header(client_reader)
|
|
||||||
if not header:
|
|
||||||
return
|
|
||||||
parts = header.split("\r\n", 1)[0].split(" ")
|
|
||||||
if len(parts) < 2:
|
|
||||||
return
|
|
||||||
method, target = parts[0], parts[1]
|
|
||||||
|
|
||||||
up_reader, up_writer = await asyncio.open_connection(self._host, self._port)
|
|
||||||
if method.upper() == "CONNECT":
|
|
||||||
# HTTPS: open an authenticated tunnel upstream, then relay raw bytes.
|
|
||||||
up_writer.write(
|
|
||||||
f"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{self._auth_header()}\r\n".encode())
|
|
||||||
await up_writer.drain()
|
|
||||||
up_header = await self._read_header(up_reader)
|
|
||||||
status = up_header.split(" ", 2) if up_header else []
|
|
||||||
if len(status) < 2 or status[1] != "200":
|
|
||||||
line = (up_header or "no response").split("\r\n", 1)[0]
|
|
||||||
print(f" proxy: upstream refused CONNECT {target}: {line}")
|
|
||||||
client_writer.write(b"HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n")
|
|
||||||
await client_writer.drain()
|
|
||||||
return
|
|
||||||
client_writer.write(b"HTTP/1.1 200 Connection established\r\n\r\n")
|
|
||||||
await client_writer.drain()
|
|
||||||
else:
|
|
||||||
# Plain HTTP: re-inject the request upstream with auth, then relay.
|
|
||||||
idx = header.index("\r\n") + 2
|
|
||||||
up_writer.write((header[:idx] + self._auth_header() + header[idx:]).encode())
|
|
||||||
await up_writer.drain()
|
|
||||||
|
|
||||||
await self._relay(client_reader, client_writer, up_reader, up_writer)
|
|
||||||
except Exception:
|
|
||||||
pass # one bad tunnel must never take down the listener
|
|
||||||
finally:
|
|
||||||
for w in (client_writer, up_writer):
|
|
||||||
if w is not None:
|
|
||||||
try:
|
|
||||||
w.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
async def _relay(
|
|
||||||
client_reader: asyncio.StreamReader, client_writer: asyncio.StreamWriter,
|
|
||||||
up_reader: asyncio.StreamReader, up_writer: asyncio.StreamWriter) -> None:
|
|
||||||
# Pipe both directions, but tear the whole tunnel down as soon as EITHER side
|
|
||||||
# closes (mirrors the .NET WhenAny). Waiting for both — as a plain gather does —
|
|
||||||
# leaks a task holding two sockets on every half-closed connection, which piles
|
|
||||||
# up fast across a long multi-worker run. Closing both writers when the first
|
|
||||||
# pipe finishes unblocks the other's pending read so both tasks settle.
|
|
||||||
async def pipe(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
|
||||||
try:
|
|
||||||
while data := await reader.read(65536):
|
|
||||||
writer.write(data)
|
|
||||||
await writer.drain()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
a = asyncio.create_task(pipe(client_reader, up_writer))
|
|
||||||
b = asyncio.create_task(pipe(up_reader, client_writer))
|
|
||||||
try:
|
|
||||||
await asyncio.wait({a, b}, return_when=asyncio.FIRST_COMPLETED)
|
|
||||||
finally:
|
|
||||||
for w in (client_writer, up_writer):
|
|
||||||
try:
|
|
||||||
w.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
await asyncio.gather(a, b, return_exceptions=True)
|
|
||||||
|
|
||||||
|
|
||||||
def looks_like_challenge(body: str) -> bool:
|
|
||||||
s = (body or "").lstrip()
|
|
||||||
return not s or s.startswith("<") or "Just a moment" in body or "challenge-platform" in body
|
|
||||||
|
|
||||||
|
|
||||||
# --- C2 HTTP (stdlib, run off the event loop) -------------------------------------
|
|
||||||
|
|
||||||
def _get_job_sync():
|
|
||||||
req = urllib.request.Request(f"{C2_URL}/jobs/next", headers={"X-Worker-Token": TOKEN})
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=15) as r:
|
|
||||||
if r.status == 204:
|
|
||||||
return None
|
|
||||||
return json.loads(r.read() or b"null")
|
|
||||||
except urllib.error.HTTPError as e:
|
|
||||||
print(f" C2 /jobs/next -> HTTP {e.code}")
|
|
||||||
return None
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
print(f" C2 unreachable: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _post_result_sync(job_id: str, payload: dict):
|
|
||||||
data = json.dumps(payload).encode()
|
|
||||||
req = urllib.request.Request(
|
|
||||||
f"{C2_URL}/jobs/{job_id}/result", data=data, method="POST",
|
|
||||||
headers={"X-Worker-Token": TOKEN, "Content-Type": "application/json"})
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=60) as r:
|
|
||||||
return json.loads(r.read() or b"null")
|
|
||||||
except urllib.error.HTTPError as e:
|
|
||||||
print(f" C2 result -> HTTP {e.code}: {e.read()[:200]!r}")
|
|
||||||
return None
|
|
||||||
except urllib.error.URLError as e:
|
|
||||||
print(f" C2 unreachable posting result: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def get_job():
|
|
||||||
return await asyncio.to_thread(_get_job_sync)
|
|
||||||
|
|
||||||
|
|
||||||
async def post_result(job_id, payload):
|
|
||||||
return await asyncio.to_thread(_post_result_sync, job_id, payload)
|
|
||||||
|
|
||||||
|
|
||||||
# --- scraping ---------------------------------------------------------------------
|
|
||||||
|
|
||||||
async def fetch_json(page, url: str) -> tuple[str, str, int]:
|
|
||||||
"""Fetch in-page and also read back the Resource Timing transferSize — the actual
|
|
||||||
COMPRESSED bytes on the wire (what the metered proxy bills), not len(body) which is
|
|
||||||
the decompressed size. Returns (status, body, wire_bytes); wire_bytes is -1 if the
|
|
||||||
timing entry wasn't available. Same-origin (cs.money), so the size fields are exposed."""
|
|
||||||
expr = (
|
|
||||||
f"fetch({url!r}, {{credentials:'include', headers:{{'accept':'application/json'}}}})"
|
|
||||||
f".then(async r => {{"
|
|
||||||
f" const body = await r.text();"
|
|
||||||
f" const e = performance.getEntriesByName({url!r}).slice(-1)[0];"
|
|
||||||
f" return JSON.stringify({{status: r.status, body: body,"
|
|
||||||
f" wire: e ? e.transferSize : -1, dec: e ? e.decodedBodySize : -1}});"
|
|
||||||
f"}})"
|
|
||||||
)
|
|
||||||
raw = await page.evaluate(expr, await_promise=True)
|
|
||||||
if not isinstance(raw, str):
|
|
||||||
return ("-1", "", -1)
|
|
||||||
try:
|
|
||||||
obj = json.loads(raw)
|
|
||||||
return (str(obj.get("status", "-1")), obj.get("body", ""), int(obj.get("wire", -1)))
|
|
||||||
except (json.JSONDecodeError, ValueError, TypeError):
|
|
||||||
return ("-1", raw, -1)
|
|
||||||
|
|
||||||
|
|
||||||
async def _click(page, text, timeout=3):
|
|
||||||
try:
|
|
||||||
el = await page.find(text, best_match=True, timeout=timeout)
|
|
||||||
if el:
|
|
||||||
await el.click()
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
async def dismiss_consent(page):
|
|
||||||
"""Privacy-preserving. The banner only offers 'Accept all' / 'Manage cookies';
|
|
||||||
the Reject-all control lives inside the Manage window. So: Manage -> Reject all ->
|
|
||||||
Confirm. (The data path reads SSR __page-params regardless, but this keeps the
|
|
||||||
session honest and unblocks any future interaction.)"""
|
|
||||||
steps = []
|
|
||||||
if await _click(page, "Manage cookies") or await _click(page, "Manage"):
|
|
||||||
await page.sleep(1)
|
|
||||||
if await _click(page, "Reject all"):
|
|
||||||
steps.append("reject-all")
|
|
||||||
for c in ("Confirm my choice", "Confirm", "Save"):
|
|
||||||
if await _click(page, c):
|
|
||||||
steps.append(f"confirm:{c}")
|
|
||||||
break
|
|
||||||
return ", ".join(steps) if steps else None
|
|
||||||
|
|
||||||
|
|
||||||
async def warm(page):
|
|
||||||
"""Open the market and clear Cloudflare so the session holds cf_clearance."""
|
|
||||||
print(f"Warming session at {MARKET_URL} (clear Cloudflare; {SOLVE_SECONDS}s)...")
|
|
||||||
await page.get(MARKET_URL)
|
|
||||||
await page.sleep(SOLVE_SECONDS)
|
|
||||||
clicked = await dismiss_consent(page)
|
|
||||||
print(f"Consent: {'dismissed via ' + clicked if clicked else 'left up'}")
|
|
||||||
|
|
||||||
|
|
||||||
def extract_items(html: str) -> list:
|
|
||||||
"""Pull inventory.items out of the page's __page-params JSON blob."""
|
|
||||||
m = PAGE_PARAMS_RE.search(html)
|
|
||||||
if not m:
|
|
||||||
return []
|
|
||||||
try:
|
|
||||||
return json.loads(m.group(1)).get("inventory", {}).get("items", []) or []
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
async def scrape_job(page, job) -> tuple[list, int, str, int]:
|
|
||||||
"""Scrape ALL listings for one skin+wear via a forward float cursor.
|
|
||||||
|
|
||||||
A search page returns at most 60 items and ignores offset, but cs.money sorts by
|
|
||||||
float (order=asc&sort=float) and filters by minFloat. So we walk the float axis:
|
|
||||||
grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float on
|
|
||||||
the page, and repeat until a page is under the cap. The boundary item is re-fetched
|
|
||||||
(minFloat is inclusive) and dropped by the id dedup. Returns
|
|
||||||
(items, fetches, reason, wire_bytes) where wire_bytes is the metered (compressed) cost.
|
|
||||||
"""
|
|
||||||
search = urllib.parse.quote_plus(job["search"])
|
|
||||||
max_fetches = job.get("maxPages", 40) # safety cap on page fetches per job
|
|
||||||
seen: dict = {}
|
|
||||||
fetches = 0
|
|
||||||
wire = 0
|
|
||||||
lo = 0.0
|
|
||||||
reason = "completed"
|
|
||||||
|
|
||||||
while fetches < max_fetches:
|
|
||||||
status, body, wbytes = await fetch_json(page, PAGE.format(search=search, lo=lo))
|
|
||||||
fetches += 1
|
|
||||||
if wbytes > 0:
|
|
||||||
wire += wbytes
|
|
||||||
|
|
||||||
if "Just a moment" in body or "challenge-platform" in body:
|
|
||||||
return list(seen.values()), fetches, "challenged", wire
|
|
||||||
|
|
||||||
items = extract_items(body)
|
|
||||||
floats = []
|
|
||||||
for it in items:
|
|
||||||
if it.get("id") is not None:
|
|
||||||
seen[it["id"]] = it
|
|
||||||
fl = (it.get("asset") or {}).get("float")
|
|
||||||
if isinstance(fl, (int, float)):
|
|
||||||
floats.append(fl)
|
|
||||||
|
|
||||||
if len(items) < PAGE_CAP:
|
|
||||||
break # last page — fewer than the cap means we've seen everything
|
|
||||||
|
|
||||||
# Advance the cursor past the highest float on this page. Items at exactly that
|
|
||||||
# float are re-fetched next round (minFloat is inclusive) and deduped by id.
|
|
||||||
nxt = max(floats) if floats else None
|
|
||||||
if nxt is None or nxt <= lo:
|
|
||||||
# Cursor can't advance: >60 listings share a single float value, or the
|
|
||||||
# items carry no float. Bail loudly rather than spin — a flagged gap beats
|
|
||||||
# a silent one (this is the failure the price-window version hid).
|
|
||||||
reason = "stuck-float-tie"
|
|
||||||
break
|
|
||||||
lo = nxt
|
|
||||||
|
|
||||||
await page.sleep(DELAY + random.uniform(0, JITTER))
|
|
||||||
else:
|
|
||||||
reason = "fetch-cap"
|
|
||||||
|
|
||||||
return list(seen.values()), fetches, reason, wire
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
# IPRoyal (auth'd, per-worker sticky IP) takes priority; else a plain auth-free
|
|
||||||
# PROXY; else this host's own IP. The forwarder injects IPRoyal auth so Chrome
|
|
||||||
# only ever sees an auth-free 127.0.0.1 endpoint.
|
|
||||||
forwarder = None
|
|
||||||
session_id = None
|
|
||||||
if IPROYAL_USERNAME and IPROYAL_PASSWORD:
|
|
||||||
session_id = _new_session_id()
|
|
||||||
forwarder = await LocalForwardingProxy(
|
|
||||||
IPROYAL_HOST, IPROYAL_PORT, IPROYAL_USERNAME, _iproyal_password(session_id)).start()
|
|
||||||
proxy = forwarder.endpoint
|
|
||||||
proxy_label = f"iproyal[{IPROYAL_COUNTRY or 'any'}] session {session_id} via {forwarder.endpoint}"
|
|
||||||
else:
|
|
||||||
proxy = PROXY
|
|
||||||
proxy_label = PROXY or "own IP"
|
|
||||||
|
|
||||||
args = [f"--proxy-server={proxy}"] if proxy else []
|
|
||||||
if not LOAD_IMAGES:
|
|
||||||
# Disable image loading at the engine level — the dominant bandwidth cost on
|
|
||||||
# an image-heavy market, and unneeded for CF clearance or the JSON API.
|
|
||||||
args.append("--blink-settings=imagesEnabled=false")
|
|
||||||
if os.environ.get("CHROME_NO_SANDBOX") == "1":
|
|
||||||
# Required when running Chromium as root in a container.
|
|
||||||
args += ["--no-sandbox", "--disable-dev-shm-usage"]
|
|
||||||
print(f"Starting worker (C2={C2_URL}, proxy={proxy_label}, images={'on' if LOAD_IMAGES else 'off'})...")
|
|
||||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args)
|
|
||||||
try:
|
|
||||||
page = await browser.get("about:blank")
|
|
||||||
await warm(page)
|
|
||||||
|
|
||||||
total_wire = 0 # metered (compressed) bytes this worker has pulled, lifetime
|
|
||||||
while True:
|
|
||||||
job = await get_job()
|
|
||||||
if not job:
|
|
||||||
await asyncio.sleep(IDLE_SECONDS)
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Job {job['jobId'][:8]} — search {job['search']!r}")
|
|
||||||
items, pages, reason, wire = await scrape_job(page, job)
|
|
||||||
total_wire += wire
|
|
||||||
|
|
||||||
if reason == "challenged":
|
|
||||||
# The exit IP is likely flagged. On IPRoyal, rotate to a fresh sticky
|
|
||||||
# session (new IP) before re-warming; otherwise just re-solve in place.
|
|
||||||
if forwarder is not None:
|
|
||||||
session_id = _new_session_id()
|
|
||||||
forwarder.set_password(_iproyal_password(session_id))
|
|
||||||
print(f" challenged; rotating exit IP -> session {session_id}, re-warming...")
|
|
||||||
else:
|
|
||||||
print(" re-challenged; re-warming session...")
|
|
||||||
await warm(page)
|
|
||||||
|
|
||||||
result = await post_result(job["jobId"], {
|
|
||||||
"items": items, "pages": pages, "stoppedReason": reason})
|
|
||||||
summary = (f"matched {result.get('matched')}, new {result.get('inserted')}, "
|
|
||||||
f"upd {result.get('updated')}, removed {result.get('removed')}") if result else "post failed"
|
|
||||||
wire_kb = wire / 1024
|
|
||||||
print(f" scraped {len(items)} items ({pages}p, {reason}, {wire_kb:.0f}KB wire) "
|
|
||||||
f"-> {summary} [lifetime {total_wire / 1_048_576:.1f}MB]")
|
|
||||||
|
|
||||||
await page.sleep(DELAY + random.uniform(0, JITTER))
|
|
||||||
finally:
|
|
||||||
browser.stop()
|
|
||||||
if forwarder is not None:
|
|
||||||
await forwarder.stop()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
uc.loop().run_until_complete(main())
|
|
||||||
Reference in New Issue
Block a user