Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
122
BlueLaminate/BlueLaminate.Cli/Commands/CaptureCsMoneyCommand.cs
Normal file
122
BlueLaminate/BlueLaminate.Cli/Commands/CaptureCsMoneyCommand.cs
Normal file
@@ -0,0 +1,122 @@
|
||||
using BlueLaminate.Scraper.CsMoney;
|
||||
using BlueLaminate.Scraper.Proxies;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Options;
|
||||
using System.CommandLine;
|
||||
|
||||
namespace BlueLaminate.Cli.Commands;
|
||||
|
||||
/// <summary>
|
||||
/// <c>capture-csmoney</c>: open the cs.money market through the IPRoyal residential
|
||||
/// proxy (local forwarding hop, no CDP) in a real, non-headless browser. You clear
|
||||
/// the Cloudflare challenge once; the tool then pages the listings API from inside
|
||||
/// the cleared page with human-like pacing, dumping each page's JSON and reporting
|
||||
/// how many pages survive before a re-challenge. Discovery/measurement tool — writes
|
||||
/// nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.
|
||||
/// </summary>
|
||||
internal static class CaptureCsMoneyCommand
|
||||
{
|
||||
public static Command Build(IHost host)
|
||||
{
|
||||
var countryOption = new Option<string?>("--country")
|
||||
{
|
||||
Description = "ISO country code(s) for the exit IP, e.g. \"us\". Default: configured/random.",
|
||||
};
|
||||
var loadImagesOption = new Option<bool>("--load-images")
|
||||
{
|
||||
Description = "Load images (uses more bandwidth). Default off to conserve the metered plan.",
|
||||
};
|
||||
var pagesOption = new Option<int>("--pages")
|
||||
{
|
||||
Description = "Maximum offset pages (60 items each) to fetch before stopping.",
|
||||
DefaultValueFactory = _ => 50,
|
||||
};
|
||||
var noProxyOption = new Option<bool>("--no-proxy")
|
||||
{
|
||||
Description = "Diagnostic: drive the browser on this machine's own IP (no IPRoyal proxy), "
|
||||
+ "to isolate whether re-challenges are IP reputation vs. the webdriver fingerprint.",
|
||||
};
|
||||
var outOption = new Option<string>("--out")
|
||||
{
|
||||
Description = "Directory to write captured JSON pages to.",
|
||||
DefaultValueFactory = _ => "csmoney-captures",
|
||||
};
|
||||
|
||||
var command = new Command(
|
||||
"capture-csmoney",
|
||||
"Open the cs.money market through the residential proxy, clear Cloudflare once, then page "
|
||||
+ "the listings API with pacing and report how many pages survive. Discovery/measurement "
|
||||
+ "tool — writes nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.")
|
||||
{
|
||||
countryOption,
|
||||
loadImagesOption,
|
||||
pagesOption,
|
||||
outOption,
|
||||
noProxyOption,
|
||||
};
|
||||
|
||||
command.SetAction((parseResult, ct) => RunAsync(
|
||||
host,
|
||||
parseResult.GetValue(countryOption),
|
||||
parseResult.GetValue(loadImagesOption),
|
||||
parseResult.GetValue(pagesOption),
|
||||
parseResult.GetValue(outOption)!,
|
||||
parseResult.GetValue(noProxyOption),
|
||||
ct));
|
||||
|
||||
return command;
|
||||
}
|
||||
|
||||
private static async Task<int> RunAsync(
|
||||
IHost host, string? country, bool loadImages, int pages, string outDir, bool noProxy,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var scope = host.Services.CreateScope();
|
||||
var options = scope.ServiceProvider.GetRequiredService<IOptions<CsMoneyOptions>>().Value;
|
||||
|
||||
var exitCountry = string.IsNullOrWhiteSpace(country) ? options.Country : country;
|
||||
var images = loadImages || options.LoadImages;
|
||||
|
||||
Console.WriteLine($"Opening {options.MarketUrl}{(noProxy ? " (DIRECT — no proxy)" : "")}");
|
||||
Console.WriteLine(
|
||||
"Solve any Cloudflare challenge in the window and wait until the market grid "
|
||||
+ "(items + prices) is actually visible — that means the session is cleared.");
|
||||
Console.WriteLine(
|
||||
$"Press Enter here once it's visible. The tool then pages up to {pages} page(s) of "
|
||||
+ "listings from inside the cleared page and reports how far it gets.");
|
||||
|
||||
try
|
||||
{
|
||||
var capture = scope.ServiceProvider.GetRequiredService<CsMoneyCaptureService>();
|
||||
|
||||
// Block until the operator presses Enter; the browser stays open the whole
|
||||
// time. ReadLine is sync, so push it off-thread.
|
||||
var result = await capture.RunAsync(
|
||||
outDir,
|
||||
new ProxyRequest(Country: exitCountry, Sticky: true),
|
||||
images,
|
||||
useProxy: !noProxy,
|
||||
pages,
|
||||
() => Task.Run(() => Console.ReadLine(), ct),
|
||||
ct);
|
||||
|
||||
var full = Path.GetFullPath(outDir);
|
||||
Console.WriteLine();
|
||||
Console.WriteLine(
|
||||
$"Stopped: {result.StoppedReason}. {result.PagesSucceeded} page(s), "
|
||||
+ $"{result.ItemsTotal} item(s) → {full}");
|
||||
return result.PagesSucceeded > 0 ? 0 : 1;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
Console.Error.WriteLine("Capture cancelled.");
|
||||
return 130;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.Error.WriteLine($"cs.money capture failed: {ex.Message}");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user