using BlueLaminate.Scraper.CsMoney; using BlueLaminate.Scraper.Proxies; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Options; using System.CommandLine; namespace BlueLaminate.Cli.Commands; /// /// capture-csmoney: open the cs.money market through the IPRoyal residential /// proxy (local forwarding hop, no CDP) in a real, non-headless browser. You clear /// the Cloudflare challenge once; the tool then pages the listings API from inside /// the cleared page with human-like pacing, dumping each page's JSON and reporting /// how many pages survive before a re-challenge. Discovery/measurement tool — writes /// nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD. /// internal static class CaptureCsMoneyCommand { public static Command Build(IHost host) { var countryOption = new Option("--country") { Description = "ISO country code(s) for the exit IP, e.g. \"us\". Default: configured/random.", }; var loadImagesOption = new Option("--load-images") { Description = "Load images (uses more bandwidth). Default off to conserve the metered plan.", }; var pagesOption = new Option("--pages") { Description = "Maximum offset pages (60 items each) to fetch before stopping.", DefaultValueFactory = _ => 50, }; var noProxyOption = new Option("--no-proxy") { Description = "Diagnostic: drive the browser on this machine's own IP (no IPRoyal proxy), " + "to isolate whether re-challenges are IP reputation vs. the webdriver fingerprint.", }; var outOption = new Option("--out") { Description = "Directory to write captured JSON pages to.", DefaultValueFactory = _ => "csmoney-captures", }; var command = new Command( "capture-csmoney", "Open the cs.money market through the residential proxy, clear Cloudflare once, then page " + "the listings API with pacing and report how many pages survive. Discovery/measurement " + "tool — writes nothing to the database. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.") { countryOption, loadImagesOption, pagesOption, outOption, noProxyOption, }; command.SetAction((parseResult, ct) => RunAsync( host, parseResult.GetValue(countryOption), parseResult.GetValue(loadImagesOption), parseResult.GetValue(pagesOption), parseResult.GetValue(outOption)!, parseResult.GetValue(noProxyOption), ct)); return command; } private static async Task RunAsync( IHost host, string? country, bool loadImages, int pages, string outDir, bool noProxy, CancellationToken ct) { using var scope = host.Services.CreateScope(); var options = scope.ServiceProvider.GetRequiredService>().Value; var exitCountry = string.IsNullOrWhiteSpace(country) ? options.Country : country; var images = loadImages || options.LoadImages; Console.WriteLine($"Opening {options.MarketUrl}{(noProxy ? " (DIRECT — no proxy)" : "")}"); Console.WriteLine( "Solve any Cloudflare challenge in the window and wait until the market grid " + "(items + prices) is actually visible — that means the session is cleared."); Console.WriteLine( $"Press Enter here once it's visible. The tool then pages up to {pages} page(s) of " + "listings from inside the cleared page and reports how far it gets."); try { var capture = scope.ServiceProvider.GetRequiredService(); // Block until the operator presses Enter; the browser stays open the whole // time. ReadLine is sync, so push it off-thread. var result = await capture.RunAsync( outDir, new ProxyRequest(Country: exitCountry, Sticky: true), images, useProxy: !noProxy, pages, () => Task.Run(() => Console.ReadLine(), ct), ct); var full = Path.GetFullPath(outDir); Console.WriteLine(); Console.WriteLine( $"Stopped: {result.StoppedReason}. {result.PagesSucceeded} page(s), " + $"{result.ItemsTotal} item(s) → {full}"); return result.PagesSucceeded > 0 ? 0 : 1; } catch (OperationCanceledException) { Console.Error.WriteLine("Capture cancelled."); return 130; } catch (Exception ex) { Console.Error.WriteLine($"cs.money capture failed: {ex.Message}"); return 1; } } }