using System.Text; using System.Text.Json; using BlueLaminate.Scraper.Browser; using BlueLaminate.Scraper.Proxies; using Microsoft.Extensions.Logging; using OpenQA.Selenium; namespace BlueLaminate.Scraper.CsMoney; /// Outcome of a stealth pagination run. /// How many offset pages returned listings JSON before stopping. /// Total listing items captured across those pages. /// Why pagination stopped: "challenged", "empty", "completed", or "error". public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason); /// /// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding /// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages /// the listings API with human-like pacing using in-page fetch() calls from /// the cleared origin (so the cf_clearance cookie rides along). It records each /// page's JSON and — crucially for the current phase — measures how many pages /// survive before Cloudflare re-challenges, which tells us whether the /// fingerprint reductions are enough for a real sweep. /// public sealed class CsMoneyCaptureService { private readonly IProxyProvider _provider; private readonly LocalForwardingProxyFactory _proxyFactory; private readonly BrowserDriverFactory _factory; private readonly CsMoneyOptions _options; private readonly ILogger _logger; public CsMoneyCaptureService( IProxyProvider provider, LocalForwardingProxyFactory proxyFactory, BrowserDriverFactory factory, CsMoneyOptions options, ILogger logger) { _provider = provider; _proxyFactory = proxyFactory; _factory = factory; _options = options; _logger = logger; } /// /// Open the market, wait for (the operator /// clears Cloudflare and presses Enter), then page the listings API up to /// times, stopping early on a re-challenge or an /// empty page. Each page's body is written to . /// public async Task RunAsync( string outputDir, ProxyRequest request, bool loadImages, bool useProxy, int maxPages, Func browseUntilDone, CancellationToken ct = default) { Directory.CreateDirectory(outputDir); // --no-proxy (useProxy=false) drives the automated browser on the machine's // own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation // or the webdriver fingerprint itself. LocalForwardingProxy? localProxy = null; string? proxyEndpoint = null; if (useProxy) { var lease = _provider.Acquire(request); localProxy = _proxyFactory.Create(lease).Start(); proxyEndpoint = localProxy.Endpoint; } var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir); var pages = 0; var items = 0; var reason = "completed"; try { driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90); driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45); _logger.LogInformation("Navigating to {Url}", _options.MarketUrl); driver.Navigate().GoToUrl(_options.MarketUrl); // Operator clears the Cloudflare challenge in the visible window, waits // until the market grid is actually rendered, then presses Enter. await browseUntilDone(); for (var offset = 0; pages < maxPages; offset += 60) { ct.ThrowIfCancellationRequested(); var apiUrl = string.Format(_options.ApiUrlTemplate, offset); var (status, body) = DirectFetch(driver, apiUrl); if (LooksLikeChallenge(status, body)) { _logger.LogWarning( "Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.", offset, pages); await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct); reason = "challenged"; break; } var count = TryCountItems(body); if (count is 0) { _logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset); reason = "empty"; break; } await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct); pages++; items += count ?? 0; _logger.LogInformation( "Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).", pages, offset, status, count, body.Length); await DelayAsync(ct); } } catch (OperationCanceledException) { reason = "cancelled"; throw; } catch (Exception ex) { _logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages); reason = "error"; } finally { driver.Quit(); if (localProxy is not null) { await localProxy.DisposeAsync(); } } return new CsMoneyCaptureResult(pages, items, reason); } // Run a same-origin fetch() in the cleared page and return (status, body). Uses // ExecuteAsyncScript so we can await the fetch promise; the page is on the // cs.money origin, so the cf_clearance cookie is sent automatically. private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl) { const string script = """ const url = arguments[0]; const done = arguments[arguments.length - 1]; fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } }) .then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t })))) .catch(e => done(JSON.stringify({ status: -1, body: String(e) }))); """; var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string; if (string.IsNullOrEmpty(raw)) { return (-1, ""); } using var doc = JsonDocument.Parse(raw); var status = doc.RootElement.GetProperty("status").GetInt32(); var body = doc.RootElement.GetProperty("body").GetString() ?? ""; return (status, body); } private static bool LooksLikeChallenge(int status, string body) => status is 403 or 503 or -1 || body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase) || body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase) || body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON // Count items[] without binding a full model (the typed model is Phase 2). private static int? TryCountItems(string body) { try { using var doc = JsonDocument.Parse(body); return doc.RootElement.TryGetProperty("items", out var items) && items.ValueKind == JsonValueKind.Array ? items.GetArrayLength() : null; } catch (JsonException) { return null; } } private async Task DelayAsync(CancellationToken ct) { var jitter = _options.PageJitterSeconds > 0 ? Random.Shared.NextDouble() * _options.PageJitterSeconds : 0; var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter; if (seconds > 0) { await Task.Delay(TimeSpan.FromSeconds(seconds), ct); } } private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) => await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct); }