using System.Text;
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.CsMoney;
/// Outcome of a stealth pagination run.
/// How many offset pages returned listings JSON before stopping.
/// Total listing items captured across those pages.
/// Why pagination stopped: "challenged", "empty", "completed", or "error".
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
///
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
/// the listings API with human-like pacing using in-page fetch() calls from
/// the cleared origin (so the cf_clearance cookie rides along). It records each
/// page's JSON and — crucially for the current phase — measures how many pages
/// survive before Cloudflare re-challenges, which tells us whether the
/// fingerprint reductions are enough for a real sweep.
///
public sealed class CsMoneyCaptureService
{
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly CsMoneyOptions _options;
private readonly ILogger _logger;
public CsMoneyCaptureService(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
CsMoneyOptions options,
ILogger logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_options = options;
_logger = logger;
}
///
/// Open the market, wait for (the operator
/// clears Cloudflare and presses Enter), then page the listings API up to
/// times, stopping early on a re-challenge or an
/// empty page. Each page's body is written to .
///
public async Task RunAsync(
string outputDir,
ProxyRequest request,
bool loadImages,
bool useProxy,
int maxPages,
Func browseUntilDone,
CancellationToken ct = default)
{
Directory.CreateDirectory(outputDir);
// --no-proxy (useProxy=false) drives the automated browser on the machine's
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
// or the webdriver fingerprint itself.
LocalForwardingProxy? localProxy = null;
string? proxyEndpoint = null;
if (useProxy)
{
var lease = _provider.Acquire(request);
localProxy = _proxyFactory.Create(lease).Start();
proxyEndpoint = localProxy.Endpoint;
}
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
var pages = 0;
var items = 0;
var reason = "completed";
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
driver.Navigate().GoToUrl(_options.MarketUrl);
// Operator clears the Cloudflare challenge in the visible window, waits
// until the market grid is actually rendered, then presses Enter.
await browseUntilDone();
for (var offset = 0; pages < maxPages; offset += 60)
{
ct.ThrowIfCancellationRequested();
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
var (status, body) = DirectFetch(driver, apiUrl);
if (LooksLikeChallenge(status, body))
{
_logger.LogWarning(
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
offset, pages);
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
reason = "challenged";
break;
}
var count = TryCountItems(body);
if (count is 0)
{
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
reason = "empty";
break;
}
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
pages++;
items += count ?? 0;
_logger.LogInformation(
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
pages, offset, status, count, body.Length);
await DelayAsync(ct);
}
}
catch (OperationCanceledException)
{
reason = "cancelled";
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
reason = "error";
}
finally
{
driver.Quit();
if (localProxy is not null)
{
await localProxy.DisposeAsync();
}
}
return new CsMoneyCaptureResult(pages, items, reason);
}
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
// cs.money origin, so the cf_clearance cookie is sent automatically.
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
{
const string script = """
const url = arguments[0];
const done = arguments[arguments.length - 1];
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
""";
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
if (string.IsNullOrEmpty(raw))
{
return (-1, "");
}
using var doc = JsonDocument.Parse(raw);
var status = doc.RootElement.GetProperty("status").GetInt32();
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
return (status, body);
}
private static bool LooksLikeChallenge(int status, string body) =>
status is 403 or 503 or -1
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
// Count items[] without binding a full model (the typed model is Phase 2).
private static int? TryCountItems(string body)
{
try
{
using var doc = JsonDocument.Parse(body);
return doc.RootElement.TryGetProperty("items", out var items)
&& items.ValueKind == JsonValueKind.Array
? items.GetArrayLength()
: null;
}
catch (JsonException)
{
return null;
}
}
private async Task DelayAsync(CancellationToken ct)
{
var jitter = _options.PageJitterSeconds > 0
? Random.Shared.NextDouble() * _options.PageJitterSeconds
: 0;
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
if (seconds > 0)
{
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
}
}
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
}