using System.Text; using BlueLaminate.Scraper.Browser; using BlueLaminate.Scraper.Proxies; using Microsoft.Extensions.Logging; using OpenQA.Selenium; namespace BlueLaminate.Scraper.CsFloat; /// /// Phase-B discovery tool. Drives a real Edge browser through a residential /// lease to a CSFloat search page, then records every CSFloat /api/ JSON /// response to disk while a human clicks around (open a listing → "Latest /// Sales"). We don't yet know CSFloat's exact endpoints or DOM selectors, so a /// human-in-the-loop is the cheapest way to surface the real traffic: the tool /// just listens and dumps, the operator drives the UI in the visible window. /// Once we can see the captured shapes we can automate navigation and design the /// tables. /// public sealed class CsFloatCaptureService { private readonly IProxyProvider _provider; private readonly BrowserDriverFactory _factory; private readonly ILogger _logger; public CsFloatCaptureService( IProxyProvider provider, BrowserDriverFactory factory, ILogger logger) { _provider = provider; _factory = factory; _logger = logger; } /// /// Opens through the proxy and captures CSFloat API /// responses to until /// completes (the CLI ties that to the operator pressing Enter). When /// is true, every CSFloat-domain response is /// logged (url + status + type) to reveal where a login wall appears. /// Returns the number of responses written. /// public async Task RunAsync( string url, string outputDir, ProxyRequest request, bool loadImages, bool diagnose, Func browseUntilDone) { Directory.CreateDirectory(outputDir); var lease = _provider.Acquire(request); var driver = await _factory.CreateAsync(lease, blockImages: !loadImages); var captured = 0; void OnResponse(object? sender, NetworkResponseReceivedEventArgs e) { var responseUrl = e.ResponseUrl; if (string.IsNullOrEmpty(responseUrl) || !responseUrl.Contains("csfloat", StringComparison.OrdinalIgnoreCase)) { return; } // Diagnose mode logs every CSFloat-domain response — including the // SPA shell, redirects and any 401/403 — so we can see exactly where // a Steam-login wall appears even before any /api/ call fires. if (diagnose) { _logger.LogInformation("[{Status}] {Type} {Url}", e.ResponseStatusCode, e.ResponseResourceType, responseUrl); } // Only JSON API calls get written to disk; skip the shell, images, // fonts, analytics, etc. Matches both api.csfloat.com and csfloat.com/api. if (!responseUrl.Contains("/api/", StringComparison.OrdinalIgnoreCase)) return; var body = e.ResponseBody; if (string.IsNullOrWhiteSpace(body)) { // Body wasn't buffered (e.g. the known Fetch interception race). // Log the endpoint so we still learn it exists even if empty. _logger.LogWarning("No body captured for {Url} (status {Status}).", responseUrl, e.ResponseStatusCode); return; } try { var n = Interlocked.Increment(ref captured); var fileName = $"{n:D3}_{Sanitize(responseUrl)}.json"; File.WriteAllText(Path.Combine(outputDir, fileName), body, Encoding.UTF8); _logger.LogInformation( "Captured #{N} [{Status}] {Url} → {File} ({Bytes} bytes).", n, e.ResponseStatusCode, responseUrl, fileName, body.Length); } catch (Exception ex) { _logger.LogWarning(ex, "Failed to write capture for {Url}.", responseUrl); } } var network = driver.Manage().Network; network.NetworkResponseReceived += OnResponse; try { _logger.LogInformation("Navigating to {Url}", url); driver.Navigate().GoToUrl(url); await browseUntilDone(); } finally { network.NetworkResponseReceived -= OnResponse; driver.Quit(); } return captured; } // Turn a URL into a filesystem-safe, readable, length-capped file stem so the // captures are self-describing (the endpoint is visible in the filename). private static string Sanitize(string url) { var trimmed = url .Replace("https://", "", StringComparison.OrdinalIgnoreCase) .Replace("http://", "", StringComparison.OrdinalIgnoreCase); var sb = new StringBuilder(trimmed.Length); foreach (var c in trimmed) sb.Append(char.IsLetterOrDigit(c) || c is '-' or '.' ? c : '_'); var stem = sb.ToString(); return stem.Length > 120 ? stem[..120] : stem; } }