diff --git a/BlueLaminate/BlueLaminate.Cli/Program.cs b/BlueLaminate/BlueLaminate.Cli/Program.cs index f3ae79d..0cc5b4d 100644 --- a/BlueLaminate/BlueLaminate.Cli/Program.cs +++ b/BlueLaminate/BlueLaminate.Cli/Program.cs @@ -1,9 +1,7 @@ using BlueLaminate.Cli; using BlueLaminate.Cli.Logging; using BlueLaminate.EFCore.Data; -using BlueLaminate.Scraper.Browser; using BlueLaminate.Scraper.CsFloat; -using BlueLaminate.Scraper.Proxies; using BlueLaminate.Scraper.Skins; using Microsoft.Extensions.Logging; using OpenTelemetry; @@ -49,30 +47,6 @@ syncSkins.SetAction((parseResult, ct) => loggerFactory, ct)); -var countryOption = new Option("--country") -{ - Description = "Optional ISO country code(s) for the exit IP, e.g. \"us\" or \"us,gb\". Default: random." -}; -var rotatingOption = new Option("--rotating") -{ - Description = "Use a rotating exit IP instead of a pinned (sticky) session." -}; - -var probeProxy = new Command( - "probe-proxy", - "Launch non-headless Edge through the IPRoyal residential proxy and print the exit IP " - + "to confirm auth works and the IP is residential. Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.") -{ - countryOption, - rotatingOption, -}; -probeProxy.SetAction((parseResult, ct) => - ProbeProxyAsync( - parseResult.GetValue(countryOption), - parseResult.GetValue(rotatingOption), - loggerFactory, - ct)); - var defIndexOption = new Option("--def-index") { Description = "CSFloat weapon def_index (e.g. AK-47=7, M4A4=16)." @@ -81,50 +55,6 @@ var paintIndexOption = new Option("--paint-index") { Description = "CSFloat paint_index for a specific skin (e.g. M4A4 | Cyber Security=985)." }; -var urlOption = new Option("--url") -{ - Description = "Full CSFloat URL to open. Overrides --def-index/--paint-index when set." -}; -var loadImagesOption = new Option("--load-images") -{ - Description = "Load images (uses more bandwidth). Default off to conserve the metered plan." -}; -var diagnoseOption = new Option("--diagnose") -{ - Description = "Log every CSFloat-domain response (url + status + type) to reveal where a " - + "Steam-login wall appears, not just /api/ JSON." -}; -var outOption = new Option("--out") -{ - Description = "Directory to write captured JSON to.", - DefaultValueFactory = _ => "captures", -}; - -var captureCsfloat = new Command( - "capture-csfloat", - "Open a CSFloat search page through the residential proxy and dump every CSFloat /api/ " - + "JSON response to disk while you browse (open a listing → 'Latest Sales'). " - + "Reads IPROYAL_USERNAME / IPROYAL_PASSWORD.") -{ - defIndexOption, - paintIndexOption, - urlOption, - countryOption, - loadImagesOption, - diagnoseOption, - outOption, -}; -captureCsfloat.SetAction((parseResult, ct) => - CaptureCsfloatAsync( - parseResult.GetValue(defIndexOption), - parseResult.GetValue(paintIndexOption), - parseResult.GetValue(urlOption), - parseResult.GetValue(countryOption), - parseResult.GetValue(loadImagesOption), - parseResult.GetValue(diagnoseOption), - parseResult.GetValue(outOption)!, - loggerFactory, - ct)); var sortByOption = new Option("--sort-by") { @@ -226,8 +156,6 @@ sweepCatalog.SetAction((parseResult, ct) => var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker tools.") { syncSkins, - probeProxy, - captureCsfloat, fetchListings, sweepListings, sweepCatalog, @@ -235,111 +163,6 @@ var root = new RootCommand("BlueLaminate CLI — Counter-Strike skin tracker too return await root.Parse(args).InvokeAsync(); -// Acquire an IPRoyal residential lease, drive a real (non-headless) Edge browser -// through it, and report the exit IP. This is the proxy/Selenium spike: it proves -// authenticated residential routing end-to-end for a few KB before any CSFloat -// scraping spends real bandwidth. -static async Task ProbeProxyAsync( - string? country, bool rotating, ILoggerFactory loggerFactory, CancellationToken ct) -{ - var username = Environment.GetEnvironmentVariable("IPROYAL_USERNAME"); - var password = Environment.GetEnvironmentVariable("IPROYAL_PASSWORD"); - if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password)) - { - Console.Error.WriteLine( - "Set IPROYAL_USERNAME and IPROYAL_PASSWORD environment variables first."); - return 1; - } - - var provider = new IpRoyalProxyProvider(username, password); - var factory = new BrowserDriverFactory(loggerFactory.CreateLogger()); - var probe = new ProxyProbe(provider, factory, loggerFactory.CreateLogger()); - - try - { - var info = await probe.RunAsync(new ProxyRequest(Country: country, Sticky: !rotating)); - Console.WriteLine(); - Console.WriteLine($" Exit IP : {info.Ip}"); - Console.WriteLine($" Location: {info.City}, {info.Region}, {info.Country}"); - Console.WriteLine($" Org/ASN : {info.Org}"); - Console.WriteLine($" Hostname: {info.Hostname ?? "—"}"); - Console.WriteLine(); - Console.WriteLine( - "Check Org/ASN: a consumer ISP = residential; a hosting provider = datacenter."); - return 0; - } - catch (Exception ex) - { - Console.Error.WriteLine($"Proxy probe failed: {ex.Message}"); - return 1; - } -} - -// Phase B: open a CSFloat search page through the residential proxy and dump -// every CSFloat /api/ JSON response to disk while the operator browses. This is -// how we discover the real endpoint/field shapes (active listings + Latest -// Sales) before designing tables or automating navigation. -static async Task CaptureCsfloatAsync( - int? defIndex, int? paintIndex, string? url, string? country, - bool loadImages, bool diagnose, string outDir, ILoggerFactory loggerFactory, CancellationToken ct) -{ - var username = Environment.GetEnvironmentVariable("IPROYAL_USERNAME"); - var password = Environment.GetEnvironmentVariable("IPROYAL_PASSWORD"); - if (string.IsNullOrWhiteSpace(username) || string.IsNullOrWhiteSpace(password)) - { - Console.Error.WriteLine( - "Set IPROYAL_USERNAME and IPROYAL_PASSWORD environment variables first."); - return 1; - } - - var targetUrl = BuildCsfloatUrl(url, defIndex, paintIndex); - var provider = new IpRoyalProxyProvider(username, password); - var factory = new BrowserDriverFactory(loggerFactory.CreateLogger()); - var capture = new CsFloatCaptureService( - provider, factory, loggerFactory.CreateLogger()); - - Console.WriteLine($"Opening {targetUrl}"); - Console.WriteLine( - "When the page loads: click a listing, then the 'Latest Sales' tab. " - + "Capturing all CSFloat /api/ responses."); - Console.WriteLine("Press Enter here when you're done to close the browser."); - - try - { - // Block until the operator presses Enter; the browser stays open and - // capturing the whole time. ReadLine is sync, so push it off-thread. - var count = await capture.RunAsync( - targetUrl, - outDir, - new ProxyRequest(Country: country, Sticky: true), - loadImages, - diagnose, - () => Task.Run(() => Console.ReadLine(), ct)); - - var full = Path.GetFullPath(outDir); - Console.WriteLine(); - Console.WriteLine($"Captured {count} response(s) to {full}"); - return 0; - } - catch (Exception ex) - { - Console.Error.WriteLine($"CSFloat capture failed: {ex.Message}"); - return 1; - } -} - -// Prefer an explicit --url; otherwise build a search URL from the indexes, -// defaulting to the M4A4 | Cyber Security example so the command runs as-is. -static string BuildCsfloatUrl(string? url, int? defIndex, int? paintIndex) -{ - if (!string.IsNullOrWhiteSpace(url)) - return url; - - var def = defIndex ?? 16; - var paint = paintIndex ?? 985; - return $"https://csfloat.com/search?def_index={def}&paint_index={paint}"; -} - // Fetch active listings for one skin via CSFloat's official API and print them. // Fetch-and-print only — no DB — so we can verify the real field shapes against a // live key before designing the Listing schema. Defaults to the M4A4 | Cyber diff --git a/BlueLaminate/BlueLaminate.Scraper/BlueLaminate.Scraper.csproj b/BlueLaminate/BlueLaminate.Scraper/BlueLaminate.Scraper.csproj index 0e23381..3ae7f04 100644 --- a/BlueLaminate/BlueLaminate.Scraper/BlueLaminate.Scraper.csproj +++ b/BlueLaminate/BlueLaminate.Scraper/BlueLaminate.Scraper.csproj @@ -8,7 +8,6 @@ - diff --git a/BlueLaminate/BlueLaminate.Scraper/Browser/BrowserDriverFactory.cs b/BlueLaminate/BlueLaminate.Scraper/Browser/BrowserDriverFactory.cs deleted file mode 100644 index 8b07fe6..0000000 --- a/BlueLaminate/BlueLaminate.Scraper/Browser/BrowserDriverFactory.cs +++ /dev/null @@ -1,88 +0,0 @@ -using BlueLaminate.Scraper.Proxies; -using Microsoft.Extensions.Logging; -using OpenQA.Selenium; -using OpenQA.Selenium.Edge; - -namespace BlueLaminate.Scraper.Browser; - -/// -/// Builds a non-headless Edge (Chromium) WebDriver routed through a -/// . Two things make this non-trivial: -/// -/// Proxy authentication. Chromium can't auto-fill the gateway's auth -/// dialog under automation, and the classic extension trick relies on -/// Manifest V2 which current Chromium disables. Instead we answer the proxy's -/// 407 challenge through the DevTools (CDP) auth handler, which works -/// non-headless and needs no extension. -/// Bandwidth. The residential plan is metered per GB, so images are -/// disabled at the content-settings level. Cloudflare gates on JS execution and -/// TLS/behaviour, not whether pictures render, so this stays realistic. -/// -/// Each driver gets a throwaway user-data dir so runs never share cookies and -/// never touch the user's real Edge profile. -/// -public sealed class BrowserDriverFactory -{ - private readonly ILogger _logger; - - public BrowserDriverFactory(ILogger logger) - { - _logger = logger; - } - - public async Task CreateAsync(ProxyLease lease, bool blockImages = true) - { - var options = new EdgeOptions(); - - // Route browser traffic through the gateway via the launch argument - // rather than EdgeOptions.Proxy. Setting Proxy makes Selenium hand the - // gateway to Selenium Manager for the driver *download* too, which fails - // because that step can't authenticate. The arg scopes the proxy to the - // browser only; credentials are answered below via CDP. No scheme = all - // protocols use the gateway. - options.AddArgument($"--proxy-server={lease.Endpoint}"); - - // Reduce the most obvious automation tells; residential exit + a real - // (non-headless) browser do the rest. - options.AddArgument("--disable-blink-features=AutomationControlled"); - options.AddExcludedArgument("enable-automation"); - options.AddArgument("--no-first-run"); - options.AddArgument("--no-default-browser-check"); - options.AddArgument("--start-maximized"); - - // Isolated, disposable profile per launch. - var profileDir = Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N")); - Directory.CreateDirectory(profileDir); - options.AddArgument($"--user-data-dir={profileDir}"); - - if (blockImages) - options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2); - - _logger.LogInformation( - "Launching Edge via proxy {Endpoint} (provider {Provider}, session {Session}).", - lease.Endpoint, lease.Provider, lease.SessionId ?? "rotating"); - - var driver = new EdgeDriver(options); - - try - { - // Answer the gateway's proxy-auth (407) challenge with the lease - // credentials. UriMatcher returns true so it applies to every - // request, since the challenge originates from the proxy itself. - var network = driver.Manage().Network; - network.AddAuthenticationHandler(new NetworkAuthenticationHandler - { - UriMatcher = _ => true, - Credentials = new PasswordCredentials(lease.Username, lease.Password), - }); - await network.StartMonitoring(); - } - catch - { - driver.Quit(); - throw; - } - - return driver; - } -} diff --git a/BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatCaptureService.cs b/BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatCaptureService.cs deleted file mode 100644 index 716dd99..0000000 --- a/BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatCaptureService.cs +++ /dev/null @@ -1,139 +0,0 @@ -using System.Text; -using BlueLaminate.Scraper.Browser; -using BlueLaminate.Scraper.Proxies; -using Microsoft.Extensions.Logging; -using OpenQA.Selenium; - -namespace BlueLaminate.Scraper.CsFloat; - -/// -/// Phase-B discovery tool. Drives a real Edge browser through a residential -/// lease to a CSFloat search page, then records every CSFloat /api/ JSON -/// response to disk while a human clicks around (open a listing → "Latest -/// Sales"). We don't yet know CSFloat's exact endpoints or DOM selectors, so a -/// human-in-the-loop is the cheapest way to surface the real traffic: the tool -/// just listens and dumps, the operator drives the UI in the visible window. -/// Once we can see the captured shapes we can automate navigation and design the -/// tables. -/// -public sealed class CsFloatCaptureService -{ - private readonly IProxyProvider _provider; - private readonly BrowserDriverFactory _factory; - private readonly ILogger _logger; - - public CsFloatCaptureService( - IProxyProvider provider, - BrowserDriverFactory factory, - ILogger logger) - { - _provider = provider; - _factory = factory; - _logger = logger; - } - - /// - /// Opens through the proxy and captures CSFloat API - /// responses to until - /// completes (the CLI ties that to the operator pressing Enter). When - /// is true, every CSFloat-domain response is - /// logged (url + status + type) to reveal where a login wall appears. - /// Returns the number of responses written. - /// - public async Task RunAsync( - string url, - string outputDir, - ProxyRequest request, - bool loadImages, - bool diagnose, - Func browseUntilDone) - { - Directory.CreateDirectory(outputDir); - - var lease = _provider.Acquire(request); - var driver = await _factory.CreateAsync(lease, blockImages: !loadImages); - - var captured = 0; - - void OnResponse(object? sender, NetworkResponseReceivedEventArgs e) - { - var responseUrl = e.ResponseUrl; - if (string.IsNullOrEmpty(responseUrl) - || !responseUrl.Contains("csfloat", StringComparison.OrdinalIgnoreCase)) - { - return; - } - - // Diagnose mode logs every CSFloat-domain response — including the - // SPA shell, redirects and any 401/403 — so we can see exactly where - // a Steam-login wall appears even before any /api/ call fires. - if (diagnose) - { - _logger.LogInformation("[{Status}] {Type} {Url}", - e.ResponseStatusCode, e.ResponseResourceType, responseUrl); - } - - // Only JSON API calls get written to disk; skip the shell, images, - // fonts, analytics, etc. Matches both api.csfloat.com and csfloat.com/api. - if (!responseUrl.Contains("/api/", StringComparison.OrdinalIgnoreCase)) - return; - - var body = e.ResponseBody; - if (string.IsNullOrWhiteSpace(body)) - { - // Body wasn't buffered (e.g. the known Fetch interception race). - // Log the endpoint so we still learn it exists even if empty. - _logger.LogWarning("No body captured for {Url} (status {Status}).", - responseUrl, e.ResponseStatusCode); - return; - } - - try - { - var n = Interlocked.Increment(ref captured); - var fileName = $"{n:D3}_{Sanitize(responseUrl)}.json"; - File.WriteAllText(Path.Combine(outputDir, fileName), body, Encoding.UTF8); - _logger.LogInformation( - "Captured #{N} [{Status}] {Url} → {File} ({Bytes} bytes).", - n, e.ResponseStatusCode, responseUrl, fileName, body.Length); - } - catch (Exception ex) - { - _logger.LogWarning(ex, "Failed to write capture for {Url}.", responseUrl); - } - } - - var network = driver.Manage().Network; - network.NetworkResponseReceived += OnResponse; - - try - { - _logger.LogInformation("Navigating to {Url}", url); - driver.Navigate().GoToUrl(url); - await browseUntilDone(); - } - finally - { - network.NetworkResponseReceived -= OnResponse; - driver.Quit(); - } - - return captured; - } - - // Turn a URL into a filesystem-safe, readable, length-capped file stem so the - // captures are self-describing (the endpoint is visible in the filename). - private static string Sanitize(string url) - { - var trimmed = url - .Replace("https://", "", StringComparison.OrdinalIgnoreCase) - .Replace("http://", "", StringComparison.OrdinalIgnoreCase); - - var sb = new StringBuilder(trimmed.Length); - foreach (var c in trimmed) - sb.Append(char.IsLetterOrDigit(c) || c is '-' or '.' ? c : '_'); - - var stem = sb.ToString(); - return stem.Length > 120 ? stem[..120] : stem; - } -} diff --git a/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs b/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs index 18bd06a..fe4aaae 100644 --- a/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs +++ b/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs @@ -2,11 +2,11 @@ namespace BlueLaminate.Scraper.Proxies; /// /// A concrete, ready-to-use proxy endpoint handed back by an -/// . This is the only proxy type the browser layer -/// ever sees, so swapping providers (or mixing several in a grab-bag) never -/// touches the Selenium code. and -/// are the literal credentials to present to the gateway — for providers like -/// IPRoyal the targeting/session parameters are already baked into them. +/// . This is the only proxy type a consumer ever +/// sees, so swapping providers (or mixing several in a grab-bag) never touches +/// the calling code. and are the +/// literal credentials to present to the gateway — for providers like IPRoyal +/// the targeting/session parameters are already baked into them. /// /// Gateway host, e.g. "geo.iproyal.com". /// Gateway port, e.g. 12321. diff --git a/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs b/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs deleted file mode 100644 index 7c86e8d..0000000 --- a/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs +++ /dev/null @@ -1,97 +0,0 @@ -using System.Text.Json; -using BlueLaminate.Scraper.Browser; -using Microsoft.Extensions.Logging; -using OpenQA.Selenium; - -namespace BlueLaminate.Scraper.Proxies; - -/// The exit IP a proxy lease actually resolves to, per ipinfo.io. -/// -/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for -/// residential vs. datacenter: a consumer ISP here means a real residential -/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up. -/// -public sealed record ProxyExitInfo( - string? Ip, - string? City, - string? Region, - string? Country, - string? Org, - string? Hostname, - string? Timezone); - -/// -/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease, -/// launch the real browser through it, and read back the exit IP from an -/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run -/// against a metered residential plan — it proves auth works and shows whether -/// the IP is genuinely residential before we spend bandwidth on CSFloat. -/// -public sealed class ProxyProbe -{ - private const string IpEchoUrl = "https://ipinfo.io/json"; - - private static readonly JsonSerializerOptions JsonOptions = new() - { - PropertyNameCaseInsensitive = true, - }; - - private readonly IProxyProvider _provider; - private readonly BrowserDriverFactory _factory; - private readonly ILogger _logger; - - public ProxyProbe( - IProxyProvider provider, - BrowserDriverFactory factory, - ILogger logger) - { - _provider = provider; - _factory = factory; - _logger = logger; - } - - public async Task RunAsync(ProxyRequest request) - { - var lease = _provider.Acquire(request); - _logger.LogInformation( - "Acquired {Provider} lease (exit {Mode}).", - lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}"); - - var driver = await _factory.CreateAsync(lease, blockImages: true); - try - { - driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60); - driver.Navigate().GoToUrl(IpEchoUrl); - - // Read the document's text rather than the DOM so the browser's - // built-in JSON viewer doesn't get in the way, then carve out the - // JSON object it rendered. - var rendered = ((IJavaScriptExecutor)driver) - .ExecuteScript("return document.documentElement.innerText;") as string - ?? throw new InvalidOperationException("Browser returned no page text."); - - var info = JsonSerializer.Deserialize(ExtractJson(rendered), JsonOptions) - ?? throw new InvalidOperationException("IP-echo response was empty."); - - _logger.LogInformation( - "Exit IP {Ip} — {City}, {Region}, {Country} — {Org}", - info.Ip, info.City, info.Region, info.Country, info.Org); - - return info; - } - finally - { - driver.Quit(); - } - } - - private static string ExtractJson(string text) - { - var start = text.IndexOf('{'); - var end = text.LastIndexOf('}'); - if (start < 0 || end <= start) - throw new InvalidOperationException($"No JSON found in IP-echo response: {text}"); - - return text[start..(end + 1)]; - } -}