Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
80 lines
3.5 KiB
C#
80 lines
3.5 KiB
C#
using Microsoft.Extensions.Logging;
|
|
using OpenQA.Selenium;
|
|
using OpenQA.Selenium.Edge;
|
|
|
|
namespace BlueLaminate.Scraper.Browser;
|
|
|
|
/// <summary>
|
|
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
|
|
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
|
|
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
|
|
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
|
|
/// the local proxy already carries the upstream credentials, so there's no 407 to
|
|
/// answer in the browser. Combined with a warmed, persistent profile this is the
|
|
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
|
|
/// (which has no .NET equivalent).
|
|
/// <para>
|
|
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
|
|
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
|
|
/// whether pictures render, so this stays realistic.
|
|
/// </para>
|
|
/// </summary>
|
|
public sealed class BrowserDriverFactory
|
|
{
|
|
private readonly ILogger<BrowserDriverFactory> _logger;
|
|
|
|
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
|
{
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
|
|
/// auth). When <paramref name="profileDir"/> is set the profile persists across
|
|
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
|
|
/// history carry over — a warmed profile looks far less like a fresh bot); when
|
|
/// null a throwaway profile is used.
|
|
/// </summary>
|
|
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
|
|
{
|
|
var options = new EdgeOptions();
|
|
|
|
// Route browser traffic through the local proxy via the launch argument
|
|
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
|
|
// driver download). No scheme = all protocols use the proxy. When null/empty
|
|
// the browser uses the machine's direct connection (diagnostic --no-proxy).
|
|
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
|
|
{
|
|
options.AddArgument($"--proxy-server={proxyEndpoint}");
|
|
}
|
|
|
|
// Reduce the most obvious automation tells; residential exit + a real
|
|
// (non-headless) browser + a warmed profile do the rest.
|
|
options.AddArgument("--disable-blink-features=AutomationControlled");
|
|
options.AddExcludedArgument("enable-automation");
|
|
options.AddAdditionalOption("useAutomationExtension", false);
|
|
options.AddArgument("--no-first-run");
|
|
options.AddArgument("--no-default-browser-check");
|
|
options.AddArgument("--start-maximized");
|
|
|
|
var persist = !string.IsNullOrWhiteSpace(profileDir);
|
|
var dir = persist
|
|
? profileDir!
|
|
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
|
Directory.CreateDirectory(dir);
|
|
options.AddArgument($"--user-data-dir={dir}");
|
|
|
|
if (blockImages)
|
|
{
|
|
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
|
}
|
|
|
|
_logger.LogInformation(
|
|
"Launching Edge via {Route} (profile: {Profile}).",
|
|
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
|
|
persist ? dir : "throwaway");
|
|
|
|
return new EdgeDriver(options);
|
|
}
|
|
}
|