Files
Operation-Blue-Laminate-v2/BlueLaminate/BlueLaminate.Scraper/Browser/BrowserDriverFactory.cs
2026-05-29 22:08:32 -05:00

89 lines
3.6 KiB
C#

using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
using OpenQA.Selenium.Edge;
namespace BlueLaminate.Scraper.Browser;
/// <summary>
/// Builds a non-headless Edge (Chromium) WebDriver routed through a
/// <see cref="ProxyLease"/>. Two things make this non-trivial:
/// <list type="bullet">
/// <item>Proxy authentication. Chromium can't auto-fill the gateway's auth
/// dialog under automation, and the classic extension trick relies on
/// Manifest V2 which current Chromium disables. Instead we answer the proxy's
/// 407 challenge through the DevTools (CDP) auth handler, which works
/// non-headless and needs no extension.</item>
/// <item>Bandwidth. The residential plan is metered per GB, so images are
/// disabled at the content-settings level. Cloudflare gates on JS execution and
/// TLS/behaviour, not whether pictures render, so this stays realistic.</item>
/// </list>
/// Each driver gets a throwaway user-data dir so runs never share cookies and
/// never touch the user's real Edge profile.
/// </summary>
public sealed class BrowserDriverFactory
{
private readonly ILogger<BrowserDriverFactory> _logger;
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
{
_logger = logger;
}
public async Task<IWebDriver> CreateAsync(ProxyLease lease, bool blockImages = true)
{
var options = new EdgeOptions();
// Route browser traffic through the gateway via the launch argument
// rather than EdgeOptions.Proxy. Setting Proxy makes Selenium hand the
// gateway to Selenium Manager for the driver *download* too, which fails
// because that step can't authenticate. The arg scopes the proxy to the
// browser only; credentials are answered below via CDP. No scheme = all
// protocols use the gateway.
options.AddArgument($"--proxy-server={lease.Endpoint}");
// Reduce the most obvious automation tells; residential exit + a real
// (non-headless) browser do the rest.
options.AddArgument("--disable-blink-features=AutomationControlled");
options.AddExcludedArgument("enable-automation");
options.AddArgument("--no-first-run");
options.AddArgument("--no-default-browser-check");
options.AddArgument("--start-maximized");
// Isolated, disposable profile per launch.
var profileDir = Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
Directory.CreateDirectory(profileDir);
options.AddArgument($"--user-data-dir={profileDir}");
if (blockImages)
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
_logger.LogInformation(
"Launching Edge via proxy {Endpoint} (provider {Provider}, session {Session}).",
lease.Endpoint, lease.Provider, lease.SessionId ?? "rotating");
var driver = new EdgeDriver(options);
try
{
// Answer the gateway's proxy-auth (407) challenge with the lease
// credentials. UriMatcher returns true so it applies to every
// request, since the challenge originates from the proxy itself.
var network = driver.Manage().Network;
network.AddAuthenticationHandler(new NetworkAuthenticationHandler
{
UriMatcher = _ => true,
Credentials = new PasswordCredentials(lease.Username, lease.Password),
});
await network.StartMonitoring();
}
catch
{
driver.Quit();
throw;
}
return driver;
}
}