89 lines
3.6 KiB
C#
89 lines
3.6 KiB
C#
using BlueLaminate.Scraper.Proxies;
|
|
using Microsoft.Extensions.Logging;
|
|
using OpenQA.Selenium;
|
|
using OpenQA.Selenium.Edge;
|
|
|
|
namespace BlueLaminate.Scraper.Browser;
|
|
|
|
/// <summary>
|
|
/// Builds a non-headless Edge (Chromium) WebDriver routed through a
|
|
/// <see cref="ProxyLease"/>. Two things make this non-trivial:
|
|
/// <list type="bullet">
|
|
/// <item>Proxy authentication. Chromium can't auto-fill the gateway's auth
|
|
/// dialog under automation, and the classic extension trick relies on
|
|
/// Manifest V2 which current Chromium disables. Instead we answer the proxy's
|
|
/// 407 challenge through the DevTools (CDP) auth handler, which works
|
|
/// non-headless and needs no extension.</item>
|
|
/// <item>Bandwidth. The residential plan is metered per GB, so images are
|
|
/// disabled at the content-settings level. Cloudflare gates on JS execution and
|
|
/// TLS/behaviour, not whether pictures render, so this stays realistic.</item>
|
|
/// </list>
|
|
/// Each driver gets a throwaway user-data dir so runs never share cookies and
|
|
/// never touch the user's real Edge profile.
|
|
/// </summary>
|
|
public sealed class BrowserDriverFactory
|
|
{
|
|
private readonly ILogger<BrowserDriverFactory> _logger;
|
|
|
|
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
|
{
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<IWebDriver> CreateAsync(ProxyLease lease, bool blockImages = true)
|
|
{
|
|
var options = new EdgeOptions();
|
|
|
|
// Route browser traffic through the gateway via the launch argument
|
|
// rather than EdgeOptions.Proxy. Setting Proxy makes Selenium hand the
|
|
// gateway to Selenium Manager for the driver *download* too, which fails
|
|
// because that step can't authenticate. The arg scopes the proxy to the
|
|
// browser only; credentials are answered below via CDP. No scheme = all
|
|
// protocols use the gateway.
|
|
options.AddArgument($"--proxy-server={lease.Endpoint}");
|
|
|
|
// Reduce the most obvious automation tells; residential exit + a real
|
|
// (non-headless) browser do the rest.
|
|
options.AddArgument("--disable-blink-features=AutomationControlled");
|
|
options.AddExcludedArgument("enable-automation");
|
|
options.AddArgument("--no-first-run");
|
|
options.AddArgument("--no-default-browser-check");
|
|
options.AddArgument("--start-maximized");
|
|
|
|
// Isolated, disposable profile per launch.
|
|
var profileDir = Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
|
Directory.CreateDirectory(profileDir);
|
|
options.AddArgument($"--user-data-dir={profileDir}");
|
|
|
|
if (blockImages)
|
|
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
|
|
|
_logger.LogInformation(
|
|
"Launching Edge via proxy {Endpoint} (provider {Provider}, session {Session}).",
|
|
lease.Endpoint, lease.Provider, lease.SessionId ?? "rotating");
|
|
|
|
var driver = new EdgeDriver(options);
|
|
|
|
try
|
|
{
|
|
// Answer the gateway's proxy-auth (407) challenge with the lease
|
|
// credentials. UriMatcher returns true so it applies to every
|
|
// request, since the challenge originates from the proxy itself.
|
|
var network = driver.Manage().Network;
|
|
network.AddAuthenticationHandler(new NetworkAuthenticationHandler
|
|
{
|
|
UriMatcher = _ => true,
|
|
Credentials = new PasswordCredentials(lease.Username, lease.Password),
|
|
});
|
|
await network.StartMonitoring();
|
|
}
|
|
catch
|
|
{
|
|
driver.Quit();
|
|
throw;
|
|
}
|
|
|
|
return driver;
|
|
}
|
|
}
|