using System.Text.Json; using BlueLaminate.Scraper.Browser; using Microsoft.Extensions.Logging; using OpenQA.Selenium; namespace BlueLaminate.Scraper.Proxies; /// The exit IP a proxy lease actually resolves to, per ipinfo.io. /// /// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for /// residential vs. datacenter: a consumer ISP here means a real residential /// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up. /// public sealed record ProxyExitInfo( string? Ip, string? City, string? Region, string? Country, string? Org, string? Hostname, string? Timezone); /// /// Smallest possible end-to-end check of the proxy plumbing: acquire a lease, /// launch the real browser through it, and read back the exit IP from an /// IP-echo endpoint. Costs a few KB, so it's the right first thing to run /// against a metered residential plan — it proves auth works and shows whether /// the IP is genuinely residential before we spend bandwidth on CSFloat. /// public sealed class ProxyProbe { private const string IpEchoUrl = "https://ipinfo.io/json"; private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNameCaseInsensitive = true, }; private readonly IProxyProvider _provider; private readonly LocalForwardingProxyFactory _proxyFactory; private readonly BrowserDriverFactory _factory; private readonly ILogger _logger; public ProxyProbe( IProxyProvider provider, LocalForwardingProxyFactory proxyFactory, BrowserDriverFactory factory, ILogger logger) { _provider = provider; _proxyFactory = proxyFactory; _factory = factory; _logger = logger; } public async Task RunAsync(ProxyRequest request) { var lease = _provider.Acquire(request); _logger.LogInformation( "Acquired {Provider} lease (exit {Mode}).", lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}"); await using var localProxy = _proxyFactory.Create(lease).Start(); var driver = _factory.Create(localProxy.Endpoint, blockImages: true); try { driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60); driver.Navigate().GoToUrl(IpEchoUrl); // Read the document's text rather than the DOM so the browser's // built-in JSON viewer doesn't get in the way, then carve out the // JSON object it rendered. var rendered = ((IJavaScriptExecutor)driver) .ExecuteScript("return document.documentElement.innerText;") as string ?? throw new InvalidOperationException("Browser returned no page text."); var info = JsonSerializer.Deserialize(ExtractJson(rendered), JsonOptions) ?? throw new InvalidOperationException("IP-echo response was empty."); _logger.LogInformation( "Exit IP {Ip} — {City}, {Region}, {Country} — {Org}", info.Ip, info.City, info.Region, info.Country, info.Org); return info; } finally { driver.Quit(); } } private static string ExtractJson(string text) { var start = text.IndexOf('{'); var end = text.LastIndexOf('}'); if (start < 0 || end <= start) { throw new InvalidOperationException($"No JSON found in IP-echo response: {text}"); } return text[start..(end + 1)]; } }