using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.Proxies;
/// The exit IP a proxy lease actually resolves to, per ipinfo.io.
///
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
/// residential vs. datacenter: a consumer ISP here means a real residential
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
///
public sealed record ProxyExitInfo(
string? Ip,
string? City,
string? Region,
string? Country,
string? Org,
string? Hostname,
string? Timezone);
///
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
/// launch the real browser through it, and read back the exit IP from an
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
/// against a metered residential plan — it proves auth works and shows whether
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
///
public sealed class ProxyProbe
{
private const string IpEchoUrl = "https://ipinfo.io/json";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly ILogger _logger;
public ProxyProbe(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
ILogger logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_logger = logger;
}
public async Task RunAsync(ProxyRequest request)
{
var lease = _provider.Acquire(request);
_logger.LogInformation(
"Acquired {Provider} lease (exit {Mode}).",
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
await using var localProxy = _proxyFactory.Create(lease).Start();
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
driver.Navigate().GoToUrl(IpEchoUrl);
// Read the document's text rather than the DOM so the browser's
// built-in JSON viewer doesn't get in the way, then carve out the
// JSON object it rendered.
var rendered = ((IJavaScriptExecutor)driver)
.ExecuteScript("return document.documentElement.innerText;") as string
?? throw new InvalidOperationException("Browser returned no page text.");
var info = JsonSerializer.Deserialize(ExtractJson(rendered), JsonOptions)
?? throw new InvalidOperationException("IP-echo response was empty.");
_logger.LogInformation(
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
info.Ip, info.City, info.Region, info.Country, info.Org);
return info;
}
finally
{
driver.Quit();
}
}
private static string ExtractJson(string text)
{
var start = text.IndexOf('{');
var end = text.LastIndexOf('}');
if (start < 0 || end <= start)
{
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
}
return text[start..(end + 1)];
}
}