Files
Operation-Blue-Laminate-v2/BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
2026-05-29 22:08:32 -05:00

98 lines
3.4 KiB
C#

using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
/// <param name="Org">
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
/// residential vs. datacenter: a consumer ISP here means a real residential
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
/// </param>
public sealed record ProxyExitInfo(
string? Ip,
string? City,
string? Region,
string? Country,
string? Org,
string? Hostname,
string? Timezone);
/// <summary>
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
/// launch the real browser through it, and read back the exit IP from an
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
/// against a metered residential plan — it proves auth works and shows whether
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
/// </summary>
public sealed class ProxyProbe
{
private const string IpEchoUrl = "https://ipinfo.io/json";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
private readonly IProxyProvider _provider;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<ProxyProbe> _logger;
public ProxyProbe(
IProxyProvider provider,
BrowserDriverFactory factory,
ILogger<ProxyProbe> logger)
{
_provider = provider;
_factory = factory;
_logger = logger;
}
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
{
var lease = _provider.Acquire(request);
_logger.LogInformation(
"Acquired {Provider} lease (exit {Mode}).",
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
var driver = await _factory.CreateAsync(lease, blockImages: true);
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
driver.Navigate().GoToUrl(IpEchoUrl);
// Read the document's text rather than the DOM so the browser's
// built-in JSON viewer doesn't get in the way, then carve out the
// JSON object it rendered.
var rendered = ((IJavaScriptExecutor)driver)
.ExecuteScript("return document.documentElement.innerText;") as string
?? throw new InvalidOperationException("Browser returned no page text.");
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
?? throw new InvalidOperationException("IP-echo response was empty.");
_logger.LogInformation(
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
info.Ip, info.City, info.Region, info.Country, info.Org);
return info;
}
finally
{
driver.Quit();
}
}
private static string ExtractJson(string text)
{
var start = text.IndexOf('{');
var end = text.LastIndexOf('}');
if (start < 0 || end <= start)
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
return text[start..(end + 1)];
}
}