98 lines
3.4 KiB
C#
98 lines
3.4 KiB
C#
using System.Text.Json;
|
|
using BlueLaminate.Scraper.Browser;
|
|
using Microsoft.Extensions.Logging;
|
|
using OpenQA.Selenium;
|
|
|
|
namespace BlueLaminate.Scraper.Proxies;
|
|
|
|
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
|
/// <param name="Org">
|
|
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
|
/// residential vs. datacenter: a consumer ISP here means a real residential
|
|
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
|
/// </param>
|
|
public sealed record ProxyExitInfo(
|
|
string? Ip,
|
|
string? City,
|
|
string? Region,
|
|
string? Country,
|
|
string? Org,
|
|
string? Hostname,
|
|
string? Timezone);
|
|
|
|
/// <summary>
|
|
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
|
/// launch the real browser through it, and read back the exit IP from an
|
|
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
|
/// against a metered residential plan — it proves auth works and shows whether
|
|
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
|
/// </summary>
|
|
public sealed class ProxyProbe
|
|
{
|
|
private const string IpEchoUrl = "https://ipinfo.io/json";
|
|
|
|
private static readonly JsonSerializerOptions JsonOptions = new()
|
|
{
|
|
PropertyNameCaseInsensitive = true,
|
|
};
|
|
|
|
private readonly IProxyProvider _provider;
|
|
private readonly BrowserDriverFactory _factory;
|
|
private readonly ILogger<ProxyProbe> _logger;
|
|
|
|
public ProxyProbe(
|
|
IProxyProvider provider,
|
|
BrowserDriverFactory factory,
|
|
ILogger<ProxyProbe> logger)
|
|
{
|
|
_provider = provider;
|
|
_factory = factory;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
|
{
|
|
var lease = _provider.Acquire(request);
|
|
_logger.LogInformation(
|
|
"Acquired {Provider} lease (exit {Mode}).",
|
|
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
|
|
|
var driver = await _factory.CreateAsync(lease, blockImages: true);
|
|
try
|
|
{
|
|
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
|
driver.Navigate().GoToUrl(IpEchoUrl);
|
|
|
|
// Read the document's text rather than the DOM so the browser's
|
|
// built-in JSON viewer doesn't get in the way, then carve out the
|
|
// JSON object it rendered.
|
|
var rendered = ((IJavaScriptExecutor)driver)
|
|
.ExecuteScript("return document.documentElement.innerText;") as string
|
|
?? throw new InvalidOperationException("Browser returned no page text.");
|
|
|
|
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
|
?? throw new InvalidOperationException("IP-echo response was empty.");
|
|
|
|
_logger.LogInformation(
|
|
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
|
info.Ip, info.City, info.Region, info.Country, info.Org);
|
|
|
|
return info;
|
|
}
|
|
finally
|
|
{
|
|
driver.Quit();
|
|
}
|
|
}
|
|
|
|
private static string ExtractJson(string text)
|
|
{
|
|
var start = text.IndexOf('{');
|
|
var end = text.LastIndexOf('}');
|
|
if (start < 0 || end <= start)
|
|
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
|
|
|
return text[start..(end + 1)];
|
|
}
|
|
}
|