Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
103
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
103
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
@@ -0,0 +1,103 @@
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
||||
/// <param name="Org">
|
||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
||||
/// </param>
|
||||
public sealed record ProxyExitInfo(
|
||||
string? Ip,
|
||||
string? City,
|
||||
string? Region,
|
||||
string? Country,
|
||||
string? Org,
|
||||
string? Hostname,
|
||||
string? Timezone);
|
||||
|
||||
/// <summary>
|
||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
||||
/// launch the real browser through it, and read back the exit IP from an
|
||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
||||
/// against a metered residential plan — it proves auth works and shows whether
|
||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
||||
/// </summary>
|
||||
public sealed class ProxyProbe
|
||||
{
|
||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<ProxyProbe> _logger;
|
||||
|
||||
public ProxyProbe(
|
||||
IProxyProvider provider,
|
||||
LocalForwardingProxyFactory proxyFactory,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<ProxyProbe> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_proxyFactory = proxyFactory;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
_logger.LogInformation(
|
||||
"Acquired {Provider} lease (exit {Mode}).",
|
||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
||||
|
||||
await using var localProxy = _proxyFactory.Create(lease).Start();
|
||||
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
||||
|
||||
// Read the document's text rather than the DOM so the browser's
|
||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
||||
// JSON object it rendered.
|
||||
var rendered = ((IJavaScriptExecutor)driver)
|
||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
||||
|
||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
||||
|
||||
return info;
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractJson(string text)
|
||||
{
|
||||
var start = text.IndexOf('{');
|
||||
var end = text.LastIndexOf('}');
|
||||
if (start < 0 || end <= start)
|
||||
{
|
||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
||||
}
|
||||
|
||||
return text[start..(end + 1)];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user