Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -23,9 +23,14 @@ public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
public IpRoyalProxyProvider(string username, string password)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(username))
|
||||
{
|
||||
throw new ArgumentException("IPRoyal username is required.", nameof(username));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(password))
|
||||
{
|
||||
throw new ArgumentException("IPRoyal password is required.", nameof(password));
|
||||
}
|
||||
|
||||
_username = username;
|
||||
_password = password;
|
||||
@@ -41,7 +46,9 @@ public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
|
||||
// Country first; the router picks one at random when several are listed.
|
||||
if (!string.IsNullOrWhiteSpace(request.Country))
|
||||
{
|
||||
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
if (request.Sticky)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,232 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
|
||||
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
|
||||
/// gateway's <c>Proxy-Authorization</c> header itself.
|
||||
/// <para>
|
||||
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
|
||||
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
|
||||
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
|
||||
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
|
||||
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
|
||||
/// CDP — far less detectable — while the upstream still carries the IPRoyal
|
||||
/// username/password (and its baked-in country/session params).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
|
||||
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
|
||||
/// the browser does TLS end-to-end with the real host — this proxy never sees
|
||||
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class LocalForwardingProxy : IAsyncDisposable
|
||||
{
|
||||
private readonly ProxyLease _upstream;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TcpListener _listener;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private readonly string _authHeader;
|
||||
private Task? _acceptLoop;
|
||||
|
||||
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
|
||||
{
|
||||
_upstream = upstream;
|
||||
_logger = logger;
|
||||
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
|
||||
var token = Convert.ToBase64String(
|
||||
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
|
||||
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
|
||||
}
|
||||
|
||||
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
|
||||
public string Endpoint { get; private set; } = "";
|
||||
|
||||
/// <summary>Bind the local port and start accepting browser connections.</summary>
|
||||
public LocalForwardingProxy Start()
|
||||
{
|
||||
_listener.Start();
|
||||
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
|
||||
Endpoint = $"127.0.0.1:{port}";
|
||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
||||
_logger.LogInformation(
|
||||
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
|
||||
Endpoint, _upstream.Endpoint, _upstream.Provider);
|
||||
return this;
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
TcpClient client;
|
||||
try
|
||||
{
|
||||
client = await _listener.AcceptTcpClientAsync(ct);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Accept failed.");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fire-and-forget per connection; exceptions are swallowed per client so
|
||||
// one bad tunnel never takes down the listener.
|
||||
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
|
||||
{
|
||||
using (client)
|
||||
{
|
||||
client.NoDelay = true;
|
||||
try
|
||||
{
|
||||
var clientStream = client.GetStream();
|
||||
var header = await ReadHeaderAsync(clientStream, ct);
|
||||
if (header is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var requestLine = header.Split("\r\n", 2)[0];
|
||||
var parts = requestLine.Split(' ');
|
||||
if (parts.Length < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var method = parts[0];
|
||||
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
await HandleConnectAsync(clientStream, parts[1], ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
await HandlePlainAsync(clientStream, header, ct);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Client connection error.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
|
||||
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
|
||||
{
|
||||
using var upstream = new TcpClient { NoDelay = true };
|
||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
||||
var upstreamStream = upstream.GetStream();
|
||||
|
||||
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
|
||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
|
||||
|
||||
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
|
||||
var ok = upstreamHeader is not null
|
||||
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
|
||||
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
|
||||
&& sl[1] == "200";
|
||||
if (!ok)
|
||||
{
|
||||
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
|
||||
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
|
||||
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
|
||||
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
|
||||
return;
|
||||
}
|
||||
|
||||
await clientStream.WriteAsync(
|
||||
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
|
||||
|
||||
await RelayAsync(clientStream, upstreamStream, ct);
|
||||
}
|
||||
|
||||
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
|
||||
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
|
||||
{
|
||||
var hostLine = header.Split("\r\n")
|
||||
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
|
||||
if (hostLine is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using var upstream = new TcpClient { NoDelay = true };
|
||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
||||
var upstreamStream = upstream.GetStream();
|
||||
|
||||
// Insert the Proxy-Authorization header right after the request line.
|
||||
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
|
||||
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
|
||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
|
||||
|
||||
await RelayAsync(clientStream, upstreamStream, ct);
|
||||
}
|
||||
|
||||
// Pipe both directions until either side closes.
|
||||
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
|
||||
{
|
||||
var toUpstream = a.CopyToAsync(b, ct);
|
||||
var toClient = b.CopyToAsync(a, ct);
|
||||
await Task.WhenAny(toUpstream, toClient);
|
||||
}
|
||||
|
||||
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
|
||||
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
|
||||
{
|
||||
var buffer = new byte[1];
|
||||
var sb = new StringBuilder(256);
|
||||
while (true)
|
||||
{
|
||||
var read = await stream.ReadAsync(buffer, ct);
|
||||
if (read == 0)
|
||||
{
|
||||
return sb.Length > 0 ? sb.ToString() : null;
|
||||
}
|
||||
|
||||
sb.Append((char)buffer[0]);
|
||||
if (sb.Length >= 4
|
||||
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
|
||||
{
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
// Guard against a runaway/garbage stream.
|
||||
if (sb.Length > 64 * 1024)
|
||||
{
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await _cts.CancelAsync();
|
||||
_listener.Stop();
|
||||
if (_acceptLoop is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _acceptLoop;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// expected on shutdown
|
||||
}
|
||||
}
|
||||
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
|
||||
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
|
||||
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
|
||||
/// </summary>
|
||||
public sealed class LocalForwardingProxyFactory
|
||||
{
|
||||
private readonly ILogger<LocalForwardingProxy> _logger;
|
||||
|
||||
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
|
||||
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
|
||||
}
|
||||
103
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
103
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
@@ -0,0 +1,103 @@
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
||||
/// <param name="Org">
|
||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
||||
/// </param>
|
||||
public sealed record ProxyExitInfo(
|
||||
string? Ip,
|
||||
string? City,
|
||||
string? Region,
|
||||
string? Country,
|
||||
string? Org,
|
||||
string? Hostname,
|
||||
string? Timezone);
|
||||
|
||||
/// <summary>
|
||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
||||
/// launch the real browser through it, and read back the exit IP from an
|
||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
||||
/// against a metered residential plan — it proves auth works and shows whether
|
||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
||||
/// </summary>
|
||||
public sealed class ProxyProbe
|
||||
{
|
||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<ProxyProbe> _logger;
|
||||
|
||||
public ProxyProbe(
|
||||
IProxyProvider provider,
|
||||
LocalForwardingProxyFactory proxyFactory,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<ProxyProbe> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_proxyFactory = proxyFactory;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
_logger.LogInformation(
|
||||
"Acquired {Provider} lease (exit {Mode}).",
|
||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
||||
|
||||
await using var localProxy = _proxyFactory.Create(lease).Start();
|
||||
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
||||
|
||||
// Read the document's text rather than the DOM so the browser's
|
||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
||||
// JSON object it rendered.
|
||||
var rendered = ((IJavaScriptExecutor)driver)
|
||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
||||
|
||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
||||
|
||||
return info;
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractJson(string text)
|
||||
{
|
||||
var start = text.IndexOf('{');
|
||||
var end = text.LastIndexOf('}');
|
||||
if (start < 0 || end <= start)
|
||||
{
|
||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
||||
}
|
||||
|
||||
return text[start..(end + 1)];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user