Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
233 lines
8.4 KiB
C#
233 lines
8.4 KiB
C#
using System.Net;
|
|
using System.Net.Sockets;
|
|
using System.Text;
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace BlueLaminate.Scraper.Proxies;
|
|
|
|
/// <summary>
|
|
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
|
|
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
|
|
/// gateway's <c>Proxy-Authorization</c> header itself.
|
|
/// <para>
|
|
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
|
|
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
|
|
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
|
|
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
|
|
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
|
|
/// CDP — far less detectable — while the upstream still carries the IPRoyal
|
|
/// username/password (and its baked-in country/session params).
|
|
/// </para>
|
|
/// <para>
|
|
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
|
|
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
|
|
/// the browser does TLS end-to-end with the real host — this proxy never sees
|
|
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
|
|
/// </para>
|
|
/// </summary>
|
|
public sealed class LocalForwardingProxy : IAsyncDisposable
|
|
{
|
|
private readonly ProxyLease _upstream;
|
|
private readonly ILogger _logger;
|
|
private readonly TcpListener _listener;
|
|
private readonly CancellationTokenSource _cts = new();
|
|
private readonly string _authHeader;
|
|
private Task? _acceptLoop;
|
|
|
|
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
|
|
{
|
|
_upstream = upstream;
|
|
_logger = logger;
|
|
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
|
|
var token = Convert.ToBase64String(
|
|
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
|
|
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
|
|
}
|
|
|
|
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
|
|
public string Endpoint { get; private set; } = "";
|
|
|
|
/// <summary>Bind the local port and start accepting browser connections.</summary>
|
|
public LocalForwardingProxy Start()
|
|
{
|
|
_listener.Start();
|
|
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
|
|
Endpoint = $"127.0.0.1:{port}";
|
|
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
|
_logger.LogInformation(
|
|
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
|
|
Endpoint, _upstream.Endpoint, _upstream.Provider);
|
|
return this;
|
|
}
|
|
|
|
private async Task AcceptLoopAsync(CancellationToken ct)
|
|
{
|
|
while (!ct.IsCancellationRequested)
|
|
{
|
|
TcpClient client;
|
|
try
|
|
{
|
|
client = await _listener.AcceptTcpClientAsync(ct);
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
break;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogDebug(ex, "Accept failed.");
|
|
continue;
|
|
}
|
|
|
|
// Fire-and-forget per connection; exceptions are swallowed per client so
|
|
// one bad tunnel never takes down the listener.
|
|
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
|
|
}
|
|
}
|
|
|
|
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
|
|
{
|
|
using (client)
|
|
{
|
|
client.NoDelay = true;
|
|
try
|
|
{
|
|
var clientStream = client.GetStream();
|
|
var header = await ReadHeaderAsync(clientStream, ct);
|
|
if (header is null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var requestLine = header.Split("\r\n", 2)[0];
|
|
var parts = requestLine.Split(' ');
|
|
if (parts.Length < 2)
|
|
{
|
|
return;
|
|
}
|
|
|
|
var method = parts[0];
|
|
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
|
|
{
|
|
await HandleConnectAsync(clientStream, parts[1], ct);
|
|
}
|
|
else
|
|
{
|
|
await HandlePlainAsync(clientStream, header, ct);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogDebug(ex, "Client connection error.");
|
|
}
|
|
}
|
|
}
|
|
|
|
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
|
|
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
|
|
{
|
|
using var upstream = new TcpClient { NoDelay = true };
|
|
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
|
var upstreamStream = upstream.GetStream();
|
|
|
|
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
|
|
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
|
|
|
|
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
|
|
var ok = upstreamHeader is not null
|
|
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
|
|
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
|
|
&& sl[1] == "200";
|
|
if (!ok)
|
|
{
|
|
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
|
|
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
|
|
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
|
|
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
|
|
return;
|
|
}
|
|
|
|
await clientStream.WriteAsync(
|
|
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
|
|
|
|
await RelayAsync(clientStream, upstreamStream, ct);
|
|
}
|
|
|
|
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
|
|
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
|
|
{
|
|
var hostLine = header.Split("\r\n")
|
|
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
|
|
if (hostLine is null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
using var upstream = new TcpClient { NoDelay = true };
|
|
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
|
var upstreamStream = upstream.GetStream();
|
|
|
|
// Insert the Proxy-Authorization header right after the request line.
|
|
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
|
|
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
|
|
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
|
|
|
|
await RelayAsync(clientStream, upstreamStream, ct);
|
|
}
|
|
|
|
// Pipe both directions until either side closes.
|
|
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
|
|
{
|
|
var toUpstream = a.CopyToAsync(b, ct);
|
|
var toClient = b.CopyToAsync(a, ct);
|
|
await Task.WhenAny(toUpstream, toClient);
|
|
}
|
|
|
|
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
|
|
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
|
|
{
|
|
var buffer = new byte[1];
|
|
var sb = new StringBuilder(256);
|
|
while (true)
|
|
{
|
|
var read = await stream.ReadAsync(buffer, ct);
|
|
if (read == 0)
|
|
{
|
|
return sb.Length > 0 ? sb.ToString() : null;
|
|
}
|
|
|
|
sb.Append((char)buffer[0]);
|
|
if (sb.Length >= 4
|
|
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
|
|
{
|
|
return sb.ToString();
|
|
}
|
|
|
|
// Guard against a runaway/garbage stream.
|
|
if (sb.Length > 64 * 1024)
|
|
{
|
|
return sb.ToString();
|
|
}
|
|
}
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
await _cts.CancelAsync();
|
|
_listener.Stop();
|
|
if (_acceptLoop is not null)
|
|
{
|
|
try
|
|
{
|
|
await _acceptLoop;
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
// expected on shutdown
|
|
}
|
|
}
|
|
|
|
_cts.Dispose();
|
|
}
|
|
}
|