Add cs.money worker stack with per-worker IPRoyal residential proxy

Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration.

IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
bob
2026-05-31 15:03:31 -05:00
parent eb5fb0dac7
commit dc7c3f99ae
82 changed files with 8354 additions and 571 deletions

View File

@@ -7,7 +7,8 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.8" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Selenium.WebDriver" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,79 @@
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
using OpenQA.Selenium.Edge;
namespace BlueLaminate.Scraper.Browser;
/// <summary>
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
/// the local proxy already carries the upstream credentials, so there's no 407 to
/// answer in the browser. Combined with a warmed, persistent profile this is the
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
/// (which has no .NET equivalent).
/// <para>
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
/// whether pictures render, so this stays realistic.
/// </para>
/// </summary>
public sealed class BrowserDriverFactory
{
private readonly ILogger<BrowserDriverFactory> _logger;
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
{
_logger = logger;
}
/// <summary>
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
/// auth). When <paramref name="profileDir"/> is set the profile persists across
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
/// history carry over — a warmed profile looks far less like a fresh bot); when
/// null a throwaway profile is used.
/// </summary>
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
{
var options = new EdgeOptions();
// Route browser traffic through the local proxy via the launch argument
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
// driver download). No scheme = all protocols use the proxy. When null/empty
// the browser uses the machine's direct connection (diagnostic --no-proxy).
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
{
options.AddArgument($"--proxy-server={proxyEndpoint}");
}
// Reduce the most obvious automation tells; residential exit + a real
// (non-headless) browser + a warmed profile do the rest.
options.AddArgument("--disable-blink-features=AutomationControlled");
options.AddExcludedArgument("enable-automation");
options.AddAdditionalOption("useAutomationExtension", false);
options.AddArgument("--no-first-run");
options.AddArgument("--no-default-browser-check");
options.AddArgument("--start-maximized");
var persist = !string.IsNullOrWhiteSpace(profileDir);
var dir = persist
? profileDir!
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
Directory.CreateDirectory(dir);
options.AddArgument($"--user-data-dir={dir}");
if (blockImages)
{
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
}
_logger.LogInformation(
"Launching Edge via {Route} (profile: {Profile}).",
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
persist ? dir : "throwaway");
return new EdgeDriver(options);
}
}

View File

@@ -1,3 +1,4 @@
using System.Globalization;
using System.Net;
using System.Text.Json;
using System.Text.Json.Serialization;
@@ -27,9 +28,6 @@ public sealed record ListingsPageResult(IReadOnlyList<CsFloatListing> Listings,
/// </summary>
public sealed class CsFloatListingsClient
{
private const string BaseUrl = "https://csfloat.com/api/v1/listings";
private const int MaxLimit = 50; // API hard cap per page.
private static readonly JsonSerializerOptions Options = new()
{
// CSFloat uses snake_case for item fields (market_hash_name, float_value,
@@ -43,18 +41,30 @@ public sealed class CsFloatListingsClient
private readonly HttpClient _http;
private readonly string _apiKey;
private readonly string _baseUrl;
private readonly int _maxLimit;
private readonly ILogger<CsFloatListingsClient> _logger;
public CsFloatListingsClient(HttpClient http, string apiKey, ILogger<CsFloatListingsClient> logger)
public CsFloatListingsClient(HttpClient http, CsFloatOptions options, ILogger<CsFloatListingsClient> logger)
{
if (string.IsNullOrWhiteSpace(apiKey))
throw new ArgumentException("CSFloat API key is required.", nameof(apiKey));
if (string.IsNullOrWhiteSpace(options.ApiKey))
{
throw new ArgumentException("CSFloat API key is required.", nameof(options));
}
_http = http;
_apiKey = apiKey;
_apiKey = options.ApiKey;
_baseUrl = options.BaseUrl;
_maxLimit = options.MaxLimit;
_logger = logger;
}
/// <summary>
/// Maximum listings returned per page (the API page cap, from configuration).
/// This is listings-per-request — unrelated to how many requests are made.
/// </summary>
public int MaxLimit => _maxLimit;
/// <summary>
/// Rate-limit state from the most recent response (success or failure).
/// <see cref="CsFloatRateLimit.None"/> until the first request completes.
@@ -81,9 +91,9 @@ public sealed class CsFloatListingsClient
do
{
var remaining = maxListings - results.Count;
var limit = Math.Min(MaxLimit, remaining);
var limit = Math.Min(_maxLimit, remaining);
var page = await FetchPageAsync(defIndex, paintIndex, sortBy, limit, cursor, type, ct);
var page = await FetchPageAsync(defIndex, paintIndex, sortBy, limit, cursor, type, ct: ct);
results.AddRange(page.Listings);
_logger.LogInformation(
@@ -94,7 +104,9 @@ public sealed class CsFloatListingsClient
// Stop when the API signals the end (no cursor) or returns an empty page.
if (string.IsNullOrEmpty(cursor) || page.Listings.Count == 0)
{
break;
}
}
while (results.Count < maxListings);
@@ -106,6 +118,9 @@ public sealed class CsFloatListingsClient
/// sweep runner drives this directly so it can decide — between pages — when
/// to stop (already-seen listings) or pace (rate-limit headers). Filters are
/// optional: omit def_index/paint_index for a global sweep across all items.
/// <paramref name="minFloat"/>/<paramref name="maxFloat"/> restrict the result
/// to a float (wear) band, so the catalogue sweep can split a skin into smaller,
/// independently-checkpointable wear units.
/// </summary>
public Task<ListingsPageResult> FetchPageAsync(
int? defIndex,
@@ -114,30 +129,64 @@ public sealed class CsFloatListingsClient
int limit,
string? cursor,
string? type = "buy_now",
decimal? minFloat = null,
decimal? maxFloat = null,
CancellationToken ct = default)
{
var query = new List<string>
{
$"sort_by={Uri.EscapeDataString(sortBy)}",
$"limit={Math.Clamp(limit, 1, MaxLimit)}",
$"limit={Math.Clamp(limit, 1, _maxLimit)}",
};
// Default to fixed-price listings only; auctions have no firm sale price
// and aren't wanted. Pass type=null to include everything.
if (!string.IsNullOrEmpty(type))
{
query.Add($"type={Uri.EscapeDataString(type)}");
}
if (defIndex is { } def)
{
query.Add($"def_index={def}");
}
if (paintIndex is { } paint)
{
query.Add($"paint_index={paint}");
}
// CSFloat's min_float/max_float are exclusive ("float higher/lower than this").
// Nudge the bounds outward by a tiny epsilon so a listing whose float sits
// exactly on a band boundary isn't dropped; slight overlap between adjacent
// bands is harmless (same listing id, just upserted twice).
if (minFloat is { } min)
{
query.Add($"min_float={Format(min - FloatBoundaryEpsilon)}");
}
if (maxFloat is { } max)
{
query.Add($"max_float={Format(max + FloatBoundaryEpsilon)}");
}
if (!string.IsNullOrEmpty(cursor))
{
query.Add($"cursor={Uri.EscapeDataString(cursor)}");
}
return SendPageAsync(query, ct);
}
private const decimal FloatBoundaryEpsilon = 0.000001m;
// Invariant, fixed-point formatting so floats serialise as "0.07" rather than a
// culture-specific or scientific form the API would reject.
private static string Format(decimal value) =>
Math.Clamp(value, 0m, 1m).ToString("0.0##########", CultureInfo.InvariantCulture);
private async Task<ListingsPageResult> SendPageAsync(List<string> query, CancellationToken ct)
{
var url = $"{BaseUrl}?{string.Join('&', query)}";
var url = $"{_baseUrl}?{string.Join('&', query)}";
using var request = new HttpRequestMessage(HttpMethod.Get, url);
// CSFloat expects the raw key in the Authorization header (no scheme).
@@ -152,7 +201,9 @@ public sealed class CsFloatListingsClient
_logger.LogInformation("{RateLimit}", LastRateLimit);
if (!response.IsSuccessStatusCode)
{
throw new CsFloatApiException(response.StatusCode, Truncate(body));
}
var page = Parse(body);
return new ListingsPageResult(page.Data.Select(Map).ToList(), page.Cursor);
@@ -169,7 +220,9 @@ public sealed class CsFloatListingsClient
// Scan both response and content headers — servers split them either way.
var all = response.Headers.AsEnumerable();
if (response.Content is not null)
{
all = all.Concat(response.Content.Headers);
}
foreach (var header in all)
{
@@ -178,11 +231,15 @@ public sealed class CsFloatListingsClient
|| name.Contains("rate-limit", StringComparison.OrdinalIgnoreCase)
|| name.Equals("Retry-After", StringComparison.OrdinalIgnoreCase);
if (isRateLimit)
{
raw[name] = string.Join(",", header.Value);
}
}
if (raw.Count == 0)
{
return CsFloatRateLimit.None;
}
return new CsFloatRateLimit(
Limit: FindInt(raw, "limit"),

View File

@@ -0,0 +1,30 @@
using System.ComponentModel.DataAnnotations;
namespace BlueLaminate.Scraper.CsFloat;
/// <summary>
/// Configuration for <see cref="CsFloatListingsClient"/>, bound from the
/// <c>CsFloat</c> configuration section. Defaults match the live API so the
/// client works with no configuration beyond the key.
/// </summary>
public sealed class CsFloatOptions
{
public const string SectionName = "CsFloat";
/// <summary>
/// Developer key CSFloat requires on the <c>Authorization</c> header. Falls
/// back to the legacy <c>CSFLOAT_API_KEY</c> environment variable (wired in the
/// composition root). Only commands that hit the API need it.
/// </summary>
public string? ApiKey { get; set; }
/// <summary>Active-listings endpoint.</summary>
public string BaseUrl { get; set; } = "https://csfloat.com/api/v1/listings";
/// <summary>
/// Listings per page. CSFloat caps this at 50; values outside [1, 50] are
/// rejected at startup rather than silently clamped.
/// </summary>
[Range(1, 50, ErrorMessage = "CsFloat:MaxLimit must be between 1 and 50 (the CSFloat API page cap).")]
public int MaxLimit { get; set; } = 50;
}

View File

@@ -0,0 +1,211 @@
using System.Text;
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.CsMoney;
/// <summary>Outcome of a stealth pagination run.</summary>
/// <param name="PagesSucceeded">How many offset pages returned listings JSON before stopping.</param>
/// <param name="ItemsTotal">Total listing items captured across those pages.</param>
/// <param name="StoppedReason">Why pagination stopped: "challenged", "empty", "completed", or "error".</param>
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
/// <summary>
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
/// the listings API with human-like pacing using in-page <c>fetch()</c> calls from
/// the cleared origin (so the cf_clearance cookie rides along). It records each
/// page's JSON and — crucially for the current phase — <b>measures how many pages
/// survive before Cloudflare re-challenges</b>, which tells us whether the
/// fingerprint reductions are enough for a real sweep.
/// </summary>
public sealed class CsMoneyCaptureService
{
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly CsMoneyOptions _options;
private readonly ILogger<CsMoneyCaptureService> _logger;
public CsMoneyCaptureService(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
CsMoneyOptions options,
ILogger<CsMoneyCaptureService> logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_options = options;
_logger = logger;
}
/// <summary>
/// Open the market, wait for <paramref name="browseUntilDone"/> (the operator
/// clears Cloudflare and presses Enter), then page the listings API up to
/// <paramref name="maxPages"/> times, stopping early on a re-challenge or an
/// empty page. Each page's body is written to <paramref name="outputDir"/>.
/// </summary>
public async Task<CsMoneyCaptureResult> RunAsync(
string outputDir,
ProxyRequest request,
bool loadImages,
bool useProxy,
int maxPages,
Func<Task> browseUntilDone,
CancellationToken ct = default)
{
Directory.CreateDirectory(outputDir);
// --no-proxy (useProxy=false) drives the automated browser on the machine's
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
// or the webdriver fingerprint itself.
LocalForwardingProxy? localProxy = null;
string? proxyEndpoint = null;
if (useProxy)
{
var lease = _provider.Acquire(request);
localProxy = _proxyFactory.Create(lease).Start();
proxyEndpoint = localProxy.Endpoint;
}
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
var pages = 0;
var items = 0;
var reason = "completed";
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
driver.Navigate().GoToUrl(_options.MarketUrl);
// Operator clears the Cloudflare challenge in the visible window, waits
// until the market grid is actually rendered, then presses Enter.
await browseUntilDone();
for (var offset = 0; pages < maxPages; offset += 60)
{
ct.ThrowIfCancellationRequested();
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
var (status, body) = DirectFetch(driver, apiUrl);
if (LooksLikeChallenge(status, body))
{
_logger.LogWarning(
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
offset, pages);
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
reason = "challenged";
break;
}
var count = TryCountItems(body);
if (count is 0)
{
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
reason = "empty";
break;
}
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
pages++;
items += count ?? 0;
_logger.LogInformation(
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
pages, offset, status, count, body.Length);
await DelayAsync(ct);
}
}
catch (OperationCanceledException)
{
reason = "cancelled";
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
reason = "error";
}
finally
{
driver.Quit();
if (localProxy is not null)
{
await localProxy.DisposeAsync();
}
}
return new CsMoneyCaptureResult(pages, items, reason);
}
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
// cs.money origin, so the cf_clearance cookie is sent automatically.
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
{
const string script = """
const url = arguments[0];
const done = arguments[arguments.length - 1];
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
""";
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
if (string.IsNullOrEmpty(raw))
{
return (-1, "");
}
using var doc = JsonDocument.Parse(raw);
var status = doc.RootElement.GetProperty("status").GetInt32();
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
return (status, body);
}
private static bool LooksLikeChallenge(int status, string body) =>
status is 403 or 503 or -1
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
// Count items[] without binding a full model (the typed model is Phase 2).
private static int? TryCountItems(string body)
{
try
{
using var doc = JsonDocument.Parse(body);
return doc.RootElement.TryGetProperty("items", out var items)
&& items.ValueKind == JsonValueKind.Array
? items.GetArrayLength()
: null;
}
catch (JsonException)
{
return null;
}
}
private async Task DelayAsync(CancellationToken ct)
{
var jitter = _options.PageJitterSeconds > 0
? Random.Shared.NextDouble() * _options.PageJitterSeconds
: 0;
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
if (seconds > 0)
{
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
}
}
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
}

View File

@@ -0,0 +1,50 @@
namespace BlueLaminate.Scraper.CsMoney;
/// <summary>
/// Configuration for the cs.money scraper, bound from the <c>CsMoney</c>
/// configuration section.
/// <para>
/// cs.money exposes no public API and sits behind Cloudflare bot protection, so we
/// drive a real, non-headless browser (Selenium/Edge) routed through an IPRoyal
/// residential proxy via a local forwarding hop (no CDP). The market endpoint
/// re-challenges aggressively during pagination, so these options also tune the
/// warmed profile and request pacing we use to survive longer.
/// </para>
/// </summary>
public sealed class CsMoneyOptions
{
public const string SectionName = "CsMoney";
/// <summary>Public market page the browser opens (and where the operator clears Cloudflare).</summary>
public string MarketUrl { get; set; } = "https://cs.money/market/buy/";
/// <summary>
/// Listings API template; <c>{0}</c> is the page offset (steps of 60). Fetched
/// in-page from the cleared market origin so the cf_clearance cookie is sent.
/// </summary>
public string ApiUrlTemplate { get; set; } =
"https://cs.money/2.0/market/sell-orders?limit=60&offset={0}";
/// <summary>
/// Persistent Chromium profile directory. Reusing one profile keeps the
/// cf_clearance cookie and history between runs — a warmed profile is far less
/// likely to be re-challenged than a fresh one. Empty = throwaway profile.
/// </summary>
public string ProfileDir { get; set; } =
Path.Combine(Path.GetTempPath(), "bluelaminate-csmoney-profile");
/// <summary>
/// Optional ISO country code(s) for the residential exit IP, e.g. "us". Null/empty
/// lets IPRoyal pick at random.
/// </summary>
public string? Country { get; set; }
/// <summary>Load images. Off by default to conserve the metered residential plan.</summary>
public bool LoadImages { get; set; }
/// <summary>Base delay between paginated API fetches, in seconds (human-like pacing).</summary>
public double PageDelaySeconds { get; set; } = 2.5;
/// <summary>Extra random jitter added to each delay, in seconds (0..value).</summary>
public double PageJitterSeconds { get; set; } = 2.0;
}

View File

@@ -23,9 +23,14 @@ public sealed class IpRoyalProxyProvider : IProxyProvider
public IpRoyalProxyProvider(string username, string password)
{
if (string.IsNullOrWhiteSpace(username))
{
throw new ArgumentException("IPRoyal username is required.", nameof(username));
}
if (string.IsNullOrWhiteSpace(password))
{
throw new ArgumentException("IPRoyal password is required.", nameof(password));
}
_username = username;
_password = password;
@@ -41,7 +46,9 @@ public sealed class IpRoyalProxyProvider : IProxyProvider
// Country first; the router picks one at random when several are listed.
if (!string.IsNullOrWhiteSpace(request.Country))
{
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
}
if (request.Sticky)
{

View File

@@ -0,0 +1,232 @@
using System.Net;
using System.Net.Sockets;
using System.Text;
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
/// gateway's <c>Proxy-Authorization</c> header itself.
/// <para>
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
/// CDP — far less detectable — while the upstream still carries the IPRoyal
/// username/password (and its baked-in country/session params).
/// </para>
/// <para>
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
/// the browser does TLS end-to-end with the real host — this proxy never sees
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
/// </para>
/// </summary>
public sealed class LocalForwardingProxy : IAsyncDisposable
{
private readonly ProxyLease _upstream;
private readonly ILogger _logger;
private readonly TcpListener _listener;
private readonly CancellationTokenSource _cts = new();
private readonly string _authHeader;
private Task? _acceptLoop;
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
{
_upstream = upstream;
_logger = logger;
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
var token = Convert.ToBase64String(
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
}
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
public string Endpoint { get; private set; } = "";
/// <summary>Bind the local port and start accepting browser connections.</summary>
public LocalForwardingProxy Start()
{
_listener.Start();
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
Endpoint = $"127.0.0.1:{port}";
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
_logger.LogInformation(
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
Endpoint, _upstream.Endpoint, _upstream.Provider);
return this;
}
private async Task AcceptLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
TcpClient client;
try
{
client = await _listener.AcceptTcpClientAsync(ct);
}
catch (OperationCanceledException)
{
break;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Accept failed.");
continue;
}
// Fire-and-forget per connection; exceptions are swallowed per client so
// one bad tunnel never takes down the listener.
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
}
}
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
{
using (client)
{
client.NoDelay = true;
try
{
var clientStream = client.GetStream();
var header = await ReadHeaderAsync(clientStream, ct);
if (header is null)
{
return;
}
var requestLine = header.Split("\r\n", 2)[0];
var parts = requestLine.Split(' ');
if (parts.Length < 2)
{
return;
}
var method = parts[0];
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
{
await HandleConnectAsync(clientStream, parts[1], ct);
}
else
{
await HandlePlainAsync(clientStream, header, ct);
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Client connection error.");
}
}
}
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
{
using var upstream = new TcpClient { NoDelay = true };
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
var upstreamStream = upstream.GetStream();
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
var ok = upstreamHeader is not null
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
&& sl[1] == "200";
if (!ok)
{
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
return;
}
await clientStream.WriteAsync(
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
await RelayAsync(clientStream, upstreamStream, ct);
}
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
{
var hostLine = header.Split("\r\n")
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
if (hostLine is null)
{
return;
}
using var upstream = new TcpClient { NoDelay = true };
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
var upstreamStream = upstream.GetStream();
// Insert the Proxy-Authorization header right after the request line.
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
await RelayAsync(clientStream, upstreamStream, ct);
}
// Pipe both directions until either side closes.
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
{
var toUpstream = a.CopyToAsync(b, ct);
var toClient = b.CopyToAsync(a, ct);
await Task.WhenAny(toUpstream, toClient);
}
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
{
var buffer = new byte[1];
var sb = new StringBuilder(256);
while (true)
{
var read = await stream.ReadAsync(buffer, ct);
if (read == 0)
{
return sb.Length > 0 ? sb.ToString() : null;
}
sb.Append((char)buffer[0]);
if (sb.Length >= 4
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
{
return sb.ToString();
}
// Guard against a runaway/garbage stream.
if (sb.Length > 64 * 1024)
{
return sb.ToString();
}
}
}
public async ValueTask DisposeAsync()
{
await _cts.CancelAsync();
_listener.Stop();
if (_acceptLoop is not null)
{
try
{
await _acceptLoop;
}
catch (OperationCanceledException)
{
// expected on shutdown
}
}
_cts.Dispose();
}
}

View File

@@ -0,0 +1,21 @@
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
/// </summary>
public sealed class LocalForwardingProxyFactory
{
private readonly ILogger<LocalForwardingProxy> _logger;
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
{
_logger = logger;
}
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
}

View File

@@ -0,0 +1,103 @@
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
/// <param name="Org">
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
/// residential vs. datacenter: a consumer ISP here means a real residential
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
/// </param>
public sealed record ProxyExitInfo(
string? Ip,
string? City,
string? Region,
string? Country,
string? Org,
string? Hostname,
string? Timezone);
/// <summary>
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
/// launch the real browser through it, and read back the exit IP from an
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
/// against a metered residential plan — it proves auth works and shows whether
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
/// </summary>
public sealed class ProxyProbe
{
private const string IpEchoUrl = "https://ipinfo.io/json";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<ProxyProbe> _logger;
public ProxyProbe(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
ILogger<ProxyProbe> logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_logger = logger;
}
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
{
var lease = _provider.Acquire(request);
_logger.LogInformation(
"Acquired {Provider} lease (exit {Mode}).",
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
await using var localProxy = _proxyFactory.Create(lease).Start();
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
driver.Navigate().GoToUrl(IpEchoUrl);
// Read the document's text rather than the DOM so the browser's
// built-in JSON viewer doesn't get in the way, then carve out the
// JSON object it rendered.
var rendered = ((IJavaScriptExecutor)driver)
.ExecuteScript("return document.documentElement.innerText;") as string
?? throw new InvalidOperationException("Browser returned no page text.");
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
?? throw new InvalidOperationException("IP-echo response was empty.");
_logger.LogInformation(
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
info.Ip, info.City, info.Region, info.Country, info.Org);
return info;
}
finally
{
driver.Quit();
}
}
private static string ExtractJson(string text)
{
var start = text.IndexOf('{');
var end = text.LastIndexOf('}');
if (start < 0 || end <= start)
{
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
}
return text[start..(end + 1)];
}
}

View File

@@ -11,9 +11,6 @@ namespace BlueLaminate.Scraper.Skins;
/// </summary>
public sealed class SkinCatalogClient
{
public const string DefaultUrl =
"https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json";
private static readonly JsonSerializerOptions Options = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
@@ -24,10 +21,10 @@ public sealed class SkinCatalogClient
private readonly HttpClient _http;
private readonly string _url;
public SkinCatalogClient(HttpClient http, string? url = null)
public SkinCatalogClient(HttpClient http, SkinCatalogOptions options)
{
_http = http;
_url = url ?? DefaultUrl;
_url = options.Url;
}
public async Task<IReadOnlyList<CatalogSkin>> FetchAsync(CancellationToken ct = default)
@@ -67,14 +64,22 @@ public sealed class SkinCatalogClient
private static void AddSources(List<CatalogSource> into, List<NamedDto>? items, string type)
{
if (items is null)
{
return;
}
foreach (var item in items)
{
if (string.IsNullOrEmpty(item.Id) || string.IsNullOrEmpty(item.Name))
{
continue;
}
if (into.Any(s => s.Id == item.Id))
{
continue;
}
into.Add(new CatalogSource(item.Id, item.Name, type));
}
}

View File

@@ -0,0 +1,14 @@
namespace BlueLaminate.Scraper.Skins;
/// <summary>
/// Configuration for <see cref="SkinCatalogClient"/>, bound from the
/// <c>SkinCatalog</c> configuration section.
/// </summary>
public sealed class SkinCatalogOptions
{
public const string SectionName = "SkinCatalog";
/// <summary>Static CS2 skin catalogue dataset (ByMykel/CSGO-API skins.json).</summary>
public string Url { get; set; } =
"https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json";
}