almost ready

This commit is contained in:
bob
2026-06-01 10:52:06 -05:00
parent 8b0eb0db78
commit 763305ca89
94 changed files with 8766 additions and 2674 deletions

View File

@@ -8,7 +8,6 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Selenium.WebDriver" />
</ItemGroup>
</Project>

View File

@@ -1,79 +0,0 @@
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
using OpenQA.Selenium.Edge;
namespace BlueLaminate.Scraper.Browser;
/// <summary>
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
/// the local proxy already carries the upstream credentials, so there's no 407 to
/// answer in the browser. Combined with a warmed, persistent profile this is the
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
/// (which has no .NET equivalent).
/// <para>
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
/// whether pictures render, so this stays realistic.
/// </para>
/// </summary>
public sealed class BrowserDriverFactory
{
private readonly ILogger<BrowserDriverFactory> _logger;
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
{
_logger = logger;
}
/// <summary>
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
/// auth). When <paramref name="profileDir"/> is set the profile persists across
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
/// history carry over — a warmed profile looks far less like a fresh bot); when
/// null a throwaway profile is used.
/// </summary>
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
{
var options = new EdgeOptions();
// Route browser traffic through the local proxy via the launch argument
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
// driver download). No scheme = all protocols use the proxy. When null/empty
// the browser uses the machine's direct connection (diagnostic --no-proxy).
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
{
options.AddArgument($"--proxy-server={proxyEndpoint}");
}
// Reduce the most obvious automation tells; residential exit + a real
// (non-headless) browser + a warmed profile do the rest.
options.AddArgument("--disable-blink-features=AutomationControlled");
options.AddExcludedArgument("enable-automation");
options.AddAdditionalOption("useAutomationExtension", false);
options.AddArgument("--no-first-run");
options.AddArgument("--no-default-browser-check");
options.AddArgument("--start-maximized");
var persist = !string.IsNullOrWhiteSpace(profileDir);
var dir = persist
? profileDir!
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
Directory.CreateDirectory(dir);
options.AddArgument($"--user-data-dir={dir}");
if (blockImages)
{
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
}
_logger.LogInformation(
"Launching Edge via {Route} (profile: {Profile}).",
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
persist ? dir : "throwaway");
return new EdgeDriver(options);
}
}

View File

@@ -15,7 +15,10 @@ namespace BlueLaminate.Scraper.CsFloat;
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
/// <param name="PaintSeed">Pattern seed.</param>
/// <param name="FloatValue">Exact float/wear value.</param>
/// <param name="FloatValue">
/// Exact float/wear value, or null for items that have no float at all
/// (e.g. Vanilla knives). A null is distinct from a genuine 0.0 float.
/// </param>
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
/// <param name="IsStatTrak">StatTrak™ variant.</param>
/// <param name="IsSouvenir">Souvenir variant.</param>
@@ -37,7 +40,7 @@ public sealed record CsFloatListing(
int DefIndex,
int PaintIndex,
int PaintSeed,
decimal FloatValue,
decimal? FloatValue,
string? WearName,
bool IsStatTrak,
bool IsSouvenir,

View File

@@ -321,7 +321,7 @@ public sealed class CsFloatListingsClient
public int DefIndex { get; init; }
public int PaintIndex { get; init; }
public int PaintSeed { get; init; }
public decimal FloatValue { get; init; }
public decimal? FloatValue { get; init; }
public string? WearName { get; init; }
public bool IsStatTrak { get; init; }
public bool IsSouvenir { get; init; }

View File

@@ -1,211 +0,0 @@
using System.Text;
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.CsMoney;
/// <summary>Outcome of a stealth pagination run.</summary>
/// <param name="PagesSucceeded">How many offset pages returned listings JSON before stopping.</param>
/// <param name="ItemsTotal">Total listing items captured across those pages.</param>
/// <param name="StoppedReason">Why pagination stopped: "challenged", "empty", "completed", or "error".</param>
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
/// <summary>
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
/// the listings API with human-like pacing using in-page <c>fetch()</c> calls from
/// the cleared origin (so the cf_clearance cookie rides along). It records each
/// page's JSON and — crucially for the current phase — <b>measures how many pages
/// survive before Cloudflare re-challenges</b>, which tells us whether the
/// fingerprint reductions are enough for a real sweep.
/// </summary>
public sealed class CsMoneyCaptureService
{
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly CsMoneyOptions _options;
private readonly ILogger<CsMoneyCaptureService> _logger;
public CsMoneyCaptureService(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
CsMoneyOptions options,
ILogger<CsMoneyCaptureService> logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_options = options;
_logger = logger;
}
/// <summary>
/// Open the market, wait for <paramref name="browseUntilDone"/> (the operator
/// clears Cloudflare and presses Enter), then page the listings API up to
/// <paramref name="maxPages"/> times, stopping early on a re-challenge or an
/// empty page. Each page's body is written to <paramref name="outputDir"/>.
/// </summary>
public async Task<CsMoneyCaptureResult> RunAsync(
string outputDir,
ProxyRequest request,
bool loadImages,
bool useProxy,
int maxPages,
Func<Task> browseUntilDone,
CancellationToken ct = default)
{
Directory.CreateDirectory(outputDir);
// --no-proxy (useProxy=false) drives the automated browser on the machine's
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
// or the webdriver fingerprint itself.
LocalForwardingProxy? localProxy = null;
string? proxyEndpoint = null;
if (useProxy)
{
var lease = _provider.Acquire(request);
localProxy = _proxyFactory.Create(lease).Start();
proxyEndpoint = localProxy.Endpoint;
}
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
var pages = 0;
var items = 0;
var reason = "completed";
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
driver.Navigate().GoToUrl(_options.MarketUrl);
// Operator clears the Cloudflare challenge in the visible window, waits
// until the market grid is actually rendered, then presses Enter.
await browseUntilDone();
for (var offset = 0; pages < maxPages; offset += 60)
{
ct.ThrowIfCancellationRequested();
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
var (status, body) = DirectFetch(driver, apiUrl);
if (LooksLikeChallenge(status, body))
{
_logger.LogWarning(
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
offset, pages);
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
reason = "challenged";
break;
}
var count = TryCountItems(body);
if (count is 0)
{
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
reason = "empty";
break;
}
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
pages++;
items += count ?? 0;
_logger.LogInformation(
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
pages, offset, status, count, body.Length);
await DelayAsync(ct);
}
}
catch (OperationCanceledException)
{
reason = "cancelled";
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
reason = "error";
}
finally
{
driver.Quit();
if (localProxy is not null)
{
await localProxy.DisposeAsync();
}
}
return new CsMoneyCaptureResult(pages, items, reason);
}
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
// cs.money origin, so the cf_clearance cookie is sent automatically.
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
{
const string script = """
const url = arguments[0];
const done = arguments[arguments.length - 1];
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
""";
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
if (string.IsNullOrEmpty(raw))
{
return (-1, "");
}
using var doc = JsonDocument.Parse(raw);
var status = doc.RootElement.GetProperty("status").GetInt32();
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
return (status, body);
}
private static bool LooksLikeChallenge(int status, string body) =>
status is 403 or 503 or -1
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
// Count items[] without binding a full model (the typed model is Phase 2).
private static int? TryCountItems(string body)
{
try
{
using var doc = JsonDocument.Parse(body);
return doc.RootElement.TryGetProperty("items", out var items)
&& items.ValueKind == JsonValueKind.Array
? items.GetArrayLength()
: null;
}
catch (JsonException)
{
return null;
}
}
private async Task DelayAsync(CancellationToken ct)
{
var jitter = _options.PageJitterSeconds > 0
? Random.Shared.NextDouble() * _options.PageJitterSeconds
: 0;
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
if (seconds > 0)
{
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
}
}
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
}

View File

@@ -1,50 +0,0 @@
namespace BlueLaminate.Scraper.CsMoney;
/// <summary>
/// Configuration for the cs.money scraper, bound from the <c>CsMoney</c>
/// configuration section.
/// <para>
/// cs.money exposes no public API and sits behind Cloudflare bot protection, so we
/// drive a real, non-headless browser (Selenium/Edge) routed through an IPRoyal
/// residential proxy via a local forwarding hop (no CDP). The market endpoint
/// re-challenges aggressively during pagination, so these options also tune the
/// warmed profile and request pacing we use to survive longer.
/// </para>
/// </summary>
public sealed class CsMoneyOptions
{
public const string SectionName = "CsMoney";
/// <summary>Public market page the browser opens (and where the operator clears Cloudflare).</summary>
public string MarketUrl { get; set; } = "https://cs.money/market/buy/";
/// <summary>
/// Listings API template; <c>{0}</c> is the page offset (steps of 60). Fetched
/// in-page from the cleared market origin so the cf_clearance cookie is sent.
/// </summary>
public string ApiUrlTemplate { get; set; } =
"https://cs.money/2.0/market/sell-orders?limit=60&offset={0}";
/// <summary>
/// Persistent Chromium profile directory. Reusing one profile keeps the
/// cf_clearance cookie and history between runs — a warmed profile is far less
/// likely to be re-challenged than a fresh one. Empty = throwaway profile.
/// </summary>
public string ProfileDir { get; set; } =
Path.Combine(Path.GetTempPath(), "bluelaminate-csmoney-profile");
/// <summary>
/// Optional ISO country code(s) for the residential exit IP, e.g. "us". Null/empty
/// lets IPRoyal pick at random.
/// </summary>
public string? Country { get; set; }
/// <summary>Load images. Off by default to conserve the metered residential plan.</summary>
public bool LoadImages { get; set; }
/// <summary>Base delay between paginated API fetches, in seconds (human-like pacing).</summary>
public double PageDelaySeconds { get; set; } = 2.5;
/// <summary>Extra random jitter added to each delay, in seconds (0..value).</summary>
public double PageJitterSeconds { get; set; } = 2.0;
}

View File

@@ -1,21 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// Source of proxy endpoints. The whole point of this seam is that the rest of
/// the scraper depends only on this interface and <see cref="ProxyLease"/>, so a
/// different residential provider — or the future C2 that allocates IPs to
/// containers, or a composite "grab-bag" over several providers — drops in
/// without changing any browser or scraping code.
/// </summary>
public interface IProxyProvider
{
/// <summary>Identifier recorded on issued leases, e.g. "iproyal".</summary>
string Name { get; }
/// <summary>
/// Produce a usable endpoint for the given request. For gateway providers
/// this is pure string composition (no network call); the C2 implementation
/// can override that later with real allocation.
/// </summary>
ProxyLease Acquire(ProxyRequest request);
}

View File

@@ -1,77 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// <see cref="IProxyProvider"/> for IPRoyal's residential gateway. IPRoyal keeps
/// one fixed host/port (geo.iproyal.com:12321) and encodes everything else —
/// country, sticky-session id, session lifetime — as underscore-delimited
/// parameters appended to the account password. Example password:
/// "secret_country-us_session-ab12cd_lifetime-30m". The account username is sent
/// unchanged. Docs: https://docs.iproyal.com/proxies/residential/proxy
/// </summary>
public sealed class IpRoyalProxyProvider : IProxyProvider
{
public const string GatewayHost = "geo.iproyal.com";
public const int GatewayPort = 12321;
// IPRoyal caps sticky sessions; 30 minutes is a safe default that comfortably
// covers a single scrape pass without forcing an early IP rotation.
private static readonly TimeSpan DefaultLifetime = TimeSpan.FromMinutes(30);
private readonly string _username;
private readonly string _password;
public IpRoyalProxyProvider(string username, string password)
{
if (string.IsNullOrWhiteSpace(username))
{
throw new ArgumentException("IPRoyal username is required.", nameof(username));
}
if (string.IsNullOrWhiteSpace(password))
{
throw new ArgumentException("IPRoyal password is required.", nameof(password));
}
_username = username;
_password = password;
}
public string Name => "iproyal";
public ProxyLease Acquire(ProxyRequest request)
{
var password = _password;
string? sessionId = null;
DateTimeOffset? expiresAt = null;
// Country first; the router picks one at random when several are listed.
if (!string.IsNullOrWhiteSpace(request.Country))
{
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
}
if (request.Sticky)
{
sessionId = request.SessionId ?? NewSessionId();
var lifetime = request.Lifetime ?? DefaultLifetime;
// IPRoyal expresses lifetime as whole minutes (e.g. "_lifetime-30m").
var minutes = Math.Max(1, (int)Math.Round(lifetime.TotalMinutes));
password += $"_session-{sessionId}_lifetime-{minutes}m";
expiresAt = DateTimeOffset.UtcNow.AddMinutes(minutes);
}
return new ProxyLease(
Host: GatewayHost,
Port: GatewayPort,
Username: _username,
Password: password,
Provider: Name,
SessionId: sessionId,
ExpiresAt: expiresAt);
}
// Short, URL/param-safe token. IPRoyal treats the session value opaquely;
// it only needs to be stable for the duration of a sticky lease.
private static string NewSessionId() =>
Guid.NewGuid().ToString("N")[..10];
}

View File

@@ -1,232 +0,0 @@
using System.Net;
using System.Net.Sockets;
using System.Text;
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
/// gateway's <c>Proxy-Authorization</c> header itself.
/// <para>
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
/// CDP — far less detectable — while the upstream still carries the IPRoyal
/// username/password (and its baked-in country/session params).
/// </para>
/// <para>
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
/// the browser does TLS end-to-end with the real host — this proxy never sees
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
/// </para>
/// </summary>
public sealed class LocalForwardingProxy : IAsyncDisposable
{
private readonly ProxyLease _upstream;
private readonly ILogger _logger;
private readonly TcpListener _listener;
private readonly CancellationTokenSource _cts = new();
private readonly string _authHeader;
private Task? _acceptLoop;
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
{
_upstream = upstream;
_logger = logger;
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
var token = Convert.ToBase64String(
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
}
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
public string Endpoint { get; private set; } = "";
/// <summary>Bind the local port and start accepting browser connections.</summary>
public LocalForwardingProxy Start()
{
_listener.Start();
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
Endpoint = $"127.0.0.1:{port}";
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
_logger.LogInformation(
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
Endpoint, _upstream.Endpoint, _upstream.Provider);
return this;
}
private async Task AcceptLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
{
TcpClient client;
try
{
client = await _listener.AcceptTcpClientAsync(ct);
}
catch (OperationCanceledException)
{
break;
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Accept failed.");
continue;
}
// Fire-and-forget per connection; exceptions are swallowed per client so
// one bad tunnel never takes down the listener.
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
}
}
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
{
using (client)
{
client.NoDelay = true;
try
{
var clientStream = client.GetStream();
var header = await ReadHeaderAsync(clientStream, ct);
if (header is null)
{
return;
}
var requestLine = header.Split("\r\n", 2)[0];
var parts = requestLine.Split(' ');
if (parts.Length < 2)
{
return;
}
var method = parts[0];
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
{
await HandleConnectAsync(clientStream, parts[1], ct);
}
else
{
await HandlePlainAsync(clientStream, header, ct);
}
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Client connection error.");
}
}
}
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
{
using var upstream = new TcpClient { NoDelay = true };
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
var upstreamStream = upstream.GetStream();
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
var ok = upstreamHeader is not null
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
&& sl[1] == "200";
if (!ok)
{
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
return;
}
await clientStream.WriteAsync(
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
await RelayAsync(clientStream, upstreamStream, ct);
}
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
{
var hostLine = header.Split("\r\n")
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
if (hostLine is null)
{
return;
}
using var upstream = new TcpClient { NoDelay = true };
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
var upstreamStream = upstream.GetStream();
// Insert the Proxy-Authorization header right after the request line.
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
await RelayAsync(clientStream, upstreamStream, ct);
}
// Pipe both directions until either side closes.
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
{
var toUpstream = a.CopyToAsync(b, ct);
var toClient = b.CopyToAsync(a, ct);
await Task.WhenAny(toUpstream, toClient);
}
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
{
var buffer = new byte[1];
var sb = new StringBuilder(256);
while (true)
{
var read = await stream.ReadAsync(buffer, ct);
if (read == 0)
{
return sb.Length > 0 ? sb.ToString() : null;
}
sb.Append((char)buffer[0]);
if (sb.Length >= 4
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
{
return sb.ToString();
}
// Guard against a runaway/garbage stream.
if (sb.Length > 64 * 1024)
{
return sb.ToString();
}
}
}
public async ValueTask DisposeAsync()
{
await _cts.CancelAsync();
_listener.Stop();
if (_acceptLoop is not null)
{
try
{
await _acceptLoop;
}
catch (OperationCanceledException)
{
// expected on shutdown
}
}
_cts.Dispose();
}
}

View File

@@ -1,21 +0,0 @@
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
/// </summary>
public sealed class LocalForwardingProxyFactory
{
private readonly ILogger<LocalForwardingProxy> _logger;
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
{
_logger = logger;
}
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
}

View File

@@ -1,29 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// A concrete, ready-to-use proxy endpoint handed back by an
/// <see cref="IProxyProvider"/>. This is the only proxy type a consumer ever
/// sees, so swapping providers (or mixing several in a grab-bag) never touches
/// the calling code. <see cref="Username"/> and <see cref="Password"/> are the
/// literal credentials to present to the gateway — for providers like IPRoyal
/// the targeting/session parameters are already baked into them.
/// </summary>
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
/// <param name="Port">Gateway port, e.g. 12321.</param>
/// <param name="Username">Credential username for the gateway.</param>
/// <param name="Password">Credential password (may carry encoded session/geo params).</param>
/// <param name="Provider">Name of the provider that issued this lease.</param>
/// <param name="SessionId">The sticky session key, if this is a pinned IP.</param>
/// <param name="ExpiresAt">When a sticky IP may be recycled; null if rotating/unbounded.</param>
public sealed record ProxyLease(
string Host,
int Port,
string Username,
string Password,
string Provider,
string? SessionId = null,
DateTimeOffset? ExpiresAt = null)
{
/// <summary>"host:port" form used by browser proxy settings.</summary>
public string Endpoint => $"{Host}:{Port}";
}

View File

@@ -1,103 +0,0 @@
using System.Text.Json;
using BlueLaminate.Scraper.Browser;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.Proxies;
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
/// <param name="Org">
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
/// residential vs. datacenter: a consumer ISP here means a real residential
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
/// </param>
public sealed record ProxyExitInfo(
string? Ip,
string? City,
string? Region,
string? Country,
string? Org,
string? Hostname,
string? Timezone);
/// <summary>
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
/// launch the real browser through it, and read back the exit IP from an
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
/// against a metered residential plan — it proves auth works and shows whether
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
/// </summary>
public sealed class ProxyProbe
{
private const string IpEchoUrl = "https://ipinfo.io/json";
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
private readonly IProxyProvider _provider;
private readonly LocalForwardingProxyFactory _proxyFactory;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<ProxyProbe> _logger;
public ProxyProbe(
IProxyProvider provider,
LocalForwardingProxyFactory proxyFactory,
BrowserDriverFactory factory,
ILogger<ProxyProbe> logger)
{
_provider = provider;
_proxyFactory = proxyFactory;
_factory = factory;
_logger = logger;
}
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
{
var lease = _provider.Acquire(request);
_logger.LogInformation(
"Acquired {Provider} lease (exit {Mode}).",
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
await using var localProxy = _proxyFactory.Create(lease).Start();
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
try
{
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
driver.Navigate().GoToUrl(IpEchoUrl);
// Read the document's text rather than the DOM so the browser's
// built-in JSON viewer doesn't get in the way, then carve out the
// JSON object it rendered.
var rendered = ((IJavaScriptExecutor)driver)
.ExecuteScript("return document.documentElement.innerText;") as string
?? throw new InvalidOperationException("Browser returned no page text.");
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
?? throw new InvalidOperationException("IP-echo response was empty.");
_logger.LogInformation(
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
info.Ip, info.City, info.Region, info.Country, info.Org);
return info;
}
finally
{
driver.Quit();
}
}
private static string ExtractJson(string text)
{
var start = text.IndexOf('{');
var end = text.LastIndexOf('}');
if (start < 0 || end <= start)
{
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
}
return text[start..(end + 1)];
}
}

View File

@@ -1,30 +0,0 @@
namespace BlueLaminate.Scraper.Proxies;
/// <summary>
/// What kind of exit IP the caller wants. Provider-agnostic: each
/// <see cref="IProxyProvider"/> translates these knobs into its own gateway
/// syntax. A sticky request asks the provider to pin one residential IP for the
/// session's lifetime; a non-sticky request lets the IP rotate per connection.
/// </summary>
/// <param name="Country">
/// Optional ISO 3166-1 alpha-2 code, or a comma-separated list to let the
/// provider pick one at random (e.g. "us" or "us,gb,de"). Null means no
/// geo constraint.
/// </param>
/// <param name="Sticky">
/// True to keep the same exit IP for the whole session; false to rotate.
/// </param>
/// <param name="SessionId">
/// Optional caller-supplied session key for a sticky lease. When null and
/// <paramref name="Sticky"/> is true the provider generates one.
/// </param>
/// <param name="Lifetime">
/// How long a sticky IP should be held before the provider may recycle it.
/// Ignored when <paramref name="Sticky"/> is false. Null lets the provider
/// apply its own default.
/// </param>
public sealed record ProxyRequest(
string? Country = null,
bool Sticky = true,
string? SessionId = null,
TimeSpan? Lifetime = null);