almost ready
This commit is contained in:
@@ -8,7 +8,6 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Selenium.WebDriver" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.Edge;
|
||||
|
||||
namespace BlueLaminate.Scraper.Browser;
|
||||
|
||||
/// <summary>
|
||||
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
|
||||
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
|
||||
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
|
||||
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
|
||||
/// the local proxy already carries the upstream credentials, so there's no 407 to
|
||||
/// answer in the browser. Combined with a warmed, persistent profile this is the
|
||||
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
|
||||
/// (which has no .NET equivalent).
|
||||
/// <para>
|
||||
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
|
||||
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
|
||||
/// whether pictures render, so this stays realistic.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class BrowserDriverFactory
|
||||
{
|
||||
private readonly ILogger<BrowserDriverFactory> _logger;
|
||||
|
||||
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
|
||||
/// auth). When <paramref name="profileDir"/> is set the profile persists across
|
||||
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
|
||||
/// history carry over — a warmed profile looks far less like a fresh bot); when
|
||||
/// null a throwaway profile is used.
|
||||
/// </summary>
|
||||
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
|
||||
{
|
||||
var options = new EdgeOptions();
|
||||
|
||||
// Route browser traffic through the local proxy via the launch argument
|
||||
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
|
||||
// driver download). No scheme = all protocols use the proxy. When null/empty
|
||||
// the browser uses the machine's direct connection (diagnostic --no-proxy).
|
||||
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
|
||||
{
|
||||
options.AddArgument($"--proxy-server={proxyEndpoint}");
|
||||
}
|
||||
|
||||
// Reduce the most obvious automation tells; residential exit + a real
|
||||
// (non-headless) browser + a warmed profile do the rest.
|
||||
options.AddArgument("--disable-blink-features=AutomationControlled");
|
||||
options.AddExcludedArgument("enable-automation");
|
||||
options.AddAdditionalOption("useAutomationExtension", false);
|
||||
options.AddArgument("--no-first-run");
|
||||
options.AddArgument("--no-default-browser-check");
|
||||
options.AddArgument("--start-maximized");
|
||||
|
||||
var persist = !string.IsNullOrWhiteSpace(profileDir);
|
||||
var dir = persist
|
||||
? profileDir!
|
||||
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
||||
Directory.CreateDirectory(dir);
|
||||
options.AddArgument($"--user-data-dir={dir}");
|
||||
|
||||
if (blockImages)
|
||||
{
|
||||
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Launching Edge via {Route} (profile: {Profile}).",
|
||||
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
|
||||
persist ? dir : "throwaway");
|
||||
|
||||
return new EdgeDriver(options);
|
||||
}
|
||||
}
|
||||
@@ -15,7 +15,10 @@ namespace BlueLaminate.Scraper.CsFloat;
|
||||
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
|
||||
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
|
||||
/// <param name="PaintSeed">Pattern seed.</param>
|
||||
/// <param name="FloatValue">Exact float/wear value.</param>
|
||||
/// <param name="FloatValue">
|
||||
/// Exact float/wear value, or null for items that have no float at all
|
||||
/// (e.g. Vanilla knives). A null is distinct from a genuine 0.0 float.
|
||||
/// </param>
|
||||
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
|
||||
/// <param name="IsStatTrak">StatTrak™ variant.</param>
|
||||
/// <param name="IsSouvenir">Souvenir variant.</param>
|
||||
@@ -37,7 +40,7 @@ public sealed record CsFloatListing(
|
||||
int DefIndex,
|
||||
int PaintIndex,
|
||||
int PaintSeed,
|
||||
decimal FloatValue,
|
||||
decimal? FloatValue,
|
||||
string? WearName,
|
||||
bool IsStatTrak,
|
||||
bool IsSouvenir,
|
||||
|
||||
@@ -321,7 +321,7 @@ public sealed class CsFloatListingsClient
|
||||
public int DefIndex { get; init; }
|
||||
public int PaintIndex { get; init; }
|
||||
public int PaintSeed { get; init; }
|
||||
public decimal FloatValue { get; init; }
|
||||
public decimal? FloatValue { get; init; }
|
||||
public string? WearName { get; init; }
|
||||
public bool IsStatTrak { get; init; }
|
||||
public bool IsSouvenir { get; init; }
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using BlueLaminate.Scraper.Proxies;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.CsMoney;
|
||||
|
||||
/// <summary>Outcome of a stealth pagination run.</summary>
|
||||
/// <param name="PagesSucceeded">How many offset pages returned listings JSON before stopping.</param>
|
||||
/// <param name="ItemsTotal">Total listing items captured across those pages.</param>
|
||||
/// <param name="StoppedReason">Why pagination stopped: "challenged", "empty", "completed", or "error".</param>
|
||||
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
|
||||
|
||||
/// <summary>
|
||||
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
|
||||
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
|
||||
/// the listings API with human-like pacing using in-page <c>fetch()</c> calls from
|
||||
/// the cleared origin (so the cf_clearance cookie rides along). It records each
|
||||
/// page's JSON and — crucially for the current phase — <b>measures how many pages
|
||||
/// survive before Cloudflare re-challenges</b>, which tells us whether the
|
||||
/// fingerprint reductions are enough for a real sweep.
|
||||
/// </summary>
|
||||
public sealed class CsMoneyCaptureService
|
||||
{
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly CsMoneyOptions _options;
|
||||
private readonly ILogger<CsMoneyCaptureService> _logger;
|
||||
|
||||
public CsMoneyCaptureService(
|
||||
IProxyProvider provider,
|
||||
LocalForwardingProxyFactory proxyFactory,
|
||||
BrowserDriverFactory factory,
|
||||
CsMoneyOptions options,
|
||||
ILogger<CsMoneyCaptureService> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_proxyFactory = proxyFactory;
|
||||
_factory = factory;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Open the market, wait for <paramref name="browseUntilDone"/> (the operator
|
||||
/// clears Cloudflare and presses Enter), then page the listings API up to
|
||||
/// <paramref name="maxPages"/> times, stopping early on a re-challenge or an
|
||||
/// empty page. Each page's body is written to <paramref name="outputDir"/>.
|
||||
/// </summary>
|
||||
public async Task<CsMoneyCaptureResult> RunAsync(
|
||||
string outputDir,
|
||||
ProxyRequest request,
|
||||
bool loadImages,
|
||||
bool useProxy,
|
||||
int maxPages,
|
||||
Func<Task> browseUntilDone,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
Directory.CreateDirectory(outputDir);
|
||||
|
||||
// --no-proxy (useProxy=false) drives the automated browser on the machine's
|
||||
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
|
||||
// or the webdriver fingerprint itself.
|
||||
LocalForwardingProxy? localProxy = null;
|
||||
string? proxyEndpoint = null;
|
||||
if (useProxy)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
localProxy = _proxyFactory.Create(lease).Start();
|
||||
proxyEndpoint = localProxy.Endpoint;
|
||||
}
|
||||
|
||||
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
|
||||
|
||||
var pages = 0;
|
||||
var items = 0;
|
||||
var reason = "completed";
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
|
||||
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
|
||||
|
||||
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
|
||||
driver.Navigate().GoToUrl(_options.MarketUrl);
|
||||
|
||||
// Operator clears the Cloudflare challenge in the visible window, waits
|
||||
// until the market grid is actually rendered, then presses Enter.
|
||||
await browseUntilDone();
|
||||
|
||||
for (var offset = 0; pages < maxPages; offset += 60)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
|
||||
var (status, body) = DirectFetch(driver, apiUrl);
|
||||
|
||||
if (LooksLikeChallenge(status, body))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
|
||||
offset, pages);
|
||||
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
|
||||
reason = "challenged";
|
||||
break;
|
||||
}
|
||||
|
||||
var count = TryCountItems(body);
|
||||
if (count is 0)
|
||||
{
|
||||
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
|
||||
reason = "empty";
|
||||
break;
|
||||
}
|
||||
|
||||
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
|
||||
pages++;
|
||||
items += count ?? 0;
|
||||
_logger.LogInformation(
|
||||
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
|
||||
pages, offset, status, count, body.Length);
|
||||
|
||||
await DelayAsync(ct);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
reason = "cancelled";
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
|
||||
reason = "error";
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
if (localProxy is not null)
|
||||
{
|
||||
await localProxy.DisposeAsync();
|
||||
}
|
||||
}
|
||||
|
||||
return new CsMoneyCaptureResult(pages, items, reason);
|
||||
}
|
||||
|
||||
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
|
||||
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
|
||||
// cs.money origin, so the cf_clearance cookie is sent automatically.
|
||||
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
|
||||
{
|
||||
const string script = """
|
||||
const url = arguments[0];
|
||||
const done = arguments[arguments.length - 1];
|
||||
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
|
||||
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
|
||||
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
|
||||
""";
|
||||
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
|
||||
if (string.IsNullOrEmpty(raw))
|
||||
{
|
||||
return (-1, "");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.Parse(raw);
|
||||
var status = doc.RootElement.GetProperty("status").GetInt32();
|
||||
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
|
||||
return (status, body);
|
||||
}
|
||||
|
||||
private static bool LooksLikeChallenge(int status, string body) =>
|
||||
status is 403 or 503 or -1
|
||||
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|
||||
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|
||||
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
|
||||
|
||||
// Count items[] without binding a full model (the typed model is Phase 2).
|
||||
private static int? TryCountItems(string body)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(body);
|
||||
return doc.RootElement.TryGetProperty("items", out var items)
|
||||
&& items.ValueKind == JsonValueKind.Array
|
||||
? items.GetArrayLength()
|
||||
: null;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DelayAsync(CancellationToken ct)
|
||||
{
|
||||
var jitter = _options.PageJitterSeconds > 0
|
||||
? Random.Shared.NextDouble() * _options.PageJitterSeconds
|
||||
: 0;
|
||||
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
|
||||
if (seconds > 0)
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
|
||||
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
namespace BlueLaminate.Scraper.CsMoney;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for the cs.money scraper, bound from the <c>CsMoney</c>
|
||||
/// configuration section.
|
||||
/// <para>
|
||||
/// cs.money exposes no public API and sits behind Cloudflare bot protection, so we
|
||||
/// drive a real, non-headless browser (Selenium/Edge) routed through an IPRoyal
|
||||
/// residential proxy via a local forwarding hop (no CDP). The market endpoint
|
||||
/// re-challenges aggressively during pagination, so these options also tune the
|
||||
/// warmed profile and request pacing we use to survive longer.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class CsMoneyOptions
|
||||
{
|
||||
public const string SectionName = "CsMoney";
|
||||
|
||||
/// <summary>Public market page the browser opens (and where the operator clears Cloudflare).</summary>
|
||||
public string MarketUrl { get; set; } = "https://cs.money/market/buy/";
|
||||
|
||||
/// <summary>
|
||||
/// Listings API template; <c>{0}</c> is the page offset (steps of 60). Fetched
|
||||
/// in-page from the cleared market origin so the cf_clearance cookie is sent.
|
||||
/// </summary>
|
||||
public string ApiUrlTemplate { get; set; } =
|
||||
"https://cs.money/2.0/market/sell-orders?limit=60&offset={0}";
|
||||
|
||||
/// <summary>
|
||||
/// Persistent Chromium profile directory. Reusing one profile keeps the
|
||||
/// cf_clearance cookie and history between runs — a warmed profile is far less
|
||||
/// likely to be re-challenged than a fresh one. Empty = throwaway profile.
|
||||
/// </summary>
|
||||
public string ProfileDir { get; set; } =
|
||||
Path.Combine(Path.GetTempPath(), "bluelaminate-csmoney-profile");
|
||||
|
||||
/// <summary>
|
||||
/// Optional ISO country code(s) for the residential exit IP, e.g. "us". Null/empty
|
||||
/// lets IPRoyal pick at random.
|
||||
/// </summary>
|
||||
public string? Country { get; set; }
|
||||
|
||||
/// <summary>Load images. Off by default to conserve the metered residential plan.</summary>
|
||||
public bool LoadImages { get; set; }
|
||||
|
||||
/// <summary>Base delay between paginated API fetches, in seconds (human-like pacing).</summary>
|
||||
public double PageDelaySeconds { get; set; } = 2.5;
|
||||
|
||||
/// <summary>Extra random jitter added to each delay, in seconds (0..value).</summary>
|
||||
public double PageJitterSeconds { get; set; } = 2.0;
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// Source of proxy endpoints. The whole point of this seam is that the rest of
|
||||
/// the scraper depends only on this interface and <see cref="ProxyLease"/>, so a
|
||||
/// different residential provider — or the future C2 that allocates IPs to
|
||||
/// containers, or a composite "grab-bag" over several providers — drops in
|
||||
/// without changing any browser or scraping code.
|
||||
/// </summary>
|
||||
public interface IProxyProvider
|
||||
{
|
||||
/// <summary>Identifier recorded on issued leases, e.g. "iproyal".</summary>
|
||||
string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Produce a usable endpoint for the given request. For gateway providers
|
||||
/// this is pure string composition (no network call); the C2 implementation
|
||||
/// can override that later with real allocation.
|
||||
/// </summary>
|
||||
ProxyLease Acquire(ProxyRequest request);
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IProxyProvider"/> for IPRoyal's residential gateway. IPRoyal keeps
|
||||
/// one fixed host/port (geo.iproyal.com:12321) and encodes everything else —
|
||||
/// country, sticky-session id, session lifetime — as underscore-delimited
|
||||
/// parameters appended to the account password. Example password:
|
||||
/// "secret_country-us_session-ab12cd_lifetime-30m". The account username is sent
|
||||
/// unchanged. Docs: https://docs.iproyal.com/proxies/residential/proxy
|
||||
/// </summary>
|
||||
public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
{
|
||||
public const string GatewayHost = "geo.iproyal.com";
|
||||
public const int GatewayPort = 12321;
|
||||
|
||||
// IPRoyal caps sticky sessions; 30 minutes is a safe default that comfortably
|
||||
// covers a single scrape pass without forcing an early IP rotation.
|
||||
private static readonly TimeSpan DefaultLifetime = TimeSpan.FromMinutes(30);
|
||||
|
||||
private readonly string _username;
|
||||
private readonly string _password;
|
||||
|
||||
public IpRoyalProxyProvider(string username, string password)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(username))
|
||||
{
|
||||
throw new ArgumentException("IPRoyal username is required.", nameof(username));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(password))
|
||||
{
|
||||
throw new ArgumentException("IPRoyal password is required.", nameof(password));
|
||||
}
|
||||
|
||||
_username = username;
|
||||
_password = password;
|
||||
}
|
||||
|
||||
public string Name => "iproyal";
|
||||
|
||||
public ProxyLease Acquire(ProxyRequest request)
|
||||
{
|
||||
var password = _password;
|
||||
string? sessionId = null;
|
||||
DateTimeOffset? expiresAt = null;
|
||||
|
||||
// Country first; the router picks one at random when several are listed.
|
||||
if (!string.IsNullOrWhiteSpace(request.Country))
|
||||
{
|
||||
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
if (request.Sticky)
|
||||
{
|
||||
sessionId = request.SessionId ?? NewSessionId();
|
||||
var lifetime = request.Lifetime ?? DefaultLifetime;
|
||||
// IPRoyal expresses lifetime as whole minutes (e.g. "_lifetime-30m").
|
||||
var minutes = Math.Max(1, (int)Math.Round(lifetime.TotalMinutes));
|
||||
password += $"_session-{sessionId}_lifetime-{minutes}m";
|
||||
expiresAt = DateTimeOffset.UtcNow.AddMinutes(minutes);
|
||||
}
|
||||
|
||||
return new ProxyLease(
|
||||
Host: GatewayHost,
|
||||
Port: GatewayPort,
|
||||
Username: _username,
|
||||
Password: password,
|
||||
Provider: Name,
|
||||
SessionId: sessionId,
|
||||
ExpiresAt: expiresAt);
|
||||
}
|
||||
|
||||
// Short, URL/param-safe token. IPRoyal treats the session value opaquely;
|
||||
// it only needs to be stable for the duration of a sticky lease.
|
||||
private static string NewSessionId() =>
|
||||
Guid.NewGuid().ToString("N")[..10];
|
||||
}
|
||||
@@ -1,232 +0,0 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
|
||||
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
|
||||
/// gateway's <c>Proxy-Authorization</c> header itself.
|
||||
/// <para>
|
||||
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
|
||||
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
|
||||
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
|
||||
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
|
||||
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
|
||||
/// CDP — far less detectable — while the upstream still carries the IPRoyal
|
||||
/// username/password (and its baked-in country/session params).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
|
||||
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
|
||||
/// the browser does TLS end-to-end with the real host — this proxy never sees
|
||||
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class LocalForwardingProxy : IAsyncDisposable
|
||||
{
|
||||
private readonly ProxyLease _upstream;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TcpListener _listener;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private readonly string _authHeader;
|
||||
private Task? _acceptLoop;
|
||||
|
||||
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
|
||||
{
|
||||
_upstream = upstream;
|
||||
_logger = logger;
|
||||
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
|
||||
var token = Convert.ToBase64String(
|
||||
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
|
||||
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
|
||||
}
|
||||
|
||||
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
|
||||
public string Endpoint { get; private set; } = "";
|
||||
|
||||
/// <summary>Bind the local port and start accepting browser connections.</summary>
|
||||
public LocalForwardingProxy Start()
|
||||
{
|
||||
_listener.Start();
|
||||
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
|
||||
Endpoint = $"127.0.0.1:{port}";
|
||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
||||
_logger.LogInformation(
|
||||
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
|
||||
Endpoint, _upstream.Endpoint, _upstream.Provider);
|
||||
return this;
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
TcpClient client;
|
||||
try
|
||||
{
|
||||
client = await _listener.AcceptTcpClientAsync(ct);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Accept failed.");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fire-and-forget per connection; exceptions are swallowed per client so
|
||||
// one bad tunnel never takes down the listener.
|
||||
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
|
||||
{
|
||||
using (client)
|
||||
{
|
||||
client.NoDelay = true;
|
||||
try
|
||||
{
|
||||
var clientStream = client.GetStream();
|
||||
var header = await ReadHeaderAsync(clientStream, ct);
|
||||
if (header is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var requestLine = header.Split("\r\n", 2)[0];
|
||||
var parts = requestLine.Split(' ');
|
||||
if (parts.Length < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var method = parts[0];
|
||||
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
await HandleConnectAsync(clientStream, parts[1], ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
await HandlePlainAsync(clientStream, header, ct);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Client connection error.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
|
||||
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
|
||||
{
|
||||
using var upstream = new TcpClient { NoDelay = true };
|
||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
||||
var upstreamStream = upstream.GetStream();
|
||||
|
||||
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
|
||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
|
||||
|
||||
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
|
||||
var ok = upstreamHeader is not null
|
||||
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
|
||||
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
|
||||
&& sl[1] == "200";
|
||||
if (!ok)
|
||||
{
|
||||
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
|
||||
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
|
||||
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
|
||||
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
|
||||
return;
|
||||
}
|
||||
|
||||
await clientStream.WriteAsync(
|
||||
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
|
||||
|
||||
await RelayAsync(clientStream, upstreamStream, ct);
|
||||
}
|
||||
|
||||
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
|
||||
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
|
||||
{
|
||||
var hostLine = header.Split("\r\n")
|
||||
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
|
||||
if (hostLine is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using var upstream = new TcpClient { NoDelay = true };
|
||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
||||
var upstreamStream = upstream.GetStream();
|
||||
|
||||
// Insert the Proxy-Authorization header right after the request line.
|
||||
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
|
||||
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
|
||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
|
||||
|
||||
await RelayAsync(clientStream, upstreamStream, ct);
|
||||
}
|
||||
|
||||
// Pipe both directions until either side closes.
|
||||
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
|
||||
{
|
||||
var toUpstream = a.CopyToAsync(b, ct);
|
||||
var toClient = b.CopyToAsync(a, ct);
|
||||
await Task.WhenAny(toUpstream, toClient);
|
||||
}
|
||||
|
||||
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
|
||||
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
|
||||
{
|
||||
var buffer = new byte[1];
|
||||
var sb = new StringBuilder(256);
|
||||
while (true)
|
||||
{
|
||||
var read = await stream.ReadAsync(buffer, ct);
|
||||
if (read == 0)
|
||||
{
|
||||
return sb.Length > 0 ? sb.ToString() : null;
|
||||
}
|
||||
|
||||
sb.Append((char)buffer[0]);
|
||||
if (sb.Length >= 4
|
||||
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
|
||||
{
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
// Guard against a runaway/garbage stream.
|
||||
if (sb.Length > 64 * 1024)
|
||||
{
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await _cts.CancelAsync();
|
||||
_listener.Stop();
|
||||
if (_acceptLoop is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _acceptLoop;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// expected on shutdown
|
||||
}
|
||||
}
|
||||
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
|
||||
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
|
||||
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
|
||||
/// </summary>
|
||||
public sealed class LocalForwardingProxyFactory
|
||||
{
|
||||
private readonly ILogger<LocalForwardingProxy> _logger;
|
||||
|
||||
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
|
||||
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// A concrete, ready-to-use proxy endpoint handed back by an
|
||||
/// <see cref="IProxyProvider"/>. This is the only proxy type a consumer ever
|
||||
/// sees, so swapping providers (or mixing several in a grab-bag) never touches
|
||||
/// the calling code. <see cref="Username"/> and <see cref="Password"/> are the
|
||||
/// literal credentials to present to the gateway — for providers like IPRoyal
|
||||
/// the targeting/session parameters are already baked into them.
|
||||
/// </summary>
|
||||
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
|
||||
/// <param name="Port">Gateway port, e.g. 12321.</param>
|
||||
/// <param name="Username">Credential username for the gateway.</param>
|
||||
/// <param name="Password">Credential password (may carry encoded session/geo params).</param>
|
||||
/// <param name="Provider">Name of the provider that issued this lease.</param>
|
||||
/// <param name="SessionId">The sticky session key, if this is a pinned IP.</param>
|
||||
/// <param name="ExpiresAt">When a sticky IP may be recycled; null if rotating/unbounded.</param>
|
||||
public sealed record ProxyLease(
|
||||
string Host,
|
||||
int Port,
|
||||
string Username,
|
||||
string Password,
|
||||
string Provider,
|
||||
string? SessionId = null,
|
||||
DateTimeOffset? ExpiresAt = null)
|
||||
{
|
||||
/// <summary>"host:port" form used by browser proxy settings.</summary>
|
||||
public string Endpoint => $"{Host}:{Port}";
|
||||
}
|
||||
@@ -1,103 +0,0 @@
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
||||
/// <param name="Org">
|
||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
||||
/// </param>
|
||||
public sealed record ProxyExitInfo(
|
||||
string? Ip,
|
||||
string? City,
|
||||
string? Region,
|
||||
string? Country,
|
||||
string? Org,
|
||||
string? Hostname,
|
||||
string? Timezone);
|
||||
|
||||
/// <summary>
|
||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
||||
/// launch the real browser through it, and read back the exit IP from an
|
||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
||||
/// against a metered residential plan — it proves auth works and shows whether
|
||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
||||
/// </summary>
|
||||
public sealed class ProxyProbe
|
||||
{
|
||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<ProxyProbe> _logger;
|
||||
|
||||
public ProxyProbe(
|
||||
IProxyProvider provider,
|
||||
LocalForwardingProxyFactory proxyFactory,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<ProxyProbe> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_proxyFactory = proxyFactory;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
_logger.LogInformation(
|
||||
"Acquired {Provider} lease (exit {Mode}).",
|
||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
||||
|
||||
await using var localProxy = _proxyFactory.Create(lease).Start();
|
||||
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
||||
|
||||
// Read the document's text rather than the DOM so the browser's
|
||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
||||
// JSON object it rendered.
|
||||
var rendered = ((IJavaScriptExecutor)driver)
|
||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
||||
|
||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
||||
|
||||
return info;
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractJson(string text)
|
||||
{
|
||||
var start = text.IndexOf('{');
|
||||
var end = text.LastIndexOf('}');
|
||||
if (start < 0 || end <= start)
|
||||
{
|
||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
||||
}
|
||||
|
||||
return text[start..(end + 1)];
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// What kind of exit IP the caller wants. Provider-agnostic: each
|
||||
/// <see cref="IProxyProvider"/> translates these knobs into its own gateway
|
||||
/// syntax. A sticky request asks the provider to pin one residential IP for the
|
||||
/// session's lifetime; a non-sticky request lets the IP rotate per connection.
|
||||
/// </summary>
|
||||
/// <param name="Country">
|
||||
/// Optional ISO 3166-1 alpha-2 code, or a comma-separated list to let the
|
||||
/// provider pick one at random (e.g. "us" or "us,gb,de"). Null means no
|
||||
/// geo constraint.
|
||||
/// </param>
|
||||
/// <param name="Sticky">
|
||||
/// True to keep the same exit IP for the whole session; false to rotate.
|
||||
/// </param>
|
||||
/// <param name="SessionId">
|
||||
/// Optional caller-supplied session key for a sticky lease. When null and
|
||||
/// <paramref name="Sticky"/> is true the provider generates one.
|
||||
/// </param>
|
||||
/// <param name="Lifetime">
|
||||
/// How long a sticky IP should be held before the provider may recycle it.
|
||||
/// Ignored when <paramref name="Sticky"/> is false. Null lets the provider
|
||||
/// apply its own default.
|
||||
/// </param>
|
||||
public sealed record ProxyRequest(
|
||||
string? Country = null,
|
||||
bool Sticky = true,
|
||||
string? SessionId = null,
|
||||
TimeSpan? Lifetime = null);
|
||||
Reference in New Issue
Block a user