Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,8 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.8" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
|
||||
<PackageReference Include="Selenium.WebDriver" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.Edge;
|
||||
|
||||
namespace BlueLaminate.Scraper.Browser;
|
||||
|
||||
/// <summary>
|
||||
/// Builds a non-headless Edge (Chromium) WebDriver pointed at a local, auth-free
|
||||
/// proxy endpoint (a <see cref="Proxies.LocalForwardingProxy"/> that chains to the
|
||||
/// residential gateway). Deliberately uses <b>zero CDP</b>: enabling DevTools
|
||||
/// domains — even just to answer proxy auth — is a Cloudflare automation tell, and
|
||||
/// the local proxy already carries the upstream credentials, so there's no 407 to
|
||||
/// answer in the browser. Combined with a warmed, persistent profile this is the
|
||||
/// lowest-fingerprint configuration we can manage without an undetected-chromedriver
|
||||
/// (which has no .NET equivalent).
|
||||
/// <para>
|
||||
/// Bandwidth: the residential plan is metered per GB, so images are disabled at the
|
||||
/// content-settings level by default. Cloudflare gates on JS/TLS/behaviour, not
|
||||
/// whether pictures render, so this stays realistic.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class BrowserDriverFactory
|
||||
{
|
||||
private readonly ILogger<BrowserDriverFactory> _logger;
|
||||
|
||||
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Launch Edge routed through <paramref name="proxyEndpoint"/> ("host:port", no
|
||||
/// auth). When <paramref name="profileDir"/> is set the profile persists across
|
||||
/// runs (so a once-cleared Cloudflare <c>cf_clearance</c> cookie and browsing
|
||||
/// history carry over — a warmed profile looks far less like a fresh bot); when
|
||||
/// null a throwaway profile is used.
|
||||
/// </summary>
|
||||
public IWebDriver Create(string? proxyEndpoint, bool blockImages = true, string? profileDir = null)
|
||||
{
|
||||
var options = new EdgeOptions();
|
||||
|
||||
// Route browser traffic through the local proxy via the launch argument
|
||||
// rather than EdgeOptions.Proxy (which would also route Selenium Manager's
|
||||
// driver download). No scheme = all protocols use the proxy. When null/empty
|
||||
// the browser uses the machine's direct connection (diagnostic --no-proxy).
|
||||
if (!string.IsNullOrWhiteSpace(proxyEndpoint))
|
||||
{
|
||||
options.AddArgument($"--proxy-server={proxyEndpoint}");
|
||||
}
|
||||
|
||||
// Reduce the most obvious automation tells; residential exit + a real
|
||||
// (non-headless) browser + a warmed profile do the rest.
|
||||
options.AddArgument("--disable-blink-features=AutomationControlled");
|
||||
options.AddExcludedArgument("enable-automation");
|
||||
options.AddAdditionalOption("useAutomationExtension", false);
|
||||
options.AddArgument("--no-first-run");
|
||||
options.AddArgument("--no-default-browser-check");
|
||||
options.AddArgument("--start-maximized");
|
||||
|
||||
var persist = !string.IsNullOrWhiteSpace(profileDir);
|
||||
var dir = persist
|
||||
? profileDir!
|
||||
: Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
||||
Directory.CreateDirectory(dir);
|
||||
options.AddArgument($"--user-data-dir={dir}");
|
||||
|
||||
if (blockImages)
|
||||
{
|
||||
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
||||
}
|
||||
|
||||
_logger.LogInformation(
|
||||
"Launching Edge via {Route} (profile: {Profile}).",
|
||||
string.IsNullOrWhiteSpace(proxyEndpoint) ? "DIRECT (no proxy)" : $"local proxy {proxyEndpoint}",
|
||||
persist ? dir : "throwaway");
|
||||
|
||||
return new EdgeDriver(options);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Globalization;
|
||||
using System.Net;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
@@ -27,9 +28,6 @@ public sealed record ListingsPageResult(IReadOnlyList<CsFloatListing> Listings,
|
||||
/// </summary>
|
||||
public sealed class CsFloatListingsClient
|
||||
{
|
||||
private const string BaseUrl = "https://csfloat.com/api/v1/listings";
|
||||
private const int MaxLimit = 50; // API hard cap per page.
|
||||
|
||||
private static readonly JsonSerializerOptions Options = new()
|
||||
{
|
||||
// CSFloat uses snake_case for item fields (market_hash_name, float_value,
|
||||
@@ -43,18 +41,30 @@ public sealed class CsFloatListingsClient
|
||||
|
||||
private readonly HttpClient _http;
|
||||
private readonly string _apiKey;
|
||||
private readonly string _baseUrl;
|
||||
private readonly int _maxLimit;
|
||||
private readonly ILogger<CsFloatListingsClient> _logger;
|
||||
|
||||
public CsFloatListingsClient(HttpClient http, string apiKey, ILogger<CsFloatListingsClient> logger)
|
||||
public CsFloatListingsClient(HttpClient http, CsFloatOptions options, ILogger<CsFloatListingsClient> logger)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(apiKey))
|
||||
throw new ArgumentException("CSFloat API key is required.", nameof(apiKey));
|
||||
if (string.IsNullOrWhiteSpace(options.ApiKey))
|
||||
{
|
||||
throw new ArgumentException("CSFloat API key is required.", nameof(options));
|
||||
}
|
||||
|
||||
_http = http;
|
||||
_apiKey = apiKey;
|
||||
_apiKey = options.ApiKey;
|
||||
_baseUrl = options.BaseUrl;
|
||||
_maxLimit = options.MaxLimit;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maximum listings returned per page (the API page cap, from configuration).
|
||||
/// This is listings-per-request — unrelated to how many requests are made.
|
||||
/// </summary>
|
||||
public int MaxLimit => _maxLimit;
|
||||
|
||||
/// <summary>
|
||||
/// Rate-limit state from the most recent response (success or failure).
|
||||
/// <see cref="CsFloatRateLimit.None"/> until the first request completes.
|
||||
@@ -81,9 +91,9 @@ public sealed class CsFloatListingsClient
|
||||
do
|
||||
{
|
||||
var remaining = maxListings - results.Count;
|
||||
var limit = Math.Min(MaxLimit, remaining);
|
||||
var limit = Math.Min(_maxLimit, remaining);
|
||||
|
||||
var page = await FetchPageAsync(defIndex, paintIndex, sortBy, limit, cursor, type, ct);
|
||||
var page = await FetchPageAsync(defIndex, paintIndex, sortBy, limit, cursor, type, ct: ct);
|
||||
results.AddRange(page.Listings);
|
||||
|
||||
_logger.LogInformation(
|
||||
@@ -94,7 +104,9 @@ public sealed class CsFloatListingsClient
|
||||
|
||||
// Stop when the API signals the end (no cursor) or returns an empty page.
|
||||
if (string.IsNullOrEmpty(cursor) || page.Listings.Count == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (results.Count < maxListings);
|
||||
|
||||
@@ -106,6 +118,9 @@ public sealed class CsFloatListingsClient
|
||||
/// sweep runner drives this directly so it can decide — between pages — when
|
||||
/// to stop (already-seen listings) or pace (rate-limit headers). Filters are
|
||||
/// optional: omit def_index/paint_index for a global sweep across all items.
|
||||
/// <paramref name="minFloat"/>/<paramref name="maxFloat"/> restrict the result
|
||||
/// to a float (wear) band, so the catalogue sweep can split a skin into smaller,
|
||||
/// independently-checkpointable wear units.
|
||||
/// </summary>
|
||||
public Task<ListingsPageResult> FetchPageAsync(
|
||||
int? defIndex,
|
||||
@@ -114,30 +129,64 @@ public sealed class CsFloatListingsClient
|
||||
int limit,
|
||||
string? cursor,
|
||||
string? type = "buy_now",
|
||||
decimal? minFloat = null,
|
||||
decimal? maxFloat = null,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var query = new List<string>
|
||||
{
|
||||
$"sort_by={Uri.EscapeDataString(sortBy)}",
|
||||
$"limit={Math.Clamp(limit, 1, MaxLimit)}",
|
||||
$"limit={Math.Clamp(limit, 1, _maxLimit)}",
|
||||
};
|
||||
// Default to fixed-price listings only; auctions have no firm sale price
|
||||
// and aren't wanted. Pass type=null to include everything.
|
||||
if (!string.IsNullOrEmpty(type))
|
||||
{
|
||||
query.Add($"type={Uri.EscapeDataString(type)}");
|
||||
}
|
||||
|
||||
if (defIndex is { } def)
|
||||
{
|
||||
query.Add($"def_index={def}");
|
||||
}
|
||||
|
||||
if (paintIndex is { } paint)
|
||||
{
|
||||
query.Add($"paint_index={paint}");
|
||||
}
|
||||
|
||||
// CSFloat's min_float/max_float are exclusive ("float higher/lower than this").
|
||||
// Nudge the bounds outward by a tiny epsilon so a listing whose float sits
|
||||
// exactly on a band boundary isn't dropped; slight overlap between adjacent
|
||||
// bands is harmless (same listing id, just upserted twice).
|
||||
if (minFloat is { } min)
|
||||
{
|
||||
query.Add($"min_float={Format(min - FloatBoundaryEpsilon)}");
|
||||
}
|
||||
|
||||
if (maxFloat is { } max)
|
||||
{
|
||||
query.Add($"max_float={Format(max + FloatBoundaryEpsilon)}");
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(cursor))
|
||||
{
|
||||
query.Add($"cursor={Uri.EscapeDataString(cursor)}");
|
||||
}
|
||||
|
||||
return SendPageAsync(query, ct);
|
||||
}
|
||||
|
||||
private const decimal FloatBoundaryEpsilon = 0.000001m;
|
||||
|
||||
// Invariant, fixed-point formatting so floats serialise as "0.07" rather than a
|
||||
// culture-specific or scientific form the API would reject.
|
||||
private static string Format(decimal value) =>
|
||||
Math.Clamp(value, 0m, 1m).ToString("0.0##########", CultureInfo.InvariantCulture);
|
||||
|
||||
private async Task<ListingsPageResult> SendPageAsync(List<string> query, CancellationToken ct)
|
||||
{
|
||||
var url = $"{BaseUrl}?{string.Join('&', query)}";
|
||||
var url = $"{_baseUrl}?{string.Join('&', query)}";
|
||||
|
||||
using var request = new HttpRequestMessage(HttpMethod.Get, url);
|
||||
// CSFloat expects the raw key in the Authorization header (no scheme).
|
||||
@@ -152,7 +201,9 @@ public sealed class CsFloatListingsClient
|
||||
_logger.LogInformation("{RateLimit}", LastRateLimit);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
throw new CsFloatApiException(response.StatusCode, Truncate(body));
|
||||
}
|
||||
|
||||
var page = Parse(body);
|
||||
return new ListingsPageResult(page.Data.Select(Map).ToList(), page.Cursor);
|
||||
@@ -169,7 +220,9 @@ public sealed class CsFloatListingsClient
|
||||
// Scan both response and content headers — servers split them either way.
|
||||
var all = response.Headers.AsEnumerable();
|
||||
if (response.Content is not null)
|
||||
{
|
||||
all = all.Concat(response.Content.Headers);
|
||||
}
|
||||
|
||||
foreach (var header in all)
|
||||
{
|
||||
@@ -178,11 +231,15 @@ public sealed class CsFloatListingsClient
|
||||
|| name.Contains("rate-limit", StringComparison.OrdinalIgnoreCase)
|
||||
|| name.Equals("Retry-After", StringComparison.OrdinalIgnoreCase);
|
||||
if (isRateLimit)
|
||||
{
|
||||
raw[name] = string.Join(",", header.Value);
|
||||
}
|
||||
}
|
||||
|
||||
if (raw.Count == 0)
|
||||
{
|
||||
return CsFloatRateLimit.None;
|
||||
}
|
||||
|
||||
return new CsFloatRateLimit(
|
||||
Limit: FindInt(raw, "limit"),
|
||||
|
||||
30
BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatOptions.cs
Normal file
30
BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatOptions.cs
Normal file
@@ -0,0 +1,30 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace BlueLaminate.Scraper.CsFloat;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for <see cref="CsFloatListingsClient"/>, bound from the
|
||||
/// <c>CsFloat</c> configuration section. Defaults match the live API so the
|
||||
/// client works with no configuration beyond the key.
|
||||
/// </summary>
|
||||
public sealed class CsFloatOptions
|
||||
{
|
||||
public const string SectionName = "CsFloat";
|
||||
|
||||
/// <summary>
|
||||
/// Developer key CSFloat requires on the <c>Authorization</c> header. Falls
|
||||
/// back to the legacy <c>CSFLOAT_API_KEY</c> environment variable (wired in the
|
||||
/// composition root). Only commands that hit the API need it.
|
||||
/// </summary>
|
||||
public string? ApiKey { get; set; }
|
||||
|
||||
/// <summary>Active-listings endpoint.</summary>
|
||||
public string BaseUrl { get; set; } = "https://csfloat.com/api/v1/listings";
|
||||
|
||||
/// <summary>
|
||||
/// Listings per page. CSFloat caps this at 50; values outside [1, 50] are
|
||||
/// rejected at startup rather than silently clamped.
|
||||
/// </summary>
|
||||
[Range(1, 50, ErrorMessage = "CsFloat:MaxLimit must be between 1 and 50 (the CSFloat API page cap).")]
|
||||
public int MaxLimit { get; set; } = 50;
|
||||
}
|
||||
@@ -0,0 +1,211 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using BlueLaminate.Scraper.Proxies;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.CsMoney;
|
||||
|
||||
/// <summary>Outcome of a stealth pagination run.</summary>
|
||||
/// <param name="PagesSucceeded">How many offset pages returned listings JSON before stopping.</param>
|
||||
/// <param name="ItemsTotal">Total listing items captured across those pages.</param>
|
||||
/// <param name="StoppedReason">Why pagination stopped: "challenged", "empty", "completed", or "error".</param>
|
||||
public sealed record CsMoneyCaptureResult(int PagesSucceeded, int ItemsTotal, string StoppedReason);
|
||||
|
||||
/// <summary>
|
||||
/// Drives a low-fingerprint, non-headless Edge (no CDP) through a local forwarding
|
||||
/// proxy to the cs.money market, lets the operator clear Cloudflare once, then pages
|
||||
/// the listings API with human-like pacing using in-page <c>fetch()</c> calls from
|
||||
/// the cleared origin (so the cf_clearance cookie rides along). It records each
|
||||
/// page's JSON and — crucially for the current phase — <b>measures how many pages
|
||||
/// survive before Cloudflare re-challenges</b>, which tells us whether the
|
||||
/// fingerprint reductions are enough for a real sweep.
|
||||
/// </summary>
|
||||
public sealed class CsMoneyCaptureService
|
||||
{
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly CsMoneyOptions _options;
|
||||
private readonly ILogger<CsMoneyCaptureService> _logger;
|
||||
|
||||
public CsMoneyCaptureService(
|
||||
IProxyProvider provider,
|
||||
LocalForwardingProxyFactory proxyFactory,
|
||||
BrowserDriverFactory factory,
|
||||
CsMoneyOptions options,
|
||||
ILogger<CsMoneyCaptureService> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_proxyFactory = proxyFactory;
|
||||
_factory = factory;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Open the market, wait for <paramref name="browseUntilDone"/> (the operator
|
||||
/// clears Cloudflare and presses Enter), then page the listings API up to
|
||||
/// <paramref name="maxPages"/> times, stopping early on a re-challenge or an
|
||||
/// empty page. Each page's body is written to <paramref name="outputDir"/>.
|
||||
/// </summary>
|
||||
public async Task<CsMoneyCaptureResult> RunAsync(
|
||||
string outputDir,
|
||||
ProxyRequest request,
|
||||
bool loadImages,
|
||||
bool useProxy,
|
||||
int maxPages,
|
||||
Func<Task> browseUntilDone,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
Directory.CreateDirectory(outputDir);
|
||||
|
||||
// --no-proxy (useProxy=false) drives the automated browser on the machine's
|
||||
// own IP, to isolate whether a re-challenge is the IPRoyal exit's reputation
|
||||
// or the webdriver fingerprint itself.
|
||||
LocalForwardingProxy? localProxy = null;
|
||||
string? proxyEndpoint = null;
|
||||
if (useProxy)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
localProxy = _proxyFactory.Create(lease).Start();
|
||||
proxyEndpoint = localProxy.Endpoint;
|
||||
}
|
||||
|
||||
var driver = _factory.Create(proxyEndpoint, blockImages: !loadImages, _options.ProfileDir);
|
||||
|
||||
var pages = 0;
|
||||
var items = 0;
|
||||
var reason = "completed";
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(90);
|
||||
driver.Manage().Timeouts().AsynchronousJavaScript = TimeSpan.FromSeconds(45);
|
||||
|
||||
_logger.LogInformation("Navigating to {Url}", _options.MarketUrl);
|
||||
driver.Navigate().GoToUrl(_options.MarketUrl);
|
||||
|
||||
// Operator clears the Cloudflare challenge in the visible window, waits
|
||||
// until the market grid is actually rendered, then presses Enter.
|
||||
await browseUntilDone();
|
||||
|
||||
for (var offset = 0; pages < maxPages; offset += 60)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var apiUrl = string.Format(_options.ApiUrlTemplate, offset);
|
||||
var (status, body) = DirectFetch(driver, apiUrl);
|
||||
|
||||
if (LooksLikeChallenge(status, body))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Re-challenged at offset {Offset} (after {Pages} clean page(s)). Stopping.",
|
||||
offset, pages);
|
||||
await WriteAsync(outputDir, $"challenge_offset_{offset}.html", body, ct);
|
||||
reason = "challenged";
|
||||
break;
|
||||
}
|
||||
|
||||
var count = TryCountItems(body);
|
||||
if (count is 0)
|
||||
{
|
||||
_logger.LogInformation("Offset {Offset} returned no items — end of listings.", offset);
|
||||
reason = "empty";
|
||||
break;
|
||||
}
|
||||
|
||||
await WriteAsync(outputDir, $"page_{pages:D3}_offset_{offset}.json", body, ct);
|
||||
pages++;
|
||||
items += count ?? 0;
|
||||
_logger.LogInformation(
|
||||
"Page {Page} [offset {Offset}] [{Status}] → {Count} items ({Bytes} bytes).",
|
||||
pages, offset, status, count, body.Length);
|
||||
|
||||
await DelayAsync(ct);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
reason = "cancelled";
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "cs.money capture failed after {Pages} page(s).", pages);
|
||||
reason = "error";
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
if (localProxy is not null)
|
||||
{
|
||||
await localProxy.DisposeAsync();
|
||||
}
|
||||
}
|
||||
|
||||
return new CsMoneyCaptureResult(pages, items, reason);
|
||||
}
|
||||
|
||||
// Run a same-origin fetch() in the cleared page and return (status, body). Uses
|
||||
// ExecuteAsyncScript so we can await the fetch promise; the page is on the
|
||||
// cs.money origin, so the cf_clearance cookie is sent automatically.
|
||||
private (int Status, string Body) DirectFetch(IWebDriver driver, string apiUrl)
|
||||
{
|
||||
const string script = """
|
||||
const url = arguments[0];
|
||||
const done = arguments[arguments.length - 1];
|
||||
fetch(url, { credentials: 'include', headers: { 'accept': 'application/json' } })
|
||||
.then(r => r.text().then(t => done(JSON.stringify({ status: r.status, body: t }))))
|
||||
.catch(e => done(JSON.stringify({ status: -1, body: String(e) })));
|
||||
""";
|
||||
var raw = ((IJavaScriptExecutor)driver).ExecuteAsyncScript(script, apiUrl) as string;
|
||||
if (string.IsNullOrEmpty(raw))
|
||||
{
|
||||
return (-1, "");
|
||||
}
|
||||
|
||||
using var doc = JsonDocument.Parse(raw);
|
||||
var status = doc.RootElement.GetProperty("status").GetInt32();
|
||||
var body = doc.RootElement.GetProperty("body").GetString() ?? "";
|
||||
return (status, body);
|
||||
}
|
||||
|
||||
private static bool LooksLikeChallenge(int status, string body) =>
|
||||
status is 403 or 503 or -1
|
||||
|| body.Contains("Just a moment", StringComparison.OrdinalIgnoreCase)
|
||||
|| body.Contains("challenge-platform", StringComparison.OrdinalIgnoreCase)
|
||||
|| body.TrimStart().StartsWith("<", StringComparison.Ordinal); // HTML, not JSON
|
||||
|
||||
// Count items[] without binding a full model (the typed model is Phase 2).
|
||||
private static int? TryCountItems(string body)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(body);
|
||||
return doc.RootElement.TryGetProperty("items", out var items)
|
||||
&& items.ValueKind == JsonValueKind.Array
|
||||
? items.GetArrayLength()
|
||||
: null;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DelayAsync(CancellationToken ct)
|
||||
{
|
||||
var jitter = _options.PageJitterSeconds > 0
|
||||
? Random.Shared.NextDouble() * _options.PageJitterSeconds
|
||||
: 0;
|
||||
var seconds = Math.Max(0, _options.PageDelaySeconds) + jitter;
|
||||
if (seconds > 0)
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(seconds), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task WriteAsync(string dir, string fileName, string body, CancellationToken ct) =>
|
||||
await File.WriteAllTextAsync(Path.Combine(dir, fileName), body, Encoding.UTF8, ct);
|
||||
}
|
||||
50
BlueLaminate/BlueLaminate.Scraper/CsMoney/CsMoneyOptions.cs
Normal file
50
BlueLaminate/BlueLaminate.Scraper/CsMoney/CsMoneyOptions.cs
Normal file
@@ -0,0 +1,50 @@
|
||||
namespace BlueLaminate.Scraper.CsMoney;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for the cs.money scraper, bound from the <c>CsMoney</c>
|
||||
/// configuration section.
|
||||
/// <para>
|
||||
/// cs.money exposes no public API and sits behind Cloudflare bot protection, so we
|
||||
/// drive a real, non-headless browser (Selenium/Edge) routed through an IPRoyal
|
||||
/// residential proxy via a local forwarding hop (no CDP). The market endpoint
|
||||
/// re-challenges aggressively during pagination, so these options also tune the
|
||||
/// warmed profile and request pacing we use to survive longer.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class CsMoneyOptions
|
||||
{
|
||||
public const string SectionName = "CsMoney";
|
||||
|
||||
/// <summary>Public market page the browser opens (and where the operator clears Cloudflare).</summary>
|
||||
public string MarketUrl { get; set; } = "https://cs.money/market/buy/";
|
||||
|
||||
/// <summary>
|
||||
/// Listings API template; <c>{0}</c> is the page offset (steps of 60). Fetched
|
||||
/// in-page from the cleared market origin so the cf_clearance cookie is sent.
|
||||
/// </summary>
|
||||
public string ApiUrlTemplate { get; set; } =
|
||||
"https://cs.money/2.0/market/sell-orders?limit=60&offset={0}";
|
||||
|
||||
/// <summary>
|
||||
/// Persistent Chromium profile directory. Reusing one profile keeps the
|
||||
/// cf_clearance cookie and history between runs — a warmed profile is far less
|
||||
/// likely to be re-challenged than a fresh one. Empty = throwaway profile.
|
||||
/// </summary>
|
||||
public string ProfileDir { get; set; } =
|
||||
Path.Combine(Path.GetTempPath(), "bluelaminate-csmoney-profile");
|
||||
|
||||
/// <summary>
|
||||
/// Optional ISO country code(s) for the residential exit IP, e.g. "us". Null/empty
|
||||
/// lets IPRoyal pick at random.
|
||||
/// </summary>
|
||||
public string? Country { get; set; }
|
||||
|
||||
/// <summary>Load images. Off by default to conserve the metered residential plan.</summary>
|
||||
public bool LoadImages { get; set; }
|
||||
|
||||
/// <summary>Base delay between paginated API fetches, in seconds (human-like pacing).</summary>
|
||||
public double PageDelaySeconds { get; set; } = 2.5;
|
||||
|
||||
/// <summary>Extra random jitter added to each delay, in seconds (0..value).</summary>
|
||||
public double PageJitterSeconds { get; set; } = 2.0;
|
||||
}
|
||||
@@ -23,9 +23,14 @@ public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
public IpRoyalProxyProvider(string username, string password)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(username))
|
||||
{
|
||||
throw new ArgumentException("IPRoyal username is required.", nameof(username));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(password))
|
||||
{
|
||||
throw new ArgumentException("IPRoyal password is required.", nameof(password));
|
||||
}
|
||||
|
||||
_username = username;
|
||||
_password = password;
|
||||
@@ -41,7 +46,9 @@ public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
|
||||
// Country first; the router picks one at random when several are listed.
|
||||
if (!string.IsNullOrWhiteSpace(request.Country))
|
||||
{
|
||||
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
if (request.Sticky)
|
||||
{
|
||||
|
||||
@@ -0,0 +1,232 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Text;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// A tiny in-process HTTP proxy that listens on 127.0.0.1 and chains every request
|
||||
/// to an upstream gateway (the residential <see cref="ProxyLease"/>), injecting the
|
||||
/// gateway's <c>Proxy-Authorization</c> header itself.
|
||||
/// <para>
|
||||
/// Why this exists: Chromium ignores credentials in <c>--proxy-server</c>, and the
|
||||
/// only in-browser ways to answer the gateway's 407 are a CDP auth handler (which
|
||||
/// is a Cloudflare automation tell) or a Manifest V2 extension (disabled in current
|
||||
/// Chromium). By terminating the browser→proxy hop locally and adding the auth here,
|
||||
/// the browser talks to an <em>auth-free</em> local endpoint and we run with zero
|
||||
/// CDP — far less detectable — while the upstream still carries the IPRoyal
|
||||
/// username/password (and its baked-in country/session params).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// HTTPS (the only thing cs.money serves) flows through the <c>CONNECT</c> tunnel:
|
||||
/// we open the tunnel to the upstream with auth, then relay raw bytes both ways so
|
||||
/// the browser does TLS end-to-end with the real host — this proxy never sees
|
||||
/// plaintext. Plain HTTP is forwarded best-effort for the occasional non-TLS call.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class LocalForwardingProxy : IAsyncDisposable
|
||||
{
|
||||
private readonly ProxyLease _upstream;
|
||||
private readonly ILogger _logger;
|
||||
private readonly TcpListener _listener;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private readonly string _authHeader;
|
||||
private Task? _acceptLoop;
|
||||
|
||||
public LocalForwardingProxy(ProxyLease upstream, ILogger logger)
|
||||
{
|
||||
_upstream = upstream;
|
||||
_logger = logger;
|
||||
_listener = new TcpListener(IPAddress.Loopback, 0); // ephemeral port
|
||||
var token = Convert.ToBase64String(
|
||||
Encoding.ASCII.GetBytes($"{upstream.Username}:{upstream.Password}"));
|
||||
_authHeader = $"Proxy-Authorization: Basic {token}\r\n";
|
||||
}
|
||||
|
||||
/// <summary>"127.0.0.1:port" — pass this to the browser's <c>--proxy-server</c>.</summary>
|
||||
public string Endpoint { get; private set; } = "";
|
||||
|
||||
/// <summary>Bind the local port and start accepting browser connections.</summary>
|
||||
public LocalForwardingProxy Start()
|
||||
{
|
||||
_listener.Start();
|
||||
var port = ((IPEndPoint)_listener.LocalEndpoint).Port;
|
||||
Endpoint = $"127.0.0.1:{port}";
|
||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
||||
_logger.LogInformation(
|
||||
"Local forwarding proxy listening on {Endpoint} → upstream {Upstream} ({Provider}).",
|
||||
Endpoint, _upstream.Endpoint, _upstream.Provider);
|
||||
return this;
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
TcpClient client;
|
||||
try
|
||||
{
|
||||
client = await _listener.AcceptTcpClientAsync(ct);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Accept failed.");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fire-and-forget per connection; exceptions are swallowed per client so
|
||||
// one bad tunnel never takes down the listener.
|
||||
_ = Task.Run(() => HandleClientAsync(client, ct), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task HandleClientAsync(TcpClient client, CancellationToken ct)
|
||||
{
|
||||
using (client)
|
||||
{
|
||||
client.NoDelay = true;
|
||||
try
|
||||
{
|
||||
var clientStream = client.GetStream();
|
||||
var header = await ReadHeaderAsync(clientStream, ct);
|
||||
if (header is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var requestLine = header.Split("\r\n", 2)[0];
|
||||
var parts = requestLine.Split(' ');
|
||||
if (parts.Length < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var method = parts[0];
|
||||
if (method.Equals("CONNECT", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
await HandleConnectAsync(clientStream, parts[1], ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
await HandlePlainAsync(clientStream, header, ct);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogDebug(ex, "Client connection error.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// HTTPS path: open an authenticated CONNECT tunnel upstream, then relay raw bytes.
|
||||
private async Task HandleConnectAsync(NetworkStream clientStream, string target, CancellationToken ct)
|
||||
{
|
||||
using var upstream = new TcpClient { NoDelay = true };
|
||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
||||
var upstreamStream = upstream.GetStream();
|
||||
|
||||
var connect = $"CONNECT {target} HTTP/1.1\r\nHost: {target}\r\n{_authHeader}\r\n";
|
||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(connect), ct);
|
||||
|
||||
var upstreamHeader = await ReadHeaderAsync(upstreamStream, ct);
|
||||
var ok = upstreamHeader is not null
|
||||
&& upstreamHeader.StartsWith("HTTP/1.", StringComparison.Ordinal)
|
||||
&& upstreamHeader.Split(' ', 3) is { Length: >= 2 } sl
|
||||
&& sl[1] == "200";
|
||||
if (!ok)
|
||||
{
|
||||
var status = upstreamHeader?.Split("\r\n", 2)[0] ?? "no response";
|
||||
_logger.LogWarning("Upstream refused CONNECT {Target}: {Status}", target, status);
|
||||
var resp = "HTTP/1.1 502 Bad Gateway\r\nConnection: close\r\n\r\n";
|
||||
await clientStream.WriteAsync(Encoding.ASCII.GetBytes(resp), ct);
|
||||
return;
|
||||
}
|
||||
|
||||
await clientStream.WriteAsync(
|
||||
Encoding.ASCII.GetBytes("HTTP/1.1 200 Connection established\r\n\r\n"), ct);
|
||||
|
||||
await RelayAsync(clientStream, upstreamStream, ct);
|
||||
}
|
||||
|
||||
// Plain-HTTP path: re-inject the request upstream with auth, then relay both ways.
|
||||
private async Task HandlePlainAsync(NetworkStream clientStream, string header, CancellationToken ct)
|
||||
{
|
||||
var hostLine = header.Split("\r\n")
|
||||
.FirstOrDefault(l => l.StartsWith("Host:", StringComparison.OrdinalIgnoreCase));
|
||||
if (hostLine is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
using var upstream = new TcpClient { NoDelay = true };
|
||||
await upstream.ConnectAsync(_upstream.Host, _upstream.Port, ct);
|
||||
var upstreamStream = upstream.GetStream();
|
||||
|
||||
// Insert the Proxy-Authorization header right after the request line.
|
||||
var idx = header.IndexOf("\r\n", StringComparison.Ordinal);
|
||||
var rewritten = header[..(idx + 2)] + _authHeader + header[(idx + 2)..];
|
||||
await upstreamStream.WriteAsync(Encoding.ASCII.GetBytes(rewritten), ct);
|
||||
|
||||
await RelayAsync(clientStream, upstreamStream, ct);
|
||||
}
|
||||
|
||||
// Pipe both directions until either side closes.
|
||||
private static async Task RelayAsync(NetworkStream a, NetworkStream b, CancellationToken ct)
|
||||
{
|
||||
var toUpstream = a.CopyToAsync(b, ct);
|
||||
var toClient = b.CopyToAsync(a, ct);
|
||||
await Task.WhenAny(toUpstream, toClient);
|
||||
}
|
||||
|
||||
// Read up to the end of the HTTP header block (CRLFCRLF). Returns null on EOF.
|
||||
private static async Task<string?> ReadHeaderAsync(NetworkStream stream, CancellationToken ct)
|
||||
{
|
||||
var buffer = new byte[1];
|
||||
var sb = new StringBuilder(256);
|
||||
while (true)
|
||||
{
|
||||
var read = await stream.ReadAsync(buffer, ct);
|
||||
if (read == 0)
|
||||
{
|
||||
return sb.Length > 0 ? sb.ToString() : null;
|
||||
}
|
||||
|
||||
sb.Append((char)buffer[0]);
|
||||
if (sb.Length >= 4
|
||||
&& sb[^1] == '\n' && sb[^2] == '\r' && sb[^3] == '\n' && sb[^4] == '\r')
|
||||
{
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
// Guard against a runaway/garbage stream.
|
||||
if (sb.Length > 64 * 1024)
|
||||
{
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await _cts.CancelAsync();
|
||||
_listener.Stop();
|
||||
if (_acceptLoop is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _acceptLoop;
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// expected on shutdown
|
||||
}
|
||||
}
|
||||
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// Creates <see cref="LocalForwardingProxy"/> instances with a logger supplied from
|
||||
/// DI, so consumers (the proxy probe, the cs.money capture) can spin up a per-run
|
||||
/// local proxy without depending on <see cref="ILoggerFactory"/> directly.
|
||||
/// </summary>
|
||||
public sealed class LocalForwardingProxyFactory
|
||||
{
|
||||
private readonly ILogger<LocalForwardingProxy> _logger;
|
||||
|
||||
public LocalForwardingProxyFactory(ILogger<LocalForwardingProxy> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Build (but do not start) a local proxy chaining to <paramref name="upstream"/>.</summary>
|
||||
public LocalForwardingProxy Create(ProxyLease upstream) => new(upstream, _logger);
|
||||
}
|
||||
103
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
103
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
@@ -0,0 +1,103 @@
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
||||
/// <param name="Org">
|
||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
||||
/// </param>
|
||||
public sealed record ProxyExitInfo(
|
||||
string? Ip,
|
||||
string? City,
|
||||
string? Region,
|
||||
string? Country,
|
||||
string? Org,
|
||||
string? Hostname,
|
||||
string? Timezone);
|
||||
|
||||
/// <summary>
|
||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
||||
/// launch the real browser through it, and read back the exit IP from an
|
||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
||||
/// against a metered residential plan — it proves auth works and shows whether
|
||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
||||
/// </summary>
|
||||
public sealed class ProxyProbe
|
||||
{
|
||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly LocalForwardingProxyFactory _proxyFactory;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<ProxyProbe> _logger;
|
||||
|
||||
public ProxyProbe(
|
||||
IProxyProvider provider,
|
||||
LocalForwardingProxyFactory proxyFactory,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<ProxyProbe> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_proxyFactory = proxyFactory;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
_logger.LogInformation(
|
||||
"Acquired {Provider} lease (exit {Mode}).",
|
||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
||||
|
||||
await using var localProxy = _proxyFactory.Create(lease).Start();
|
||||
var driver = _factory.Create(localProxy.Endpoint, blockImages: true);
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
||||
|
||||
// Read the document's text rather than the DOM so the browser's
|
||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
||||
// JSON object it rendered.
|
||||
var rendered = ((IJavaScriptExecutor)driver)
|
||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
||||
|
||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
||||
|
||||
return info;
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractJson(string text)
|
||||
{
|
||||
var start = text.IndexOf('{');
|
||||
var end = text.LastIndexOf('}');
|
||||
if (start < 0 || end <= start)
|
||||
{
|
||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
||||
}
|
||||
|
||||
return text[start..(end + 1)];
|
||||
}
|
||||
}
|
||||
@@ -11,9 +11,6 @@ namespace BlueLaminate.Scraper.Skins;
|
||||
/// </summary>
|
||||
public sealed class SkinCatalogClient
|
||||
{
|
||||
public const string DefaultUrl =
|
||||
"https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json";
|
||||
|
||||
private static readonly JsonSerializerOptions Options = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
@@ -24,10 +21,10 @@ public sealed class SkinCatalogClient
|
||||
private readonly HttpClient _http;
|
||||
private readonly string _url;
|
||||
|
||||
public SkinCatalogClient(HttpClient http, string? url = null)
|
||||
public SkinCatalogClient(HttpClient http, SkinCatalogOptions options)
|
||||
{
|
||||
_http = http;
|
||||
_url = url ?? DefaultUrl;
|
||||
_url = options.Url;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<CatalogSkin>> FetchAsync(CancellationToken ct = default)
|
||||
@@ -67,14 +64,22 @@ public sealed class SkinCatalogClient
|
||||
private static void AddSources(List<CatalogSource> into, List<NamedDto>? items, string type)
|
||||
{
|
||||
if (items is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
if (string.IsNullOrEmpty(item.Id) || string.IsNullOrEmpty(item.Name))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (into.Any(s => s.Id == item.Id))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
into.Add(new CatalogSource(item.Id, item.Name, type));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
namespace BlueLaminate.Scraper.Skins;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for <see cref="SkinCatalogClient"/>, bound from the
|
||||
/// <c>SkinCatalog</c> configuration section.
|
||||
/// </summary>
|
||||
public sealed class SkinCatalogOptions
|
||||
{
|
||||
public const string SectionName = "SkinCatalog";
|
||||
|
||||
/// <summary>Static CS2 skin catalogue dataset (ByMykel/CSGO-API skins.json).</summary>
|
||||
public string Url { get; set; } =
|
||||
"https://raw.githubusercontent.com/ByMykel/CSGO-API/refs/heads/main/public/api/en/skins.json";
|
||||
}
|
||||
Reference in New Issue
Block a user