add csfloat api usage
This commit is contained in:
@@ -6,4 +6,9 @@
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.8" />
|
||||
<PackageReference Include="Selenium.WebDriver" Version="4.44.0" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
using BlueLaminate.Scraper.Proxies;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
using OpenQA.Selenium.Edge;
|
||||
|
||||
namespace BlueLaminate.Scraper.Browser;
|
||||
|
||||
/// <summary>
|
||||
/// Builds a non-headless Edge (Chromium) WebDriver routed through a
|
||||
/// <see cref="ProxyLease"/>. Two things make this non-trivial:
|
||||
/// <list type="bullet">
|
||||
/// <item>Proxy authentication. Chromium can't auto-fill the gateway's auth
|
||||
/// dialog under automation, and the classic extension trick relies on
|
||||
/// Manifest V2 which current Chromium disables. Instead we answer the proxy's
|
||||
/// 407 challenge through the DevTools (CDP) auth handler, which works
|
||||
/// non-headless and needs no extension.</item>
|
||||
/// <item>Bandwidth. The residential plan is metered per GB, so images are
|
||||
/// disabled at the content-settings level. Cloudflare gates on JS execution and
|
||||
/// TLS/behaviour, not whether pictures render, so this stays realistic.</item>
|
||||
/// </list>
|
||||
/// Each driver gets a throwaway user-data dir so runs never share cookies and
|
||||
/// never touch the user's real Edge profile.
|
||||
/// </summary>
|
||||
public sealed class BrowserDriverFactory
|
||||
{
|
||||
private readonly ILogger<BrowserDriverFactory> _logger;
|
||||
|
||||
public BrowserDriverFactory(ILogger<BrowserDriverFactory> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<IWebDriver> CreateAsync(ProxyLease lease, bool blockImages = true)
|
||||
{
|
||||
var options = new EdgeOptions();
|
||||
|
||||
// Route browser traffic through the gateway via the launch argument
|
||||
// rather than EdgeOptions.Proxy. Setting Proxy makes Selenium hand the
|
||||
// gateway to Selenium Manager for the driver *download* too, which fails
|
||||
// because that step can't authenticate. The arg scopes the proxy to the
|
||||
// browser only; credentials are answered below via CDP. No scheme = all
|
||||
// protocols use the gateway.
|
||||
options.AddArgument($"--proxy-server={lease.Endpoint}");
|
||||
|
||||
// Reduce the most obvious automation tells; residential exit + a real
|
||||
// (non-headless) browser do the rest.
|
||||
options.AddArgument("--disable-blink-features=AutomationControlled");
|
||||
options.AddExcludedArgument("enable-automation");
|
||||
options.AddArgument("--no-first-run");
|
||||
options.AddArgument("--no-default-browser-check");
|
||||
options.AddArgument("--start-maximized");
|
||||
|
||||
// Isolated, disposable profile per launch.
|
||||
var profileDir = Path.Combine(Path.GetTempPath(), "bluelaminate-edge", Guid.NewGuid().ToString("N"));
|
||||
Directory.CreateDirectory(profileDir);
|
||||
options.AddArgument($"--user-data-dir={profileDir}");
|
||||
|
||||
if (blockImages)
|
||||
options.AddUserProfilePreference("profile.managed_default_content_settings.images", 2);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Launching Edge via proxy {Endpoint} (provider {Provider}, session {Session}).",
|
||||
lease.Endpoint, lease.Provider, lease.SessionId ?? "rotating");
|
||||
|
||||
var driver = new EdgeDriver(options);
|
||||
|
||||
try
|
||||
{
|
||||
// Answer the gateway's proxy-auth (407) challenge with the lease
|
||||
// credentials. UriMatcher returns true so it applies to every
|
||||
// request, since the challenge originates from the proxy itself.
|
||||
var network = driver.Manage().Network;
|
||||
network.AddAuthenticationHandler(new NetworkAuthenticationHandler
|
||||
{
|
||||
UriMatcher = _ => true,
|
||||
Credentials = new PasswordCredentials(lease.Username, lease.Password),
|
||||
});
|
||||
await network.StartMonitoring();
|
||||
}
|
||||
catch
|
||||
{
|
||||
driver.Quit();
|
||||
throw;
|
||||
}
|
||||
|
||||
return driver;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
using System.Text;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using BlueLaminate.Scraper.Proxies;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.CsFloat;
|
||||
|
||||
/// <summary>
|
||||
/// Phase-B discovery tool. Drives a real Edge browser through a residential
|
||||
/// lease to a CSFloat search page, then records every CSFloat <c>/api/</c> JSON
|
||||
/// response to disk while a human clicks around (open a listing → "Latest
|
||||
/// Sales"). We don't yet know CSFloat's exact endpoints or DOM selectors, so a
|
||||
/// human-in-the-loop is the cheapest way to surface the real traffic: the tool
|
||||
/// just listens and dumps, the operator drives the UI in the visible window.
|
||||
/// Once we can see the captured shapes we can automate navigation and design the
|
||||
/// tables.
|
||||
/// </summary>
|
||||
public sealed class CsFloatCaptureService
|
||||
{
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<CsFloatCaptureService> _logger;
|
||||
|
||||
public CsFloatCaptureService(
|
||||
IProxyProvider provider,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<CsFloatCaptureService> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Opens <paramref name="url"/> through the proxy and captures CSFloat API
|
||||
/// responses to <paramref name="outputDir"/> until <paramref name="browseUntilDone"/>
|
||||
/// completes (the CLI ties that to the operator pressing Enter). When
|
||||
/// <paramref name="diagnose"/> is true, every CSFloat-domain response is
|
||||
/// logged (url + status + type) to reveal where a login wall appears.
|
||||
/// Returns the number of responses written.
|
||||
/// </summary>
|
||||
public async Task<int> RunAsync(
|
||||
string url,
|
||||
string outputDir,
|
||||
ProxyRequest request,
|
||||
bool loadImages,
|
||||
bool diagnose,
|
||||
Func<Task> browseUntilDone)
|
||||
{
|
||||
Directory.CreateDirectory(outputDir);
|
||||
|
||||
var lease = _provider.Acquire(request);
|
||||
var driver = await _factory.CreateAsync(lease, blockImages: !loadImages);
|
||||
|
||||
var captured = 0;
|
||||
|
||||
void OnResponse(object? sender, NetworkResponseReceivedEventArgs e)
|
||||
{
|
||||
var responseUrl = e.ResponseUrl;
|
||||
if (string.IsNullOrEmpty(responseUrl)
|
||||
|| !responseUrl.Contains("csfloat", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Diagnose mode logs every CSFloat-domain response — including the
|
||||
// SPA shell, redirects and any 401/403 — so we can see exactly where
|
||||
// a Steam-login wall appears even before any /api/ call fires.
|
||||
if (diagnose)
|
||||
{
|
||||
_logger.LogInformation("[{Status}] {Type} {Url}",
|
||||
e.ResponseStatusCode, e.ResponseResourceType, responseUrl);
|
||||
}
|
||||
|
||||
// Only JSON API calls get written to disk; skip the shell, images,
|
||||
// fonts, analytics, etc. Matches both api.csfloat.com and csfloat.com/api.
|
||||
if (!responseUrl.Contains("/api/", StringComparison.OrdinalIgnoreCase))
|
||||
return;
|
||||
|
||||
var body = e.ResponseBody;
|
||||
if (string.IsNullOrWhiteSpace(body))
|
||||
{
|
||||
// Body wasn't buffered (e.g. the known Fetch interception race).
|
||||
// Log the endpoint so we still learn it exists even if empty.
|
||||
_logger.LogWarning("No body captured for {Url} (status {Status}).",
|
||||
responseUrl, e.ResponseStatusCode);
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var n = Interlocked.Increment(ref captured);
|
||||
var fileName = $"{n:D3}_{Sanitize(responseUrl)}.json";
|
||||
File.WriteAllText(Path.Combine(outputDir, fileName), body, Encoding.UTF8);
|
||||
_logger.LogInformation(
|
||||
"Captured #{N} [{Status}] {Url} → {File} ({Bytes} bytes).",
|
||||
n, e.ResponseStatusCode, responseUrl, fileName, body.Length);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to write capture for {Url}.", responseUrl);
|
||||
}
|
||||
}
|
||||
|
||||
var network = driver.Manage().Network;
|
||||
network.NetworkResponseReceived += OnResponse;
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Navigating to {Url}", url);
|
||||
driver.Navigate().GoToUrl(url);
|
||||
await browseUntilDone();
|
||||
}
|
||||
finally
|
||||
{
|
||||
network.NetworkResponseReceived -= OnResponse;
|
||||
driver.Quit();
|
||||
}
|
||||
|
||||
return captured;
|
||||
}
|
||||
|
||||
// Turn a URL into a filesystem-safe, readable, length-capped file stem so the
|
||||
// captures are self-describing (the endpoint is visible in the filename).
|
||||
private static string Sanitize(string url)
|
||||
{
|
||||
var trimmed = url
|
||||
.Replace("https://", "", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace("http://", "", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
var sb = new StringBuilder(trimmed.Length);
|
||||
foreach (var c in trimmed)
|
||||
sb.Append(char.IsLetterOrDigit(c) || c is '-' or '.' ? c : '_');
|
||||
|
||||
var stem = sb.ToString();
|
||||
return stem.Length > 120 ? stem[..120] : stem;
|
||||
}
|
||||
}
|
||||
47
BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatListing.cs
Normal file
47
BlueLaminate/BlueLaminate.Scraper/CsFloat/CsFloatListing.cs
Normal file
@@ -0,0 +1,47 @@
|
||||
namespace BlueLaminate.Scraper.CsFloat;
|
||||
|
||||
/// <summary>
|
||||
/// A single active CSFloat listing, flattened from the API's listing+item shape
|
||||
/// to the fields this project cares about. Prices arrive from CSFloat as integer
|
||||
/// cents and are converted to whole-dollar <see cref="decimal"/> here so callers
|
||||
/// never deal in cents. This is a read model for the official, documented
|
||||
/// <c>GET /api/v1/listings</c> endpoint — active listings only, not sales.
|
||||
/// </summary>
|
||||
/// <param name="ListingId">CSFloat listing id (stable while the listing is live).</param>
|
||||
/// <param name="CreatedAt">When the listing was created.</param>
|
||||
/// <param name="Type">"buy_now" or "auction".</param>
|
||||
/// <param name="Price">Asking price in USD (converted from cents).</param>
|
||||
/// <param name="MarketHashName">Canonical item name, e.g. "M4A4 | Cyber Security (Field-Tested)".</param>
|
||||
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
|
||||
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
|
||||
/// <param name="PaintSeed">Pattern seed.</param>
|
||||
/// <param name="FloatValue">Exact float/wear value.</param>
|
||||
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
|
||||
/// <param name="IsStatTrak">StatTrak™ variant.</param>
|
||||
/// <param name="IsSouvenir">Souvenir variant.</param>
|
||||
/// <param name="StickerCount">Number of stickers applied.</param>
|
||||
/// <param name="SellerSteamId">Seller's SteamID64.</param>
|
||||
/// <param name="InspectLink">In-game inspect link.</param>
|
||||
/// <param name="AssetId">
|
||||
/// Steam asset id of this specific copy. Changes when the item trades, so it is
|
||||
/// NOT a stable item identity — but two live listings sharing a fingerprint
|
||||
/// (skin+float+seed+ST/souvenir) yet showing different asset ids are the
|
||||
/// signature of a duplicated ("duped") item.
|
||||
/// </param>
|
||||
public sealed record CsFloatListing(
|
||||
string ListingId,
|
||||
DateTimeOffset CreatedAt,
|
||||
string Type,
|
||||
decimal Price,
|
||||
string MarketHashName,
|
||||
int DefIndex,
|
||||
int PaintIndex,
|
||||
int PaintSeed,
|
||||
decimal FloatValue,
|
||||
string? WearName,
|
||||
bool IsStatTrak,
|
||||
bool IsSouvenir,
|
||||
int StickerCount,
|
||||
string? SellerSteamId,
|
||||
string? InspectLink,
|
||||
string? AssetId);
|
||||
@@ -0,0 +1,277 @@
|
||||
using System.Net;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Scraper.CsFloat;
|
||||
|
||||
/// <summary>
|
||||
/// Thrown when CSFloat rejects a request (bad/missing key, rate limit, etc.) so
|
||||
/// the CLI can surface a clear message instead of a raw HTTP failure.
|
||||
/// </summary>
|
||||
public sealed class CsFloatApiException(HttpStatusCode status, string body)
|
||||
: Exception($"CSFloat API returned {(int)status} {status}: {body}")
|
||||
{
|
||||
public HttpStatusCode Status { get; } = status;
|
||||
}
|
||||
|
||||
/// <summary>One page of listings plus the opaque cursor for the next page (null at the end).</summary>
|
||||
public sealed record ListingsPageResult(IReadOnlyList<CsFloatListing> Listings, string? Cursor);
|
||||
|
||||
/// <summary>
|
||||
/// Client for CSFloat's official, documented <c>GET /api/v1/listings</c> endpoint
|
||||
/// (active listings). Authenticates with a developer API key via the
|
||||
/// <c>Authorization</c> header, filters by def_index/paint_index, and walks the
|
||||
/// cursor-based pagination. This is the supported path the user opted into — no
|
||||
/// proxy or browser involved. Docs: https://docs.csfloat.com/
|
||||
/// </summary>
|
||||
public sealed class CsFloatListingsClient
|
||||
{
|
||||
private const string BaseUrl = "https://csfloat.com/api/v1/listings";
|
||||
private const int MaxLimit = 50; // API hard cap per page.
|
||||
|
||||
private static readonly JsonSerializerOptions Options = new()
|
||||
{
|
||||
// CSFloat uses snake_case for item fields (market_hash_name, float_value,
|
||||
// def_index, …). Without this policy, multi-word fields silently
|
||||
// deserialize to defaults while single-word ones slip through on
|
||||
// case-insensitivity — exactly the "prices but no floats/names" symptom.
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
PropertyNameCaseInsensitive = true,
|
||||
NumberHandling = JsonNumberHandling.AllowReadingFromString,
|
||||
};
|
||||
|
||||
private readonly HttpClient _http;
|
||||
private readonly string _apiKey;
|
||||
private readonly ILogger<CsFloatListingsClient> _logger;
|
||||
|
||||
public CsFloatListingsClient(HttpClient http, string apiKey, ILogger<CsFloatListingsClient> logger)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(apiKey))
|
||||
throw new ArgumentException("CSFloat API key is required.", nameof(apiKey));
|
||||
|
||||
_http = http;
|
||||
_apiKey = apiKey;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Rate-limit state from the most recent response (success or failure).
|
||||
/// <see cref="CsFloatRateLimit.None"/> until the first request completes.
|
||||
/// </summary>
|
||||
public CsFloatRateLimit LastRateLimit { get; private set; } = CsFloatRateLimit.None;
|
||||
|
||||
/// <summary>
|
||||
/// Fetches active listings for one skin (by def_index/paint_index), following
|
||||
/// the cursor until there are no more pages or <paramref name="maxListings"/>
|
||||
/// is reached. <paramref name="maxListings"/> guards against pulling an
|
||||
/// unbounded result set during the spike.
|
||||
/// </summary>
|
||||
public async Task<IReadOnlyList<CsFloatListing>> GetListingsAsync(
|
||||
int defIndex,
|
||||
int paintIndex,
|
||||
string sortBy = "lowest_price",
|
||||
int maxListings = 50,
|
||||
string? type = "buy_now",
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var results = new List<CsFloatListing>();
|
||||
string? cursor = null;
|
||||
|
||||
do
|
||||
{
|
||||
var remaining = maxListings - results.Count;
|
||||
var limit = Math.Min(MaxLimit, remaining);
|
||||
|
||||
var page = await FetchPageAsync(defIndex, paintIndex, sortBy, limit, cursor, type, ct);
|
||||
results.AddRange(page.Listings);
|
||||
|
||||
_logger.LogInformation(
|
||||
"Fetched {PageCount} listings (total {Total}); cursor {Cursor}.",
|
||||
page.Listings.Count, results.Count, page.Cursor is null ? "—" : "present");
|
||||
|
||||
cursor = page.Cursor;
|
||||
|
||||
// Stop when the API signals the end (no cursor) or returns an empty page.
|
||||
if (string.IsNullOrEmpty(cursor) || page.Listings.Count == 0)
|
||||
break;
|
||||
}
|
||||
while (results.Count < maxListings);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fetches a single page of listings and the cursor for the next page. The
|
||||
/// sweep runner drives this directly so it can decide — between pages — when
|
||||
/// to stop (already-seen listings) or pace (rate-limit headers). Filters are
|
||||
/// optional: omit def_index/paint_index for a global sweep across all items.
|
||||
/// </summary>
|
||||
public Task<ListingsPageResult> FetchPageAsync(
|
||||
int? defIndex,
|
||||
int? paintIndex,
|
||||
string sortBy,
|
||||
int limit,
|
||||
string? cursor,
|
||||
string? type = "buy_now",
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
var query = new List<string>
|
||||
{
|
||||
$"sort_by={Uri.EscapeDataString(sortBy)}",
|
||||
$"limit={Math.Clamp(limit, 1, MaxLimit)}",
|
||||
};
|
||||
// Default to fixed-price listings only; auctions have no firm sale price
|
||||
// and aren't wanted. Pass type=null to include everything.
|
||||
if (!string.IsNullOrEmpty(type))
|
||||
query.Add($"type={Uri.EscapeDataString(type)}");
|
||||
if (defIndex is { } def)
|
||||
query.Add($"def_index={def}");
|
||||
if (paintIndex is { } paint)
|
||||
query.Add($"paint_index={paint}");
|
||||
if (!string.IsNullOrEmpty(cursor))
|
||||
query.Add($"cursor={Uri.EscapeDataString(cursor)}");
|
||||
|
||||
return SendPageAsync(query, ct);
|
||||
}
|
||||
|
||||
private async Task<ListingsPageResult> SendPageAsync(List<string> query, CancellationToken ct)
|
||||
{
|
||||
var url = $"{BaseUrl}?{string.Join('&', query)}";
|
||||
|
||||
using var request = new HttpRequestMessage(HttpMethod.Get, url);
|
||||
// CSFloat expects the raw key in the Authorization header (no scheme).
|
||||
request.Headers.TryAddWithoutValidation("Authorization", _apiKey);
|
||||
|
||||
using var response = await _http.SendAsync(request, ct);
|
||||
var body = await response.Content.ReadAsStringAsync(ct);
|
||||
|
||||
// Always record rate-limit state, even on failure — a 429 is exactly when
|
||||
// these headers (and Retry-After) matter most.
|
||||
LastRateLimit = ParseRateLimit(response);
|
||||
_logger.LogInformation("{RateLimit}", LastRateLimit);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
throw new CsFloatApiException(response.StatusCode, Truncate(body));
|
||||
|
||||
var page = Parse(body);
|
||||
return new ListingsPageResult(page.Data.Select(Map).ToList(), page.Cursor);
|
||||
}
|
||||
|
||||
// Pull rate-limit info from response headers without assuming exact names:
|
||||
// collect every header containing "ratelimit"/"rate-limit" (case-insensitive)
|
||||
// plus Retry-After, then best-effort map the common remaining/limit/reset
|
||||
// fields. The full set is kept in Raw so the spike reveals the real names.
|
||||
private static CsFloatRateLimit ParseRateLimit(HttpResponseMessage response)
|
||||
{
|
||||
var raw = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Scan both response and content headers — servers split them either way.
|
||||
var all = response.Headers.AsEnumerable();
|
||||
if (response.Content is not null)
|
||||
all = all.Concat(response.Content.Headers);
|
||||
|
||||
foreach (var header in all)
|
||||
{
|
||||
var name = header.Key;
|
||||
var isRateLimit = name.Contains("ratelimit", StringComparison.OrdinalIgnoreCase)
|
||||
|| name.Contains("rate-limit", StringComparison.OrdinalIgnoreCase)
|
||||
|| name.Equals("Retry-After", StringComparison.OrdinalIgnoreCase);
|
||||
if (isRateLimit)
|
||||
raw[name] = string.Join(",", header.Value);
|
||||
}
|
||||
|
||||
if (raw.Count == 0)
|
||||
return CsFloatRateLimit.None;
|
||||
|
||||
return new CsFloatRateLimit(
|
||||
Limit: FindInt(raw, "limit"),
|
||||
Remaining: FindInt(raw, "remaining"),
|
||||
Reset: Find(raw, "reset"),
|
||||
RetryAfter: FindInt(raw, "retry-after"),
|
||||
Raw: raw);
|
||||
}
|
||||
|
||||
// Matches a header whose name contains the token but is NOT a different
|
||||
// metric (e.g. "remaining" must not match when looking for "limit").
|
||||
private static string? Find(IReadOnlyDictionary<string, string> raw, string token) =>
|
||||
raw.FirstOrDefault(kv =>
|
||||
kv.Key.Contains(token, StringComparison.OrdinalIgnoreCase)
|
||||
&& !(token == "limit" && kv.Key.Contains("remaining", StringComparison.OrdinalIgnoreCase)))
|
||||
.Value;
|
||||
|
||||
private static int? FindInt(IReadOnlyDictionary<string, string> raw, string token) =>
|
||||
int.TryParse(Find(raw, token), out var v) ? v : null;
|
||||
|
||||
// The endpoint may return either a bare array of listings or an object with
|
||||
// { data, cursor }. Detect which by the first non-whitespace character so the
|
||||
// spike works regardless of which shape the live API uses.
|
||||
private static ListingsPage Parse(string body)
|
||||
{
|
||||
var trimmed = body.TrimStart();
|
||||
if (trimmed.StartsWith('['))
|
||||
{
|
||||
var array = JsonSerializer.Deserialize<List<ListingDto>>(body, Options) ?? [];
|
||||
return new ListingsPage(array, null);
|
||||
}
|
||||
|
||||
return JsonSerializer.Deserialize<ListingsPage>(body, Options)
|
||||
?? new ListingsPage([], null);
|
||||
}
|
||||
|
||||
private static CsFloatListing Map(ListingDto dto)
|
||||
{
|
||||
var item = dto.Item ?? new ItemDto();
|
||||
return new CsFloatListing(
|
||||
ListingId: dto.Id ?? "",
|
||||
CreatedAt: dto.CreatedAt ?? default,
|
||||
Type: dto.Type ?? "buy_now",
|
||||
// CSFloat prices are integer cents.
|
||||
Price: dto.Price / 100m,
|
||||
MarketHashName: item.MarketHashName ?? "Unknown",
|
||||
DefIndex: item.DefIndex,
|
||||
PaintIndex: item.PaintIndex,
|
||||
PaintSeed: item.PaintSeed,
|
||||
FloatValue: item.FloatValue,
|
||||
WearName: item.WearName,
|
||||
IsStatTrak: item.IsStatTrak,
|
||||
IsSouvenir: item.IsSouvenir,
|
||||
StickerCount: item.Stickers?.Count ?? 0,
|
||||
SellerSteamId: dto.Seller?.SteamId,
|
||||
InspectLink: item.InspectLink,
|
||||
AssetId: item.AssetId);
|
||||
}
|
||||
|
||||
private static string Truncate(string s) => s.Length <= 500 ? s : s[..500];
|
||||
|
||||
private sealed record ListingsPage(
|
||||
[property: JsonPropertyName("data")] List<ListingDto> Data,
|
||||
[property: JsonPropertyName("cursor")] string? Cursor);
|
||||
|
||||
private sealed record ListingDto(
|
||||
string? Id,
|
||||
DateTimeOffset? CreatedAt,
|
||||
string? Type,
|
||||
long Price,
|
||||
SellerDto? Seller,
|
||||
ItemDto? Item);
|
||||
|
||||
private sealed record SellerDto(string? SteamId);
|
||||
|
||||
private sealed record ItemDto
|
||||
{
|
||||
public string? MarketHashName { get; init; }
|
||||
public int DefIndex { get; init; }
|
||||
public int PaintIndex { get; init; }
|
||||
public int PaintSeed { get; init; }
|
||||
public decimal FloatValue { get; init; }
|
||||
public string? WearName { get; init; }
|
||||
public bool IsStatTrak { get; init; }
|
||||
public bool IsSouvenir { get; init; }
|
||||
public string? InspectLink { get; init; }
|
||||
public string? AssetId { get; init; }
|
||||
public List<StickerDto>? Stickers { get; init; }
|
||||
}
|
||||
|
||||
private sealed record StickerDto(int StickerId, int Slot, string? Name);
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
namespace BlueLaminate.Scraper.CsFloat;
|
||||
|
||||
/// <summary>
|
||||
/// Rate-limit state parsed from a CSFloat API response's headers. The official
|
||||
/// docs don't pin down the exact header names, so this is populated generically
|
||||
/// (any header whose name contains "ratelimit"/"rate-limit", plus "retry-after")
|
||||
/// and keeps the <see cref="Raw"/> map so the real names surface during the
|
||||
/// spike. A future catalog sweep uses <see cref="Remaining"/>/<see cref="Reset"/>
|
||||
/// to pace requests and avoid 429s.
|
||||
/// </summary>
|
||||
/// <param name="Limit">Max requests allowed in the current window, if reported.</param>
|
||||
/// <param name="Remaining">Requests left in the current window, if reported.</param>
|
||||
/// <param name="Reset">Raw reset value as sent (epoch seconds or seconds-until — unverified).</param>
|
||||
/// <param name="RetryAfter">Seconds to wait, from a Retry-After header (typically on 429).</param>
|
||||
/// <param name="Raw">Every rate-limit-related header, verbatim, for inspection.</param>
|
||||
public sealed record CsFloatRateLimit(
|
||||
int? Limit,
|
||||
int? Remaining,
|
||||
string? Reset,
|
||||
int? RetryAfter,
|
||||
IReadOnlyDictionary<string, string> Raw)
|
||||
{
|
||||
public static readonly CsFloatRateLimit None =
|
||||
new(null, null, null, null, new Dictionary<string, string>());
|
||||
|
||||
/// <summary>True when the API reports zero requests remaining.</summary>
|
||||
public bool IsExhausted => Remaining is <= 0;
|
||||
|
||||
public override string ToString() =>
|
||||
Raw.Count == 0
|
||||
? "rate-limit: (no headers)"
|
||||
: "rate-limit: " + string.Join(", ", Raw.Select(kv => $"{kv.Key}={kv.Value}"));
|
||||
}
|
||||
21
BlueLaminate/BlueLaminate.Scraper/Proxies/IProxyProvider.cs
Normal file
21
BlueLaminate/BlueLaminate.Scraper/Proxies/IProxyProvider.cs
Normal file
@@ -0,0 +1,21 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// Source of proxy endpoints. The whole point of this seam is that the rest of
|
||||
/// the scraper depends only on this interface and <see cref="ProxyLease"/>, so a
|
||||
/// different residential provider — or the future C2 that allocates IPs to
|
||||
/// containers, or a composite "grab-bag" over several providers — drops in
|
||||
/// without changing any browser or scraping code.
|
||||
/// </summary>
|
||||
public interface IProxyProvider
|
||||
{
|
||||
/// <summary>Identifier recorded on issued leases, e.g. "iproyal".</summary>
|
||||
string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Produce a usable endpoint for the given request. For gateway providers
|
||||
/// this is pure string composition (no network call); the C2 implementation
|
||||
/// can override that later with real allocation.
|
||||
/// </summary>
|
||||
ProxyLease Acquire(ProxyRequest request);
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="IProxyProvider"/> for IPRoyal's residential gateway. IPRoyal keeps
|
||||
/// one fixed host/port (geo.iproyal.com:12321) and encodes everything else —
|
||||
/// country, sticky-session id, session lifetime — as underscore-delimited
|
||||
/// parameters appended to the account password. Example password:
|
||||
/// "secret_country-us_session-ab12cd_lifetime-30m". The account username is sent
|
||||
/// unchanged. Docs: https://docs.iproyal.com/proxies/residential/proxy
|
||||
/// </summary>
|
||||
public sealed class IpRoyalProxyProvider : IProxyProvider
|
||||
{
|
||||
public const string GatewayHost = "geo.iproyal.com";
|
||||
public const int GatewayPort = 12321;
|
||||
|
||||
// IPRoyal caps sticky sessions; 30 minutes is a safe default that comfortably
|
||||
// covers a single scrape pass without forcing an early IP rotation.
|
||||
private static readonly TimeSpan DefaultLifetime = TimeSpan.FromMinutes(30);
|
||||
|
||||
private readonly string _username;
|
||||
private readonly string _password;
|
||||
|
||||
public IpRoyalProxyProvider(string username, string password)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(username))
|
||||
throw new ArgumentException("IPRoyal username is required.", nameof(username));
|
||||
if (string.IsNullOrWhiteSpace(password))
|
||||
throw new ArgumentException("IPRoyal password is required.", nameof(password));
|
||||
|
||||
_username = username;
|
||||
_password = password;
|
||||
}
|
||||
|
||||
public string Name => "iproyal";
|
||||
|
||||
public ProxyLease Acquire(ProxyRequest request)
|
||||
{
|
||||
var password = _password;
|
||||
string? sessionId = null;
|
||||
DateTimeOffset? expiresAt = null;
|
||||
|
||||
// Country first; the router picks one at random when several are listed.
|
||||
if (!string.IsNullOrWhiteSpace(request.Country))
|
||||
password += $"_country-{request.Country.Trim().ToLowerInvariant()}";
|
||||
|
||||
if (request.Sticky)
|
||||
{
|
||||
sessionId = request.SessionId ?? NewSessionId();
|
||||
var lifetime = request.Lifetime ?? DefaultLifetime;
|
||||
// IPRoyal expresses lifetime as whole minutes (e.g. "_lifetime-30m").
|
||||
var minutes = Math.Max(1, (int)Math.Round(lifetime.TotalMinutes));
|
||||
password += $"_session-{sessionId}_lifetime-{minutes}m";
|
||||
expiresAt = DateTimeOffset.UtcNow.AddMinutes(minutes);
|
||||
}
|
||||
|
||||
return new ProxyLease(
|
||||
Host: GatewayHost,
|
||||
Port: GatewayPort,
|
||||
Username: _username,
|
||||
Password: password,
|
||||
Provider: Name,
|
||||
SessionId: sessionId,
|
||||
ExpiresAt: expiresAt);
|
||||
}
|
||||
|
||||
// Short, URL/param-safe token. IPRoyal treats the session value opaquely;
|
||||
// it only needs to be stable for the duration of a sticky lease.
|
||||
private static string NewSessionId() =>
|
||||
Guid.NewGuid().ToString("N")[..10];
|
||||
}
|
||||
29
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs
Normal file
29
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyLease.cs
Normal file
@@ -0,0 +1,29 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// A concrete, ready-to-use proxy endpoint handed back by an
|
||||
/// <see cref="IProxyProvider"/>. This is the only proxy type the browser layer
|
||||
/// ever sees, so swapping providers (or mixing several in a grab-bag) never
|
||||
/// touches the Selenium code. <see cref="Username"/> and <see cref="Password"/>
|
||||
/// are the literal credentials to present to the gateway — for providers like
|
||||
/// IPRoyal the targeting/session parameters are already baked into them.
|
||||
/// </summary>
|
||||
/// <param name="Host">Gateway host, e.g. "geo.iproyal.com".</param>
|
||||
/// <param name="Port">Gateway port, e.g. 12321.</param>
|
||||
/// <param name="Username">Credential username for the gateway.</param>
|
||||
/// <param name="Password">Credential password (may carry encoded session/geo params).</param>
|
||||
/// <param name="Provider">Name of the provider that issued this lease.</param>
|
||||
/// <param name="SessionId">The sticky session key, if this is a pinned IP.</param>
|
||||
/// <param name="ExpiresAt">When a sticky IP may be recycled; null if rotating/unbounded.</param>
|
||||
public sealed record ProxyLease(
|
||||
string Host,
|
||||
int Port,
|
||||
string Username,
|
||||
string Password,
|
||||
string Provider,
|
||||
string? SessionId = null,
|
||||
DateTimeOffset? ExpiresAt = null)
|
||||
{
|
||||
/// <summary>"host:port" form used by browser proxy settings.</summary>
|
||||
public string Endpoint => $"{Host}:{Port}";
|
||||
}
|
||||
97
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
97
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyProbe.cs
Normal file
@@ -0,0 +1,97 @@
|
||||
using System.Text.Json;
|
||||
using BlueLaminate.Scraper.Browser;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using OpenQA.Selenium;
|
||||
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>The exit IP a proxy lease actually resolves to, per ipinfo.io.</summary>
|
||||
/// <param name="Org">
|
||||
/// ASN + organisation, e.g. "AS7922 Comcast Cable". This is the tell for
|
||||
/// residential vs. datacenter: a consumer ISP here means a real residential
|
||||
/// exit; a hosting provider (OVH, Hetzner, AWS…) means datacenter dressed up.
|
||||
/// </param>
|
||||
public sealed record ProxyExitInfo(
|
||||
string? Ip,
|
||||
string? City,
|
||||
string? Region,
|
||||
string? Country,
|
||||
string? Org,
|
||||
string? Hostname,
|
||||
string? Timezone);
|
||||
|
||||
/// <summary>
|
||||
/// Smallest possible end-to-end check of the proxy plumbing: acquire a lease,
|
||||
/// launch the real browser through it, and read back the exit IP from an
|
||||
/// IP-echo endpoint. Costs a few KB, so it's the right first thing to run
|
||||
/// against a metered residential plan — it proves auth works and shows whether
|
||||
/// the IP is genuinely residential before we spend bandwidth on CSFloat.
|
||||
/// </summary>
|
||||
public sealed class ProxyProbe
|
||||
{
|
||||
private const string IpEchoUrl = "https://ipinfo.io/json";
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
private readonly IProxyProvider _provider;
|
||||
private readonly BrowserDriverFactory _factory;
|
||||
private readonly ILogger<ProxyProbe> _logger;
|
||||
|
||||
public ProxyProbe(
|
||||
IProxyProvider provider,
|
||||
BrowserDriverFactory factory,
|
||||
ILogger<ProxyProbe> logger)
|
||||
{
|
||||
_provider = provider;
|
||||
_factory = factory;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ProxyExitInfo> RunAsync(ProxyRequest request)
|
||||
{
|
||||
var lease = _provider.Acquire(request);
|
||||
_logger.LogInformation(
|
||||
"Acquired {Provider} lease (exit {Mode}).",
|
||||
lease.Provider, lease.SessionId is null ? "rotating" : $"sticky:{lease.SessionId}");
|
||||
|
||||
var driver = await _factory.CreateAsync(lease, blockImages: true);
|
||||
try
|
||||
{
|
||||
driver.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(60);
|
||||
driver.Navigate().GoToUrl(IpEchoUrl);
|
||||
|
||||
// Read the document's text rather than the DOM so the browser's
|
||||
// built-in JSON viewer doesn't get in the way, then carve out the
|
||||
// JSON object it rendered.
|
||||
var rendered = ((IJavaScriptExecutor)driver)
|
||||
.ExecuteScript("return document.documentElement.innerText;") as string
|
||||
?? throw new InvalidOperationException("Browser returned no page text.");
|
||||
|
||||
var info = JsonSerializer.Deserialize<ProxyExitInfo>(ExtractJson(rendered), JsonOptions)
|
||||
?? throw new InvalidOperationException("IP-echo response was empty.");
|
||||
|
||||
_logger.LogInformation(
|
||||
"Exit IP {Ip} — {City}, {Region}, {Country} — {Org}",
|
||||
info.Ip, info.City, info.Region, info.Country, info.Org);
|
||||
|
||||
return info;
|
||||
}
|
||||
finally
|
||||
{
|
||||
driver.Quit();
|
||||
}
|
||||
}
|
||||
|
||||
private static string ExtractJson(string text)
|
||||
{
|
||||
var start = text.IndexOf('{');
|
||||
var end = text.LastIndexOf('}');
|
||||
if (start < 0 || end <= start)
|
||||
throw new InvalidOperationException($"No JSON found in IP-echo response: {text}");
|
||||
|
||||
return text[start..(end + 1)];
|
||||
}
|
||||
}
|
||||
30
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyRequest.cs
Normal file
30
BlueLaminate/BlueLaminate.Scraper/Proxies/ProxyRequest.cs
Normal file
@@ -0,0 +1,30 @@
|
||||
namespace BlueLaminate.Scraper.Proxies;
|
||||
|
||||
/// <summary>
|
||||
/// What kind of exit IP the caller wants. Provider-agnostic: each
|
||||
/// <see cref="IProxyProvider"/> translates these knobs into its own gateway
|
||||
/// syntax. A sticky request asks the provider to pin one residential IP for the
|
||||
/// session's lifetime; a non-sticky request lets the IP rotate per connection.
|
||||
/// </summary>
|
||||
/// <param name="Country">
|
||||
/// Optional ISO 3166-1 alpha-2 code, or a comma-separated list to let the
|
||||
/// provider pick one at random (e.g. "us" or "us,gb,de"). Null means no
|
||||
/// geo constraint.
|
||||
/// </param>
|
||||
/// <param name="Sticky">
|
||||
/// True to keep the same exit IP for the whole session; false to rotate.
|
||||
/// </param>
|
||||
/// <param name="SessionId">
|
||||
/// Optional caller-supplied session key for a sticky lease. When null and
|
||||
/// <paramref name="Sticky"/> is true the provider generates one.
|
||||
/// </param>
|
||||
/// <param name="Lifetime">
|
||||
/// How long a sticky IP should be held before the provider may recycle it.
|
||||
/// Ignored when <paramref name="Sticky"/> is false. Null lets the provider
|
||||
/// apply its own default.
|
||||
/// </param>
|
||||
public sealed record ProxyRequest(
|
||||
string? Country = null,
|
||||
bool Sticky = true,
|
||||
string? SessionId = null,
|
||||
TimeSpan? Lifetime = null);
|
||||
@@ -3,6 +3,8 @@ namespace BlueLaminate.Scraper.Skins;
|
||||
/// <summary>A single CS2 skin from the CSGO-API static catalogue (skins.json).</summary>
|
||||
/// <param name="Id">Stable catalogue id, e.g. "skin-e757fd7191f9". Globally unique natural key.</param>
|
||||
/// <param name="WeaponName">Owning weapon, e.g. "AK-47", "Hand Wraps", "Bayonet".</param>
|
||||
/// <param name="DefIndex">CS weapon definition index (weapon.weapon_id), e.g. AK-47=7. Null if absent.</param>
|
||||
/// <param name="PaintIndex">Paint index identifying the finish, e.g. 985. Null if absent.</param>
|
||||
/// <param name="Category">Weapon category, e.g. "Rifles", "Knives", "Gloves". Becomes the weapon type.</param>
|
||||
/// <param name="Team">"CT", "T", or "Both".</param>
|
||||
/// <param name="Name">Skin/pattern name, e.g. "Dragon Lore"; "Vanilla" for knives with no finish.</param>
|
||||
@@ -17,6 +19,8 @@ namespace BlueLaminate.Scraper.Skins;
|
||||
public sealed record CatalogSkin(
|
||||
string Id,
|
||||
string WeaponName,
|
||||
int? DefIndex,
|
||||
int? PaintIndex,
|
||||
string Category,
|
||||
string Team,
|
||||
string Name,
|
||||
|
||||
@@ -48,6 +48,8 @@ public sealed class SkinCatalogClient
|
||||
return new CatalogSkin(
|
||||
Id: dto.Id,
|
||||
WeaponName: dto.Weapon?.Name ?? "Unknown",
|
||||
DefIndex: dto.Weapon?.WeaponId,
|
||||
PaintIndex: dto.PaintIndex,
|
||||
Category: dto.Category?.Name ?? "Unknown",
|
||||
Team: MapTeam(dto.Team?.Id),
|
||||
// Knives with no finish carry a null pattern; "Vanilla" is the community term.
|
||||
@@ -88,9 +90,11 @@ public sealed class SkinCatalogClient
|
||||
string Id,
|
||||
string? Name,
|
||||
string? Description,
|
||||
NamedDto? Weapon,
|
||||
WeaponDto? Weapon,
|
||||
NamedDto? Category,
|
||||
NamedDto? Pattern,
|
||||
// Top-level paint index. AllowReadingFromString handles its string form.
|
||||
int? PaintIndex,
|
||||
decimal? MinFloat,
|
||||
decimal? MaxFloat,
|
||||
NamedDto? Rarity,
|
||||
@@ -102,4 +106,7 @@ public sealed class SkinCatalogClient
|
||||
List<NamedDto>? Crates);
|
||||
|
||||
private sealed record NamedDto(string? Id, string? Name);
|
||||
|
||||
// Weapon carries a numeric weapon_id (the def_index) alongside id/name.
|
||||
private sealed record WeaponDto(string? Id, int? WeaponId, string? Name);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user