add csfloat api usage

This commit is contained in:
bob
2026-05-29 22:08:32 -05:00
parent b51f1d9f5f
commit d1752b1b07
37 changed files with 6095 additions and 22 deletions

View File

@@ -0,0 +1,139 @@
using System.Text;
using BlueLaminate.Scraper.Browser;
using BlueLaminate.Scraper.Proxies;
using Microsoft.Extensions.Logging;
using OpenQA.Selenium;
namespace BlueLaminate.Scraper.CsFloat;
/// <summary>
/// Phase-B discovery tool. Drives a real Edge browser through a residential
/// lease to a CSFloat search page, then records every CSFloat <c>/api/</c> JSON
/// response to disk while a human clicks around (open a listing → "Latest
/// Sales"). We don't yet know CSFloat's exact endpoints or DOM selectors, so a
/// human-in-the-loop is the cheapest way to surface the real traffic: the tool
/// just listens and dumps, the operator drives the UI in the visible window.
/// Once we can see the captured shapes we can automate navigation and design the
/// tables.
/// </summary>
public sealed class CsFloatCaptureService
{
private readonly IProxyProvider _provider;
private readonly BrowserDriverFactory _factory;
private readonly ILogger<CsFloatCaptureService> _logger;
public CsFloatCaptureService(
IProxyProvider provider,
BrowserDriverFactory factory,
ILogger<CsFloatCaptureService> logger)
{
_provider = provider;
_factory = factory;
_logger = logger;
}
/// <summary>
/// Opens <paramref name="url"/> through the proxy and captures CSFloat API
/// responses to <paramref name="outputDir"/> until <paramref name="browseUntilDone"/>
/// completes (the CLI ties that to the operator pressing Enter). When
/// <paramref name="diagnose"/> is true, every CSFloat-domain response is
/// logged (url + status + type) to reveal where a login wall appears.
/// Returns the number of responses written.
/// </summary>
public async Task<int> RunAsync(
string url,
string outputDir,
ProxyRequest request,
bool loadImages,
bool diagnose,
Func<Task> browseUntilDone)
{
Directory.CreateDirectory(outputDir);
var lease = _provider.Acquire(request);
var driver = await _factory.CreateAsync(lease, blockImages: !loadImages);
var captured = 0;
void OnResponse(object? sender, NetworkResponseReceivedEventArgs e)
{
var responseUrl = e.ResponseUrl;
if (string.IsNullOrEmpty(responseUrl)
|| !responseUrl.Contains("csfloat", StringComparison.OrdinalIgnoreCase))
{
return;
}
// Diagnose mode logs every CSFloat-domain response — including the
// SPA shell, redirects and any 401/403 — so we can see exactly where
// a Steam-login wall appears even before any /api/ call fires.
if (diagnose)
{
_logger.LogInformation("[{Status}] {Type} {Url}",
e.ResponseStatusCode, e.ResponseResourceType, responseUrl);
}
// Only JSON API calls get written to disk; skip the shell, images,
// fonts, analytics, etc. Matches both api.csfloat.com and csfloat.com/api.
if (!responseUrl.Contains("/api/", StringComparison.OrdinalIgnoreCase))
return;
var body = e.ResponseBody;
if (string.IsNullOrWhiteSpace(body))
{
// Body wasn't buffered (e.g. the known Fetch interception race).
// Log the endpoint so we still learn it exists even if empty.
_logger.LogWarning("No body captured for {Url} (status {Status}).",
responseUrl, e.ResponseStatusCode);
return;
}
try
{
var n = Interlocked.Increment(ref captured);
var fileName = $"{n:D3}_{Sanitize(responseUrl)}.json";
File.WriteAllText(Path.Combine(outputDir, fileName), body, Encoding.UTF8);
_logger.LogInformation(
"Captured #{N} [{Status}] {Url} → {File} ({Bytes} bytes).",
n, e.ResponseStatusCode, responseUrl, fileName, body.Length);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to write capture for {Url}.", responseUrl);
}
}
var network = driver.Manage().Network;
network.NetworkResponseReceived += OnResponse;
try
{
_logger.LogInformation("Navigating to {Url}", url);
driver.Navigate().GoToUrl(url);
await browseUntilDone();
}
finally
{
network.NetworkResponseReceived -= OnResponse;
driver.Quit();
}
return captured;
}
// Turn a URL into a filesystem-safe, readable, length-capped file stem so the
// captures are self-describing (the endpoint is visible in the filename).
private static string Sanitize(string url)
{
var trimmed = url
.Replace("https://", "", StringComparison.OrdinalIgnoreCase)
.Replace("http://", "", StringComparison.OrdinalIgnoreCase);
var sb = new StringBuilder(trimmed.Length);
foreach (var c in trimmed)
sb.Append(char.IsLetterOrDigit(c) || c is '-' or '.' ? c : '_');
var stem = sb.ToString();
return stem.Length > 120 ? stem[..120] : stem;
}
}

View File

@@ -0,0 +1,47 @@
namespace BlueLaminate.Scraper.CsFloat;
/// <summary>
/// A single active CSFloat listing, flattened from the API's listing+item shape
/// to the fields this project cares about. Prices arrive from CSFloat as integer
/// cents and are converted to whole-dollar <see cref="decimal"/> here so callers
/// never deal in cents. This is a read model for the official, documented
/// <c>GET /api/v1/listings</c> endpoint — active listings only, not sales.
/// </summary>
/// <param name="ListingId">CSFloat listing id (stable while the listing is live).</param>
/// <param name="CreatedAt">When the listing was created.</param>
/// <param name="Type">"buy_now" or "auction".</param>
/// <param name="Price">Asking price in USD (converted from cents).</param>
/// <param name="MarketHashName">Canonical item name, e.g. "M4A4 | Cyber Security (Field-Tested)".</param>
/// <param name="DefIndex">Weapon definition index (maps to catalog weapon_id).</param>
/// <param name="PaintIndex">Paint index (maps to catalog paint_index).</param>
/// <param name="PaintSeed">Pattern seed.</param>
/// <param name="FloatValue">Exact float/wear value.</param>
/// <param name="WearName">Wear bucket name, e.g. "Field-Tested".</param>
/// <param name="IsStatTrak">StatTrak™ variant.</param>
/// <param name="IsSouvenir">Souvenir variant.</param>
/// <param name="StickerCount">Number of stickers applied.</param>
/// <param name="SellerSteamId">Seller's SteamID64.</param>
/// <param name="InspectLink">In-game inspect link.</param>
/// <param name="AssetId">
/// Steam asset id of this specific copy. Changes when the item trades, so it is
/// NOT a stable item identity — but two live listings sharing a fingerprint
/// (skin+float+seed+ST/souvenir) yet showing different asset ids are the
/// signature of a duplicated ("duped") item.
/// </param>
public sealed record CsFloatListing(
string ListingId,
DateTimeOffset CreatedAt,
string Type,
decimal Price,
string MarketHashName,
int DefIndex,
int PaintIndex,
int PaintSeed,
decimal FloatValue,
string? WearName,
bool IsStatTrak,
bool IsSouvenir,
int StickerCount,
string? SellerSteamId,
string? InspectLink,
string? AssetId);

View File

@@ -0,0 +1,277 @@
using System.Net;
using System.Text.Json;
using System.Text.Json.Serialization;
using Microsoft.Extensions.Logging;
namespace BlueLaminate.Scraper.CsFloat;
/// <summary>
/// Thrown when CSFloat rejects a request (bad/missing key, rate limit, etc.) so
/// the CLI can surface a clear message instead of a raw HTTP failure.
/// </summary>
public sealed class CsFloatApiException(HttpStatusCode status, string body)
: Exception($"CSFloat API returned {(int)status} {status}: {body}")
{
public HttpStatusCode Status { get; } = status;
}
/// <summary>One page of listings plus the opaque cursor for the next page (null at the end).</summary>
public sealed record ListingsPageResult(IReadOnlyList<CsFloatListing> Listings, string? Cursor);
/// <summary>
/// Client for CSFloat's official, documented <c>GET /api/v1/listings</c> endpoint
/// (active listings). Authenticates with a developer API key via the
/// <c>Authorization</c> header, filters by def_index/paint_index, and walks the
/// cursor-based pagination. This is the supported path the user opted into — no
/// proxy or browser involved. Docs: https://docs.csfloat.com/
/// </summary>
public sealed class CsFloatListingsClient
{
private const string BaseUrl = "https://csfloat.com/api/v1/listings";
private const int MaxLimit = 50; // API hard cap per page.
private static readonly JsonSerializerOptions Options = new()
{
// CSFloat uses snake_case for item fields (market_hash_name, float_value,
// def_index, …). Without this policy, multi-word fields silently
// deserialize to defaults while single-word ones slip through on
// case-insensitivity — exactly the "prices but no floats/names" symptom.
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
PropertyNameCaseInsensitive = true,
NumberHandling = JsonNumberHandling.AllowReadingFromString,
};
private readonly HttpClient _http;
private readonly string _apiKey;
private readonly ILogger<CsFloatListingsClient> _logger;
public CsFloatListingsClient(HttpClient http, string apiKey, ILogger<CsFloatListingsClient> logger)
{
if (string.IsNullOrWhiteSpace(apiKey))
throw new ArgumentException("CSFloat API key is required.", nameof(apiKey));
_http = http;
_apiKey = apiKey;
_logger = logger;
}
/// <summary>
/// Rate-limit state from the most recent response (success or failure).
/// <see cref="CsFloatRateLimit.None"/> until the first request completes.
/// </summary>
public CsFloatRateLimit LastRateLimit { get; private set; } = CsFloatRateLimit.None;
/// <summary>
/// Fetches active listings for one skin (by def_index/paint_index), following
/// the cursor until there are no more pages or <paramref name="maxListings"/>
/// is reached. <paramref name="maxListings"/> guards against pulling an
/// unbounded result set during the spike.
/// </summary>
public async Task<IReadOnlyList<CsFloatListing>> GetListingsAsync(
int defIndex,
int paintIndex,
string sortBy = "lowest_price",
int maxListings = 50,
string? type = "buy_now",
CancellationToken ct = default)
{
var results = new List<CsFloatListing>();
string? cursor = null;
do
{
var remaining = maxListings - results.Count;
var limit = Math.Min(MaxLimit, remaining);
var page = await FetchPageAsync(defIndex, paintIndex, sortBy, limit, cursor, type, ct);
results.AddRange(page.Listings);
_logger.LogInformation(
"Fetched {PageCount} listings (total {Total}); cursor {Cursor}.",
page.Listings.Count, results.Count, page.Cursor is null ? "—" : "present");
cursor = page.Cursor;
// Stop when the API signals the end (no cursor) or returns an empty page.
if (string.IsNullOrEmpty(cursor) || page.Listings.Count == 0)
break;
}
while (results.Count < maxListings);
return results;
}
/// <summary>
/// Fetches a single page of listings and the cursor for the next page. The
/// sweep runner drives this directly so it can decide — between pages — when
/// to stop (already-seen listings) or pace (rate-limit headers). Filters are
/// optional: omit def_index/paint_index for a global sweep across all items.
/// </summary>
public Task<ListingsPageResult> FetchPageAsync(
int? defIndex,
int? paintIndex,
string sortBy,
int limit,
string? cursor,
string? type = "buy_now",
CancellationToken ct = default)
{
var query = new List<string>
{
$"sort_by={Uri.EscapeDataString(sortBy)}",
$"limit={Math.Clamp(limit, 1, MaxLimit)}",
};
// Default to fixed-price listings only; auctions have no firm sale price
// and aren't wanted. Pass type=null to include everything.
if (!string.IsNullOrEmpty(type))
query.Add($"type={Uri.EscapeDataString(type)}");
if (defIndex is { } def)
query.Add($"def_index={def}");
if (paintIndex is { } paint)
query.Add($"paint_index={paint}");
if (!string.IsNullOrEmpty(cursor))
query.Add($"cursor={Uri.EscapeDataString(cursor)}");
return SendPageAsync(query, ct);
}
private async Task<ListingsPageResult> SendPageAsync(List<string> query, CancellationToken ct)
{
var url = $"{BaseUrl}?{string.Join('&', query)}";
using var request = new HttpRequestMessage(HttpMethod.Get, url);
// CSFloat expects the raw key in the Authorization header (no scheme).
request.Headers.TryAddWithoutValidation("Authorization", _apiKey);
using var response = await _http.SendAsync(request, ct);
var body = await response.Content.ReadAsStringAsync(ct);
// Always record rate-limit state, even on failure — a 429 is exactly when
// these headers (and Retry-After) matter most.
LastRateLimit = ParseRateLimit(response);
_logger.LogInformation("{RateLimit}", LastRateLimit);
if (!response.IsSuccessStatusCode)
throw new CsFloatApiException(response.StatusCode, Truncate(body));
var page = Parse(body);
return new ListingsPageResult(page.Data.Select(Map).ToList(), page.Cursor);
}
// Pull rate-limit info from response headers without assuming exact names:
// collect every header containing "ratelimit"/"rate-limit" (case-insensitive)
// plus Retry-After, then best-effort map the common remaining/limit/reset
// fields. The full set is kept in Raw so the spike reveals the real names.
private static CsFloatRateLimit ParseRateLimit(HttpResponseMessage response)
{
var raw = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
// Scan both response and content headers — servers split them either way.
var all = response.Headers.AsEnumerable();
if (response.Content is not null)
all = all.Concat(response.Content.Headers);
foreach (var header in all)
{
var name = header.Key;
var isRateLimit = name.Contains("ratelimit", StringComparison.OrdinalIgnoreCase)
|| name.Contains("rate-limit", StringComparison.OrdinalIgnoreCase)
|| name.Equals("Retry-After", StringComparison.OrdinalIgnoreCase);
if (isRateLimit)
raw[name] = string.Join(",", header.Value);
}
if (raw.Count == 0)
return CsFloatRateLimit.None;
return new CsFloatRateLimit(
Limit: FindInt(raw, "limit"),
Remaining: FindInt(raw, "remaining"),
Reset: Find(raw, "reset"),
RetryAfter: FindInt(raw, "retry-after"),
Raw: raw);
}
// Matches a header whose name contains the token but is NOT a different
// metric (e.g. "remaining" must not match when looking for "limit").
private static string? Find(IReadOnlyDictionary<string, string> raw, string token) =>
raw.FirstOrDefault(kv =>
kv.Key.Contains(token, StringComparison.OrdinalIgnoreCase)
&& !(token == "limit" && kv.Key.Contains("remaining", StringComparison.OrdinalIgnoreCase)))
.Value;
private static int? FindInt(IReadOnlyDictionary<string, string> raw, string token) =>
int.TryParse(Find(raw, token), out var v) ? v : null;
// The endpoint may return either a bare array of listings or an object with
// { data, cursor }. Detect which by the first non-whitespace character so the
// spike works regardless of which shape the live API uses.
private static ListingsPage Parse(string body)
{
var trimmed = body.TrimStart();
if (trimmed.StartsWith('['))
{
var array = JsonSerializer.Deserialize<List<ListingDto>>(body, Options) ?? [];
return new ListingsPage(array, null);
}
return JsonSerializer.Deserialize<ListingsPage>(body, Options)
?? new ListingsPage([], null);
}
private static CsFloatListing Map(ListingDto dto)
{
var item = dto.Item ?? new ItemDto();
return new CsFloatListing(
ListingId: dto.Id ?? "",
CreatedAt: dto.CreatedAt ?? default,
Type: dto.Type ?? "buy_now",
// CSFloat prices are integer cents.
Price: dto.Price / 100m,
MarketHashName: item.MarketHashName ?? "Unknown",
DefIndex: item.DefIndex,
PaintIndex: item.PaintIndex,
PaintSeed: item.PaintSeed,
FloatValue: item.FloatValue,
WearName: item.WearName,
IsStatTrak: item.IsStatTrak,
IsSouvenir: item.IsSouvenir,
StickerCount: item.Stickers?.Count ?? 0,
SellerSteamId: dto.Seller?.SteamId,
InspectLink: item.InspectLink,
AssetId: item.AssetId);
}
private static string Truncate(string s) => s.Length <= 500 ? s : s[..500];
private sealed record ListingsPage(
[property: JsonPropertyName("data")] List<ListingDto> Data,
[property: JsonPropertyName("cursor")] string? Cursor);
private sealed record ListingDto(
string? Id,
DateTimeOffset? CreatedAt,
string? Type,
long Price,
SellerDto? Seller,
ItemDto? Item);
private sealed record SellerDto(string? SteamId);
private sealed record ItemDto
{
public string? MarketHashName { get; init; }
public int DefIndex { get; init; }
public int PaintIndex { get; init; }
public int PaintSeed { get; init; }
public decimal FloatValue { get; init; }
public string? WearName { get; init; }
public bool IsStatTrak { get; init; }
public bool IsSouvenir { get; init; }
public string? InspectLink { get; init; }
public string? AssetId { get; init; }
public List<StickerDto>? Stickers { get; init; }
}
private sealed record StickerDto(int StickerId, int Slot, string? Name);
}

View File

@@ -0,0 +1,33 @@
namespace BlueLaminate.Scraper.CsFloat;
/// <summary>
/// Rate-limit state parsed from a CSFloat API response's headers. The official
/// docs don't pin down the exact header names, so this is populated generically
/// (any header whose name contains "ratelimit"/"rate-limit", plus "retry-after")
/// and keeps the <see cref="Raw"/> map so the real names surface during the
/// spike. A future catalog sweep uses <see cref="Remaining"/>/<see cref="Reset"/>
/// to pace requests and avoid 429s.
/// </summary>
/// <param name="Limit">Max requests allowed in the current window, if reported.</param>
/// <param name="Remaining">Requests left in the current window, if reported.</param>
/// <param name="Reset">Raw reset value as sent (epoch seconds or seconds-until — unverified).</param>
/// <param name="RetryAfter">Seconds to wait, from a Retry-After header (typically on 429).</param>
/// <param name="Raw">Every rate-limit-related header, verbatim, for inspection.</param>
public sealed record CsFloatRateLimit(
int? Limit,
int? Remaining,
string? Reset,
int? RetryAfter,
IReadOnlyDictionary<string, string> Raw)
{
public static readonly CsFloatRateLimit None =
new(null, null, null, null, new Dictionary<string, string>());
/// <summary>True when the API reports zero requests remaining.</summary>
public bool IsExhausted => Remaining is <= 0;
public override string ToString() =>
Raw.Count == 0
? "rate-limit: (no headers)"
: "rate-limit: " + string.Join(", ", Raw.Select(kv => $"{kv.Key}={kv.Value}"));
}