using BlueLaminate.EFCore.Data; using BlueLaminate.EFCore.Entities; using BlueLaminate.Scraper.CsFloat; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Logging; namespace BlueLaminate.Cli; /// How many API pages were fetched. /// Total listings returned across those pages. /// New listings inserted. /// Existing listings refreshed (price/last-seen/etc.). /// Listings flagged Removed (only on a complete pass). /// Listings resolved to a catalogue skin by def/paint. /// Why the sweep ended. public sealed record ListingSweepResult( int Pages, int Seen, int Inserted, int Updated, int Removed, int Linked, string StoppedReason); /// Catalogue skins fully paged this run. /// Skins left untouched (e.g. request budget ran out). public sealed record CatalogSweepResult( int SkinsCovered, int SkinsSkipped, int Pages, int Seen, int Inserted, int Updated, int Removed, string StoppedReason); /// /// Global incremental sweep of CSFloat active listings into the database. Pages /// sort_by=most_recent with no item filter, so it captures every listing — /// including items not in our catalogue. Each listing is upserted by its stable /// CSFloat id; / /// bound the observation window. /// /// Two things keep it safe against the 200-request rate limit and partial runs: /// /// Pacing. After each page it inspects the client's rate-limit /// headers; when remaining is low it sleeps until the reset epoch rather than /// risking a 429. /// Removed-tracking only on a complete pass. Marking unseen listings /// as Removed is only valid when the whole market was covered. A capped or /// incremental run that stops early must not do it, or it would falsely "sell" /// everything it didn't reach. /// /// public sealed class ListingSweepService { public const string Source = "listings"; public const string CatalogSource = "listings-catalog"; // Pace before the bucket is fully empty so a slightly-stale counter can't tip // us into a 429. private const int RateLimitSafetyMargin = 2; private readonly SkinTrackerDbContext _db; private readonly CsFloatListingsClient _client; private readonly ILogger _logger; public ListingSweepService( SkinTrackerDbContext db, CsFloatListingsClient client, ILogger logger) { _db = db; _client = client; _logger = logger; } /// Hard cap on API pages this run (rate-limit budget). /// Hard cap on listings ingested this run. /// /// Stop once a whole page is already-known listings (cheap daily delta). When /// false, keep paging until the cursor or a cap is exhausted (cold pass). /// /// Optional courtesy delay between pages. public async Task SweepAsync( int maxRequests = 4, int maxListings = 200, bool incremental = true, TimeSpan? delayBetweenPages = null, CancellationToken ct = default) { var now = DateTimeOffset.UtcNow; var pages = 0; var seen = 0; var inserted = 0; var updated = 0; var linked = 0; string? cursor = null; string stoppedReason = "cursor exhausted"; var completePass = true; // Catalogue lookup for best-effort skin linking, built once per run. var skinByIndex = await _db.Skins .Where(s => s.DefIndex != null && s.PaintIndex != null) .Select(s => new { s.Id, s.DefIndex, s.PaintIndex }) .ToDictionaryAsync(s => (s.DefIndex!.Value, s.PaintIndex!.Value), s => s.Id, ct); // Track which listing ids we touched this run, so a complete pass can flag // the rest as Removed. var touchedIds = new HashSet(); var touchedInstanceIds = new HashSet(); while (true) { if (pages >= maxRequests) { stoppedReason = $"hit max-requests cap ({maxRequests})"; completePass = false; break; } if (seen >= maxListings) { stoppedReason = $"hit max-listings cap ({maxListings})"; completePass = false; break; } ListingsPageResult page; try { page = await _client.FetchPageAsync( defIndex: null, paintIndex: null, sortBy: "most_recent", limit: 50, cursor: cursor, ct: ct); } catch (CsFloatApiException ex) { _logger.LogError("Sweep aborted: {Message}", ex.Message); stoppedReason = $"API error: {ex.Status}"; completePass = false; break; } pages++; seen += page.Listings.Count; var (ins, upd, link, allKnown) = await IngestPageAsync( page.Listings, skinByIndex, touchedIds, touchedInstanceIds, now, ct); inserted += ins; updated += upd; linked += link; _logger.LogInformation( "Page {Page}: {Count} listings ({Ins} new, {Upd} updated); {Rate}", pages, page.Listings.Count, ins, upd, _client.LastRateLimit); cursor = page.Cursor; // End of the market. if (string.IsNullOrEmpty(cursor) || page.Listings.Count == 0) { stoppedReason = "cursor exhausted"; break; } // Incremental short-circuit: a full page we already knew means we've // caught up to the previous sweep. This is a partial pass by design. if (incremental && allKnown) { stoppedReason = "reached already-seen listings (incremental)"; completePass = false; break; } await PaceAsync(delayBetweenPages, ct); } // Persist inserts/updates before computing Removed so the touched set is durable. await _db.SaveChangesAsync(ct); var removed = 0; if (completePass) removed = await MarkRemovedAsync(touchedIds, now, ct); else _logger.LogInformation("Partial pass — skipping Removed-tracking to avoid false sales."); await FlagDupesAsync(touchedInstanceIds, now, ct); await _db.ScrapeRuns.AddAsync( new ScrapeRun { Source = Source, RanAt = now, ItemCount = seen }, ct); await _db.SaveChangesAsync(ct); return new ListingSweepResult(pages, seen, inserted, updated, removed, linked, stoppedReason); } /// /// Catalogue-driven sweep: walk skins that have def/paint indexes and query /// each one's listings with a server-side def_index+paint_index filter. The /// API returns only that skin's listings, so no rate-limit budget is wasted on /// stickers/cases/agents — every request is productive weapon data. Because /// each skin is paged to completion, Removed-tracking is accurate per skin /// even when the overall run is capped: a skin we fully covered but whose old /// listing is now absent is genuinely gone. /// /// Hard cap on API pages across the whole run. /// Safety cap on pages-worth per skin. /// Optional courtesy delay between pages. public async Task SweepCatalogAsync( int maxRequests = 50, int maxListingsPerSkin = 500, TimeSpan? delayBetweenPages = null, CancellationToken ct = default) { var now = DateTimeOffset.UtcNow; // Least-recently-swept first (never-swept skins sort first because null // orders before any timestamp ascending). This is the cross-run resume: // a capped run always continues from where the previous one stopped, and // the stalest data refreshes first. var skins = await _db.Skins .Where(s => s.DefIndex != null && s.PaintIndex != null) .OrderBy(s => s.ListingsSweptAt) .Select(s => new { s.Id, Def = s.DefIndex!.Value, Paint = s.PaintIndex!.Value }) .ToListAsync(ct); var pages = 0; var seen = 0; var inserted = 0; var updated = 0; var removed = 0; var covered = 0; var stoppedReason = "all catalogue skins covered"; foreach (var skin in skins) { if (pages >= maxRequests) { stoppedReason = $"hit max-requests cap ({maxRequests})"; break; } // One-entry lookup so IngestPageAsync resolves SkinId to this skin. var lookup = new Dictionary<(int, int), int> { [(skin.Def, skin.Paint)] = skin.Id }; var touchedIds = new HashSet(); var touchedInstanceIds = new HashSet(); string? cursor = null; var skinComplete = true; var skinSeen = 0; while (true) { if (pages >= maxRequests) { stoppedReason = $"hit max-requests cap ({maxRequests})"; skinComplete = false; break; } ListingsPageResult page; try { page = await _client.FetchPageAsync( defIndex: skin.Def, paintIndex: skin.Paint, sortBy: "lowest_price", limit: 50, cursor: cursor, ct: ct); } catch (CsFloatApiException ex) { _logger.LogError("Catalogue sweep aborted on skin {SkinId}: {Message}", skin.Id, ex.Message); await _db.SaveChangesAsync(ct); return Finish($"API error: {ex.Status}"); } pages++; seen += page.Listings.Count; skinSeen += page.Listings.Count; var (ins, upd, _, _) = await IngestPageAsync( page.Listings, lookup, touchedIds, touchedInstanceIds, now, ct); inserted += ins; updated += upd; cursor = page.Cursor; if (string.IsNullOrEmpty(cursor) || page.Listings.Count == 0) break; if (skinSeen >= maxListingsPerSkin) { skinComplete = false; // didn't reach the end; don't mark Removed break; } await PaceAsync(delayBetweenPages, ct); } // Per-skin Removed-tracking + resume stamp: only when this skin was // paged to the end. A partial skin (hit the per-skin cap) is left with // its old ListingsSweptAt so the next run revisits it first. if (skinComplete) { removed += await MarkRemovedForSkinAsync(skin.Id, touchedIds, now, ct); await _db.Skins .Where(s => s.Id == skin.Id) .ExecuteUpdateAsync( setters => setters.SetProperty(s => s.ListingsSweptAt, now), ct); covered++; } // Persist this skin's listings/instances before dupe analysis so the // asset-id grouping query sees them. await _db.SaveChangesAsync(ct); await FlagDupesAsync(touchedInstanceIds, now, ct); await _db.SaveChangesAsync(ct); await PaceAsync(delayBetweenPages, ct); } await _db.ScrapeRuns.AddAsync( new ScrapeRun { Source = CatalogSource, RanAt = now, ItemCount = seen }, ct); await _db.SaveChangesAsync(ct); return Finish(stoppedReason); CatalogSweepResult Finish(string reason) => new(covered, skins.Count - covered, pages, seen, inserted, updated, removed, reason); } // Flag this skin's once-Active listings that we didn't see this run as Removed. private async Task MarkRemovedForSkinAsync( int skinId, HashSet touchedIds, DateTimeOffset now, CancellationToken ct) { return await _db.Listings .Where(l => l.SkinId == skinId && l.Status == ListingStatus.Active && !touchedIds.Contains(l.CsFloatListingId)) .ExecuteUpdateAsync( setters => setters .SetProperty(l => l.Status, ListingStatus.Removed) .SetProperty(l => l.RemovedAt, now), ct); } // Upsert a page of listings. Returns counts plus whether every listing on the // page already existed (the incremental stop signal). Also resolves each // listing to a SkinInstance (the physical item, by fingerprint) and records // the touched instance ids so the caller can run dupe detection over them. private async Task<(int Inserted, int Updated, int Linked, bool AllKnown)> IngestPageAsync( IReadOnlyList listings, IReadOnlyDictionary<(int, int), int> skinByIndex, HashSet touchedIds, HashSet touchedInstanceIds, DateTimeOffset now, CancellationToken ct) { if (listings.Count == 0) return (0, 0, 0, true); var ids = listings.Select(l => l.ListingId).ToList(); var existing = await _db.Listings .Where(l => ids.Contains(l.CsFloatListingId)) .ToDictionaryAsync(l => l.CsFloatListingId, ct); var inserted = 0; var updated = 0; var linked = 0; var allKnown = true; foreach (var l in listings) { touchedIds.Add(l.ListingId); int? skinId = skinByIndex.TryGetValue((l.DefIndex, l.PaintIndex), out var id) ? id : null; if (skinId is not null) linked++; // Resolve the physical item only when we know the skin — the // fingerprint is meaningless without it. var instance = skinId is { } sid ? await ResolveInstanceAsync(sid, l, now, ct) : null; if (instance is not null) touchedInstanceIds.Add(instance.Id); if (existing.TryGetValue(l.ListingId, out var row)) { // Refresh mutable fields. Price can change; a re-appeared listing // returns to Active. row.Price = l.Price; row.LastSeenAt = now; row.Status = ListingStatus.Active; row.RemovedAt = null; row.SkinId = skinId; row.AssetId = l.AssetId; row.SkinInstance = instance; updated++; } else { allKnown = false; var entity = MapToEntity(l, skinId, now); entity.SkinInstance = instance; _db.Listings.Add(entity); inserted++; } } return (inserted, updated, linked, allKnown); } // Find the SkinInstance matching this listing's fingerprint, or create one. // The fingerprint is (skin, full-precision float, seed, stattrak, souvenir). // It is deliberately NOT unique — duped copies share it — so a match may // already represent more than one physical item; dupe detection runs later. private async Task ResolveInstanceAsync( int skinId, CsFloatListing l, DateTimeOffset now, CancellationToken ct) { var seed = l.PaintSeed.ToString(); // Check the change-tracker first (an instance just added earlier this page // isn't queryable yet), then the database. var tracked = _db.ChangeTracker.Entries() .Select(e => e.Entity) .FirstOrDefault(i => i.SkinId == skinId && i.FloatValue == l.FloatValue && i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir); if (tracked is not null) { tracked.LastSeenAt = now; return tracked; } var instance = await _db.SkinInstances.FirstOrDefaultAsync( i => i.SkinId == skinId && i.FloatValue == l.FloatValue && i.PaintSeed == seed && i.StatTrak == l.IsStatTrak && i.Souvenir == l.IsSouvenir, ct); if (instance is not null) { instance.LastSeenAt = now; return instance; } instance = new SkinInstance { SkinId = skinId, FloatValue = l.FloatValue, PaintSeed = seed, StatTrak = l.IsStatTrak, Souvenir = l.IsSouvenir, FirstSeenAt = now, LastSeenAt = now, }; _db.SkinInstances.Add(instance); return instance; } private static Listing MapToEntity(CsFloatListing l, int? skinId, DateTimeOffset now) => new() { CsFloatListingId = l.ListingId, Type = l.Type, Price = l.Price, ListedAt = l.CreatedAt, AssetId = l.AssetId, DefIndex = l.DefIndex, PaintIndex = l.PaintIndex, MarketHashName = l.MarketHashName, WearName = l.WearName, FloatValue = l.FloatValue, PaintSeed = l.PaintSeed, IsStatTrak = l.IsStatTrak, IsSouvenir = l.IsSouvenir, StickerCount = l.StickerCount, SellerSteamId = l.SellerSteamId, InspectLink = l.InspectLink, SkinId = skinId, FirstSeenAt = now, LastSeenAt = now, Status = ListingStatus.Active, }; // Flag every currently-Active listing we did NOT see this run as Removed. // Only called after a complete pass. Done in a single set-based update to // avoid loading the whole table. private async Task MarkRemovedAsync( HashSet touchedIds, DateTimeOffset now, CancellationToken ct) { return await _db.Listings .Where(l => l.Status == ListingStatus.Active && !touchedIds.Contains(l.CsFloatListingId)) .ExecuteUpdateAsync( setters => setters .SetProperty(l => l.Status, ListingStatus.Removed) .SetProperty(l => l.RemovedAt, now), ct); } // Dupe detection. For each instance touched this run, count the DISTINCT // asset ids among its currently-Active listings. Two or more means the same // fingerprint (skin+float+seed+ST+souvenir) is live under multiple Steam // assets at once — the signature of a duplicated item, as opposed to an // ordinary trade (which retires the old listing before the new one appears, // leaving a single active asset). Flags freshly-detected dupes and stamps // when first seen, enabling "alert on fresh duping" downstream. private async Task FlagDupesAsync( HashSet instanceIds, DateTimeOffset now, CancellationToken ct) { if (instanceIds.Count == 0) return; // Instances (among those touched) with 2+ distinct active asset ids. var dupeInstanceIds = await _db.Listings .Where(l => l.SkinInstanceId != null && instanceIds.Contains(l.SkinInstanceId!.Value) && l.Status == ListingStatus.Active && l.AssetId != null) .GroupBy(l => l.SkinInstanceId!.Value) .Where(g => g.Select(l => l.AssetId).Distinct().Count() >= 2) .Select(g => g.Key) .ToListAsync(ct); if (dupeInstanceIds.Count == 0) return; // Flag only those not already flagged, stamping first-seen once. Instances // already marked stay marked (they're excluded by the !SuspectedDupe filter). var newlyFlagged = await _db.SkinInstances .Where(i => dupeInstanceIds.Contains(i.Id) && !i.SuspectedDupe) .ExecuteUpdateAsync( setters => setters .SetProperty(i => i.SuspectedDupe, true) .SetProperty(i => i.DupeFirstSeenAt, now), ct); if (newlyFlagged > 0) _logger.LogWarning( "Dupe detection: {Count} instance(s) newly flagged as suspected dupes.", newlyFlagged); } // Pace requests against the rate limit: if the bucket is nearly empty, sleep // until the reset epoch. Otherwise apply only the optional courtesy delay. private async Task PaceAsync(TimeSpan? delay, CancellationToken ct) { var rate = _client.LastRateLimit; if (rate.Remaining is { } remaining && remaining <= RateLimitSafetyMargin && long.TryParse(rate.Reset, out var resetEpoch)) { var resetAt = DateTimeOffset.FromUnixTimeSeconds(resetEpoch); var wait = resetAt - DateTimeOffset.UtcNow; if (wait > TimeSpan.Zero) { _logger.LogWarning( "Rate limit nearly exhausted ({Remaining} left); sleeping {Seconds:0}s until reset.", remaining, wait.TotalSeconds); await Task.Delay(wait, ct); return; } } if (delay is { } d && d > TimeSpan.Zero) await Task.Delay(d, ct); } }