using System.Collections.Concurrent; using BlueLaminate.Core.CsMoney; using BlueLaminate.EFCore.Data; using Microsoft.EntityFrameworkCore; namespace BlueLaminate.C2; /// /// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the /// catalogue's per-band checkpoints (SkinCondition.ListingsSweptAt) rather than /// a pre-built queue. Each claim picks the stalest band (never-swept first), leases it /// in memory so two workers can't get the same one, and builds a free-text search. On /// completion the ingest stamps ListingsSweptAt, so the band drops to the back — /// the sweep loops the whole catalogue continuously and resumes cleanly after restarts. /// /// A floor keeps a band from being re-handed-out until /// its data is at least that stale. Without it the queue re-scrapes the whole catalogue /// as fast as the workers run, which on a metered residential proxy is the dominant cost; /// the floor trades a little price-freshness for a roughly linear bandwidth cut (a 6h /// floor vs. continuous ≈ 6× less, if a full pass takes ~1h). When every band is fresher /// than the floor the queue hands out nothing (workers idle) until one ages past it. /// /// public sealed class JobQueue { // A leased condition can't be re-handed-out until released or the lease expires // (so a crashed worker's band returns to the pool instead of stalling forever). private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15); private const int CandidateBatch = 100; private readonly TimeSpan _minResweepInterval; private readonly SemaphoreSlim _gate = new(1, 1); private readonly ConcurrentDictionary _leases = new(); // conditionId -> leasedAt private readonly ConcurrentDictionary _inFlight = new(); // jobId -> mapping /// /// How stale a band's ListingsSweptAt must be before it's eligible again. /// disables the floor (continuous re-sweep). /// public JobQueue(TimeSpan minResweepInterval) { _minResweepInterval = minResweepInterval; } public async Task ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct) { await _gate.WaitAsync(ct); try { // Reclaim expired leases first. var cutoff = DateTimeOffset.UtcNow - LeaseTtl; foreach (var (cid, at) in _leases) { if (at < cutoff) { _leases.TryRemove(cid, out _); } } // Only consider bands that are never-swept or stale past the re-sweep floor, // then stalest first (never-swept null sorts before any timestamp). With the // floor in place a fully-fresh catalogue yields no candidates, so workers idle // instead of needlessly re-pulling ~1MB pages on the metered proxy. var freshCutoff = DateTimeOffset.UtcNow - _minResweepInterval; var candidates = await db.SkinConditions .Where(c => c.ListingsSweptAt == null || c.ListingsSweptAt <= freshCutoff) .OrderBy(c => c.ListingsSweptAt.HasValue) .ThenBy(c => c.ListingsSweptAt) .Select(c => new Candidate( c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition)) .Take(CandidateBatch) .ToListAsync(ct); var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId)); if (pick is null) { return null; // everything in the stalest batch is already in flight } _leases[pick.ConditionId] = DateTimeOffset.UtcNow; var jobId = Guid.NewGuid().ToString("N"); _inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId); var code = Wear.ToCode(pick.Condition) ?? pick.Condition; var search = $"{pick.Weapon} {pick.SkinName} {code}".Trim(); return new ScrapeJobDto(jobId, pick.SkinId, pick.ConditionId, search, maxPages); } finally { _gate.Release(); } } /// Resolve a posted job to its skin+condition and release its lease. public JobMapping? Complete(string jobId) { if (_inFlight.TryRemove(jobId, out var mapping)) { _leases.TryRemove(mapping.ConditionId, out _); return mapping; } return null; } public int InFlight => _inFlight.Count; public sealed record JobMapping(int SkinId, int ConditionId); private sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition); }