Files
Operation-Blue-Laminate-v2/BlueLaminate/BlueLaminate.C2/JobQueue.cs
2026-06-01 10:52:06 -05:00

137 lines
6.2 KiB
C#

using System.Collections.Concurrent;
using BlueLaminate.EFCore.Data;
using Microsoft.EntityFrameworkCore;
namespace BlueLaminate.C2;
/// <summary>
/// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the
/// catalogue's per-band, per-site checkpoints (the rows in <c>skin_condition_sweeps</c>
/// for this queue's <see cref="_source"/>) rather than a pre-built queue. Each claim picks
/// the stalest band (never-swept first), leases it in memory so two workers can't get the
/// same one, and builds the work target. On completion the ingest stamps the band's
/// checkpoint, so it drops to the back — the sweep loops the whole catalogue continuously
/// and resumes cleanly after restarts. Because the checkpoint is per-site, a band one
/// market just swept is still due on another.
/// <para>
/// The queue is source-agnostic: it's constructed with the checkpoint
/// <see cref="_source"/> and a <see cref="_targetBuilder"/> that turns a band into the
/// thing a worker needs — a free-text search for cs.money, a market URL for skin.land — so
/// one class drives every market. Register one instance per source.
/// </para>
/// <para>
/// A <see cref="_minResweepInterval"/> floor keeps a band from being re-handed-out until
/// its data is at least that stale. Without it the queue re-scrapes the whole catalogue as
/// fast as the workers run, which on a metered residential proxy is the dominant cost; the
/// floor trades a little price-freshness for a roughly linear bandwidth cut. When every
/// band is fresher than the floor the queue hands out nothing (workers idle) until one ages.
/// </para>
/// </summary>
public sealed class JobQueue
{
// A leased condition can't be re-handed-out until released or the lease expires (so a
// crashed worker's band returns to the pool instead of stalling forever).
private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15);
private const int CandidateBatch = 100;
private readonly string _source;
private readonly TimeSpan _minResweepInterval;
private readonly Func<Candidate, string> _targetBuilder;
private readonly SemaphoreSlim _gate = new(1, 1);
private readonly ConcurrentDictionary<int, DateTimeOffset> _leases = new(); // conditionId -> leasedAt
private readonly ConcurrentDictionary<string, JobMapping> _inFlight = new(); // jobId -> mapping
/// <param name="source">
/// The <c>skin_condition_sweeps.Source</c> this queue reads/leases on (a
/// <c>SweepSource</c> value, e.g. "csmoney" / "skinland").
/// </param>
/// <param name="minResweepInterval">
/// How stale a band's checkpoint must be before it's eligible again.
/// <see cref="TimeSpan.Zero"/> disables the floor (continuous re-sweep).
/// </param>
/// <param name="targetBuilder">Turns a claimed band into the worker's target string.</param>
public JobQueue(string source, TimeSpan minResweepInterval, Func<Candidate, string> targetBuilder)
{
_source = source;
_minResweepInterval = minResweepInterval;
_targetBuilder = targetBuilder;
}
public async Task<ClaimedJob?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
{
await _gate.WaitAsync(ct);
try
{
// Reclaim expired leases first.
var cutoff = DateTimeOffset.UtcNow - LeaseTtl;
foreach (var (cid, at) in _leases)
{
if (at < cutoff)
{
_leases.TryRemove(cid, out _);
}
}
// Only consider bands that are never-swept or stale past the re-sweep floor,
// then stalest first (never-swept null sorts before any timestamp). The
// checkpoint is read for THIS queue's source only (a correlated subquery over
// the per-site sweep rows), so a band another market just swept is still
// never-swept here. With the floor in place a fully-fresh catalogue yields no
// candidates, so workers idle instead of needlessly re-pulling on the proxy.
var freshCutoff = DateTimeOffset.UtcNow - _minResweepInterval;
var candidates = await db.SkinConditions
.Select(c => new
{
Candidate = new Candidate(c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition),
SweptAt = c.Sweeps
.Where(s => s.Source == _source)
.Select(s => (DateTimeOffset?)s.SweptAt)
.FirstOrDefault(),
})
.Where(x => x.SweptAt == null || x.SweptAt <= freshCutoff)
.OrderBy(x => x.SweptAt.HasValue)
.ThenBy(x => x.SweptAt)
.Take(CandidateBatch)
.Select(x => x.Candidate)
.ToListAsync(ct);
var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId));
if (pick is null)
{
return null; // everything in the stalest batch is already in flight
}
_leases[pick.ConditionId] = DateTimeOffset.UtcNow;
var jobId = Guid.NewGuid().ToString("N");
_inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId);
return new ClaimedJob(jobId, pick.SkinId, pick.ConditionId, _targetBuilder(pick), maxPages);
}
finally
{
_gate.Release();
}
}
/// <summary>Resolve a posted job to its skin+condition and release its lease.</summary>
public JobMapping? Complete(string jobId)
{
if (_inFlight.TryRemove(jobId, out var mapping))
{
_leases.TryRemove(mapping.ConditionId, out _);
return mapping;
}
return null;
}
public int InFlight => _inFlight.Count;
public sealed record JobMapping(int SkinId, int ConditionId);
/// <summary>A claimed band ready to hand to a worker: its ids + built target string.</summary>
public sealed record ClaimedJob(string JobId, int SkinId, int ConditionId, string Target, int MaxPages);
public sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
}