Files
Operation-Blue-Laminate-v2/BlueLaminate/BlueLaminate.C2/JobQueue.cs
bob 8b0eb0db78 Cut metered-proxy bandwidth: re-sweep floor + wire-size logging
JobQueue now skips bands swept within MinResweepHours (config, default 6h) instead of re-scraping the whole catalogue continuously — the dominant cost on the metered residential proxy. Roughly linear savings with no data loss (full pagination retained); 0 disables it. Worker logs the real compressed transferSize per job (what the proxy bills) rather than the ~6.5x-larger decompressed length, so spend is visible.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 15:27:37 -05:00

112 lines
4.8 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Collections.Concurrent;
using BlueLaminate.Core.CsMoney;
using BlueLaminate.EFCore.Data;
using Microsoft.EntityFrameworkCore;
namespace BlueLaminate.C2;
/// <summary>
/// Hands out scrape jobs to workers, one skin+wear at a time, driven directly by the
/// catalogue's per-band checkpoints (<c>SkinCondition.ListingsSweptAt</c>) rather than
/// a pre-built queue. Each claim picks the stalest band (never-swept first), leases it
/// in memory so two workers can't get the same one, and builds a free-text search. On
/// completion the ingest stamps <c>ListingsSweptAt</c>, so the band drops to the back —
/// the sweep loops the whole catalogue continuously and resumes cleanly after restarts.
/// <para>
/// A <see cref="_minResweepInterval"/> floor keeps a band from being re-handed-out until
/// its data is at least that stale. Without it the queue re-scrapes the whole catalogue
/// as fast as the workers run, which on a metered residential proxy is the dominant cost;
/// the floor trades a little price-freshness for a roughly linear bandwidth cut (a 6h
/// floor vs. continuous ≈ 6× less, if a full pass takes ~1h). When every band is fresher
/// than the floor the queue hands out nothing (workers idle) until one ages past it.
/// </para>
/// </summary>
public sealed class JobQueue
{
// A leased condition can't be re-handed-out until released or the lease expires
// (so a crashed worker's band returns to the pool instead of stalling forever).
private static readonly TimeSpan LeaseTtl = TimeSpan.FromMinutes(15);
private const int CandidateBatch = 100;
private readonly TimeSpan _minResweepInterval;
private readonly SemaphoreSlim _gate = new(1, 1);
private readonly ConcurrentDictionary<int, DateTimeOffset> _leases = new(); // conditionId -> leasedAt
private readonly ConcurrentDictionary<string, JobMapping> _inFlight = new(); // jobId -> mapping
/// <param name="minResweepInterval">
/// How stale a band's <c>ListingsSweptAt</c> must be before it's eligible again.
/// <see cref="TimeSpan.Zero"/> disables the floor (continuous re-sweep).
/// </param>
public JobQueue(TimeSpan minResweepInterval)
{
_minResweepInterval = minResweepInterval;
}
public async Task<ScrapeJobDto?> ClaimNextAsync(SkinTrackerDbContext db, int maxPages, CancellationToken ct)
{
await _gate.WaitAsync(ct);
try
{
// Reclaim expired leases first.
var cutoff = DateTimeOffset.UtcNow - LeaseTtl;
foreach (var (cid, at) in _leases)
{
if (at < cutoff)
{
_leases.TryRemove(cid, out _);
}
}
// Only consider bands that are never-swept or stale past the re-sweep floor,
// then stalest first (never-swept null sorts before any timestamp). With the
// floor in place a fully-fresh catalogue yields no candidates, so workers idle
// instead of needlessly re-pulling ~1MB pages on the metered proxy.
var freshCutoff = DateTimeOffset.UtcNow - _minResweepInterval;
var candidates = await db.SkinConditions
.Where(c => c.ListingsSweptAt == null || c.ListingsSweptAt <= freshCutoff)
.OrderBy(c => c.ListingsSweptAt.HasValue)
.ThenBy(c => c.ListingsSweptAt)
.Select(c => new Candidate(
c.Id, c.SkinId, c.Skin.Weapon.Name, c.Skin.Name, c.Condition))
.Take(CandidateBatch)
.ToListAsync(ct);
var pick = candidates.FirstOrDefault(c => !_leases.ContainsKey(c.ConditionId));
if (pick is null)
{
return null; // everything in the stalest batch is already in flight
}
_leases[pick.ConditionId] = DateTimeOffset.UtcNow;
var jobId = Guid.NewGuid().ToString("N");
_inFlight[jobId] = new JobMapping(pick.SkinId, pick.ConditionId);
var code = Wear.ToCode(pick.Condition) ?? pick.Condition;
var search = $"{pick.Weapon} {pick.SkinName} {code}".Trim();
return new ScrapeJobDto(jobId, pick.SkinId, pick.ConditionId, search, maxPages);
}
finally
{
_gate.Release();
}
}
/// <summary>Resolve a posted job to its skin+condition and release its lease.</summary>
public JobMapping? Complete(string jobId)
{
if (_inFlight.TryRemove(jobId, out var mapping))
{
_leases.TryRemove(mapping.ConditionId, out _);
return mapping;
}
return null;
}
public int InFlight => _inFlight.Count;
public sealed record JobMapping(int SkinId, int ConditionId);
private sealed record Candidate(int ConditionId, int SkinId, string Weapon, string SkinName, string Condition);
}