Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
329
BlueLaminate/BlueLaminate.Core/CsMoney/CsMoneyIngestService.cs
Normal file
329
BlueLaminate/BlueLaminate.Core/CsMoney/CsMoneyIngestService.cs
Normal file
@@ -0,0 +1,329 @@
|
||||
using BlueLaminate.EFCore.Data;
|
||||
using BlueLaminate.EFCore.Entities;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace BlueLaminate.Core.CsMoney;
|
||||
|
||||
/// <summary>Outcome of ingesting one skin+wear scrape job's results.</summary>
|
||||
public sealed record CsMoneyIngestResult(
|
||||
int Matched, int Inserted, int Updated, int Removed, int Skipped);
|
||||
|
||||
/// <summary>
|
||||
/// Persists the listings the worker scraped for one targeted skin+wear job into the
|
||||
/// <c>cs_money_listings</c> table. Mirrors the CSFloat <c>ListingSweepService</c>
|
||||
/// patterns — upsert by natural key, resolve each listing to a market-agnostic
|
||||
/// <see cref="SkinInstance"/> by fingerprint, soft-track Removed, flag dupes — but
|
||||
/// scoped to the one skin+condition the job targeted (so it's the per-band unit, and
|
||||
/// Removed-tracking is exact). cs.money's free-text search is fuzzy, so results are
|
||||
/// filtered to the intended skin (by name) and wear (by quality) before persisting.
|
||||
/// </summary>
|
||||
public sealed class CsMoneyIngestService
|
||||
{
|
||||
public const string Source = "csmoney";
|
||||
|
||||
private readonly SkinTrackerDbContext _db;
|
||||
private readonly ILogger<CsMoneyIngestService> _logger;
|
||||
|
||||
public CsMoneyIngestService(SkinTrackerDbContext db, ILogger<CsMoneyIngestService> logger)
|
||||
{
|
||||
_db = db;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <param name="complete">
|
||||
/// True only when the worker walked the whole skin+wear (stoppedReason "completed").
|
||||
/// On a partial sweep we upsert what we saw but skip Removed-marking, the price
|
||||
/// point, and the swept-checkpoint — unseen listings may just be unfetched, so the
|
||||
/// band stays un-stamped and gets re-queued rather than being wrongly pruned.
|
||||
/// </param>
|
||||
public async Task<CsMoneyIngestResult> IngestAsync(
|
||||
int skinId, int? conditionId, IReadOnlyList<CsMoneyItem> items, bool complete, CancellationToken ct = default)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
var skin = await _db.Skins
|
||||
.Where(s => s.Id == skinId)
|
||||
.Select(s => new { s.Id, s.Name, Weapon = s.Weapon.Name })
|
||||
.FirstOrDefaultAsync(ct);
|
||||
if (skin is null)
|
||||
{
|
||||
_logger.LogWarning("Ingest skipped: skin {SkinId} not found.", skinId);
|
||||
return new CsMoneyIngestResult(0, 0, 0, 0, items.Count);
|
||||
}
|
||||
|
||||
string? conditionName = null;
|
||||
if (conditionId is { } cid)
|
||||
{
|
||||
conditionName = await _db.SkinConditions
|
||||
.Where(c => c.Id == cid).Select(c => c.Condition).FirstOrDefaultAsync(ct);
|
||||
}
|
||||
|
||||
var expectedShort = Normalize($"{skin.Weapon} | {skin.Name}");
|
||||
var expectedQuality = Wear.ToCode(conditionName);
|
||||
|
||||
// cs.money search is fuzzy — keep only items that are actually this skin (by
|
||||
// name) and, when the job targets a wear band, this wear (by quality).
|
||||
var matched = items.Where(it =>
|
||||
{
|
||||
var a = it.Asset;
|
||||
if (a?.Names?.Short is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Normalize(a.Names.Short) != expectedShort)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return expectedQuality is null
|
||||
|| string.Equals(a.Quality, expectedQuality, StringComparison.OrdinalIgnoreCase);
|
||||
}).ToList();
|
||||
|
||||
var skipped = items.Count - matched.Count;
|
||||
if (matched.Count == 0)
|
||||
{
|
||||
// Nothing for this skin+wear. If the sweep was complete this is genuine
|
||||
// (none listed, or a name mismatch) — stamp the checkpoint so it advances.
|
||||
// If it was partial (e.g. challenged before any item), leave it un-stamped
|
||||
// so the band is retried.
|
||||
if (complete)
|
||||
{
|
||||
await StampCheckpointAsync(conditionId, now, ct);
|
||||
await _db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
return new CsMoneyIngestResult(0, 0, 0, 0, skipped);
|
||||
}
|
||||
|
||||
var sellOrderIds = matched.Select(it => it.Id).ToList();
|
||||
var existing = await _db.CsMoneyListings
|
||||
.Where(l => sellOrderIds.Contains(l.SellOrderId))
|
||||
.ToDictionaryAsync(l => l.SellOrderId, ct);
|
||||
|
||||
var inserted = 0;
|
||||
var updated = 0;
|
||||
var touched = new HashSet<long>();
|
||||
var touchedInstanceIds = new HashSet<int>();
|
||||
|
||||
foreach (var it in matched)
|
||||
{
|
||||
touched.Add(it.Id);
|
||||
var instance = await ResolveInstanceAsync(skinId, conditionId, it, now, ct);
|
||||
if (instance is not null)
|
||||
{
|
||||
touchedInstanceIds.Add(instance.Id);
|
||||
}
|
||||
|
||||
if (existing.TryGetValue(it.Id, out var row))
|
||||
{
|
||||
row.Price = it.Pricing?.Default ?? row.Price;
|
||||
row.PriceBeforeDiscount = it.Pricing?.PriceBeforeDiscount;
|
||||
row.ComputedPrice = it.Pricing?.Computed;
|
||||
row.AssetId = it.Asset?.Id?.ToString();
|
||||
row.LastSeenAt = now;
|
||||
row.Status = ListingStatus.Active;
|
||||
row.RemovedAt = null;
|
||||
row.ConditionId = conditionId;
|
||||
row.SkinInstance = instance;
|
||||
updated++;
|
||||
}
|
||||
else
|
||||
{
|
||||
var entity = Map(it, skinId, conditionId, now);
|
||||
entity.SkinInstance = instance;
|
||||
_db.CsMoneyListings.Add(entity);
|
||||
inserted++;
|
||||
}
|
||||
}
|
||||
|
||||
// Persist inserts/updates before the set-based Removed/dupe queries run.
|
||||
await _db.SaveChangesAsync(ct);
|
||||
|
||||
await FlagDupesAsync(touchedInstanceIds, now, ct);
|
||||
|
||||
// The following only hold if we saw the FULL skin+wear set. On a partial sweep,
|
||||
// listings we didn't fetch are not gone (so don't mark them Removed), the
|
||||
// cheapest item may be among the unfetched (so don't record a price point), and
|
||||
// the band isn't fully swept (so don't stamp the checkpoint — let it re-queue).
|
||||
var removed = 0;
|
||||
if (complete)
|
||||
{
|
||||
removed = await MarkRemovedAsync(skinId, conditionId, touched, now, ct);
|
||||
|
||||
// Record a price point (the cheapest live listing) for this skin+wear.
|
||||
if (conditionId is { } condId)
|
||||
{
|
||||
var minPrice = matched.Where(m => m.Pricing is not null).Select(m => m.Pricing!.Default).Min();
|
||||
await _db.PriceHistories.AddAsync(new PriceHistory
|
||||
{
|
||||
SkinId = skinId,
|
||||
ConditionId = condId,
|
||||
Price = minPrice,
|
||||
Currency = "USD",
|
||||
RecordedAt = now,
|
||||
Source = Source,
|
||||
}, ct);
|
||||
}
|
||||
|
||||
await StampCheckpointAsync(conditionId, now, ct);
|
||||
}
|
||||
|
||||
await _db.SaveChangesAsync(ct);
|
||||
|
||||
_logger.LogInformation(
|
||||
"cs.money ingest {Weapon} | {Skin} ({Wear}): {Matched} matched ({Ins} new, {Upd} upd, "
|
||||
+ "{Rem} removed), {Skipped} skipped by filter{Partial}.",
|
||||
skin.Weapon, skin.Name, conditionName ?? "all", matched.Count, inserted, updated, removed, skipped,
|
||||
complete ? "" : " [PARTIAL — not pruned/checkpointed]");
|
||||
|
||||
return new CsMoneyIngestResult(matched.Count, inserted, updated, removed, skipped);
|
||||
}
|
||||
|
||||
// Find the physical item matching this listing's fingerprint, or create one.
|
||||
// Shared with CSFloat listings, so a copy seen on both markets is one instance.
|
||||
// Skipped for non-skin items (no float/pattern) — the fingerprint is meaningless.
|
||||
private async Task<SkinInstance?> ResolveInstanceAsync(
|
||||
int skinId, int? conditionId, CsMoneyItem it, DateTimeOffset now, CancellationToken ct)
|
||||
{
|
||||
if (it.Asset?.Float is not { } floatValue || it.Asset.Pattern is not { } pattern)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var seed = pattern.ToString();
|
||||
var st = it.Asset.IsStatTrak;
|
||||
var sv = it.Asset.IsSouvenir;
|
||||
|
||||
var tracked = _db.ChangeTracker.Entries<SkinInstance>()
|
||||
.Select(e => e.Entity)
|
||||
.FirstOrDefault(i => i.SkinId == skinId && i.FloatValue == floatValue
|
||||
&& i.PaintSeed == seed && i.StatTrak == st && i.Souvenir == sv);
|
||||
if (tracked is not null)
|
||||
{
|
||||
tracked.LastSeenAt = now;
|
||||
return tracked;
|
||||
}
|
||||
|
||||
var instance = await _db.SkinInstances.FirstOrDefaultAsync(
|
||||
i => i.SkinId == skinId && i.FloatValue == floatValue
|
||||
&& i.PaintSeed == seed && i.StatTrak == st && i.Souvenir == sv, ct);
|
||||
if (instance is not null)
|
||||
{
|
||||
instance.LastSeenAt = now;
|
||||
return instance;
|
||||
}
|
||||
|
||||
instance = new SkinInstance
|
||||
{
|
||||
SkinId = skinId,
|
||||
ConditionId = conditionId,
|
||||
FloatValue = floatValue,
|
||||
PaintSeed = seed,
|
||||
StatTrak = st,
|
||||
Souvenir = sv,
|
||||
FirstSeenAt = now,
|
||||
LastSeenAt = now,
|
||||
};
|
||||
_db.SkinInstances.Add(instance);
|
||||
return instance;
|
||||
}
|
||||
|
||||
// Flag this skin+wear's once-Active listings we didn't see this run as Removed.
|
||||
private async Task<int> MarkRemovedAsync(
|
||||
int skinId, int? conditionId, HashSet<long> touched, DateTimeOffset now, CancellationToken ct)
|
||||
{
|
||||
return await _db.CsMoneyListings
|
||||
.Where(l => l.SkinId == skinId
|
||||
&& l.ConditionId == conditionId
|
||||
&& l.Status == ListingStatus.Active
|
||||
&& !touched.Contains(l.SellOrderId))
|
||||
.ExecuteUpdateAsync(setters => setters
|
||||
.SetProperty(l => l.Status, ListingStatus.Removed)
|
||||
.SetProperty(l => l.RemovedAt, now), ct);
|
||||
}
|
||||
|
||||
// Same dupe signal as CSFloat: a fingerprint live under 2+ distinct asset ids at
|
||||
// once. Considers cs.money listings only (cross-market dupe analysis is later).
|
||||
private async Task FlagDupesAsync(HashSet<int> instanceIds, DateTimeOffset now, CancellationToken ct)
|
||||
{
|
||||
if (instanceIds.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var dupeInstanceIds = await _db.CsMoneyListings
|
||||
.Where(l => l.SkinInstanceId != null
|
||||
&& instanceIds.Contains(l.SkinInstanceId!.Value)
|
||||
&& l.Status == ListingStatus.Active
|
||||
&& l.AssetId != null)
|
||||
.GroupBy(l => l.SkinInstanceId!.Value)
|
||||
.Where(g => g.Select(l => l.AssetId).Distinct().Count() >= 2)
|
||||
.Select(g => g.Key)
|
||||
.ToListAsync(ct);
|
||||
|
||||
if (dupeInstanceIds.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var newlyFlagged = await _db.SkinInstances
|
||||
.Where(i => dupeInstanceIds.Contains(i.Id) && !i.SuspectedDupe)
|
||||
.ExecuteUpdateAsync(setters => setters
|
||||
.SetProperty(i => i.SuspectedDupe, true)
|
||||
.SetProperty(i => i.DupeFirstSeenAt, now), ct);
|
||||
|
||||
if (newlyFlagged > 0)
|
||||
{
|
||||
_logger.LogWarning("cs.money dupe detection: {Count} instance(s) newly flagged.", newlyFlagged);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task StampCheckpointAsync(int? conditionId, DateTimeOffset now, CancellationToken ct)
|
||||
{
|
||||
if (conditionId is { } cid)
|
||||
{
|
||||
await _db.SkinConditions
|
||||
.Where(c => c.Id == cid)
|
||||
.ExecuteUpdateAsync(s => s.SetProperty(c => c.ListingsSweptAt, now), ct);
|
||||
}
|
||||
}
|
||||
|
||||
private static CsMoneyListing Map(CsMoneyItem it, int skinId, int? conditionId, DateTimeOffset now) => new()
|
||||
{
|
||||
SellOrderId = it.Id,
|
||||
AssetId = it.Asset?.Id?.ToString(),
|
||||
SkinId = skinId,
|
||||
ConditionId = conditionId,
|
||||
MarketHashName = it.Asset?.Names?.Full ?? it.Asset?.Names?.Short ?? "",
|
||||
Quality = it.Asset?.Quality,
|
||||
FloatValue = it.Asset?.Float,
|
||||
PaintSeed = it.Asset?.Pattern,
|
||||
Phase = it.Asset?.Phase,
|
||||
IsStatTrak = it.Asset?.IsStatTrak ?? false,
|
||||
IsSouvenir = it.Asset?.IsSouvenir ?? false,
|
||||
StickerCount = it.Stickers?.Count(s => s is not null) ?? 0,
|
||||
Price = it.Pricing?.Default ?? 0m,
|
||||
PriceBeforeDiscount = it.Pricing?.PriceBeforeDiscount,
|
||||
ComputedPrice = it.Pricing?.Computed,
|
||||
Currency = "USD",
|
||||
InspectLink = it.Links?.InspectLink,
|
||||
FirstSeenAt = now,
|
||||
LastSeenAt = now,
|
||||
Status = ListingStatus.Active,
|
||||
};
|
||||
|
||||
// Normalize a market name for matching: drop the StatTrak/Souvenir/★ adornments,
|
||||
// collapse whitespace, lowercase. So "StatTrak™ M4A4 | Cyber Security" and the
|
||||
// catalogue's "M4A4 | Cyber Security" compare equal.
|
||||
private static string Normalize(string name)
|
||||
{
|
||||
var s = name
|
||||
.Replace("★", " ", StringComparison.Ordinal)
|
||||
.Replace("StatTrak™", " ", StringComparison.OrdinalIgnoreCase)
|
||||
.Replace("Souvenir", " ", StringComparison.OrdinalIgnoreCase);
|
||||
return string.Join(' ', s.Split(' ', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
|
||||
.ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user