Files
Operation-Blue-Laminate-v2/worker/csmoney_worker.py
2026-06-01 10:52:06 -05:00

130 lines
5.3 KiB
Python

"""cs.money scrape worker (pull model).
A thin strategy over blworker.Worker: it supplies only the cs.money-specific bits — the
consent banner steps and how to scrape one skin+wear's sell-orders. The warm session, the
poll/scrape/post loop, the IPRoyal proxy and IP rotation, logging and shutdown all live in
the shared runtime. Env knobs are documented in worker/README.md.
cs.money is an Astro SSR app: the free-text market search filters server-side and the
resulting listings are embedded in the page as a __page-params JSON blob. The
/2.0/market/sell-orders API rejects a `search` param (HTTP 400), so we fetch the PAGE for
a search and read the embedded items — same item shape as the API.
A page returns at most 60 and offset is ignored, so we paginate with a FORWARD CURSOR on
float: cs.money honors `order=asc&sort=float` + `minFloat`, and float is full-precision and
effectively unique per item. We grab the 60 lowest-float items at/above `lo`, advance `lo`
to the highest float returned, and repeat until a page is under the cap. (The old
minPrice/maxPrice bisection silently truncated cheap skins: >60 listings can share a
sub-$0.02 reference band, which no price window can split — floats almost never tie, so the
cursor always makes progress.)
cd worker
.venv\\Scripts\\Activate.ps1
pip install -r requirements.txt
python csmoney_worker.py
"""
import json
import re
import urllib.parse
from blworker import ScrapeResult, Worker, click, page_fetch, run
PAGE = ("https://cs.money/market/buy/?search={search}"
"&order=asc&sort=float&minFloat={lo:.12f}&maxFloat=1")
PAGE_CAP = 60 # items per SSR page
PAGE_PARAMS_RE = re.compile(
r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
def extract_items(html: str) -> list:
"""Pull inventory.items out of the page's __page-params JSON blob."""
m = PAGE_PARAMS_RE.search(html)
if not m:
return []
try:
return json.loads(m.group(1)).get("inventory", {}).get("items", []) or []
except json.JSONDecodeError:
return []
class CsMoneyWorker(Worker):
name = "csmoney"
jobs_path = "/jobs"
default_market_url = "https://cs.money/market/buy/"
def describe_job(self, job) -> str:
return f"search {job['search']!r}"
async def dismiss_consent(self, page) -> str | None:
"""Privacy-preserving. The banner only offers 'Accept all' / 'Manage cookies';
the Reject-all control lives inside the Manage window. So: Manage -> Reject all ->
Confirm. (The data path reads SSR __page-params regardless, but this keeps the
session honest and unblocks any future interaction.)"""
steps = []
if await click(page, "Manage cookies") or await click(page, "Manage"):
await page.sleep(1)
if await click(page, "Reject all"):
steps.append("reject-all")
for c in ("Confirm my choice", "Confirm", "Save"):
if await click(page, c):
steps.append(f"confirm:{c}")
break
return ", ".join(steps) if steps else None
async def scrape_job(self, page, job) -> ScrapeResult:
"""Scrape ALL listings for one skin+wear via a forward float cursor.
Grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float on
the page, repeat until a page is under the cap. The boundary item is re-fetched
(minFloat is inclusive) and dropped by the id dedup."""
search = urllib.parse.quote_plus(job["search"])
max_fetches = job.get("maxPages", 40) # safety cap on page fetches per job
seen: dict = {}
fetches = 0
wire = 0
lo = 0.0
reason = "completed"
while fetches < max_fetches:
_status, body, wbytes = await page_fetch(page, PAGE.format(search=search, lo=lo))
fetches += 1
if wbytes > 0:
wire += wbytes
if "Just a moment" in body or "challenge-platform" in body:
return ScrapeResult(list(seen.values()), fetches, "challenged", wire)
items = extract_items(body)
floats = []
for it in items:
if it.get("id") is not None:
seen[it["id"]] = it
fl = (it.get("asset") or {}).get("float")
if isinstance(fl, (int, float)):
floats.append(fl)
if len(items) < PAGE_CAP:
break # last page — fewer than the cap means we've seen everything
# Advance the cursor past the highest float on this page. Items at exactly that
# float are re-fetched next round (minFloat is inclusive) and deduped by id.
nxt = max(floats) if floats else None
if nxt is None or nxt <= lo:
# Cursor can't advance: >60 listings share a single float value, or the
# items carry no float. Bail loudly rather than spin — a flagged gap beats
# a silent one (this is the failure the price-window version hid).
reason = "stuck-float-tie"
break
lo = nxt
await self._pace(page)
else:
reason = "fetch-cap"
return ScrapeResult(list(seen.values()), fetches, reason, wire)
if __name__ == "__main__":
run(CsMoneyWorker)