"""cs.money scrape worker (pull model). A thin strategy over blworker.Worker: it supplies only the cs.money-specific bits — the consent banner steps and how to scrape one skin+wear's sell-orders. The warm session, the poll/scrape/post loop, the IPRoyal proxy and IP rotation, logging and shutdown all live in the shared runtime. Env knobs are documented in worker/README.md. cs.money is an Astro SSR app: the free-text market search filters server-side and the resulting listings are embedded in the page as a __page-params JSON blob. The /2.0/market/sell-orders API rejects a `search` param (HTTP 400), so we fetch the PAGE for a search and read the embedded items — same item shape as the API. A page returns at most 60 and offset is ignored, so we paginate with a FORWARD CURSOR on float: cs.money honors `order=asc&sort=float` + `minFloat`, and float is full-precision and effectively unique per item. We grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float returned, and repeat until a page is under the cap. (The old minPrice/maxPrice bisection silently truncated cheap skins: >60 listings can share a sub-$0.02 reference band, which no price window can split — floats almost never tie, so the cursor always makes progress.) cd worker .venv\\Scripts\\Activate.ps1 pip install -r requirements.txt python csmoney_worker.py """ import json import re import urllib.parse from blworker import ScrapeResult, Worker, click, page_fetch, run PAGE = ("https://cs.money/market/buy/?search={search}" "&order=asc&sort=float&minFloat={lo:.12f}&maxFloat=1") PAGE_CAP = 60 # items per SSR page PAGE_PARAMS_RE = re.compile( r']*id="__page-params"[^>]*>(.*?)', re.S) def extract_items(html: str) -> list: """Pull inventory.items out of the page's __page-params JSON blob.""" m = PAGE_PARAMS_RE.search(html) if not m: return [] try: return json.loads(m.group(1)).get("inventory", {}).get("items", []) or [] except json.JSONDecodeError: return [] class CsMoneyWorker(Worker): name = "csmoney" jobs_path = "/jobs" default_market_url = "https://cs.money/market/buy/" def describe_job(self, job) -> str: return f"search {job['search']!r}" async def dismiss_consent(self, page) -> str | None: """Privacy-preserving. The banner only offers 'Accept all' / 'Manage cookies'; the Reject-all control lives inside the Manage window. So: Manage -> Reject all -> Confirm. (The data path reads SSR __page-params regardless, but this keeps the session honest and unblocks any future interaction.)""" steps = [] if await click(page, "Manage cookies") or await click(page, "Manage"): await page.sleep(1) if await click(page, "Reject all"): steps.append("reject-all") for c in ("Confirm my choice", "Confirm", "Save"): if await click(page, c): steps.append(f"confirm:{c}") break return ", ".join(steps) if steps else None async def scrape_job(self, page, job) -> ScrapeResult: """Scrape ALL listings for one skin+wear via a forward float cursor. Grab the 60 lowest-float items at/above `lo`, advance `lo` to the highest float on the page, repeat until a page is under the cap. The boundary item is re-fetched (minFloat is inclusive) and dropped by the id dedup.""" search = urllib.parse.quote_plus(job["search"]) max_fetches = job.get("maxPages", 40) # safety cap on page fetches per job seen: dict = {} fetches = 0 wire = 0 lo = 0.0 reason = "completed" while fetches < max_fetches: _status, body, wbytes = await page_fetch(page, PAGE.format(search=search, lo=lo)) fetches += 1 if wbytes > 0: wire += wbytes if "Just a moment" in body or "challenge-platform" in body: return ScrapeResult(list(seen.values()), fetches, "challenged", wire) items = extract_items(body) floats = [] for it in items: if it.get("id") is not None: seen[it["id"]] = it fl = (it.get("asset") or {}).get("float") if isinstance(fl, (int, float)): floats.append(fl) if len(items) < PAGE_CAP: break # last page — fewer than the cap means we've seen everything # Advance the cursor past the highest float on this page. Items at exactly that # float are re-fetched next round (minFloat is inclusive) and deduped by id. nxt = max(floats) if floats else None if nxt is None or nxt <= lo: # Cursor can't advance: >60 listings share a single float value, or the # items carry no float. Bail loudly rather than spin — a flagged gap beats # a silent one (this is the failure the price-window version hid). reason = "stuck-float-tie" break lo = nxt await self._pace(page) else: reason = "fetch-cap" return ScrapeResult(list(seen.values()), fetches, reason, wire) if __name__ == "__main__": run(CsMoneyWorker)