""" Discover how cs.money paginates a filtered search past the initial ~60 SSR items. Tests two hypotheses against a high-result search (default "ak-47 redline", which has well over 60 listings): A. Does the SSR page honor offset/limit in the URL? Fetch ?search=...&offset=60 and ?search=...&limit=120 and compare item ids to page 1. If disjoint/larger, we can paginate cheaply by re-fetching the page. B. The real client "load more": scroll hard to trigger lazy-load and capture any cs.money /2.0/ XHR via Resource Timing — that request carries the structured filter params + offset, i.e. a lighter direct-API pagination path. Findings are printed and saved to captures/_pagination.txt. cd worker; .venv\\Scripts\\Activate.ps1 python discover_pagination.py $env:SEARCH="ak-47 redline"; python discover_pagination.py # override the search """ import json import os import pathlib import re import nodriver as uc from nodriver import cdp SEARCH = os.environ.get("SEARCH", "ak-47 redline") SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30")) BROWSER_PATH = os.environ.get("BROWSER_PATH") PROXY = os.environ.get("PROXY") BASE = "https://cs.money/market/buy/" PAGE_PARAMS_RE = re.compile(r']*id="__page-params"[^>]*>(.*?)', re.S) OUT = pathlib.Path(__file__).parent / "captures" CONSENT = ["Reject all", "Only necessary", "Reject", "Decline", "Deny"] # Aggressive scroll: window + every scrollable container (the grid scrolls in a div, # which is why a plain window.scrollTo didn't trigger lazy-load before). SCROLL_JS = ( "window.scrollTo(0, document.body.scrollHeight);" "document.querySelectorAll('*').forEach(e=>{" " if (e.scrollHeight > e.clientHeight + 80) e.scrollTop = e.scrollHeight;});") async def js(page, expr): raw = await page.evaluate(f"JSON.stringify({expr})") try: return json.loads(raw) if isinstance(raw, str) else None except (json.JSONDecodeError, TypeError): return None async def fetch_text(page, url): expr = (f"fetch({url!r},{{credentials:'include'}}).then(async r=>" f"JSON.stringify({{status:r.status, body:await r.text()}}))") raw = await page.evaluate(expr, await_promise=True) try: o = json.loads(raw) return o.get("status"), o.get("body", "") except (json.JSONDecodeError, TypeError): return None, "" def page_item_ids(html): m = PAGE_PARAMS_RE.search(html or "") if not m: return [] try: return [it.get("id") for it in json.loads(m.group(1)).get("inventory", {}).get("items", [])] except json.JSONDecodeError: return [] async def click_visible(page, pattern): """Click the first VISIBLE element whose trimmed text matches `pattern` (case- insensitive). nodriver's find() was matching hidden/duplicate nodes; restricting to offsetParent!=null + short text hits the real button.""" expr = ("JSON.stringify((()=>{" "const re=new RegExp(" + json.dumps(pattern) + ",'i');" "const els=[...document.querySelectorAll('button,a,[role=\"button\"],span,div')];" "const b=els.find(e=>e.offsetParent!==null && (e.textContent||'').trim().length<40 " "&& re.test((e.textContent||'').trim()));" "if(b){b.click();return true}return false})())") r = await page.evaluate(expr) return isinstance(r, str) and "true" in r async def banner_present(page): r = await page.evaluate( "JSON.stringify(/Manage cookies|Accept all/i.test(document.body.innerText||''))") return isinstance(r, str) and "true" in r async def dismiss(page): """Privacy-preserving first (Manage -> Reject all -> Confirm); if the banner is still up, fall back to Accept all so the page becomes interactive (discovery needs scrolling to work).""" steps = [] if await click_visible(page, "manage cookies|^manage$"): steps.append("manage") await page.sleep(1.2) if await click_visible(page, "reject all"): steps.append("reject-all") await page.sleep(0.4) for c in ("confirm my choice", "^confirm$", "^save$"): if await click_visible(page, c): steps.append("confirm") break await page.sleep(1) if await banner_present(page): steps.append("still-up->accept" if await click_visible(page, "accept all|^accept$") else "still-up") await page.sleep(0.5) steps.append("gone" if not await banner_present(page) else "STILL-PRESENT") return ", ".join(steps) async def main(): OUT.mkdir(exist_ok=True) args = [f"--proxy-server={PROXY}"] if PROXY else [] args.append("--blink-settings=imagesEnabled=false") from urllib.parse import quote_plus q = quote_plus(SEARCH) findings = [] browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH, browser_args=args) try: url0 = f"{BASE}?search={q}" page = await browser.get(url0) print(f"Warming on {url0} ({SOLVE_SECONDS}s for Cloudflare)...") await page.sleep(SOLVE_SECONDS) print(f"Consent: {await dismiss(page)}") # --- A. URL offset/limit on the SSR page --- _, h0 = await fetch_text(page, f"{BASE}?search={q}") _, h1 = await fetch_text(page, f"{BASE}?search={q}&offset=60") _, h2 = await fetch_text(page, f"{BASE}?search={q}&limit=120") a, b, c = page_item_ids(h0), page_item_ids(h1), page_item_ids(h2) overlap = len(set(a) & set(b)) findings.append(f"page1 ids={len(a)} offset=60 ids={len(b)} (overlap with page1={overlap}) limit=120 ids={len(c)}") findings.append(f" -> offset works? {'YES (disjoint)' if b and overlap == 0 else 'no/ignored'}") findings.append(f" -> limit works? {'YES (>60)' if len(c) > 60 else 'no/ignored'}") # --- B. Trigger client load-more, capture cs.money /2.0/ XHRs --- # Infinite scroll only fires on GRADUAL downward scrolling — jumping to the # bottom skips the trigger. So step down in small wheel increments and watch # the item count grow. before = set(await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or []) async def card_count(): n = await page.evaluate( "JSON.stringify(document.querySelectorAll('[href*=\"/item/\"],[class*=\"item\" i]').length)") return n print(f" cards before scroll: {await card_count()}") for step in range(60): try: await page.send(cdp.input_.dispatch_mouse_event( type_="mouseWheel", x=720, y=450, delta_x=0, delta_y=500)) except Exception: pass await page.sleep(0.7) if step % 15 == 14: now = [u for u in (await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or []) if u not in before and "cs.money" in u and "metrics." not in u and "traces." not in u] print(f" step {step+1}: cards={await card_count()} new cs.money reqs={len(now)}") after = await js(page, "performance.getEntriesByType('resource').map(e=>e.name)") or [] new_xhrs = [u for u in after if u not in before and "cs.money" in u and "metrics." not in u and "traces." not in u] findings.append(f"\nclient requests after scrolling ({len(new_xhrs)} new cs.money):") findings.extend(f" {u}" for u in dict.fromkeys(new_xhrs)) if not new_xhrs: findings.append(" (none — grid may not lazy-load via XHR, or scroll didn't reach the trigger)") report = "\n".join(findings) print("\n=== FINDINGS ===\n" + report) (OUT / "_pagination.txt").write_text(f"search: {SEARCH}\n\n{report}\n", encoding="utf-8") print(f"\nsaved to {OUT / '_pagination.txt'}") finally: browser.stop() if __name__ == "__main__": uc.loop().run_until_complete(main())