Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
80 lines
2.9 KiB
Python
80 lines
2.9 KiB
Python
"""
|
|
Validate the float-cursor scrape by walking the float axis in BOTH directions and
|
|
comparing the recovered sell-order id sets. If ascending (lowest float first) and
|
|
descending (highest float first) independently land on the same listings, the
|
|
cursor is exhaustive and order-independent — i.e. the count is real, not an artifact
|
|
of walk direction or boundary double-counting.
|
|
|
|
python verify_crosscheck.py "Glock-18 Candy Apple mw"
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
|
|
import nodriver as uc
|
|
|
|
import worker
|
|
|
|
CAP = worker.PAGE_CAP
|
|
ASC = ("https://cs.money/market/buy/?search={q}"
|
|
"&order=asc&sort=float&minFloat={cur:.12f}&maxFloat=1")
|
|
DESC = ("https://cs.money/market/buy/?search={q}"
|
|
"&order=desc&sort=float&minFloat=0&maxFloat={cur:.12f}")
|
|
|
|
|
|
async def walk(page, q, template, ascending, max_fetches=60):
|
|
seen = {}
|
|
cur = 0.0 if ascending else 1.0
|
|
fetches = 0
|
|
while fetches < max_fetches:
|
|
status, body = await worker.fetch_json(page, template.format(q=q, cur=cur))
|
|
fetches += 1
|
|
if "Just a moment" in body or "challenge-platform" in body:
|
|
return seen, fetches, "challenged"
|
|
items = worker.extract_items(body)
|
|
floats = []
|
|
for it in items:
|
|
if it.get("id") is not None:
|
|
seen[it["id"]] = it
|
|
fl = (it.get("asset") or {}).get("float")
|
|
if isinstance(fl, (int, float)):
|
|
floats.append(fl)
|
|
if len(items) < CAP:
|
|
return seen, fetches, "completed"
|
|
nxt = (max(floats) if ascending else min(floats)) if floats else None
|
|
if nxt is None or (ascending and nxt <= cur) or (not ascending and nxt >= cur):
|
|
return seen, fetches, "stuck"
|
|
cur = nxt
|
|
await page.sleep(worker.DELAY)
|
|
return seen, fetches, "fetch-cap"
|
|
|
|
|
|
async def main():
|
|
search = " ".join(sys.argv[1:]) or "Glock-18 Candy Apple mw"
|
|
q = worker.urllib.parse.quote_plus(search)
|
|
browser = await uc.start(headless=False, browser_args=["--blink-settings=imagesEnabled=false"])
|
|
try:
|
|
page = await browser.get("about:blank")
|
|
await worker.warm(page)
|
|
|
|
asc, fa, ra = await walk(page, q, ASC, ascending=True)
|
|
print(f"ASC : {len(asc):4d} ids {fa} fetches {ra}")
|
|
desc, fd, rd = await walk(page, q, DESC, ascending=False)
|
|
print(f"DESC: {len(desc):4d} ids {fd} fetches {rd}")
|
|
|
|
a, d = set(asc), set(desc)
|
|
union = a | d
|
|
print("\n=== cross-check ===")
|
|
print(f" ASC only: {len(a - d)}")
|
|
print(f" DESC only: {len(d - a)}")
|
|
print(f" in both: {len(a & d)}")
|
|
print(f" UNION (distinct):{len(union)}")
|
|
agree = "AGREE — count is solid" if a == d else "DISAGREE — one walk missed listings"
|
|
print(f" verdict: {agree}")
|
|
finally:
|
|
browser.stop()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
uc.loop().run_until_complete(main())
|