Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
78 lines
2.6 KiB
Python
78 lines
2.6 KiB
Python
"""
|
|
One-off count verification: scrape a single skin+wear search from cs.money and
|
|
report how many distinct sell-orders come back, reusing the production worker's
|
|
warm-session + price-window bisection logic (worker.scrape_job).
|
|
|
|
Use it to sanity-check that our pagination actually recovers the FULL listing
|
|
count cs.money shows on the site (the known ground truth) for one query.
|
|
|
|
cd worker
|
|
.venv\\Scripts\\Activate.ps1
|
|
python verify_count.py "Desert Eagle Bronze Deco fn"
|
|
|
|
Env knobs (same meaning as worker.py): SOLVE_SECONDS, DELAY, JITTER, PROXY,
|
|
BROWSER_PATH, LOAD_IMAGES. MAX_FETCHES caps window fetches (default 80).
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
from collections import Counter
|
|
|
|
import nodriver as uc
|
|
|
|
import worker
|
|
|
|
MAX_FETCHES = int(os.environ.get("MAX_FETCHES", "80"))
|
|
|
|
|
|
async def main():
|
|
search = " ".join(sys.argv[1:]) or "Desert Eagle Bronze Deco fn"
|
|
|
|
args = [f"--proxy-server={worker.PROXY}"] if worker.PROXY else []
|
|
if not worker.LOAD_IMAGES:
|
|
args.append("--blink-settings=imagesEnabled=false")
|
|
if os.environ.get("CHROME_NO_SANDBOX") == "1":
|
|
args += ["--no-sandbox", "--disable-dev-shm-usage"]
|
|
|
|
print(f"Verifying count for search {search!r} (proxy={worker.PROXY or 'own IP'})")
|
|
browser = await uc.start(
|
|
headless=False, browser_executable_path=worker.BROWSER_PATH, browser_args=args)
|
|
try:
|
|
page = await browser.get("about:blank")
|
|
await worker.warm(page)
|
|
|
|
job = {"search": search, "maxPages": MAX_FETCHES}
|
|
items, fetches, reason = await worker.scrape_job(page, job)
|
|
|
|
print("\n=== result ===")
|
|
print(f" search: {search}")
|
|
print(f" stopped: {reason}")
|
|
print(f" fetches: {fetches}")
|
|
print(f" DISTINCT sell-orders (deduped by id): {len(items)}")
|
|
|
|
# Break down what came back so we can see whether the count is inflated by
|
|
# off-target names/wears (the C2's name+wear filter would drop those later).
|
|
names = Counter()
|
|
wears = Counter()
|
|
st = 0
|
|
for it in items:
|
|
asset = it.get("asset") or {}
|
|
names[(asset.get("names") or {}).get("full")] += 1
|
|
wears[asset.get("quality")] += 1
|
|
if asset.get("isStatTrak"):
|
|
st += 1
|
|
print(f" StatTrak in set: {st}")
|
|
print(" by name:")
|
|
for name, n in names.most_common():
|
|
print(f" {n:4d} {name}")
|
|
print(" by wear (quality code):")
|
|
for w, n in wears.most_common():
|
|
print(f" {n:4d} {w}")
|
|
finally:
|
|
browser.stop()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
uc.loop().run_until_complete(main())
|