Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
96
worker/discover_price_param.py
Normal file
96
worker/discover_price_param.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Find cs.money's price-filter URL param (the basis for price-bucket pagination).
|
||||
|
||||
The market has a Price from/to filter in the sidebar. `search=` works via the URL and
|
||||
the page SSRs the filtered listings into __page-params, so a price param likely works
|
||||
the same way. We baseline the cheapest set, then try candidate param names with a high
|
||||
floor and check whether the returned listings actually shift above it.
|
||||
|
||||
cd worker; .venv\\Scripts\\Activate.ps1
|
||||
python discover_price_param.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import nodriver as uc
|
||||
|
||||
SEARCH = os.environ.get("SEARCH", "ak-47 redline")
|
||||
FLOOR = float(os.environ.get("FLOOR", "200"))
|
||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
||||
BASE = "https://cs.money/market/buy/"
|
||||
PP = re.compile(r'<script\b[^>]*id="__page-params"[^>]*>(.*?)</script>', re.S)
|
||||
OUT = pathlib.Path(__file__).parent / "captures"
|
||||
|
||||
# Param-name variants for a price floor (and a couple of from/to pairs).
|
||||
CANDIDATES = [
|
||||
"minPrice", "priceFrom", "price_from", "priceMin", "min_price",
|
||||
"priceGte", "from", "price_min", "minprice", "price.gte", "pricegte",
|
||||
]
|
||||
|
||||
|
||||
async def fetch_prices(page, url):
|
||||
expr = (f"fetch({url!r},{{credentials:'include'}}).then(async r=>"
|
||||
f"JSON.stringify({{status:r.status, body:await r.text()}}))")
|
||||
raw = await page.evaluate(expr, await_promise=True)
|
||||
try:
|
||||
body = json.loads(raw).get("body", "")
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return None
|
||||
m = PP.search(body or "")
|
||||
if not m:
|
||||
return None
|
||||
try:
|
||||
items = json.loads(m.group(1)).get("inventory", {}).get("items", [])
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return [it.get("pricing", {}) for it in items if it.get("pricing")]
|
||||
|
||||
|
||||
async def main():
|
||||
OUT.mkdir(exist_ok=True)
|
||||
q = quote_plus(SEARCH)
|
||||
lines = []
|
||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH,
|
||||
browser_args=["--blink-settings=imagesEnabled=false"])
|
||||
try:
|
||||
page = await browser.get(f"{BASE}?search={q}")
|
||||
print(f"Warming ({SOLVE_SECONDS}s)..."); await page.sleep(SOLVE_SECONDS)
|
||||
|
||||
# Test minPrice/maxPrice semantics directly (old cs.money API used these).
|
||||
tests = [
|
||||
("baseline", f"{BASE}?search={q}"),
|
||||
("maxPrice=200", f"{BASE}?search={q}&maxPrice=200"),
|
||||
("minPrice=300", f"{BASE}?search={q}&minPrice=300"),
|
||||
("minPrice=300&maxPrice=400", f"{BASE}?search={q}&minPrice=300&maxPrice=400"),
|
||||
("minPrice=500&maxPrice=1000", f"{BASE}?search={q}&minPrice=500&maxPrice=1000"),
|
||||
]
|
||||
def rng(pr, field):
|
||||
vals = [p.get(field) for p in pr if isinstance(p.get(field), (int, float))]
|
||||
return (min(vals), max(vals)) if vals else (None, None)
|
||||
|
||||
for name, url in tests:
|
||||
pr = await fetch_prices(page, url)
|
||||
if not pr:
|
||||
lines.append(f"{name:28} -> no items")
|
||||
else:
|
||||
d0, d1 = rng(pr, "default")
|
||||
c0, c1 = rng(pr, "computed")
|
||||
b0, b1 = rng(pr, "basePrice")
|
||||
lines.append(f"{name:28} -> n={len(pr)} default[{d0:.2f},{d1:.2f}] "
|
||||
f"computed[{c0:.2f},{c1:.2f}] base[{b0:.2f},{b1:.2f}]")
|
||||
print(lines[-1])
|
||||
|
||||
(OUT / "_price_param.txt").write_text(
|
||||
f"search={SEARCH} floor={FLOOR}\n\n" + "\n".join(lines), encoding="utf-8")
|
||||
print(f"\nsaved to {OUT/'_price_param.txt'}")
|
||||
finally:
|
||||
browser.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uc.loop().run_until_complete(main())
|
||||
Reference in New Issue
Block a user