Add cs.money worker stack with per-worker IPRoyal residential proxy
Brings up the pull-model scraper: the .NET C2 hands skin+wear jobs to Python nodriver workers that scrape cs.money and post results back, plus the supporting Core/EFCore data model, migrations, and docker-compose orchestration. IPRoyal proxying lets workers scale horizontally with a distinct residential exit IP each: every worker process mints its own sticky session at startup, and an in-process forwarding proxy injects the gateway auth so Chromium talks only to an auth-free localhost endpoint (zero CDP). On a Cloudflare challenge a worker rotates to a fresh session/IP and re-warms. Verified end-to-end against live IPRoyal: distinct US residential exits per worker and IP rotation on demand. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
71
worker/diag_consent.py
Normal file
71
worker/diag_consent.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
Diagnose the cs.money cookie-consent banner so we can dismiss it programmatically.
|
||||
It's likely a Shadow DOM web component (CookieConsentSystem), which is why
|
||||
document.querySelectorAll-based clicks miss the real buttons.
|
||||
|
||||
Saves:
|
||||
captures/_consent.png - screenshot (so we can SEE the banner + button positions)
|
||||
captures/_consent.txt - shadow-host tags + every consent-like button found by
|
||||
piercing shadow roots, with center coordinates.
|
||||
|
||||
cd worker; .venv\\Scripts\\Activate.ps1
|
||||
python diag_consent.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
import nodriver as uc
|
||||
|
||||
URL = os.environ.get("URL", "https://cs.money/market/buy/?search=ak-47+redline")
|
||||
SOLVE_SECONDS = int(os.environ.get("SOLVE_SECONDS", "30"))
|
||||
BROWSER_PATH = os.environ.get("BROWSER_PATH")
|
||||
OUT = pathlib.Path(__file__).parent / "captures"
|
||||
|
||||
# Pierce shadow roots to find consent buttons + their viewport-center coords.
|
||||
DEEP_FIND = r"""
|
||||
JSON.stringify((()=>{
|
||||
const hits=[], hosts=[];
|
||||
function walk(root){
|
||||
root.querySelectorAll('*').forEach(e=>{
|
||||
if(e.shadowRoot){ hosts.push(e.tagName.toLowerCase()); walk(e.shadowRoot); }
|
||||
const t=(e.textContent||'').trim();
|
||||
if(t.length<40 && /accept all|manage cookies|reject all|confirm my choice|^accept$|^manage$/i.test(t)){
|
||||
const r=e.getBoundingClientRect();
|
||||
if(r.width>0&&r.height>0)
|
||||
hits.push({tag:e.tagName, text:t, x:Math.round(r.x+r.width/2), y:Math.round(r.y+r.height/2)});
|
||||
}
|
||||
});
|
||||
}
|
||||
walk(document);
|
||||
return {shadowHosts:[...new Set(hosts)], buttons:hits};
|
||||
})())
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
OUT.mkdir(exist_ok=True)
|
||||
browser = await uc.start(headless=False, browser_executable_path=BROWSER_PATH)
|
||||
try:
|
||||
page = await browser.get(URL)
|
||||
print(f"Loaded {URL}; waiting {SOLVE_SECONDS}s for Cloudflare...")
|
||||
await page.sleep(SOLVE_SECONDS)
|
||||
|
||||
png = str(OUT / "_consent.png")
|
||||
await page.save_screenshot(png)
|
||||
print(f"screenshot -> {png}")
|
||||
|
||||
raw = await page.evaluate(DEEP_FIND)
|
||||
info = json.loads(raw) if isinstance(raw, str) else {"error": repr(raw)}
|
||||
(OUT / "_consent.txt").write_text(json.dumps(info, indent=2), encoding="utf-8")
|
||||
print("shadow hosts:", info.get("shadowHosts"))
|
||||
print("consent buttons found:")
|
||||
for b in info.get("buttons", []):
|
||||
print(f" {b}")
|
||||
finally:
|
||||
browser.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uc.loop().run_until_complete(main())
|
||||
Reference in New Issue
Block a user