Operation-Blue-Laminate-v2/WeaponGrabber/WeaponScraper.py

"""Print every CS2 weapon listed on the Counter-Strike wiki.

Requires: pip install curl_cffi beautifulsoup4

Uses curl_cffi instead of requests because the wiki sits behind Cloudflare,
which blocks Python's default TLS fingerprint with a 403 even when the
User-Agent header looks like a browser.
"""
import re

from bs4 import BeautifulSoup
from curl_cffi import requests

URL = "https://counterstrike.fandom.com/wiki/Weapons"
TAB_HASH = "Global_Offensive_&_Counter-Strike_2"
ANNOTATION_RE = re.compile(r"\s*\((?:CT|T)\)\s*$")
STOCK_PREFIX_RE = re.compile(r"^Stock\s+")


def cs2_weapons():
    resp = requests.get(URL, impersonate="chrome", timeout=30)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    weapons, seen = [], set()
    for tabber in soup.select("div.tabber"):
        tabs = tabber.select("li.wds-tabs__tab")
        idx = next(
            (i for i, t in enumerate(tabs) if t.get("data-hash") == TAB_HASH),
            None,
        )
        if idx is None:
            continue
        contents = tabber.find_all("div", class_="wds-tab__content")
        if idx >= len(contents):
            continue
        for cap in contents[idx].select("div.lightbox-caption"):
            name = cap.get_text(" ", strip=True)
            name = ANNOTATION_RE.sub("", name)
            name = STOCK_PREFIX_RE.sub("", name).strip()
            if not name:
                continue
            if name not in seen:
                seen.add(name)
                weapons.append(name)
    return weapons


if __name__ == "__main__":
    weaps = cs2_weapons()
    for w in weaps:
        print(w)

    print(len(weaps))