Files
Operation-Blue-Laminate-v2/WeaponGrabber/WeaponScraper.py
2026-05-29 12:21:42 -05:00

54 lines
1.6 KiB
Python

"""Print every CS2 weapon listed on the Counter-Strike wiki.
Requires: pip install curl_cffi beautifulsoup4
Uses curl_cffi instead of requests because the wiki sits behind Cloudflare,
which blocks Python's default TLS fingerprint with a 403 even when the
User-Agent header looks like a browser.
"""
import re
from bs4 import BeautifulSoup
from curl_cffi import requests
URL = "https://counterstrike.fandom.com/wiki/Weapons"
TAB_HASH = "Global_Offensive_&_Counter-Strike_2"
ANNOTATION_RE = re.compile(r"\s*\((?:CT|T)\)\s*$")
STOCK_PREFIX_RE = re.compile(r"^Stock\s+")
def cs2_weapons():
resp = requests.get(URL, impersonate="chrome", timeout=30)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
weapons, seen = [], set()
for tabber in soup.select("div.tabber"):
tabs = tabber.select("li.wds-tabs__tab")
idx = next(
(i for i, t in enumerate(tabs) if t.get("data-hash") == TAB_HASH),
None,
)
if idx is None:
continue
contents = tabber.find_all("div", class_="wds-tab__content")
if idx >= len(contents):
continue
for cap in contents[idx].select("div.lightbox-caption"):
name = cap.get_text(" ", strip=True)
name = ANNOTATION_RE.sub("", name)
name = STOCK_PREFIX_RE.sub("", name).strip()
if not name:
continue
if name not in seen:
seen.add(name)
weapons.append(name)
return weapons
if __name__ == "__main__":
weaps = cs2_weapons()
for w in weaps:
print(w)
print(len(weaps))