init efcore
This commit is contained in:
54
WeaponGrabber/WeaponScraper.py
Normal file
54
WeaponGrabber/WeaponScraper.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""Print every CS2 weapon listed on the Counter-Strike wiki.
|
||||
|
||||
Requires: pip install curl_cffi beautifulsoup4
|
||||
|
||||
Uses curl_cffi instead of requests because the wiki sits behind Cloudflare,
|
||||
which blocks Python's default TLS fingerprint with a 403 even when the
|
||||
User-Agent header looks like a browser.
|
||||
"""
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from curl_cffi import requests
|
||||
|
||||
URL = "https://counterstrike.fandom.com/wiki/Weapons"
|
||||
TAB_HASH = "Global_Offensive_&_Counter-Strike_2"
|
||||
ANNOTATION_RE = re.compile(r"\s*\((?:CT|T)\)\s*$")
|
||||
STOCK_PREFIX_RE = re.compile(r"^Stock\s+")
|
||||
|
||||
|
||||
def cs2_weapons():
|
||||
resp = requests.get(URL, impersonate="chrome", timeout=30)
|
||||
resp.raise_for_status()
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
weapons, seen = [], set()
|
||||
for tabber in soup.select("div.tabber"):
|
||||
tabs = tabber.select("li.wds-tabs__tab")
|
||||
idx = next(
|
||||
(i for i, t in enumerate(tabs) if t.get("data-hash") == TAB_HASH),
|
||||
None,
|
||||
)
|
||||
if idx is None:
|
||||
continue
|
||||
contents = tabber.find_all("div", class_="wds-tab__content")
|
||||
if idx >= len(contents):
|
||||
continue
|
||||
for cap in contents[idx].select("div.lightbox-caption"):
|
||||
name = cap.get_text(" ", strip=True)
|
||||
name = ANNOTATION_RE.sub("", name)
|
||||
name = STOCK_PREFIX_RE.sub("", name).strip()
|
||||
if not name:
|
||||
continue
|
||||
if name not in seen:
|
||||
seen.add(name)
|
||||
weapons.append(name)
|
||||
return weapons
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
weaps = cs2_weapons()
|
||||
for w in weaps:
|
||||
print(w)
|
||||
|
||||
print(len(weaps))
|
||||
Reference in New Issue
Block a user