diff --git a/bot/utils/twitter_fetcher.py b/bot/utils/twitter_fetcher.py index 9641628..79235b7 100644 --- a/bot/utils/twitter_fetcher.py +++ b/bot/utils/twitter_fetcher.py @@ -3,6 +3,11 @@ import asyncio import json from typing import Dict, Any + +# Apply twscrape fix BEFORE importing twscrape +from utils.twscrape_fix import apply_twscrape_fix +apply_twscrape_fix() + from twscrape import API, gather, Account from playwright.async_api import async_playwright from pathlib import Path diff --git a/bot/utils/twscrape_fix.py b/bot/utils/twscrape_fix.py new file mode 100644 index 0000000..5664c4e --- /dev/null +++ b/bot/utils/twscrape_fix.py @@ -0,0 +1,41 @@ +# utils/twscrape_fix.py +""" +Monkey patch for twscrape to fix "Failed to parse scripts" error. +Twitter started returning malformed JSON with unquoted keys. +See: https://github.com/vladkens/twscrape/issues/284 +""" + +import json +import re + + +def script_url(k: str, v: str): + return f"https://abs.twimg.com/responsive-web/client-web/{k}.{v}.js" + + +def patched_get_scripts_list(text: str): + """Fixed version that handles unquoted keys in Twitter's JSON response""" + scripts = text.split('e=>e+"."+')[1].split('[e]+"a.js"')[0] + + try: + for k, v in json.loads(scripts).items(): + yield script_url(k, f"{v}a") + except json.decoder.JSONDecodeError: + # Fix unquoted keys like: node_modules_pnpm_ws_8_18_0_node_modules_ws_browser_js + fixed_scripts = re.sub( + r'([,\{])(\s*)([\w]+_[\w_]+)(\s*):', + r'\1\2"\3"\4:', + scripts + ) + for k, v in json.loads(fixed_scripts).items(): + yield script_url(k, f"{v}a") + + +def apply_twscrape_fix(): + """Apply the monkey patch to twscrape""" + try: + from twscrape import xclid + xclid.get_scripts_list = patched_get_scripts_list + print("✅ Applied twscrape monkey patch for 'Failed to parse scripts' fix") + except Exception as e: + print(f"⚠️ Failed to apply twscrape monkey patch: {e}")