Twitter changed their JavaScript response format to include unquoted keys in JSON objects, which breaks twscrape's parser. This fix applies a monkey patch that uses regex to quote the unquoted keys before parsing. This resolves the issue preventing figurine notifications from being sent for the past several days. Reference: https://github.com/vladkens/twscrape/issues/284
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
# utils/twscrape_fix.py
|
|
"""
|
|
Monkey patch for twscrape to fix "Failed to parse scripts" error.
|
|
Twitter started returning malformed JSON with unquoted keys.
|
|
See: https://github.com/vladkens/twscrape/issues/284
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
|
|
|
|
def script_url(k: str, v: str):
|
|
return f"https://abs.twimg.com/responsive-web/client-web/{k}.{v}.js"
|
|
|
|
|
|
def patched_get_scripts_list(text: str):
|
|
"""Fixed version that handles unquoted keys in Twitter's JSON response"""
|
|
scripts = text.split('e=>e+"."+')[1].split('[e]+"a.js"')[0]
|
|
|
|
try:
|
|
for k, v in json.loads(scripts).items():
|
|
yield script_url(k, f"{v}a")
|
|
except json.decoder.JSONDecodeError:
|
|
# Fix unquoted keys like: node_modules_pnpm_ws_8_18_0_node_modules_ws_browser_js
|
|
fixed_scripts = re.sub(
|
|
r'([,\{])(\s*)([\w]+_[\w_]+)(\s*):',
|
|
r'\1\2"\3"\4:',
|
|
scripts
|
|
)
|
|
for k, v in json.loads(fixed_scripts).items():
|
|
yield script_url(k, f"{v}a")
|
|
|
|
|
|
def apply_twscrape_fix():
|
|
"""Apply the monkey patch to twscrape"""
|
|
try:
|
|
from twscrape import xclid
|
|
xclid.get_scripts_list = patched_get_scripts_list
|
|
print("✅ Applied twscrape monkey patch for 'Failed to parse scripts' fix")
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to apply twscrape monkey patch: {e}")
|