# utils/twitter_fetcher.py

import asyncio
import json
from twscrape import API, gather, Account
from playwright.async_api import async_playwright
from pathlib import Path

COOKIE_PATH = Path(__file__).parent / "x.com.cookies.json"

async def extract_media_urls(page, tweet_url):
    print(f"🔍 Visiting tweet page: {tweet_url}")
    try:
        await page.goto(tweet_url, timeout=15000)
        await page.wait_for_timeout(1000)

        media_elements = await page.query_selector_all("img[src*='pbs.twimg.com/media']")
        urls = set()

        for element in media_elements:
            src = await element.get_attribute("src")
            if src:
                cleaned = src.split("&name=")[0] + "&name=large"
                urls.add(cleaned)

        print(f"🖼️ Found {len(urls)} media URLs on tweet: {tweet_url}")
        return list(urls)

    except Exception as e:
        print(f"❌ Playwright error on {tweet_url}: {e}")
        return []

async def fetch_miku_tweets(limit=5):
    # Load cookies from JSON file
    with open(COOKIE_PATH, "r", encoding="utf-8") as f:
        cookie_list = json.load(f)
    cookie_header = "; ".join(f"{c['name']}={c['value']}" for c in cookie_list)

    # Add the account to twscrape
    api = API()
    await api.pool.add_account(
        username="HSankyuu39",
        password="x",           # placeholder (won't be used)
        email="x",              # optional
        email_password="x",     # optional
        cookies=cookie_header
    )
    await api.pool.login_all()

    print(f"🔎 Searching for Miku tweets (limit={limit})...")
    query = 'Hatsune Miku OR 初音ミク has:images after:2025'
    tweets = await gather(api.search(query, limit=limit, kv={"product": "Top"}))

    print(f"📄 Found {len(tweets)} tweets, launching browser...")

    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=True)
        context = await browser.new_context()

        await context.route("**/*", lambda route, request: (
            route.abort() if any([
                request.resource_type in ["font", "stylesheet"],
                "analytics" in request.url,
                "googletagmanager" in request.url,
                "ads-twitter" in request.url,
            ]) else route.continue_()
        ))

        page = await context.new_page()

        results = []
        for i, tweet in enumerate(tweets, 1):
            username = tweet.user.username
            tweet_url = f"https://twitter.com/{username}/status/{tweet.id}"
            print(f"🧵 Processing tweet {i}/{len(tweets)} from @{username}")
            media_urls = await extract_media_urls(page, tweet_url)

            if media_urls:
                results.append({
                    "username": username,
                    "text": tweet.rawContent,
                    "url": tweet_url,
                    "media": media_urls
                })

        await browser.close()
        print(f"✅ Finished! Returning {len(results)} tweet(s) with media.")
        return results