miku-discord/bot/utils/image_generation.py

"""
Image Generation System for Miku Bot
Natural language detection and ComfyUI integration
"""

import aiohttp
import asyncio
import glob
import json
import os
import re
import tempfile
import time
from typing import Optional, Tuple
import globals
from utils.llm import query_llama

# Image generation detection patterns
IMAGE_REQUEST_PATTERNS = [
    # Direct requests
    r'\b(?:draw|generate|create|make|show me|paint|sketch|illustrate)\b.*\b(?:image|picture|art|artwork|drawing|painting|illustration)\b',
    r'\b(?:i\s+(?:want|would like|need)\s+(?:to see|an?\s+)?(?:image|picture|art|artwork|drawing|painting|illustration))\b',
    r'\b(?:can you|could you|please)\s+(?:draw|generate|create|make|show me|paint|sketch|illustrate)\b',
    r'\b(?:image|picture|art|artwork|drawing|painting|illustration)\s+of\b',

    # Visual requests about Miku
    r'\b(?:show me|let me see)\s+(?:you|miku|yourself)\b',
    r'\b(?:what do you look like|how do you look)\b',
    r'\b(?:i\s+(?:want|would like)\s+to see)\s+(?:you|miku|yourself)\b',
    r'\bsee\s+(?:you|miku|yourself)(?:\s+(?:in|with|doing|wearing))?\b',

    # Activity-based visual requests
    r'\b(?:you|miku|yourself)\s+(?:swimming|dancing|singing|playing|wearing|in|with|doing)\b.*\b(?:pool|water|stage|outfit|clothes|dress)\b',
    r'\b(?:visualize|envision|imagine)\s+(?:you|miku|yourself)\b',

    # Artistic requests
    r'\b(?:artistic|art|visual)\s+(?:representation|depiction|version)\s+of\s+(?:you|miku|yourself)\b',
]

# Compile patterns for efficiency
COMPILED_PATTERNS = [re.compile(pattern, re.IGNORECASE) for pattern in IMAGE_REQUEST_PATTERNS]

async def detect_image_request(message_content: str) -> Tuple[bool, Optional[str]]:
    """
    Detect if a message is requesting image generation using natural language.

    Returns:
        Tuple[bool, Optional[str]]: (is_image_request, extracted_prompt)
    """
    content = message_content.lower().strip()

    # Quick rejection for very short messages
    if len(content) < 5:
        return False, None

    # Check against patterns
    for pattern in COMPILED_PATTERNS:
        if pattern.search(content):
            # Extract the prompt by cleaning up the message
            prompt = extract_image_prompt(message_content)
            return True, prompt

    return False, None

def extract_image_prompt(message_content: str) -> str:
    """
    Extract and clean the image prompt from the user's message.
    Convert natural language to a proper image generation prompt.
    """
    content = message_content.strip()

    # Remove common prefixes that don't help with image generation
    prefixes_to_remove = [
        r'^(?:hey\s+)?miku,?\s*',
        r'^(?:can you|could you|please|would you)\s*',
        r'^(?:i\s+(?:want|would like|need)\s+(?:to see|you to|an?)?)\s*',
        r'^(?:show me|let me see)\s*',
        r'^(?:draw|generate|create|make|paint|sketch|illustrate)\s*(?:me\s*)?(?:an?\s*)?(?:image|picture|art|artwork|drawing|painting|illustration)?\s*(?:of\s*)?',
    ]

    cleaned = content
    for prefix in prefixes_to_remove:
        cleaned = re.sub(prefix, '', cleaned, flags=re.IGNORECASE).strip()

    # If the cleaned prompt is too short or generic, enhance it
    if len(cleaned) < 10 or cleaned.lower() in ['you', 'yourself', 'miku']:
        cleaned = "Hatsune Miku"

    # Ensure Miku is mentioned if the user said "you" or "yourself"
    if re.search(r'\b(?:you|yourself)\b', content, re.IGNORECASE) and not re.search(r'\bmiku\b', cleaned, re.IGNORECASE):
        # Replace "you" with "Hatsune Miku" instead of just prepending
        cleaned = re.sub(r'\byou\b', 'Hatsune Miku', cleaned, flags=re.IGNORECASE)
        cleaned = re.sub(r'\byourself\b', 'Hatsune Miku', cleaned, flags=re.IGNORECASE)

    return cleaned

def find_latest_generated_image(prompt_id: str, expected_filename: str = None) -> Optional[str]:
    """
    Find the most recently generated image in the ComfyUI output directory.
    This handles cases where the exact filename from API doesn't match the file system.
    """
    output_dirs = [
        "ComfyUI/output",
        "/app/ComfyUI/output"
    ]

    for output_dir in output_dirs:
        if not os.path.exists(output_dir):
            continue

        try:
            # Get all image files in the directory
            image_extensions = ['.png', '.jpg', '.jpeg', '.webp']
            all_files = []

            for ext in image_extensions:
                pattern = os.path.join(output_dir, f"*{ext}")
                all_files.extend(glob.glob(pattern))

            if not all_files:
                continue

            # Sort by modification time (most recent first)
            all_files.sort(key=os.path.getmtime, reverse=True)

            # If we have an expected filename, try to find it first
            if expected_filename:
                for file_path in all_files:
                    if os.path.basename(file_path) == expected_filename:
                        return file_path

            # Otherwise, return the most recent image (within last 10 minutes)
            recent_threshold = time.time() - 600  # 10 minutes
            for file_path in all_files:
                if os.path.getmtime(file_path) > recent_threshold:
                    print(f"🎨 Found recent image: {file_path}")
                    return file_path

        except Exception as e:
            print(f"⚠️ Error searching in {output_dir}: {e}")
            continue

    return None

async def generate_image_with_comfyui(prompt: str) -> Optional[str]:
    """
    Generate an image using ComfyUI with the provided prompt.

    Args:
        prompt: The image generation prompt

    Returns:
        Optional[str]: Path to the generated image file, or None if failed
    """
    try:
        # Load the workflow template
        workflow_path = "Miku_BasicWorkflow.json"
        if not os.path.exists(workflow_path):
            print(f"❌ Workflow template not found: {workflow_path}")
            return None

        with open(workflow_path, 'r') as f:
            workflow_data = json.load(f)

        # Replace the prompt placeholder
        workflow_json = json.dumps(workflow_data)
        workflow_json = workflow_json.replace("_POSITIVEPROMPT_", prompt)
        workflow_data = json.loads(workflow_json)

        # Prepare the request payload
        payload = {"prompt": workflow_data}

        # Send request to ComfyUI (try different Docker networking options)
        comfyui_urls = [
            "http://host.docker.internal:8188",  # Docker Desktop
            "http://172.17.0.1:8188",            # Default Docker bridge gateway
            "http://localhost:8188"              # Fallback (if network_mode: host)
        ]

        # Try each URL until one works
        comfyui_url = None
        for url in comfyui_urls:
            try:
                async with aiohttp.ClientSession() as test_session:
                    timeout = aiohttp.ClientTimeout(total=2)
                    async with test_session.get(f"{url}/system_stats", timeout=timeout) as test_response:
                        if test_response.status == 200:
                            comfyui_url = url
                            print(f"✅ ComfyUI found at: {url}")
                            break
            except:
                continue

        if not comfyui_url:
            print(f"❌ ComfyUI not reachable at any of: {comfyui_urls}")
            return None
        async with aiohttp.ClientSession() as session:
            # Submit the generation request
            async with session.post(f"{comfyui_url}/prompt", json=payload) as response:
                if response.status != 200:
                    print(f"❌ ComfyUI request failed: {response.status}")
                    return None

                result = await response.json()
                prompt_id = result.get("prompt_id")

                if not prompt_id:
                    print("❌ No prompt_id received from ComfyUI")
                    return None

                print(f"🎨 ComfyUI generation started with prompt_id: {prompt_id}")

                # Poll for completion (timeout after 5 minutes)
                timeout = 300  # 5 minutes
                start_time = time.time()

                while time.time() - start_time < timeout:
                    # Check if generation is complete
                    async with session.get(f"{comfyui_url}/history/{prompt_id}") as hist_response:
                        if hist_response.status == 200:
                            history = await hist_response.json()

                            if prompt_id in history:
                                # Generation complete, find the output image
                                outputs = history[prompt_id].get("outputs", {})

                                # Look for image outputs (usually in nodes with "images" key)
                                for node_id, node_output in outputs.items():
                                    if "images" in node_output:
                                        images = node_output["images"]
                                        if images:
                                            # Get the first image
                                            image_info = images[0]
                                            filename = image_info["filename"]
                                            subfolder = image_info.get("subfolder", "")

                                            # Construct the full path (adjust for Docker mount)
                                            if subfolder:
                                                image_path = os.path.join("ComfyUI", "output", subfolder, filename)
                                            else:
                                                image_path = os.path.join("ComfyUI", "output", filename)

                                            # Verify the file exists before returning
                                            if os.path.exists(image_path):
                                                print(f"✅ Image generated successfully: {image_path}")
                                                return image_path
                                            else:
                                                # Try alternative paths in case of different mounting
                                                alt_path = os.path.join("/app/ComfyUI/output", filename)
                                                if os.path.exists(alt_path):
                                                    print(f"✅ Image generated successfully: {alt_path}")
                                                    return alt_path
                                                else:
                                                    print(f"⚠️ Generated image not found at expected paths: {image_path} or {alt_path}")
                                                    continue

                                # If we couldn't find the image via API, try the fallback method
                                print("🔍 Image not found via API, trying fallback method...")
                                fallback_image = find_latest_generated_image(prompt_id)
                                if fallback_image:
                                    return fallback_image

                    # Wait before polling again
                    await asyncio.sleep(2)

                print("❌ ComfyUI generation timed out")

                # Final fallback: look for the most recent image
                print("🔍 Trying final fallback: most recent image...")
                fallback_image = find_latest_generated_image(prompt_id)
                if fallback_image:
                    print(f"✅ Found image via fallback method: {fallback_image}")
                    return fallback_image

                return None

    except Exception as e:
        print(f"❌ Error in generate_image_with_comfyui: {e}")
        return None

async def handle_image_generation_request(message, prompt: str) -> bool:
    """
    Handle the complete image generation workflow for a user request.

    Args:
        message: Discord message object
        prompt: Extracted image prompt

    Returns:
        bool: True if image was successfully generated and sent
    """
    try:
        # Generate a contextual response about what we're creating
        is_dm = message.guild is None
        guild_id = message.guild.id if message.guild else None
        user_id = str(message.author.id)

        # Create a response about starting image generation
        response_prompt = f"A user asked you to create an image with this description: '{prompt}'. Respond enthusiastically that you're creating this image for them. Keep it short and excited!"

        response_type = "dm_response" if is_dm else "server_response"
        initial_response = await query_llama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)

        # Send initial response
        initial_msg = await message.channel.send(initial_response)

        # Start typing to show we're working
        async with message.channel.typing():
            # Generate the image
            print(f"🎨 Starting image generation for prompt: {prompt}")
            image_path = await generate_image_with_comfyui(prompt)

            if image_path and os.path.exists(image_path):
                # Send the image
                import discord
                with open(image_path, 'rb') as f:
                    file = discord.File(f, filename=f"miku_generated_{int(time.time())}.png")

                    # Create a follow-up message about the completed image
                    completion_prompt = f"You just finished creating an image based on '{prompt}'. Make a short, excited comment about the completed artwork!"
                    completion_response = await query_llama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)

                    await message.channel.send(completion_response, file=file)

                print(f"✅ Image sent successfully to {message.author.display_name}")

                # Log to DM history if it's a DM
                if is_dm:
                    from utils.dm_logger import dm_logger
                    dm_logger.log_conversation(user_id, message.content, f"{initial_response}\n[Generated image: {prompt}]", attachments=["generated_image.png"])

                return True
            else:
                # Image generation failed
                error_prompt = "You tried to create an image but something went wrong with the generation process. Apologize briefly and suggest they try again later."
                error_response = await query_llama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
                await message.channel.send(error_response)

                print(f"❌ Image generation failed for prompt: {prompt}")
                return False

    except Exception as e:
        print(f"❌ Error in handle_image_generation_request: {e}")

        # Send error message
        try:
            await message.channel.send("Sorry, I had trouble creating that image. Please try again later!")
        except:
            pass

        return False

async def check_comfyui_status() -> dict:
    """
    Check the status of ComfyUI and the workflow template.

    Returns:
        dict: Status information
    """
    try:
        import aiohttp

        # Check if ComfyUI workflow template exists
        workflow_exists = os.path.exists("Miku_BasicWorkflow.json")

        # Check if ComfyUI is running (try different Docker networking options)
        comfyui_running = False
        comfyui_url = "http://host.docker.internal:8188"  # Default

        comfyui_urls = [
            "http://host.docker.internal:8188",  # Docker Desktop
            "http://172.17.0.1:8188",            # Default Docker bridge gateway
            "http://localhost:8188"              # Fallback (if network_mode: host)
        ]

        for url in comfyui_urls:
            try:
                async with aiohttp.ClientSession() as session:
                    timeout = aiohttp.ClientTimeout(total=3)
                    async with session.get(f"{url}/system_stats", timeout=timeout) as response:
                        if response.status == 200:
                            comfyui_running = True
                            comfyui_url = url
                            break
            except:
                continue

        return {
            "workflow_template_exists": workflow_exists,
            "comfyui_running": comfyui_running,
            "comfyui_url": comfyui_url,
            "ready": workflow_exists and comfyui_running
        }

    except Exception as e:
        return {
            "workflow_template_exists": False,
            "comfyui_running": False,
            "comfyui_url": "http://localhost:8188",
            "ready": False,
            "error": str(e)
        }