Initial commit: Miku Discord Bot

2025-12-07 17:15:09 +02:00
commit 8c74ad5260
206 changed files with 50125 additions and 0 deletions
--- a/bot/utils/image_generation.py
+++ b/bot/utils/image_generation.py
@@ -0,0 +1,402 @@
+"""
+Image Generation System for Miku Bot
+Natural language detection and ComfyUI integration
+"""
+
+import aiohttp
+import asyncio
+import glob
+import json
+import os
+import re
+import tempfile
+import time
+from typing import Optional, Tuple
+import globals
+from utils.llm import query_ollama
+
+# Image generation detection patterns
+IMAGE_REQUEST_PATTERNS = [
+    # Direct requests
+    r'\b(?:draw|generate|create|make|show me|paint|sketch|illustrate)\b.*\b(?:image|picture|art|artwork|drawing|painting|illustration)\b',
+    r'\b(?:i\s+(?:want|would like|need)\s+(?:to see|an?\s+)?(?:image|picture|art|artwork|drawing|painting|illustration))\b',
+    r'\b(?:can you|could you|please)\s+(?:draw|generate|create|make|show me|paint|sketch|illustrate)\b',
+    r'\b(?:image|picture|art|artwork|drawing|painting|illustration)\s+of\b',
+    
+    # Visual requests about Miku
+    r'\b(?:show me|let me see)\s+(?:you|miku|yourself)\b',
+    r'\b(?:what do you look like|how do you look)\b',
+    r'\b(?:i\s+(?:want|would like)\s+to see)\s+(?:you|miku|yourself)\b',
+    r'\bsee\s+(?:you|miku|yourself)(?:\s+(?:in|with|doing|wearing))?\b',
+    
+    # Activity-based visual requests
+    r'\b(?:you|miku|yourself)\s+(?:swimming|dancing|singing|playing|wearing|in|with|doing)\b.*\b(?:pool|water|stage|outfit|clothes|dress)\b',
+    r'\b(?:visualize|envision|imagine)\s+(?:you|miku|yourself)\b',
+    
+    # Artistic requests
+    r'\b(?:artistic|art|visual)\s+(?:representation|depiction|version)\s+of\s+(?:you|miku|yourself)\b',
+]
+
+# Compile patterns for efficiency
+COMPILED_PATTERNS = [re.compile(pattern, re.IGNORECASE) for pattern in IMAGE_REQUEST_PATTERNS]
+
+async def detect_image_request(message_content: str) -> Tuple[bool, Optional[str]]:
+    """
+    Detect if a message is requesting image generation using natural language.
+    
+    Returns:
+        Tuple[bool, Optional[str]]: (is_image_request, extracted_prompt)
+    """
+    content = message_content.lower().strip()
+    
+    # Quick rejection for very short messages
+    if len(content) < 5:
+        return False, None
+    
+    # Check against patterns
+    for pattern in COMPILED_PATTERNS:
+        if pattern.search(content):
+            # Extract the prompt by cleaning up the message
+            prompt = extract_image_prompt(message_content)
+            return True, prompt
+    
+    return False, None
+
+def extract_image_prompt(message_content: str) -> str:
+    """
+    Extract and clean the image prompt from the user's message.
+    Convert natural language to a proper image generation prompt.
+    """
+    content = message_content.strip()
+    
+    # Remove common prefixes that don't help with image generation
+    prefixes_to_remove = [
+        r'^(?:hey\s+)?miku,?\s*',
+        r'^(?:can you|could you|please|would you)\s*',
+        r'^(?:i\s+(?:want|would like|need)\s+(?:to see|you to|an?)?)\s*',
+        r'^(?:show me|let me see)\s*',
+        r'^(?:draw|generate|create|make|paint|sketch|illustrate)\s*(?:me\s*)?(?:an?\s*)?(?:image|picture|art|artwork|drawing|painting|illustration)?\s*(?:of\s*)?',
+    ]
+    
+    cleaned = content
+    for prefix in prefixes_to_remove:
+        cleaned = re.sub(prefix, '', cleaned, flags=re.IGNORECASE).strip()
+    
+    # If the cleaned prompt is too short or generic, enhance it
+    if len(cleaned) < 10 or cleaned.lower() in ['you', 'yourself', 'miku']:
+        cleaned = "Hatsune Miku"
+    
+    # Ensure Miku is mentioned if the user said "you" or "yourself"
+    if re.search(r'\b(?:you|yourself)\b', content, re.IGNORECASE) and not re.search(r'\bmiku\b', cleaned, re.IGNORECASE):
+        # Replace "you" with "Hatsune Miku" instead of just prepending
+        cleaned = re.sub(r'\byou\b', 'Hatsune Miku', cleaned, flags=re.IGNORECASE)
+        cleaned = re.sub(r'\byourself\b', 'Hatsune Miku', cleaned, flags=re.IGNORECASE)
+    
+    return cleaned
+
+def find_latest_generated_image(prompt_id: str, expected_filename: str = None) -> Optional[str]:
+    """
+    Find the most recently generated image in the ComfyUI output directory.
+    This handles cases where the exact filename from API doesn't match the file system.
+    """
+    output_dirs = [
+        "ComfyUI/output",
+        "/app/ComfyUI/output"
+    ]
+    
+    for output_dir in output_dirs:
+        if not os.path.exists(output_dir):
+            continue
+            
+        try:
+            # Get all image files in the directory
+            image_extensions = ['.png', '.jpg', '.jpeg', '.webp']
+            all_files = []
+            
+            for ext in image_extensions:
+                pattern = os.path.join(output_dir, f"*{ext}")
+                all_files.extend(glob.glob(pattern))
+            
+            if not all_files:
+                continue
+                
+            # Sort by modification time (most recent first)
+            all_files.sort(key=os.path.getmtime, reverse=True)
+            
+            # If we have an expected filename, try to find it first
+            if expected_filename:
+                for file_path in all_files:
+                    if os.path.basename(file_path) == expected_filename:
+                        return file_path
+            
+            # Otherwise, return the most recent image (within last 10 minutes)
+            recent_threshold = time.time() - 600  # 10 minutes
+            for file_path in all_files:
+                if os.path.getmtime(file_path) > recent_threshold:
+                    print(f"🎨 Found recent image: {file_path}")
+                    return file_path
+                    
+        except Exception as e:
+            print(f"⚠️ Error searching in {output_dir}: {e}")
+            continue
+    
+    return None
+
+async def generate_image_with_comfyui(prompt: str) -> Optional[str]:
+    """
+    Generate an image using ComfyUI with the provided prompt.
+    
+    Args:
+        prompt: The image generation prompt
+        
+    Returns:
+        Optional[str]: Path to the generated image file, or None if failed
+    """
+    try:
+        # Load the workflow template
+        workflow_path = "Miku_BasicWorkflow.json"
+        if not os.path.exists(workflow_path):
+            print(f"❌ Workflow template not found: {workflow_path}")
+            return None
+        
+        with open(workflow_path, 'r') as f:
+            workflow_data = json.load(f)
+        
+        # Replace the prompt placeholder
+        workflow_json = json.dumps(workflow_data)
+        workflow_json = workflow_json.replace("_POSITIVEPROMPT_", prompt)
+        workflow_data = json.loads(workflow_json)
+        
+        # Prepare the request payload
+        payload = {"prompt": workflow_data}
+        
+        # Send request to ComfyUI (try different Docker networking options)
+        comfyui_urls = [
+            "http://host.docker.internal:8188",  # Docker Desktop
+            "http://172.17.0.1:8188",            # Default Docker bridge gateway
+            "http://localhost:8188"              # Fallback (if network_mode: host)
+        ]
+        
+        # Try each URL until one works
+        comfyui_url = None
+        for url in comfyui_urls:
+            try:
+                async with aiohttp.ClientSession() as test_session:
+                    timeout = aiohttp.ClientTimeout(total=2)
+                    async with test_session.get(f"{url}/system_stats", timeout=timeout) as test_response:
+                        if test_response.status == 200:
+                            comfyui_url = url
+                            print(f"✅ ComfyUI found at: {url}")
+                            break
+            except:
+                continue
+        
+        if not comfyui_url:
+            print(f"❌ ComfyUI not reachable at any of: {comfyui_urls}")
+            return None
+        async with aiohttp.ClientSession() as session:
+            # Submit the generation request
+            async with session.post(f"{comfyui_url}/prompt", json=payload) as response:
+                if response.status != 200:
+                    print(f"❌ ComfyUI request failed: {response.status}")
+                    return None
+                
+                result = await response.json()
+                prompt_id = result.get("prompt_id")
+                
+                if not prompt_id:
+                    print("❌ No prompt_id received from ComfyUI")
+                    return None
+                
+                print(f"🎨 ComfyUI generation started with prompt_id: {prompt_id}")
+                
+                # Poll for completion (timeout after 5 minutes)
+                timeout = 300  # 5 minutes
+                start_time = time.time()
+                
+                while time.time() - start_time < timeout:
+                    # Check if generation is complete
+                    async with session.get(f"{comfyui_url}/history/{prompt_id}") as hist_response:
+                        if hist_response.status == 200:
+                            history = await hist_response.json()
+                            
+                            if prompt_id in history:
+                                # Generation complete, find the output image
+                                outputs = history[prompt_id].get("outputs", {})
+                                
+                                # Look for image outputs (usually in nodes with "images" key)
+                                for node_id, node_output in outputs.items():
+                                    if "images" in node_output:
+                                        images = node_output["images"]
+                                        if images:
+                                            # Get the first image
+                                            image_info = images[0]
+                                            filename = image_info["filename"]
+                                            subfolder = image_info.get("subfolder", "")
+                                            
+                                            # Construct the full path (adjust for Docker mount)
+                                            if subfolder:
+                                                image_path = os.path.join("ComfyUI", "output", subfolder, filename)
+                                            else:
+                                                image_path = os.path.join("ComfyUI", "output", filename)
+                                            
+                                            # Verify the file exists before returning
+                                            if os.path.exists(image_path):
+                                                print(f"✅ Image generated successfully: {image_path}")
+                                                return image_path
+                                            else:
+                                                # Try alternative paths in case of different mounting
+                                                alt_path = os.path.join("/app/ComfyUI/output", filename)
+                                                if os.path.exists(alt_path):
+                                                    print(f"✅ Image generated successfully: {alt_path}")
+                                                    return alt_path
+                                                else:
+                                                    print(f"⚠️ Generated image not found at expected paths: {image_path} or {alt_path}")
+                                                    continue
+                                
+                                # If we couldn't find the image via API, try the fallback method
+                                print("🔍 Image not found via API, trying fallback method...")
+                                fallback_image = find_latest_generated_image(prompt_id)
+                                if fallback_image:
+                                    return fallback_image
+                    
+                    # Wait before polling again
+                    await asyncio.sleep(2)
+                
+                print("❌ ComfyUI generation timed out")
+                
+                # Final fallback: look for the most recent image
+                print("🔍 Trying final fallback: most recent image...")
+                fallback_image = find_latest_generated_image(prompt_id)
+                if fallback_image:
+                    print(f"✅ Found image via fallback method: {fallback_image}")
+                    return fallback_image
+                
+                return None
+                
+    except Exception as e:
+        print(f"❌ Error in generate_image_with_comfyui: {e}")
+        return None
+
+async def handle_image_generation_request(message, prompt: str) -> bool:
+    """
+    Handle the complete image generation workflow for a user request.
+    
+    Args:
+        message: Discord message object
+        prompt: Extracted image prompt
+        
+    Returns:
+        bool: True if image was successfully generated and sent
+    """
+    try:
+        # Generate a contextual response about what we're creating
+        is_dm = message.guild is None
+        guild_id = message.guild.id if message.guild else None
+        user_id = str(message.author.id)
+        
+        # Create a response about starting image generation
+        response_prompt = f"A user asked you to create an image with this description: '{prompt}'. Respond enthusiastically that you're creating this image for them. Keep it short and excited!"
+        
+        response_type = "dm_response" if is_dm else "server_response"
+        initial_response = await query_ollama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
+        
+        # Send initial response
+        initial_msg = await message.channel.send(initial_response)
+        
+        # Start typing to show we're working
+        async with message.channel.typing():
+            # Generate the image
+            print(f"🎨 Starting image generation for prompt: {prompt}")
+            image_path = await generate_image_with_comfyui(prompt)
+            
+            if image_path and os.path.exists(image_path):
+                # Send the image
+                import discord
+                with open(image_path, 'rb') as f:
+                    file = discord.File(f, filename=f"miku_generated_{int(time.time())}.png")
+                    
+                    # Create a follow-up message about the completed image
+                    completion_prompt = f"You just finished creating an image based on '{prompt}'. Make a short, excited comment about the completed artwork!"
+                    completion_response = await query_ollama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
+                    
+                    await message.channel.send(completion_response, file=file)
+                
+                print(f"✅ Image sent successfully to {message.author.display_name}")
+                
+                # Log to DM history if it's a DM
+                if is_dm:
+                    from utils.dm_logger import dm_logger
+                    dm_logger.log_conversation(user_id, message.content, f"{initial_response}\n[Generated image: {prompt}]", attachments=["generated_image.png"])
+                
+                return True
+            else:
+                # Image generation failed
+                error_prompt = "You tried to create an image but something went wrong with the generation process. Apologize briefly and suggest they try again later."
+                error_response = await query_ollama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
+                await message.channel.send(error_response)
+                
+                print(f"❌ Image generation failed for prompt: {prompt}")
+                return False
+                
+    except Exception as e:
+        print(f"❌ Error in handle_image_generation_request: {e}")
+        
+        # Send error message
+        try:
+            await message.channel.send("Sorry, I had trouble creating that image. Please try again later!")
+        except:
+            pass
+        
+        return False
+
+async def check_comfyui_status() -> dict:
+    """
+    Check the status of ComfyUI and the workflow template.
+    
+    Returns:
+        dict: Status information
+    """
+    try:
+        import aiohttp
+        
+        # Check if ComfyUI workflow template exists
+        workflow_exists = os.path.exists("Miku_BasicWorkflow.json")
+        
+        # Check if ComfyUI is running (try different Docker networking options)
+        comfyui_running = False
+        comfyui_url = "http://host.docker.internal:8188"  # Default
+        
+        comfyui_urls = [
+            "http://host.docker.internal:8188",  # Docker Desktop
+            "http://172.17.0.1:8188",            # Default Docker bridge gateway
+            "http://localhost:8188"              # Fallback (if network_mode: host)
+        ]
+        
+        for url in comfyui_urls:
+            try:
+                async with aiohttp.ClientSession() as session:
+                    timeout = aiohttp.ClientTimeout(total=3)
+                    async with session.get(f"{url}/system_stats", timeout=timeout) as response:
+                        if response.status == 200:
+                            comfyui_running = True
+                            comfyui_url = url
+                            break
+            except:
+                continue
+        
+        return {
+            "workflow_template_exists": workflow_exists,
+            "comfyui_running": comfyui_running,
+            "comfyui_url": comfyui_url,
+            "ready": workflow_exists and comfyui_running
+        }
+        
+    except Exception as e:
+        return {
+            "workflow_template_exists": False,
+            "comfyui_running": False,
+            "comfyui_url": "http://localhost:8188",
+            "ready": False,
+            "error": str(e)
+        }