Initial commit: Miku Discord Bot
This commit is contained in:
402
bot/utils/image_generation.py
Normal file
402
bot/utils/image_generation.py
Normal file
@@ -0,0 +1,402 @@
|
||||
"""
|
||||
Image Generation System for Miku Bot
|
||||
Natural language detection and ComfyUI integration
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Optional, Tuple
|
||||
import globals
|
||||
from utils.llm import query_ollama
|
||||
|
||||
# Image generation detection patterns
|
||||
IMAGE_REQUEST_PATTERNS = [
|
||||
# Direct requests
|
||||
r'\b(?:draw|generate|create|make|show me|paint|sketch|illustrate)\b.*\b(?:image|picture|art|artwork|drawing|painting|illustration)\b',
|
||||
r'\b(?:i\s+(?:want|would like|need)\s+(?:to see|an?\s+)?(?:image|picture|art|artwork|drawing|painting|illustration))\b',
|
||||
r'\b(?:can you|could you|please)\s+(?:draw|generate|create|make|show me|paint|sketch|illustrate)\b',
|
||||
r'\b(?:image|picture|art|artwork|drawing|painting|illustration)\s+of\b',
|
||||
|
||||
# Visual requests about Miku
|
||||
r'\b(?:show me|let me see)\s+(?:you|miku|yourself)\b',
|
||||
r'\b(?:what do you look like|how do you look)\b',
|
||||
r'\b(?:i\s+(?:want|would like)\s+to see)\s+(?:you|miku|yourself)\b',
|
||||
r'\bsee\s+(?:you|miku|yourself)(?:\s+(?:in|with|doing|wearing))?\b',
|
||||
|
||||
# Activity-based visual requests
|
||||
r'\b(?:you|miku|yourself)\s+(?:swimming|dancing|singing|playing|wearing|in|with|doing)\b.*\b(?:pool|water|stage|outfit|clothes|dress)\b',
|
||||
r'\b(?:visualize|envision|imagine)\s+(?:you|miku|yourself)\b',
|
||||
|
||||
# Artistic requests
|
||||
r'\b(?:artistic|art|visual)\s+(?:representation|depiction|version)\s+of\s+(?:you|miku|yourself)\b',
|
||||
]
|
||||
|
||||
# Compile patterns for efficiency
|
||||
COMPILED_PATTERNS = [re.compile(pattern, re.IGNORECASE) for pattern in IMAGE_REQUEST_PATTERNS]
|
||||
|
||||
async def detect_image_request(message_content: str) -> Tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Detect if a message is requesting image generation using natural language.
|
||||
|
||||
Returns:
|
||||
Tuple[bool, Optional[str]]: (is_image_request, extracted_prompt)
|
||||
"""
|
||||
content = message_content.lower().strip()
|
||||
|
||||
# Quick rejection for very short messages
|
||||
if len(content) < 5:
|
||||
return False, None
|
||||
|
||||
# Check against patterns
|
||||
for pattern in COMPILED_PATTERNS:
|
||||
if pattern.search(content):
|
||||
# Extract the prompt by cleaning up the message
|
||||
prompt = extract_image_prompt(message_content)
|
||||
return True, prompt
|
||||
|
||||
return False, None
|
||||
|
||||
def extract_image_prompt(message_content: str) -> str:
|
||||
"""
|
||||
Extract and clean the image prompt from the user's message.
|
||||
Convert natural language to a proper image generation prompt.
|
||||
"""
|
||||
content = message_content.strip()
|
||||
|
||||
# Remove common prefixes that don't help with image generation
|
||||
prefixes_to_remove = [
|
||||
r'^(?:hey\s+)?miku,?\s*',
|
||||
r'^(?:can you|could you|please|would you)\s*',
|
||||
r'^(?:i\s+(?:want|would like|need)\s+(?:to see|you to|an?)?)\s*',
|
||||
r'^(?:show me|let me see)\s*',
|
||||
r'^(?:draw|generate|create|make|paint|sketch|illustrate)\s*(?:me\s*)?(?:an?\s*)?(?:image|picture|art|artwork|drawing|painting|illustration)?\s*(?:of\s*)?',
|
||||
]
|
||||
|
||||
cleaned = content
|
||||
for prefix in prefixes_to_remove:
|
||||
cleaned = re.sub(prefix, '', cleaned, flags=re.IGNORECASE).strip()
|
||||
|
||||
# If the cleaned prompt is too short or generic, enhance it
|
||||
if len(cleaned) < 10 or cleaned.lower() in ['you', 'yourself', 'miku']:
|
||||
cleaned = "Hatsune Miku"
|
||||
|
||||
# Ensure Miku is mentioned if the user said "you" or "yourself"
|
||||
if re.search(r'\b(?:you|yourself)\b', content, re.IGNORECASE) and not re.search(r'\bmiku\b', cleaned, re.IGNORECASE):
|
||||
# Replace "you" with "Hatsune Miku" instead of just prepending
|
||||
cleaned = re.sub(r'\byou\b', 'Hatsune Miku', cleaned, flags=re.IGNORECASE)
|
||||
cleaned = re.sub(r'\byourself\b', 'Hatsune Miku', cleaned, flags=re.IGNORECASE)
|
||||
|
||||
return cleaned
|
||||
|
||||
def find_latest_generated_image(prompt_id: str, expected_filename: str = None) -> Optional[str]:
|
||||
"""
|
||||
Find the most recently generated image in the ComfyUI output directory.
|
||||
This handles cases where the exact filename from API doesn't match the file system.
|
||||
"""
|
||||
output_dirs = [
|
||||
"ComfyUI/output",
|
||||
"/app/ComfyUI/output"
|
||||
]
|
||||
|
||||
for output_dir in output_dirs:
|
||||
if not os.path.exists(output_dir):
|
||||
continue
|
||||
|
||||
try:
|
||||
# Get all image files in the directory
|
||||
image_extensions = ['.png', '.jpg', '.jpeg', '.webp']
|
||||
all_files = []
|
||||
|
||||
for ext in image_extensions:
|
||||
pattern = os.path.join(output_dir, f"*{ext}")
|
||||
all_files.extend(glob.glob(pattern))
|
||||
|
||||
if not all_files:
|
||||
continue
|
||||
|
||||
# Sort by modification time (most recent first)
|
||||
all_files.sort(key=os.path.getmtime, reverse=True)
|
||||
|
||||
# If we have an expected filename, try to find it first
|
||||
if expected_filename:
|
||||
for file_path in all_files:
|
||||
if os.path.basename(file_path) == expected_filename:
|
||||
return file_path
|
||||
|
||||
# Otherwise, return the most recent image (within last 10 minutes)
|
||||
recent_threshold = time.time() - 600 # 10 minutes
|
||||
for file_path in all_files:
|
||||
if os.path.getmtime(file_path) > recent_threshold:
|
||||
print(f"🎨 Found recent image: {file_path}")
|
||||
return file_path
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error searching in {output_dir}: {e}")
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
async def generate_image_with_comfyui(prompt: str) -> Optional[str]:
|
||||
"""
|
||||
Generate an image using ComfyUI with the provided prompt.
|
||||
|
||||
Args:
|
||||
prompt: The image generation prompt
|
||||
|
||||
Returns:
|
||||
Optional[str]: Path to the generated image file, or None if failed
|
||||
"""
|
||||
try:
|
||||
# Load the workflow template
|
||||
workflow_path = "Miku_BasicWorkflow.json"
|
||||
if not os.path.exists(workflow_path):
|
||||
print(f"❌ Workflow template not found: {workflow_path}")
|
||||
return None
|
||||
|
||||
with open(workflow_path, 'r') as f:
|
||||
workflow_data = json.load(f)
|
||||
|
||||
# Replace the prompt placeholder
|
||||
workflow_json = json.dumps(workflow_data)
|
||||
workflow_json = workflow_json.replace("_POSITIVEPROMPT_", prompt)
|
||||
workflow_data = json.loads(workflow_json)
|
||||
|
||||
# Prepare the request payload
|
||||
payload = {"prompt": workflow_data}
|
||||
|
||||
# Send request to ComfyUI (try different Docker networking options)
|
||||
comfyui_urls = [
|
||||
"http://host.docker.internal:8188", # Docker Desktop
|
||||
"http://172.17.0.1:8188", # Default Docker bridge gateway
|
||||
"http://localhost:8188" # Fallback (if network_mode: host)
|
||||
]
|
||||
|
||||
# Try each URL until one works
|
||||
comfyui_url = None
|
||||
for url in comfyui_urls:
|
||||
try:
|
||||
async with aiohttp.ClientSession() as test_session:
|
||||
timeout = aiohttp.ClientTimeout(total=2)
|
||||
async with test_session.get(f"{url}/system_stats", timeout=timeout) as test_response:
|
||||
if test_response.status == 200:
|
||||
comfyui_url = url
|
||||
print(f"✅ ComfyUI found at: {url}")
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
if not comfyui_url:
|
||||
print(f"❌ ComfyUI not reachable at any of: {comfyui_urls}")
|
||||
return None
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Submit the generation request
|
||||
async with session.post(f"{comfyui_url}/prompt", json=payload) as response:
|
||||
if response.status != 200:
|
||||
print(f"❌ ComfyUI request failed: {response.status}")
|
||||
return None
|
||||
|
||||
result = await response.json()
|
||||
prompt_id = result.get("prompt_id")
|
||||
|
||||
if not prompt_id:
|
||||
print("❌ No prompt_id received from ComfyUI")
|
||||
return None
|
||||
|
||||
print(f"🎨 ComfyUI generation started with prompt_id: {prompt_id}")
|
||||
|
||||
# Poll for completion (timeout after 5 minutes)
|
||||
timeout = 300 # 5 minutes
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
# Check if generation is complete
|
||||
async with session.get(f"{comfyui_url}/history/{prompt_id}") as hist_response:
|
||||
if hist_response.status == 200:
|
||||
history = await hist_response.json()
|
||||
|
||||
if prompt_id in history:
|
||||
# Generation complete, find the output image
|
||||
outputs = history[prompt_id].get("outputs", {})
|
||||
|
||||
# Look for image outputs (usually in nodes with "images" key)
|
||||
for node_id, node_output in outputs.items():
|
||||
if "images" in node_output:
|
||||
images = node_output["images"]
|
||||
if images:
|
||||
# Get the first image
|
||||
image_info = images[0]
|
||||
filename = image_info["filename"]
|
||||
subfolder = image_info.get("subfolder", "")
|
||||
|
||||
# Construct the full path (adjust for Docker mount)
|
||||
if subfolder:
|
||||
image_path = os.path.join("ComfyUI", "output", subfolder, filename)
|
||||
else:
|
||||
image_path = os.path.join("ComfyUI", "output", filename)
|
||||
|
||||
# Verify the file exists before returning
|
||||
if os.path.exists(image_path):
|
||||
print(f"✅ Image generated successfully: {image_path}")
|
||||
return image_path
|
||||
else:
|
||||
# Try alternative paths in case of different mounting
|
||||
alt_path = os.path.join("/app/ComfyUI/output", filename)
|
||||
if os.path.exists(alt_path):
|
||||
print(f"✅ Image generated successfully: {alt_path}")
|
||||
return alt_path
|
||||
else:
|
||||
print(f"⚠️ Generated image not found at expected paths: {image_path} or {alt_path}")
|
||||
continue
|
||||
|
||||
# If we couldn't find the image via API, try the fallback method
|
||||
print("🔍 Image not found via API, trying fallback method...")
|
||||
fallback_image = find_latest_generated_image(prompt_id)
|
||||
if fallback_image:
|
||||
return fallback_image
|
||||
|
||||
# Wait before polling again
|
||||
await asyncio.sleep(2)
|
||||
|
||||
print("❌ ComfyUI generation timed out")
|
||||
|
||||
# Final fallback: look for the most recent image
|
||||
print("🔍 Trying final fallback: most recent image...")
|
||||
fallback_image = find_latest_generated_image(prompt_id)
|
||||
if fallback_image:
|
||||
print(f"✅ Found image via fallback method: {fallback_image}")
|
||||
return fallback_image
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error in generate_image_with_comfyui: {e}")
|
||||
return None
|
||||
|
||||
async def handle_image_generation_request(message, prompt: str) -> bool:
|
||||
"""
|
||||
Handle the complete image generation workflow for a user request.
|
||||
|
||||
Args:
|
||||
message: Discord message object
|
||||
prompt: Extracted image prompt
|
||||
|
||||
Returns:
|
||||
bool: True if image was successfully generated and sent
|
||||
"""
|
||||
try:
|
||||
# Generate a contextual response about what we're creating
|
||||
is_dm = message.guild is None
|
||||
guild_id = message.guild.id if message.guild else None
|
||||
user_id = str(message.author.id)
|
||||
|
||||
# Create a response about starting image generation
|
||||
response_prompt = f"A user asked you to create an image with this description: '{prompt}'. Respond enthusiastically that you're creating this image for them. Keep it short and excited!"
|
||||
|
||||
response_type = "dm_response" if is_dm else "server_response"
|
||||
initial_response = await query_ollama(response_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
|
||||
|
||||
# Send initial response
|
||||
initial_msg = await message.channel.send(initial_response)
|
||||
|
||||
# Start typing to show we're working
|
||||
async with message.channel.typing():
|
||||
# Generate the image
|
||||
print(f"🎨 Starting image generation for prompt: {prompt}")
|
||||
image_path = await generate_image_with_comfyui(prompt)
|
||||
|
||||
if image_path and os.path.exists(image_path):
|
||||
# Send the image
|
||||
import discord
|
||||
with open(image_path, 'rb') as f:
|
||||
file = discord.File(f, filename=f"miku_generated_{int(time.time())}.png")
|
||||
|
||||
# Create a follow-up message about the completed image
|
||||
completion_prompt = f"You just finished creating an image based on '{prompt}'. Make a short, excited comment about the completed artwork!"
|
||||
completion_response = await query_ollama(completion_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
|
||||
|
||||
await message.channel.send(completion_response, file=file)
|
||||
|
||||
print(f"✅ Image sent successfully to {message.author.display_name}")
|
||||
|
||||
# Log to DM history if it's a DM
|
||||
if is_dm:
|
||||
from utils.dm_logger import dm_logger
|
||||
dm_logger.log_conversation(user_id, message.content, f"{initial_response}\n[Generated image: {prompt}]", attachments=["generated_image.png"])
|
||||
|
||||
return True
|
||||
else:
|
||||
# Image generation failed
|
||||
error_prompt = "You tried to create an image but something went wrong with the generation process. Apologize briefly and suggest they try again later."
|
||||
error_response = await query_ollama(error_prompt, user_id=user_id, guild_id=guild_id, response_type=response_type)
|
||||
await message.channel.send(error_response)
|
||||
|
||||
print(f"❌ Image generation failed for prompt: {prompt}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error in handle_image_generation_request: {e}")
|
||||
|
||||
# Send error message
|
||||
try:
|
||||
await message.channel.send("Sorry, I had trouble creating that image. Please try again later!")
|
||||
except:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
async def check_comfyui_status() -> dict:
|
||||
"""
|
||||
Check the status of ComfyUI and the workflow template.
|
||||
|
||||
Returns:
|
||||
dict: Status information
|
||||
"""
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
# Check if ComfyUI workflow template exists
|
||||
workflow_exists = os.path.exists("Miku_BasicWorkflow.json")
|
||||
|
||||
# Check if ComfyUI is running (try different Docker networking options)
|
||||
comfyui_running = False
|
||||
comfyui_url = "http://host.docker.internal:8188" # Default
|
||||
|
||||
comfyui_urls = [
|
||||
"http://host.docker.internal:8188", # Docker Desktop
|
||||
"http://172.17.0.1:8188", # Default Docker bridge gateway
|
||||
"http://localhost:8188" # Fallback (if network_mode: host)
|
||||
]
|
||||
|
||||
for url in comfyui_urls:
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
timeout = aiohttp.ClientTimeout(total=3)
|
||||
async with session.get(f"{url}/system_stats", timeout=timeout) as response:
|
||||
if response.status == 200:
|
||||
comfyui_running = True
|
||||
comfyui_url = url
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
return {
|
||||
"workflow_template_exists": workflow_exists,
|
||||
"comfyui_running": comfyui_running,
|
||||
"comfyui_url": comfyui_url,
|
||||
"ready": workflow_exists and comfyui_running
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"workflow_template_exists": False,
|
||||
"comfyui_running": False,
|
||||
"comfyui_url": "http://localhost:8188",
|
||||
"ready": False,
|
||||
"error": str(e)
|
||||
}
|
||||
Reference in New Issue
Block a user