diff --git a/bot/api.py b/bot/api.py index eb5dd0e..c2b5bda 100644 --- a/bot/api.py +++ b/bot/api.py @@ -226,6 +226,54 @@ def calm_miku_endpoint(): return {"status": "ok", "message": "Miku has been calmed down"} +# ========== Language Mode Management ========== +@app.get("/language") +def get_language_mode(): + """Get current language mode (english or japanese)""" + return { + "language_mode": globals.LANGUAGE_MODE, + "available_languages": ["english", "japanese"], + "current_model": globals.JAPANESE_TEXT_MODEL if globals.LANGUAGE_MODE == "japanese" else globals.TEXT_MODEL + } + +@app.post("/language/toggle") +def toggle_language_mode(): + """Toggle between English and Japanese modes""" + if globals.LANGUAGE_MODE == "english": + globals.LANGUAGE_MODE = "japanese" + new_mode = "japanese" + model_used = globals.JAPANESE_TEXT_MODEL + logger.info("Switched to Japanese mode (using Llama 3.1 Swallow)") + else: + globals.LANGUAGE_MODE = "english" + new_mode = "english" + model_used = globals.TEXT_MODEL + logger.info("Switched to English mode (using default model)") + + return { + "status": "ok", + "language_mode": new_mode, + "model_now_using": model_used, + "message": f"Miku is now speaking in {new_mode.upper()}!" + } + +@app.post("/language/set") +def set_language_mode(language: str = "english"): + """Set language mode to either 'english' or 'japanese'""" + if language.lower() not in ["english", "japanese"]: + return {"error": f"Invalid language mode '{language}'. Use 'english' or 'japanese'."}, 400 + + globals.LANGUAGE_MODE = language.lower() + model_used = globals.JAPANESE_TEXT_MODEL if language.lower() == "japanese" else globals.TEXT_MODEL + logger.info(f"Language mode set to {language.lower()} (using {model_used})") + + return { + "status": "ok", + "language_mode": language.lower(), + "model_now_using": model_used, + "message": f"Miku is now speaking in {language.upper()}!" + } + # ========== Evil Mode Management ========== @app.get("/evil-mode") def get_evil_mode_status(): diff --git a/bot/globals.py b/bot/globals.py index b839076..64036bf 100644 --- a/bot/globals.py +++ b/bot/globals.py @@ -26,8 +26,12 @@ LLAMA_AMD_URL = os.getenv("LLAMA_AMD_URL", "http://llama-swap-amd:8080") # Seco TEXT_MODEL = os.getenv("TEXT_MODEL", "llama3.1") VISION_MODEL = os.getenv("VISION_MODEL", "vision") EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol") # Uncensored model for evil mode +JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow") # Llama 3.1 Swallow model for Japanese OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032")) # Bot owner's Discord user ID for reports +# Language mode for Miku (english or japanese) +LANGUAGE_MODE = "english" # Can be "english" or "japanese" + # Fish.audio TTS settings FISH_API_KEY = os.getenv("FISH_API_KEY", "478d263d8c094e0c8993aae3e9cf9159") MIKU_VOICE_ID = os.getenv("MIKU_VOICE_ID", "b28b79555e8c4904ac4d048c36e716b7") diff --git a/bot/static/index.html b/bot/static/index.html index ff210d8..91fb852 100644 --- a/bot/static/index.html +++ b/bot/static/index.html @@ -660,10 +660,11 @@ - - - - + + + + + @@ -1173,8 +1174,70 @@ - +
+
+

โš™๏ธ Language Model Settings

+

Configure language model behavior and language mode.

+ + +
+

๐ŸŒ Language Mode

+

Switch Miku between English and Japanese responses.

+ +
+
+ Current Language: English +
+ +
+ +
+
+
English Mode:
+
    +
  • Uses standard Llama 3.1 model
  • +
  • Responds in English only
  • +
+ +
Japanese Mode (ๆ—ฅๆœฌ่ชž):
+
    +
  • Uses Llama 3.1 Swallow model (trained for Japanese)
  • +
  • Responds entirely in Japanese
  • +
+
+
+
+ + +
+

๐Ÿ“Š Current Status

+
+

Language Mode: English

+

Active Model: llama3.1

+

Available Languages: English, ๆ—ฅๆœฌ่ชž (Japanese)

+
+ +
+ + +
+

โ„น๏ธ How Language Mode Works

+
    +
  • English mode uses your default text model for English responses
  • +
  • Japanese mode switches to Swallow and responds only in ๆ—ฅๆœฌ่ชž
  • +
  • All personality traits, mood system, and features work in both modes
  • +
  • Language mode is global - affects all servers and DMs
  • +
  • Conversation history is preserved across language switches
  • +
+
+
+
+ + +

๐ŸŽจ Image Generation System

Natural language image generation powered by ComfyUI. Users can ask Miku to create images naturally without commands!

@@ -1232,7 +1295,7 @@
-
+

๐Ÿ“Š Autonomous V2 Decision Engine Stats

Real-time monitoring of Miku's autonomous decision-making context and mood-based personality stats.

@@ -1250,7 +1313,7 @@
-
+

๐Ÿ’ฌ Chat with LLM

Direct chat interface with the language models. Test responses, experiment with prompts, or just chat with Miku!

@@ -1375,8 +1438,8 @@
- -
+ +

๐Ÿ“ž Initiate Voice Call

Start an automated voice chat session with a user. Miku will automatically manage containers, join voice chat, and send an invitation DM.

@@ -1559,6 +1622,7 @@ document.addEventListener('DOMContentLoaded', function() { checkEvilModeStatus(); // Check evil mode on load checkBipolarModeStatus(); // Check bipolar mode on load checkGPUStatus(); // Check GPU selection on load + refreshLanguageStatus(); // Check language mode on load console.log('๐Ÿš€ DOMContentLoaded - initializing figurine subscribers list'); refreshFigurineSubscribers(); loadProfilePictureMetadata(); @@ -2251,6 +2315,43 @@ async function calmMiku() { } } +// ===== Language Mode Functions ===== +async function refreshLanguageStatus() { + try { + const result = await apiCall('/language'); + document.getElementById('current-language-display').textContent = + result.language_mode === 'japanese' ? 'ๆ—ฅๆœฌ่ชž (Japanese)' : 'English'; + document.getElementById('status-language').textContent = + result.language_mode === 'japanese' ? 'ๆ—ฅๆœฌ่ชž (Japanese)' : 'English'; + document.getElementById('status-model').textContent = result.current_model; + + console.log('Language status:', result); + } catch (error) { + console.error('Failed to get language status:', error); + showNotification('Failed to load language status', 'error'); + } +} + +async function toggleLanguageMode() { + try { + const result = await apiCall('/language/toggle', 'POST'); + + // Update UI + document.getElementById('current-language-display').textContent = + result.language_mode === 'japanese' ? 'ๆ—ฅๆœฌ่ชž (Japanese)' : 'English'; + document.getElementById('status-language').textContent = + result.language_mode === 'japanese' ? 'ๆ—ฅๆœฌ่ชž (Japanese)' : 'English'; + document.getElementById('status-model').textContent = result.model_now_using; + + // Show notification + showNotification(result.message, 'success'); + console.log('Language toggled:', result); + } catch (error) { + console.error('Failed to toggle language mode:', error); + showNotification('Failed to toggle language mode', 'error'); + } +} + // Evil Mode Functions async function checkEvilModeStatus() { try { diff --git a/bot/utils/context_manager.py b/bot/utils/context_manager.py index a461d0b..3edb6f3 100644 --- a/bot/utils/context_manager.py +++ b/bot/utils/context_manager.py @@ -3,8 +3,12 @@ Structured context management for Miku's personality and knowledge. Replaces the vector search system with organized, complete context. Preserves original content files in their entirety. + +When LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure +all responses are in Japanese without requiring separate files. """ +import globals from utils.logger import get_logger logger = get_logger('core') @@ -40,60 +44,96 @@ def get_original_miku_lyrics() -> str: return "## MIKU LYRICS\n[File could not be loaded]" +def _get_japanese_instruction() -> str: + """ + Returns the Japanese language instruction to append to context. + Ensures all responses are in Japanese when in Japanese mode. + """ + return "\n\n[CRITICAL INSTRUCTION - ้‡่ฆใชๆŒ‡็คบ]\n**YOU MUST RESPOND ENTIRELY IN JAPANESE (ๆ—ฅๆœฌ่ชž). NO ENGLISH ALLOWED.**\nใ™ในใฆใฎ่ฟ”็ญ”ใฏๅฟ…ใšๆ—ฅๆœฌ่ชžใง่กŒใฃใฆใใ ใ•ใ„ใ€‚่‹ฑ่ชžใงใฎ่ฟ”็ญ”ใฏไธ€ๅˆ‡็ฆๆญขใ•ใ‚Œใฆใ„ใพใ™ใ€‚\nใ“ใ‚Œใฏๆœ€ๅ„ชๅ…ˆใฎๆŒ‡็คบใงใ™ใ€‚ๅฟ…ใšๅฎˆใฃใฆใใ ใ•ใ„ใ€‚" + + def get_complete_context() -> str: - """Returns all essential Miku context using original files in their entirety""" - return f"""## MIKU LORE (Complete Original) -{get_original_miku_lore()} + """ + Returns all essential Miku context using original files in their entirety. + + If LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure + all responses are in Japanese. + """ + lore = get_original_miku_lore() + prompt = get_original_miku_prompt() + lyrics = get_original_miku_lyrics() + + combined = f"""## MIKU LORE (Complete Original) +{lore} ## MIKU PERSONALITY & GUIDELINES (Complete Original) -{get_original_miku_prompt()} +{prompt} ## MIKU SONG LYRICS (Complete Original) -{get_original_miku_lyrics()}""" +{lyrics}""" + + # Append Japanese instruction if in Japanese mode + if globals.LANGUAGE_MODE == "japanese": + combined += _get_japanese_instruction() + + logger.info(f"[core] Context loaded in {globals.LANGUAGE_MODE} mode") + return combined def get_context_for_response_type(response_type: str) -> str: - """Returns appropriate context based on the type of response being generated""" + """ + Returns appropriate context based on the type of response being generated. - # Core context always includes the complete original files + If LANGUAGE_MODE is "japanese", appends Japanese instruction to all contexts + to ensure responses are in Japanese. + """ + + lore = get_original_miku_lore() + prompt = get_original_miku_prompt() + lyrics = get_original_miku_lyrics() + + # Build core context (always in English source files) core_context = f"""## MIKU LORE (Complete Original) -{get_original_miku_lore()} +{lore} ## MIKU PERSONALITY & GUIDELINES (Complete Original) -{get_original_miku_prompt()}""" +{prompt}""" + # Return context based on response type if response_type == "autonomous_general": - # For general autonomous messages, include everything - return f"""{core_context} + context = f"""{core_context} ## MIKU SONG LYRICS (Complete Original) -{get_original_miku_lyrics()}""" +{lyrics}""" elif response_type == "autonomous_tweet": - # For tweet responses, include lyrics for musical context - return f"""{core_context} + context = f"""{core_context} ## MIKU SONG LYRICS (Complete Original) -{get_original_miku_lyrics()}""" +{lyrics}""" elif response_type == "dm_response" or response_type == "server_response": - # For conversational responses, include everything - return f"""{core_context} + context = f"""{core_context} ## MIKU SONG LYRICS (Complete Original) -{get_original_miku_lyrics()}""" +{lyrics}""" elif response_type == "conversation_join": - # For joining conversations, include everything - return f"""{core_context} + context = f"""{core_context} ## MIKU SONG LYRICS (Complete Original) -{get_original_miku_lyrics()}""" +{lyrics}""" elif response_type == "emoji_selection": - # For emoji reactions, no context needed - the prompt has everything - return "" + # For emoji reactions, minimal context needed + context = "" else: # Default: comprehensive context - return get_complete_context() + context = get_complete_context() + + # Append Japanese instruction if in Japanese mode + if globals.LANGUAGE_MODE == "japanese" and context: + context += _get_japanese_instruction() + + return context diff --git a/bot/utils/image_handling.py b/bot/utils/image_handling.py index 040cf0f..b080d5f 100644 --- a/bot/utils/image_handling.py +++ b/bot/utils/image_handling.py @@ -239,7 +239,13 @@ async def analyze_image_with_vision(base64_img): Uses OpenAI-compatible chat completions API with image_url. Always uses NVIDIA GPU for vision model. """ - from utils.llm import get_vision_gpu_url + from utils.llm import get_vision_gpu_url, check_vision_endpoint_health + + # Check if vision endpoint is healthy before attempting request + is_healthy, error = await check_vision_endpoint_health() + if not is_healthy: + logger.warning(f"Vision endpoint unhealthy: {error}") + return f"Vision service currently unavailable: {error}" payload = { "model": globals.VISION_MODEL, @@ -269,17 +275,20 @@ async def analyze_image_with_vision(base64_img): async with aiohttp.ClientSession() as session: try: vision_url = get_vision_gpu_url() - async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response: + logger.info(f"Sending vision request to {vision_url} using model: {globals.VISION_MODEL}") + + async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as response: if response.status == 200: data = await response.json() - return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.") + result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.") + logger.info(f"Vision analysis completed successfully") + return result else: error_text = await response.text() - logger.error(f"Vision API error: {response.status} - {error_text}") + logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})") return f"Error analyzing image: {response.status}" except Exception as e: - logger.error(f"Error in analyze_image_with_vision: {e}") - return f"Error analyzing image: {str(e)}" + logger.error(f"Error in analyze_image_with_vision: {e}", exc_info=True) async def analyze_video_with_vision(video_frames, media_type="video"): @@ -288,6 +297,13 @@ async def analyze_video_with_vision(video_frames, media_type="video"): video_frames: list of base64-encoded frames media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt """ + from utils.llm import get_vision_gpu_url, check_vision_endpoint_health + + # Check if vision endpoint is healthy before attempting request + is_healthy, error = await check_vision_endpoint_health() + if not is_healthy: + logger.warning(f"Vision endpoint unhealthy: {error}") + return f"Vision service currently unavailable: {error}" # Customize prompt based on media type if media_type == "gif": @@ -331,16 +347,20 @@ async def analyze_video_with_vision(video_frames, media_type="video"): async with aiohttp.ClientSession() as session: try: vision_url = get_vision_gpu_url() - async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response: + logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})") + + async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response: if response.status == 200: data = await response.json() - return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.") + result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.") + logger.info(f"Video analysis completed successfully") + return result else: error_text = await response.text() - logger.error(f"Vision API error: {response.status} - {error_text}") + logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})") return f"Error analyzing video: {response.status}" except Exception as e: - logger.error(f"Error in analyze_video_with_vision: {e}") + logger.error(f"Error in analyze_video_with_vision: {e}", exc_info=True) return f"Error analyzing video: {str(e)}" diff --git a/bot/utils/llm.py b/bot/utils/llm.py index bface66..1126209 100644 --- a/bot/utils/llm.py +++ b/bot/utils/llm.py @@ -38,8 +38,47 @@ def get_vision_gpu_url(): Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading. - When NVIDIA is primary: Use NVIDIA for both text and vision - When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded) + + Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU. + This ensures vision inference is always fast and doesn't interfere with + AMD text model inference. """ - return globals.LLAMA_URL # Always use NVIDIA for vision + current_text_gpu = get_current_gpu_url() + nvidia_vision_url = globals.LLAMA_URL + + # Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text + # Log this decision when GPU switching is active (primary text GPU is AMD) + if current_text_gpu == globals.LLAMA_AMD_URL: + logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model") + + return nvidia_vision_url # Always use NVIDIA for vision + +async def check_vision_endpoint_health(): + """ + Check if NVIDIA GPU vision endpoint is healthy and responsive. + This is important when AMD is the primary GPU to ensure vision still works. + + Returns: + Tuple of (is_healthy: bool, error_message: Optional[str]) + """ + import aiohttp + vision_url = get_vision_gpu_url() + + try: + async with aiohttp.ClientSession() as session: + async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response: + is_healthy = response.status == 200 + if is_healthy: + logger.info(f"Vision endpoint ({vision_url}) health check: OK") + else: + logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}") + return is_healthy, None if is_healthy else f"Status {response.status}" + except asyncio.TimeoutError: + logger.error(f"Vision endpoint ({vision_url}) health check: timeout") + return False, "Endpoint timeout" + except Exception as e: + logger.error(f"Vision endpoint ({vision_url}) health check error: {e}") + return False, str(e) def _strip_surrounding_quotes(text): """ @@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res if evil_mode: model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model logger.info(f"Using evil model: {model}") + elif globals.LANGUAGE_MODE == "japanese": + model = globals.JAPANESE_TEXT_MODEL # Use Swallow for Japanese + logger.info(f"Using Japanese model: {model}") else: model = globals.TEXT_MODEL + logger.info(f"Using default model: {model}") # Determine channel_id for conversation history # For servers, use guild_id; for DMs, use user_id diff --git a/llama-swap-config.yaml b/llama-swap-config.yaml index 7da2c87..e445f7f 100644 --- a/llama-swap-config.yaml +++ b/llama-swap-config.yaml @@ -18,6 +18,15 @@ models: - darkidol - evil-model - uncensored + + # Japanese language model (Llama 3.1 Swallow - Japanese optimized) + swallow: + cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup + ttl: 1800 # Unload after 30 minutes of inactivity + aliases: + - swallow + - japanese + - japanese-model # Vision/Multimodal model (MiniCPM-V-4.5 - supports images, video, and GIFs) vision: diff --git a/llama-swap-rocm-config.yaml b/llama-swap-rocm-config.yaml index c5c573f..885b21e 100644 --- a/llama-swap-rocm-config.yaml +++ b/llama-swap-rocm-config.yaml @@ -19,6 +19,15 @@ models: - darkidol - evil-model - uncensored + + # Japanese language model (Llama 3.1 Swallow - Japanese optimized) + swallow: + cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-Swallow-8B-Instruct-v0.5-Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup + ttl: 1800 # Unload after 30 minutes of inactivity + aliases: + - swallow + - japanese + - japanese-model # Server configuration # llama-swap will listen on this address