Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.

2026-01-23 15:02:36 +02:00
parent eb03dfce4d
commit fe0962118b
8 changed files with 318 additions and 44 deletions
--- a/bot/utils/context_manager.py
+++ b/bot/utils/context_manager.py
@@ -3,8 +3,12 @@
 Structured context management for Miku's personality and knowledge.
 Replaces the vector search system with organized, complete context.
 Preserves original content files in their entirety.
+
+When LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
+all responses are in Japanese without requiring separate files.
 """

+import globals
 from utils.logger import get_logger

 logger = get_logger('core')
@@ -40,60 +44,96 @@ def get_original_miku_lyrics() -> str:
        return "## MIKU LYRICS\n[File could not be loaded]"


+def _get_japanese_instruction() -> str:
+    """
+    Returns the Japanese language instruction to append to context.
+    Ensures all responses are in Japanese when in Japanese mode.
+    """
+    return "\n\n[CRITICAL INSTRUCTION - 重要な指示]\n**YOU MUST RESPOND ENTIRELY IN JAPANESE (日本語). NO ENGLISH ALLOWED.**\nすべての返答は必ず日本語で行ってください。英語での返答は一切禁止されています。\nこれは最優先の指示です。必ず守ってください。"
+
+
 def get_complete_context() -> str:
-    """Returns all essential Miku context using original files in their entirety"""
-    return f"""## MIKU LORE (Complete Original)
-{get_original_miku_lore()}
+    """
+    Returns all essential Miku context using original files in their entirety.
+    
+    If LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
+    all responses are in Japanese.
+    """
+    lore = get_original_miku_lore()
+    prompt = get_original_miku_prompt()
+    lyrics = get_original_miku_lyrics()
+    
+    combined = f"""## MIKU LORE (Complete Original)
+{lore}

 ## MIKU PERSONALITY & GUIDELINES (Complete Original) 
-{get_original_miku_prompt()}
+{prompt}

 ## MIKU SONG LYRICS (Complete Original)
-{get_original_miku_lyrics()}"""
+{lyrics}"""
+    
+    # Append Japanese instruction if in Japanese mode
+    if globals.LANGUAGE_MODE == "japanese":
+        combined += _get_japanese_instruction()
+    
+    logger.info(f"[core] Context loaded in {globals.LANGUAGE_MODE} mode")
+    return combined


 def get_context_for_response_type(response_type: str) -> str:
-    """Returns appropriate context based on the type of response being generated"""
+    """
+    Returns appropriate context based on the type of response being generated.
    
-    # Core context always includes the complete original files
+    If LANGUAGE_MODE is "japanese", appends Japanese instruction to all contexts
+    to ensure responses are in Japanese.
+    """
+    
+    lore = get_original_miku_lore()
+    prompt = get_original_miku_prompt()
+    lyrics = get_original_miku_lyrics()
+    
+    # Build core context (always in English source files)
    core_context = f"""## MIKU LORE (Complete Original)
-{get_original_miku_lore()}
+{lore}

 ## MIKU PERSONALITY & GUIDELINES (Complete Original) 
-{get_original_miku_prompt()}"""
+{prompt}"""
    
+    # Return context based on response type
    if response_type == "autonomous_general":
-        # For general autonomous messages, include everything
-        return f"""{core_context}
+        context = f"""{core_context}

 ## MIKU SONG LYRICS (Complete Original)
-{get_original_miku_lyrics()}"""
+{lyrics}"""
        
    elif response_type == "autonomous_tweet":
-        # For tweet responses, include lyrics for musical context
-        return f"""{core_context}
+        context = f"""{core_context}

 ## MIKU SONG LYRICS (Complete Original)
-{get_original_miku_lyrics()}"""
+{lyrics}"""
        
    elif response_type == "dm_response" or response_type == "server_response":
-        # For conversational responses, include everything
-        return f"""{core_context}
+        context = f"""{core_context}

 ## MIKU SONG LYRICS (Complete Original)
-{get_original_miku_lyrics()}"""
+{lyrics}"""
        
    elif response_type == "conversation_join":
-        # For joining conversations, include everything
-        return f"""{core_context}
+        context = f"""{core_context}

 ## MIKU SONG LYRICS (Complete Original)
-{get_original_miku_lyrics()}"""
+{lyrics}"""

    elif response_type == "emoji_selection":
-        # For emoji reactions, no context needed - the prompt has everything
-        return ""
+        # For emoji reactions, minimal context needed
+        context = ""
            
    else:
        # Default: comprehensive context
-        return get_complete_context()
+        context = get_complete_context()
+    
+    # Append Japanese instruction if in Japanese mode
+    if globals.LANGUAGE_MODE == "japanese" and context:
+        context += _get_japanese_instruction()
+    
+    return context
--- a/bot/utils/image_handling.py
+++ b/bot/utils/image_handling.py
@@ -239,7 +239,13 @@ async def analyze_image_with_vision(base64_img):
    Uses OpenAI-compatible chat completions API with image_url.
    Always uses NVIDIA GPU for vision model.
    """
-    from utils.llm import get_vision_gpu_url
+    from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
+    
+    # Check if vision endpoint is healthy before attempting request
+    is_healthy, error = await check_vision_endpoint_health()
+    if not is_healthy:
+        logger.warning(f"Vision endpoint unhealthy: {error}")
+        return f"Vision service currently unavailable: {error}"
    
    payload = {
        "model": globals.VISION_MODEL,
@@ -269,17 +275,20 @@ async def analyze_image_with_vision(base64_img):
    async with aiohttp.ClientSession() as session:
        try:
            vision_url = get_vision_gpu_url()
-            async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
+            logger.info(f"Sending vision request to {vision_url} using model: {globals.VISION_MODEL}")
+            
+            async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as response:
                if response.status == 200:
                    data = await response.json()
-                    return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
+                    result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
+                    logger.info(f"Vision analysis completed successfully")
+                    return result
                else:
                    error_text = await response.text()
-                    logger.error(f"Vision API error: {response.status} - {error_text}")
+                    logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
                    return f"Error analyzing image: {response.status}"
        except Exception as e:
-            logger.error(f"Error in analyze_image_with_vision: {e}")
-            return f"Error analyzing image: {str(e)}"
+            logger.error(f"Error in analyze_image_with_vision: {e}", exc_info=True)


 async def analyze_video_with_vision(video_frames, media_type="video"):
@@ -288,6 +297,13 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
    video_frames: list of base64-encoded frames
    media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt
    """
+    from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
+    
+    # Check if vision endpoint is healthy before attempting request
+    is_healthy, error = await check_vision_endpoint_health()
+    if not is_healthy:
+        logger.warning(f"Vision endpoint unhealthy: {error}")
+        return f"Vision service currently unavailable: {error}"
    
    # Customize prompt based on media type
    if media_type == "gif":
@@ -331,16 +347,20 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
    async with aiohttp.ClientSession() as session:
        try:
            vision_url = get_vision_gpu_url()
-            async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
+            logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
+            
+            async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
                if response.status == 200:
                    data = await response.json()
-                    return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
+                    result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
+                    logger.info(f"Video analysis completed successfully")
+                    return result
                else:
                    error_text = await response.text()
-                    logger.error(f"Vision API error: {response.status} - {error_text}")
+                    logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
                    return f"Error analyzing video: {response.status}"
        except Exception as e:
-            logger.error(f"Error in analyze_video_with_vision: {e}")
+            logger.error(f"Error in analyze_video_with_vision: {e}", exc_info=True)
            return f"Error analyzing video: {str(e)}"


--- a/bot/utils/llm.py
+++ b/bot/utils/llm.py
@@ -38,8 +38,47 @@ def get_vision_gpu_url():
    Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
    - When NVIDIA is primary: Use NVIDIA for both text and vision
    - When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
+    
+    Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU.
+    This ensures vision inference is always fast and doesn't interfere with 
+    AMD text model inference.
    """
-    return globals.LLAMA_URL  # Always use NVIDIA for vision
+    current_text_gpu = get_current_gpu_url()
+    nvidia_vision_url = globals.LLAMA_URL
+    
+    # Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text
+    # Log this decision when GPU switching is active (primary text GPU is AMD)
+    if current_text_gpu == globals.LLAMA_AMD_URL:
+        logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model")
+    
+    return nvidia_vision_url  # Always use NVIDIA for vision
+
+async def check_vision_endpoint_health():
+    """
+    Check if NVIDIA GPU vision endpoint is healthy and responsive.
+    This is important when AMD is the primary GPU to ensure vision still works.
+    
+    Returns:
+        Tuple of (is_healthy: bool, error_message: Optional[str])
+    """
+    import aiohttp
+    vision_url = get_vision_gpu_url()
+    
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
+                is_healthy = response.status == 200
+                if is_healthy:
+                    logger.info(f"Vision endpoint ({vision_url}) health check: OK")
+                else:
+                    logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}")
+                return is_healthy, None if is_healthy else f"Status {response.status}"
+    except asyncio.TimeoutError:
+        logger.error(f"Vision endpoint ({vision_url}) health check: timeout")
+        return False, "Endpoint timeout"
+    except Exception as e:
+        logger.error(f"Vision endpoint ({vision_url}) health check error: {e}")
+        return False, str(e)

 def _strip_surrounding_quotes(text):
    """
@@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
        if evil_mode:
            model = globals.EVIL_TEXT_MODEL  # Use DarkIdol uncensored model
            logger.info(f"Using evil model: {model}")
+        elif globals.LANGUAGE_MODE == "japanese":
+            model = globals.JAPANESE_TEXT_MODEL  # Use Swallow for Japanese
+            logger.info(f"Using Japanese model: {model}")
        else:
            model = globals.TEXT_MODEL
+            logger.info(f"Using default model: {model}")
    
    # Determine channel_id for conversation history
    # For servers, use guild_id; for DMs, use user_id