Implemented new Japanese only text mode with WebUI toggle, utilizing a llama3.1 swallow dataset model. Next up is Japanese TTS.
This commit is contained in:
@@ -3,8 +3,12 @@
|
||||
Structured context management for Miku's personality and knowledge.
|
||||
Replaces the vector search system with organized, complete context.
|
||||
Preserves original content files in their entirety.
|
||||
|
||||
When LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
|
||||
all responses are in Japanese without requiring separate files.
|
||||
"""
|
||||
|
||||
import globals
|
||||
from utils.logger import get_logger
|
||||
|
||||
logger = get_logger('core')
|
||||
@@ -40,60 +44,96 @@ def get_original_miku_lyrics() -> str:
|
||||
return "## MIKU LYRICS\n[File could not be loaded]"
|
||||
|
||||
|
||||
def _get_japanese_instruction() -> str:
|
||||
"""
|
||||
Returns the Japanese language instruction to append to context.
|
||||
Ensures all responses are in Japanese when in Japanese mode.
|
||||
"""
|
||||
return "\n\n[CRITICAL INSTRUCTION - 重要な指示]\n**YOU MUST RESPOND ENTIRELY IN JAPANESE (日本語). NO ENGLISH ALLOWED.**\nすべての返答は必ず日本語で行ってください。英語での返答は一切禁止されています。\nこれは最優先の指示です。必ず守ってください。"
|
||||
|
||||
|
||||
def get_complete_context() -> str:
|
||||
"""Returns all essential Miku context using original files in their entirety"""
|
||||
return f"""## MIKU LORE (Complete Original)
|
||||
{get_original_miku_lore()}
|
||||
"""
|
||||
Returns all essential Miku context using original files in their entirety.
|
||||
|
||||
If LANGUAGE_MODE is "japanese", appends a Japanese instruction to ensure
|
||||
all responses are in Japanese.
|
||||
"""
|
||||
lore = get_original_miku_lore()
|
||||
prompt = get_original_miku_prompt()
|
||||
lyrics = get_original_miku_lyrics()
|
||||
|
||||
combined = f"""## MIKU LORE (Complete Original)
|
||||
{lore}
|
||||
|
||||
## MIKU PERSONALITY & GUIDELINES (Complete Original)
|
||||
{get_original_miku_prompt()}
|
||||
{prompt}
|
||||
|
||||
## MIKU SONG LYRICS (Complete Original)
|
||||
{get_original_miku_lyrics()}"""
|
||||
{lyrics}"""
|
||||
|
||||
# Append Japanese instruction if in Japanese mode
|
||||
if globals.LANGUAGE_MODE == "japanese":
|
||||
combined += _get_japanese_instruction()
|
||||
|
||||
logger.info(f"[core] Context loaded in {globals.LANGUAGE_MODE} mode")
|
||||
return combined
|
||||
|
||||
|
||||
def get_context_for_response_type(response_type: str) -> str:
|
||||
"""Returns appropriate context based on the type of response being generated"""
|
||||
"""
|
||||
Returns appropriate context based on the type of response being generated.
|
||||
|
||||
# Core context always includes the complete original files
|
||||
If LANGUAGE_MODE is "japanese", appends Japanese instruction to all contexts
|
||||
to ensure responses are in Japanese.
|
||||
"""
|
||||
|
||||
lore = get_original_miku_lore()
|
||||
prompt = get_original_miku_prompt()
|
||||
lyrics = get_original_miku_lyrics()
|
||||
|
||||
# Build core context (always in English source files)
|
||||
core_context = f"""## MIKU LORE (Complete Original)
|
||||
{get_original_miku_lore()}
|
||||
{lore}
|
||||
|
||||
## MIKU PERSONALITY & GUIDELINES (Complete Original)
|
||||
{get_original_miku_prompt()}"""
|
||||
{prompt}"""
|
||||
|
||||
# Return context based on response type
|
||||
if response_type == "autonomous_general":
|
||||
# For general autonomous messages, include everything
|
||||
return f"""{core_context}
|
||||
context = f"""{core_context}
|
||||
|
||||
## MIKU SONG LYRICS (Complete Original)
|
||||
{get_original_miku_lyrics()}"""
|
||||
{lyrics}"""
|
||||
|
||||
elif response_type == "autonomous_tweet":
|
||||
# For tweet responses, include lyrics for musical context
|
||||
return f"""{core_context}
|
||||
context = f"""{core_context}
|
||||
|
||||
## MIKU SONG LYRICS (Complete Original)
|
||||
{get_original_miku_lyrics()}"""
|
||||
{lyrics}"""
|
||||
|
||||
elif response_type == "dm_response" or response_type == "server_response":
|
||||
# For conversational responses, include everything
|
||||
return f"""{core_context}
|
||||
context = f"""{core_context}
|
||||
|
||||
## MIKU SONG LYRICS (Complete Original)
|
||||
{get_original_miku_lyrics()}"""
|
||||
{lyrics}"""
|
||||
|
||||
elif response_type == "conversation_join":
|
||||
# For joining conversations, include everything
|
||||
return f"""{core_context}
|
||||
context = f"""{core_context}
|
||||
|
||||
## MIKU SONG LYRICS (Complete Original)
|
||||
{get_original_miku_lyrics()}"""
|
||||
{lyrics}"""
|
||||
|
||||
elif response_type == "emoji_selection":
|
||||
# For emoji reactions, no context needed - the prompt has everything
|
||||
return ""
|
||||
# For emoji reactions, minimal context needed
|
||||
context = ""
|
||||
|
||||
else:
|
||||
# Default: comprehensive context
|
||||
return get_complete_context()
|
||||
context = get_complete_context()
|
||||
|
||||
# Append Japanese instruction if in Japanese mode
|
||||
if globals.LANGUAGE_MODE == "japanese" and context:
|
||||
context += _get_japanese_instruction()
|
||||
|
||||
return context
|
||||
|
||||
@@ -239,7 +239,13 @@ async def analyze_image_with_vision(base64_img):
|
||||
Uses OpenAI-compatible chat completions API with image_url.
|
||||
Always uses NVIDIA GPU for vision model.
|
||||
"""
|
||||
from utils.llm import get_vision_gpu_url
|
||||
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
|
||||
|
||||
# Check if vision endpoint is healthy before attempting request
|
||||
is_healthy, error = await check_vision_endpoint_health()
|
||||
if not is_healthy:
|
||||
logger.warning(f"Vision endpoint unhealthy: {error}")
|
||||
return f"Vision service currently unavailable: {error}"
|
||||
|
||||
payload = {
|
||||
"model": globals.VISION_MODEL,
|
||||
@@ -269,17 +275,20 @@ async def analyze_image_with_vision(base64_img):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
try:
|
||||
vision_url = get_vision_gpu_url()
|
||||
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
|
||||
logger.info(f"Sending vision request to {vision_url} using model: {globals.VISION_MODEL}")
|
||||
|
||||
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=60)) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
||||
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
||||
logger.info(f"Vision analysis completed successfully")
|
||||
return result
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"Vision API error: {response.status} - {error_text}")
|
||||
logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
|
||||
return f"Error analyzing image: {response.status}"
|
||||
except Exception as e:
|
||||
logger.error(f"Error in analyze_image_with_vision: {e}")
|
||||
return f"Error analyzing image: {str(e)}"
|
||||
logger.error(f"Error in analyze_image_with_vision: {e}", exc_info=True)
|
||||
|
||||
|
||||
async def analyze_video_with_vision(video_frames, media_type="video"):
|
||||
@@ -288,6 +297,13 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
|
||||
video_frames: list of base64-encoded frames
|
||||
media_type: "video", "gif", or "tenor_gif" to customize the analysis prompt
|
||||
"""
|
||||
from utils.llm import get_vision_gpu_url, check_vision_endpoint_health
|
||||
|
||||
# Check if vision endpoint is healthy before attempting request
|
||||
is_healthy, error = await check_vision_endpoint_health()
|
||||
if not is_healthy:
|
||||
logger.warning(f"Vision endpoint unhealthy: {error}")
|
||||
return f"Vision service currently unavailable: {error}"
|
||||
|
||||
# Customize prompt based on media type
|
||||
if media_type == "gif":
|
||||
@@ -331,16 +347,20 @@ async def analyze_video_with_vision(video_frames, media_type="video"):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
try:
|
||||
vision_url = get_vision_gpu_url()
|
||||
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers) as response:
|
||||
logger.info(f"Sending video analysis request to {vision_url} using model: {globals.VISION_MODEL} (media_type: {media_type}, frames: {len(video_frames)})")
|
||||
|
||||
async with session.post(f"{vision_url}/v1/chat/completions", json=payload, headers=headers, timeout=aiohttp.ClientTimeout(total=120)) as response:
|
||||
if response.status == 200:
|
||||
data = await response.json()
|
||||
return data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
||||
result = data.get("choices", [{}])[0].get("message", {}).get("content", "No description.")
|
||||
logger.info(f"Video analysis completed successfully")
|
||||
return result
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"Vision API error: {response.status} - {error_text}")
|
||||
logger.error(f"Vision API error: {response.status} - {error_text} (endpoint: {vision_url})")
|
||||
return f"Error analyzing video: {response.status}"
|
||||
except Exception as e:
|
||||
logger.error(f"Error in analyze_video_with_vision: {e}")
|
||||
logger.error(f"Error in analyze_video_with_vision: {e}", exc_info=True)
|
||||
return f"Error analyzing video: {str(e)}"
|
||||
|
||||
|
||||
|
||||
@@ -38,8 +38,47 @@ def get_vision_gpu_url():
|
||||
Strategy: Always use NVIDIA GPU for vision to avoid unloading/reloading.
|
||||
- When NVIDIA is primary: Use NVIDIA for both text and vision
|
||||
- When AMD is primary: Use AMD for text, NVIDIA for vision (keeps vision loaded)
|
||||
|
||||
Important: Vision model (MiniCPM-V) is ONLY configured on NVIDIA GPU.
|
||||
This ensures vision inference is always fast and doesn't interfere with
|
||||
AMD text model inference.
|
||||
"""
|
||||
return globals.LLAMA_URL # Always use NVIDIA for vision
|
||||
current_text_gpu = get_current_gpu_url()
|
||||
nvidia_vision_url = globals.LLAMA_URL
|
||||
|
||||
# Vision ALWAYS uses NVIDIA, regardless of which GPU is primary for text
|
||||
# Log this decision when GPU switching is active (primary text GPU is AMD)
|
||||
if current_text_gpu == globals.LLAMA_AMD_URL:
|
||||
logger.debug(f"Primary GPU is AMD for text, but using NVIDIA for vision model")
|
||||
|
||||
return nvidia_vision_url # Always use NVIDIA for vision
|
||||
|
||||
async def check_vision_endpoint_health():
|
||||
"""
|
||||
Check if NVIDIA GPU vision endpoint is healthy and responsive.
|
||||
This is important when AMD is the primary GPU to ensure vision still works.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_healthy: bool, error_message: Optional[str])
|
||||
"""
|
||||
import aiohttp
|
||||
vision_url = get_vision_gpu_url()
|
||||
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(f"{vision_url}/health", timeout=aiohttp.ClientTimeout(total=5)) as response:
|
||||
is_healthy = response.status == 200
|
||||
if is_healthy:
|
||||
logger.info(f"Vision endpoint ({vision_url}) health check: OK")
|
||||
else:
|
||||
logger.warning(f"Vision endpoint ({vision_url}) health check failed: status {response.status}")
|
||||
return is_healthy, None if is_healthy else f"Status {response.status}"
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Vision endpoint ({vision_url}) health check: timeout")
|
||||
return False, "Endpoint timeout"
|
||||
except Exception as e:
|
||||
logger.error(f"Vision endpoint ({vision_url}) health check error: {e}")
|
||||
return False, str(e)
|
||||
|
||||
def _strip_surrounding_quotes(text):
|
||||
"""
|
||||
@@ -108,8 +147,12 @@ async def query_llama(user_prompt, user_id, guild_id=None, response_type="dm_res
|
||||
if evil_mode:
|
||||
model = globals.EVIL_TEXT_MODEL # Use DarkIdol uncensored model
|
||||
logger.info(f"Using evil model: {model}")
|
||||
elif globals.LANGUAGE_MODE == "japanese":
|
||||
model = globals.JAPANESE_TEXT_MODEL # Use Swallow for Japanese
|
||||
logger.info(f"Using Japanese model: {model}")
|
||||
else:
|
||||
model = globals.TEXT_MODEL
|
||||
logger.info(f"Using default model: {model}")
|
||||
|
||||
# Determine channel_id for conversation history
|
||||
# For servers, use guild_id; for DMs, use user_id
|
||||
|
||||
Reference in New Issue
Block a user