bot/utils/core.py

# utils/core.py

import asyncio
import aiohttp
import re

import globals
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from utils.logger import get_logger

logger = get_logger('core')


# switch_model() removed - llama-swap handles model switching automatically


async def is_miku_addressed(message) -> bool:
    # Check if this is a DM (no guild)
    if message.guild is None:
        # In DMs, always respond to every message
        return True
    
    # Safety check: ensure guild and guild.me exist
    if not message.guild or not message.guild.me:
        logger.warning(f"Invalid guild or guild.me in message from {message.author}")
        return False
    
    # If message contains a ping for Miku, return true
    if message.guild.me in message.mentions:
        return True

    # If message is a reply, check the referenced message author
    if message.reference:
        try:
            referenced_msg = await message.channel.fetch_message(message.reference.message_id)
            if referenced_msg.author == message.guild.me:
                return True
        except Exception as e:
            logger.warning(f"Could not fetch referenced message: {e}")

    cleaned = message.content.strip()
    cleaned_lower = cleaned.lower()
    
    # Base names for Miku in different scripts
    base_names = [
        'miku', 'мику', 'みく', 'ミク', '未来'
    ]
    
    # Japanese honorifics - all scripts combined
    honorifics = [
        # Latin
        'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika', 
        'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
        # Hiragana
        'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん', 'へいか',
        'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの', 'せんせい', 'せんぱい', 'じょう',
        # Katakana
        'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
        'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
        # Cyrillic
        'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
        'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
    ]
    
    # o- prefix variants
    o_prefixes = ['o-', 'о-', 'お', 'オ']
    
    # Build all possible name variations to check
    name_patterns = []
    
    for base in base_names:
        base_lower = base.lower()
        base_escaped = re.escape(base_lower)
        
        # Base name alone
        name_patterns.append(base_escaped)
        
        # With honorifics (allows optional dash/space between)
        for honorific in honorifics:
            honorific_lower = honorific.lower()
            honorific_escaped = re.escape(honorific_lower)
            # Build pattern: base + optional [dash or space] + honorific
            name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
        
        # With o- prefix
        for prefix in o_prefixes:
            prefix_lower = prefix.lower()
            prefix_escaped = re.escape(prefix_lower)
            # o-prefix + optional space + base
            name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
            
            # With o- prefix + honorific
            for honorific in honorifics:
                honorific_lower = honorific.lower()
                honorific_escaped = re.escape(honorific_lower)
                # o-prefix + space + base + dash/space + honorific
                name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
    
    # Check all patterns - she must be "addressed" not just mentioned
    for pattern in name_patterns:
        try:
            # Pattern 1: Start of message + punctuation/end
            # "Miku, ..." or "みく！" or "ミクちゃん、..."
            start_p = r'^' + pattern + r'(?:[,，、!！?？.。\s]+|$)'
            if re.search(start_p, cleaned_lower, re.IGNORECASE):
                return True
            
            # Pattern 2: End of message (optionally preceded by punctuation)
            # "..., Miku" or "...みく" or "...ミクちゃん！"
            end_p = r'(?:[,，、!！?？.。\s]+|^)' + pattern + r'[!！?？.。\s]*$'
            if re.search(end_p, cleaned_lower, re.IGNORECASE):
                return True
            
            # Pattern 3: Middle (surrounded by punctuation)
            # "..., Miku, ..." or "...、ミク、..."
            middle_p = r'[,，、!！?？.。\s]+' + pattern + r'[,，、!！?？.。\s]+'
            if re.search(middle_p, cleaned_lower, re.IGNORECASE):
                return True
            
            # Pattern 4: Just the name alone
            # "Miku" or "みく！" or "ミクちゃん"
            alone_p = r'^\s*' + pattern + r'[!！?？.。]*\s*$'
            if re.search(alone_p, cleaned_lower, re.IGNORECASE):
                return True
        except re.error as e:
            # Log the problematic pattern and skip it
            logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
            continue
    
    return False

# Vectorstore functionality disabled - not needed with current structured context approach
# If you need embeddings in the future, you can use a different embedding provider
# For now, the bot uses structured prompts from context_manager.py

# def load_miku_knowledge():
#     with open("miku_lore.txt", "r", encoding="utf-8") as f:
#         text = f.read()
#     
#     from langchain_text_splitters import RecursiveCharacterTextSplitter
#     
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=520,
#         chunk_overlap=50,
#         separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
#     )
#     
#     docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(text)]
#     
#     vectorstore = FAISS.from_documents(docs, embeddings)
#     return vectorstore
# 
# def load_miku_lyrics():
#     with open("miku_lyrics.txt", "r", encoding="utf-8") as f:
#         lyrics_text = f.read()
#     
#     text_splitter = CharacterTextSplitter(chunk_size=520, chunk_overlap=50)
#     docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(lyrics_text)]
#     
#     vectorstore = FAISS.from_documents(docs, embeddings)
#     return vectorstore
# 
# miku_vectorstore = load_miku_knowledge()
# miku_lyrics_vectorstore = load_miku_lyrics()
-												Initial commit: Miku Discord Bot

											
										
										
											2025-12-07 17:15:09 +02:00
+								# utils/core.py
 								import asyncio
 								import aiohttp
 								import re
 								import globals
 								from langchain_community.vectorstores import FAISS
 								from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
 								from langchain_core.documents import Document
-												feat: Implement comprehensive non-hierarchical logging system

- Created new logging infrastructure with per-component filtering
- Added 6 log levels: DEBUG, INFO, API, WARNING, ERROR, CRITICAL
- Implemented non-hierarchical level control (any combination can be enabled)
- Migrated 917 print() statements across 31 files to structured logging
- Created web UI (system.html) for runtime configuration with dark theme
- Added global level controls to enable/disable levels across all components
- Added timestamp format control (off/time/date/datetime options)
- Implemented log rotation (10MB per file, 5 backups)
- Added API endpoints for dynamic log configuration
- Configured HTTP request logging with filtering via api.requests component
- Intercepted APScheduler logs with proper formatting
- Fixed persistence paths to use /app/memory for Docker volume compatibility
- Fixed checkbox display bug in web UI (enabled_levels now properly shown)
- Changed System Settings button to open in same tab instead of new window

Components: bot, api, api.requests, autonomous, persona, vision, llm,
conversation, mood, dm, scheduled, gpu, media, server, commands,
sentiment, core, apscheduler

All settings persist across container restarts via JSON config.

											
										
										
											2026-01-10 20:46:19 +02:00
+								from utils.logger import get_logger
 								logger = get_logger('core')
-												Initial commit: Miku Discord Bot

											
										
										
											2025-12-07 17:15:09 +02:00
 								# switch_model() removed - llama-swap handles model switching automatically
 								async def is_miku_addressed(message) -> bool:
 								    # Check if this is a DM (no guild)
 								    if message.guild is None:
 								        # In DMs, always respond to every message
 								        return True
 								    # Safety check: ensure guild and guild.me exist
 								    if not message.guild or not message.guild.me:
-												feat: Implement comprehensive non-hierarchical logging system

- Created new logging infrastructure with per-component filtering
- Added 6 log levels: DEBUG, INFO, API, WARNING, ERROR, CRITICAL
- Implemented non-hierarchical level control (any combination can be enabled)
- Migrated 917 print() statements across 31 files to structured logging
- Created web UI (system.html) for runtime configuration with dark theme
- Added global level controls to enable/disable levels across all components
- Added timestamp format control (off/time/date/datetime options)
- Implemented log rotation (10MB per file, 5 backups)
- Added API endpoints for dynamic log configuration
- Configured HTTP request logging with filtering via api.requests component
- Intercepted APScheduler logs with proper formatting
- Fixed persistence paths to use /app/memory for Docker volume compatibility
- Fixed checkbox display bug in web UI (enabled_levels now properly shown)
- Changed System Settings button to open in same tab instead of new window

Components: bot, api, api.requests, autonomous, persona, vision, llm,
conversation, mood, dm, scheduled, gpu, media, server, commands,
sentiment, core, apscheduler

All settings persist across container restarts via JSON config.

											
										
										
											2026-01-10 20:46:19 +02:00
+								        logger.warning(f"Invalid guild or guild.me in message from {message.author}")
-												Initial commit: Miku Discord Bot

											
										
										
											2025-12-07 17:15:09 +02:00
+								        return False
 								    # If message contains a ping for Miku, return true
 								    if message.guild.me in message.mentions:
 								        return True
 								    # If message is a reply, check the referenced message author
 								    if message.reference:
 								        try:
 								            referenced_msg = await message.channel.fetch_message(message.reference.message_id)
 								            if referenced_msg.author == message.guild.me:
 								                return True
 								        except Exception as e:
-												feat: Implement comprehensive non-hierarchical logging system

- Created new logging infrastructure with per-component filtering
- Added 6 log levels: DEBUG, INFO, API, WARNING, ERROR, CRITICAL
- Implemented non-hierarchical level control (any combination can be enabled)
- Migrated 917 print() statements across 31 files to structured logging
- Created web UI (system.html) for runtime configuration with dark theme
- Added global level controls to enable/disable levels across all components
- Added timestamp format control (off/time/date/datetime options)
- Implemented log rotation (10MB per file, 5 backups)
- Added API endpoints for dynamic log configuration
- Configured HTTP request logging with filtering via api.requests component
- Intercepted APScheduler logs with proper formatting
- Fixed persistence paths to use /app/memory for Docker volume compatibility
- Fixed checkbox display bug in web UI (enabled_levels now properly shown)
- Changed System Settings button to open in same tab instead of new window

Components: bot, api, api.requests, autonomous, persona, vision, llm,
conversation, mood, dm, scheduled, gpu, media, server, commands,
sentiment, core, apscheduler

All settings persist across container restarts via JSON config.

											
										
										
											2026-01-10 20:46:19 +02:00
+								            logger.warning(f"Could not fetch referenced message: {e}")
-												Initial commit: Miku Discord Bot

											
										
										
											2025-12-07 17:15:09 +02:00
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								    cleaned = message.content.strip()
 								    cleaned_lower = cleaned.lower()
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
 								    # Base names for Miku in different scripts
 								    base_names = [
 								        'miku', 'мику', 'みく', 'ミク', '未来'
 								    ]
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								    # Japanese honorifics - all scripts combined
 								    honorifics = [
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
+								        # Latin
 								        'chan', 'san', 'kun', 'nyan', 'hime', 'tan', 'chin', 'heika',
 								        'denka', 'kakka', 'shi', 'chama', 'kyun', 'dono', 'sensei', 'senpai', 'jou',
 								        # Hiragana
 								        'ちゃん', 'さん', 'くん', 'にゃん', 'ひめ', 'たん', 'ちん', 'へいか',
 								        'でんか', 'かっか', 'し', 'ちゃま', 'きゅん', 'どの', 'せんせい', 'せんぱい', 'じょう',
 								        # Katakana
 								        'チャン', 'サン', 'クン', 'ニャン', 'ヒメ', 'タン', 'チン', 'ヘイカ',
 								        'デンカ', 'カッカ', 'シ', 'チャマ', 'キュン', 'ドノ', 'センセイ', 'センパイ', 'ジョウ',
 								        # Cyrillic
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								        'чан', 'сан', 'кун', 'нян', 'химе', 'тан', 'чин', 'хейка', 'хеика',
 								        'денка', 'какка', 'си', 'чама', 'кюн', 'доно', 'сенсэй', 'сенсеи', 'сенпай', 'сенпаи', 'джо'
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
+								    ]
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								    # o- prefix variants
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
+								    o_prefixes = ['o-', 'о-', 'お', 'オ']
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								    # Build all possible name variations to check
 								    name_patterns = []
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
 								    for base in base_names:
 								        base_lower = base.lower()
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								        base_escaped = re.escape(base_lower)
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								        # Base name alone
 								        name_patterns.append(base_escaped)
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								        # With honorifics (allows optional dash/space between)
 								        for honorific in honorifics:
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
+								            honorific_lower = honorific.lower()
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								            honorific_escaped = re.escape(honorific_lower)
 								            # Build pattern: base + optional [dash or space] + honorific
 								            name_patterns.append(base_escaped + r'[\-\s]*' + honorific_escaped)
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								        # With o- prefix
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
+								        for prefix in o_prefixes:
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								            prefix_lower = prefix.lower()
 								            prefix_escaped = re.escape(prefix_lower)
 								            # o-prefix + optional space + base
 								            name_patterns.append(prefix_escaped + r'\s*' + base_escaped)
 								            # With o- prefix + honorific
 								            for honorific in honorifics:
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
+								                honorific_lower = honorific.lower()
-												Added Japanese and Bulgarian addressing

											
										
										
											2026-01-30 21:34:24 +02:00
+								                honorific_escaped = re.escape(honorific_lower)
 								                # o-prefix + space + base + dash/space + honorific
 								                name_patterns.append(prefix_escaped + r'\s*' + base_escaped + r'[\-\s]*' + honorific_escaped)
 								    # Check all patterns - she must be "addressed" not just mentioned
 								    for pattern in name_patterns:
 								        try:
 								            # Pattern 1: Start of message + punctuation/end
 								            # "Miku, ..." or "みく！" or "ミクちゃん、..."
 								            start_p = r'^' + pattern + r'(?:[,，、!！?？.。\s]+|$)'
 								            if re.search(start_p, cleaned_lower, re.IGNORECASE):
 								                return True
 								            # Pattern 2: End of message (optionally preceded by punctuation)
 								            # "..., Miku" or "...みく" or "...ミクちゃん！"
 								            end_p = r'(?:[,，、!！?？.。\s]+|^)' + pattern + r'[!！?？.。\s]*$'
 								            if re.search(end_p, cleaned_lower, re.IGNORECASE):
 								                return True
 								            # Pattern 3: Middle (surrounded by punctuation)
 								            # "..., Miku, ..." or "...、ミク、..."
 								            middle_p = r'[,，、!！?？.。\s]+' + pattern + r'[,，、!！?？.。\s]+'
 								            if re.search(middle_p, cleaned_lower, re.IGNORECASE):
 								                return True
 								            # Pattern 4: Just the name alone
 								            # "Miku" or "みく！" or "ミクちゃん"
 								            alone_p = r'^\s*' + pattern + r'[!！?？.。]*\s*$'
 								            if re.search(alone_p, cleaned_lower, re.IGNORECASE):
 								                return True
 								        except re.error as e:
 								            # Log the problematic pattern and skip it
 								            logger.error(f"REGEX ERROR - Pattern: '{pattern}' | Start regex: '{start_p}' | Error: {e}")
 								            continue
-												Able to now address Miku in Cyrillic, Kanji and both Kanas, incl. Japanese honorifics

											
										
										
											2026-01-27 19:53:18 +02:00
 								    return False
-												Initial commit: Miku Discord Bot

											
										
										
											2025-12-07 17:15:09 +02:00
 								# Vectorstore functionality disabled - not needed with current structured context approach
 								# If you need embeddings in the future, you can use a different embedding provider
 								# For now, the bot uses structured prompts from context_manager.py
 								# def load_miku_knowledge():
 								#     with open("miku_lore.txt", "r", encoding="utf-8") as f:
 								#         text = f.read()
 								#
 								#     from langchain_text_splitters import RecursiveCharacterTextSplitter
 								#
 								#     text_splitter = RecursiveCharacterTextSplitter(
 								#         chunk_size=520,
 								#         chunk_overlap=50,
 								#         separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
 								#     )
 								#
 								#     docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(text)]
 								#
 								#     vectorstore = FAISS.from_documents(docs, embeddings)
 								#     return vectorstore
 								#
 								# def load_miku_lyrics():
 								#     with open("miku_lyrics.txt", "r", encoding="utf-8") as f:
 								#         lyrics_text = f.read()
 								#
 								#     text_splitter = CharacterTextSplitter(chunk_size=520, chunk_overlap=50)
 								#     docs = [Document(page_content=chunk) for chunk in text_splitter.split_text(lyrics_text)]
 								#
 								#     vectorstore = FAISS.from_documents(docs, embeddings)
 								#     return vectorstore
 								#
 								# miku_vectorstore = load_miku_knowledge()
 								# miku_lyrics_vectorstore = load_miku_lyrics()