Untested Phase 1 (Foundation & Resource management) of voice chat integration

This commit is contained in:
2026-01-16 13:01:08 +02:00
parent 353c9c9583
commit 911f11ee9f
9 changed files with 2288 additions and 0 deletions

View File

@@ -125,6 +125,19 @@ async def on_message(message):
if message.author == globals.client.user:
return
# Check for voice commands first (!miku join, !miku leave, !miku voice-status)
if not isinstance(message.channel, discord.DMChannel) and message.content.strip().lower().startswith('!miku '):
from commands.voice import handle_voice_command
parts = message.content.strip().split()
if len(parts) >= 2:
cmd = parts[1].lower()
args = parts[2:] if len(parts) > 2 else []
if cmd in ['join', 'leave', 'voice-status']:
await handle_voice_command(message, cmd, args)
return
# Skip processing if a bipolar argument is in progress in this channel
if not isinstance(message.channel, discord.DMChannel):
from utils.bipolar_mode import is_argument_in_progress
@@ -196,6 +209,14 @@ async def on_message(message):
logger.error(f"Failed to fetch replied message for context: {e}")
async with message.channel.typing():
# Check if vision model is blocked (voice session active)
if message.attachments and globals.VISION_MODEL_BLOCKED:
await message.channel.send(
"🎤 I can't look at images or videos right now, I'm talking in voice chat! "
"Send it again after I leave the voice channel."
)
return
# If message has an image, video, or GIF attachment
if message.attachments:
for attachment in message.attachments:
@@ -504,6 +525,13 @@ async def on_message(message):
if is_image_request and image_prompt:
logger.info(f"🎨 Image generation request detected: '{image_prompt}' from {message.author.display_name}")
# Block image generation during voice sessions
if globals.IMAGE_GENERATION_BLOCKED:
await message.channel.send(globals.IMAGE_GENERATION_BLOCK_MESSAGE)
await message.add_reaction('🎤')
logger.info("🚫 Image generation blocked - voice session active")
return
# Handle the image generation workflow
success = await handle_image_generation_request(message, image_prompt)
if success:

229
bot/commands/voice.py Normal file
View File

@@ -0,0 +1,229 @@
# voice.py
"""
Voice channel commands for Miku Discord bot.
Handles joining, leaving, and status commands for voice chat sessions.
"""
import discord
from utils.voice_manager import voice_manager
from utils.logger import get_logger
logger = get_logger('voice_commands')
async def handle_voice_command(message, cmd, args):
"""
Handle voice-related commands.
Args:
message: Discord message object
cmd: Command name (join, leave, voice-status)
args: Command arguments
"""
if cmd == 'join':
await _handle_join(message, args)
elif cmd == 'leave':
await _handle_leave(message)
elif cmd == 'voice-status':
await _handle_status(message)
else:
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
async def _handle_join(message, args):
"""
Handle !miku join command.
Join voice channel and start session with resource locks.
"""
# Get voice channel
voice_channel = None
if args and args[0].startswith('<#'):
# Channel mentioned (e.g., !miku join #voice-chat)
try:
channel_id = int(args[0][2:-1])
voice_channel = message.guild.get_channel(channel_id)
if not isinstance(voice_channel, discord.VoiceChannel):
await message.channel.send("❌ That's not a voice channel!")
return
except (ValueError, AttributeError):
await message.channel.send("❌ Invalid channel!")
return
else:
# Use user's current voice channel
if message.author.voice and message.author.voice.channel:
voice_channel = message.author.voice.channel
else:
await message.channel.send(
"❌ You must be in a voice channel! "
"Or mention a voice channel like `!miku join #voice-chat`"
)
return
# Check permissions
if not voice_channel.permissions_for(message.guild.me).connect:
await message.channel.send(f"❌ I don't have permission to join {voice_channel.mention}!")
return
if not voice_channel.permissions_for(message.guild.me).speak:
await message.channel.send(f"❌ I don't have permission to speak in {voice_channel.mention}!")
return
# Start session
try:
await message.channel.send(f"🎤 Joining {voice_channel.mention}...")
await voice_manager.start_session(
message.guild.id,
voice_channel,
message.channel # Use current text channel for prompts
)
embed = discord.Embed(
title="🎤 Voice Chat Active",
description=f"I've joined {voice_channel.mention}!",
color=discord.Color.from_rgb(134, 206, 203) # Miku teal
)
embed.add_field(
name="How to use",
value=f"Send messages in {message.channel.mention} to make me speak!",
inline=False
)
embed.add_field(
name="⚠️ Resource Mode",
value=(
"• Text inference on AMD GPU only\n"
"• Vision model disabled\n"
"• Image generation disabled\n"
"• Other text channels paused"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)
logger.info(f"Voice session started by {message.author} in {voice_channel.name}")
except Exception as e:
await message.channel.send(f"❌ Failed to join voice: {str(e)}")
logger.error(f"Failed to start voice session: {e}", exc_info=True)
async def _handle_leave(message):
"""
Handle !miku leave command.
Leave voice channel and release all resources.
"""
if not voice_manager.active_session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Check if user is in the same guild as the active session
if voice_manager.active_session.guild_id != message.guild.id:
await message.channel.send("❌ I'm in a voice channel in a different server!")
return
try:
voice_channel_name = voice_manager.active_session.voice_channel.name
await message.channel.send("👋 Leaving voice channel...")
await voice_manager.end_session()
embed = discord.Embed(
title="👋 Voice Chat Ended",
description=f"Left {voice_channel_name}",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="✅ Resources Released",
value=(
"• Vision model available\n"
"• Image generation available\n"
"• Text channels resumed\n"
"• All features restored"
),
inline=False
)
await message.channel.send(embed=embed)
logger.info(f"Voice session ended by {message.author}")
except Exception as e:
await message.channel.send(f"⚠️ Error leaving voice: {str(e)}")
logger.error(f"Failed to end voice session: {e}", exc_info=True)
async def _handle_status(message):
"""
Handle !miku voice-status command.
Show current voice session status.
"""
if not voice_manager.active_session:
embed = discord.Embed(
title="🔇 No Active Voice Session",
description="I'm not currently in a voice channel.",
color=discord.Color.greyple()
)
embed.add_field(
name="To start",
value="Use `!miku join` while in a voice channel",
inline=False
)
await message.channel.send(embed=embed)
return
session = voice_manager.active_session
# Check if in same guild
if session.guild_id != message.guild.id:
await message.channel.send(" I'm in a voice channel in a different server.")
return
embed = discord.Embed(
title="🎤 Voice Session Active",
description=f"Currently in voice chat",
color=discord.Color.from_rgb(134, 206, 203)
)
embed.add_field(
name="Voice Channel",
value=session.voice_channel.mention,
inline=True
)
embed.add_field(
name="Prompt Channel",
value=session.text_channel.mention,
inline=True
)
embed.add_field(
name="📊 Resource Allocation",
value=(
"**GPU Usage:**\n"
"• AMD RX 6800: Text model + RVC\n"
"• GTX 1660: Soprano TTS only\n\n"
"**Blocked Features:**\n"
"• ❌ Vision model\n"
"• ❌ Image generation\n"
"• ❌ Bipolar mode\n"
"• ❌ Profile picture changes\n"
"• ⏸️ Autonomous engine\n"
"• ⏸️ Scheduled events\n"
"• 📦 Other text channels (queued)"
),
inline=False
)
embed.set_footer(text="Use !miku leave to end the session")
await message.channel.send(embed=embed)

View File

@@ -96,3 +96,12 @@ LAST_FULL_PROMPT = ""
# Persona Dialogue System (conversations between Miku and Evil Miku)
LAST_PERSONA_DIALOGUE_TIME = 0 # Timestamp of last dialogue for cooldown
# Voice Chat Session State
VOICE_SESSION_ACTIVE = False
TEXT_MESSAGE_QUEUE = [] # Queue for messages received during voice session
# Feature Blocking Flags (set during voice session)
VISION_MODEL_BLOCKED = False
IMAGE_GENERATION_BLOCKED = False
IMAGE_GENERATION_BLOCK_MESSAGE = None

View File

@@ -20,3 +20,5 @@ numpy
scikit-learn
transformers
torch
PyNaCl>=1.5.0
websockets>=12.0

View File

@@ -17,12 +17,34 @@ logger = get_logger('autonomous')
_last_action_execution = {} # guild_id -> timestamp
_MIN_ACTION_INTERVAL = 30 # Minimum 30 seconds between autonomous actions
# Pause state for voice sessions
_autonomous_paused = False
def pause_autonomous_system():
"""Pause autonomous message generation (called during voice sessions)"""
global _autonomous_paused
_autonomous_paused = True
logger.info("Autonomous system paused")
def resume_autonomous_system():
"""Resume autonomous message generation (called after voice sessions)"""
global _autonomous_paused
_autonomous_paused = False
logger.info("Autonomous system resumed")
async def autonomous_tick_v2(guild_id: int):
"""
New autonomous tick that uses context-aware decision making.
Replaces the random 10% chance with intelligent decision.
"""
# Check if autonomous is paused (voice session)
if _autonomous_paused:
logger.debug(f"[V2] Autonomous system paused (voice session active)")
return
# Rate limiting check
now = time.time()
if guild_id in _last_action_execution:

View File

@@ -28,6 +28,31 @@ MIN_EXCHANGES = 4 # Minimum number of back-and-forth exchanges before ending ca
ARGUMENT_TRIGGER_CHANCE = 0.15 # 15% chance for the other Miku to break through
DELAY_BETWEEN_MESSAGES = (2.0, 5.0) # Random delay between argument messages (seconds)
# Pause state for voice sessions
_bipolar_interactions_paused = False
# ============================================================================
# VOICE SESSION PAUSE/RESUME
# ============================================================================
def pause_bipolar_interactions():
"""Pause all bipolar interactions (called during voice sessions)"""
global _bipolar_interactions_paused
_bipolar_interactions_paused = True
logger.info("Bipolar interactions paused")
def resume_bipolar_interactions():
"""Resume bipolar interactions (called after voice sessions)"""
global _bipolar_interactions_paused
_bipolar_interactions_paused = False
logger.info("Bipolar interactions resumed")
def is_bipolar_paused():
"""Check if bipolar interactions are currently paused"""
return _bipolar_interactions_paused
# ============================================================================
# STATE PERSISTENCE
# ============================================================================
@@ -1039,6 +1064,11 @@ async def maybe_trigger_argument(channel: discord.TextChannel, client, context:
if not globals.BIPOLAR_MODE:
return False
# Check if bipolar interactions are paused (voice session)
if is_bipolar_paused():
logger.debug("Bipolar argument blocked (voice session active)")
return False
if is_argument_in_progress(channel.id):
return False

View File

@@ -47,6 +47,17 @@ class ProfilePictureManager:
def __init__(self):
self._ensure_directories()
self.switching_locked = False # Lock for voice session
def lock_switching(self):
"""Lock profile picture changes during voice session"""
self.switching_locked = True
logger.info("Profile picture switching locked")
def unlock_switching(self):
"""Unlock profile picture changes after voice session"""
self.switching_locked = False
logger.info("Profile picture switching unlocked")
def _ensure_directories(self):
"""Ensure profile picture directory exists"""
@@ -247,6 +258,16 @@ class ProfilePictureManager:
Returns:
Dict with status and metadata
"""
# Check if switching is locked (voice session active)
if self.switching_locked:
logger.info("Profile picture change blocked (voice session active)")
return {
"success": False,
"source": None,
"error": "Profile picture switching locked during voice session",
"metadata": {}
}
result = {
"success": False,
"source": None,

358
bot/utils/voice_manager.py Normal file
View File

@@ -0,0 +1,358 @@
# voice_manager.py
"""
Voice session manager for Miku Discord bot.
Handles Discord voice channel connections, resource locking, and feature blocking during voice sessions.
During a voice session:
- GPU switches to AMD for text inference only
- Vision model is blocked (keeps GTX 1660 for TTS)
- Image generation is blocked
- Bipolar mode interactions are disabled
- Profile picture switching is locked
- Autonomous engine is paused
- Scheduled events are paused
- Text channels are paused (messages queued)
"""
import asyncio
import json
import os
from typing import Optional
import discord
import globals
from utils.logger import get_logger
logger = get_logger('voice_manager')
class VoiceSessionManager:
"""
Singleton manager for voice chat sessions.
Ensures only one voice session active at a time and manages all resource locks.
"""
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if self._initialized:
return
self.active_session: Optional['VoiceSession'] = None
self.session_lock = asyncio.Lock()
self._initialized = True
logger.info("VoiceSessionManager initialized")
async def start_session(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
"""
Start a voice session with full resource locking.
Args:
guild_id: Discord guild ID
voice_channel: Voice channel to join
text_channel: Text channel for voice prompts
Raises:
Exception: If session already active or resources can't be locked
"""
async with self.session_lock:
if self.active_session:
raise Exception("Voice session already active")
logger.info(f"Starting voice session in {voice_channel.name} (guild {guild_id})")
try:
# 1. Switch to AMD GPU for text inference
await self._switch_to_amd_gpu()
# 2. Block vision model loading
await self._block_vision_model()
# 3. Disable image generation (ComfyUI)
await self._disable_image_generation()
# 4. Pause text channel inference (queue messages)
await self._pause_text_channels()
# 5. Disable bipolar mode interactions (Miku/Evil Miku arguments)
await self._disable_bipolar_mode()
# 6. Disable profile picture switching
await self._disable_profile_picture_switching()
# 7. Pause autonomous engine
await self._pause_autonomous_engine()
# 8. Pause scheduled events
await self._pause_scheduled_events()
# 9. Pause figurine notifier
await self._pause_figurine_notifier()
# 10. Create and connect voice session
self.active_session = VoiceSession(guild_id, voice_channel, text_channel)
# Note: Actual voice connection will be implemented in Phase 2
logger.info(f"✓ Voice session started successfully")
except Exception as e:
logger.error(f"Failed to start voice session: {e}", exc_info=True)
# Cleanup on failure
await self._cleanup_failed_start()
raise
async def end_session(self):
"""
End voice session and release all resources.
"""
async with self.session_lock:
if not self.active_session:
logger.warning("No active voice session to end")
return
logger.info("Ending voice session...")
try:
# 1. Disconnect from voice (Phase 2 implementation)
# await self.active_session.disconnect()
# 2. Resume text channel inference
await self._resume_text_channels()
# 3. Unblock vision model
await self._unblock_vision_model()
# 4. Re-enable image generation
await self._enable_image_generation()
# 5. Re-enable bipolar mode interactions
await self._enable_bipolar_mode()
# 6. Re-enable profile picture switching
await self._enable_profile_picture_switching()
# 7. Resume autonomous engine
await self._resume_autonomous_engine()
# 8. Resume scheduled events
await self._resume_scheduled_events()
# 9. Resume figurine notifier
await self._resume_figurine_notifier()
# 10. Clear active session
self.active_session = None
logger.info("✓ Voice session ended successfully, all resources released")
except Exception as e:
logger.error(f"Error during session cleanup: {e}", exc_info=True)
# Force clear session even on error
self.active_session = None
raise
# ==================== Resource Locking Methods ====================
async def _switch_to_amd_gpu(self):
"""Switch text inference to AMD GPU (RX 6800)"""
try:
gpu_state_file = os.path.join("memory", "gpu_state.json")
os.makedirs("memory", exist_ok=True)
with open(gpu_state_file, "w") as f:
json.dump({"current_gpu": "amd", "reason": "voice_session"}, f)
logger.info("✓ Switched to AMD GPU for text inference")
except Exception as e:
logger.error(f"Failed to switch GPU: {e}")
raise
async def _block_vision_model(self):
"""Prevent vision model from loading during voice session"""
globals.VISION_MODEL_BLOCKED = True
logger.info("✓ Vision model blocked")
async def _unblock_vision_model(self):
"""Allow vision model to load after voice session"""
globals.VISION_MODEL_BLOCKED = False
logger.info("✓ Vision model unblocked")
async def _disable_image_generation(self):
"""Block ComfyUI image generation during voice session"""
globals.IMAGE_GENERATION_BLOCKED = True
globals.IMAGE_GENERATION_BLOCK_MESSAGE = (
"🎤 I can't draw right now, I'm talking in voice chat! "
"Ask me again after I leave the voice channel."
)
logger.info("✓ Image generation disabled")
async def _enable_image_generation(self):
"""Re-enable image generation after voice session"""
globals.IMAGE_GENERATION_BLOCKED = False
globals.IMAGE_GENERATION_BLOCK_MESSAGE = None
logger.info("✓ Image generation re-enabled")
async def _pause_text_channels(self):
"""Queue text messages instead of processing during voice session"""
globals.VOICE_SESSION_ACTIVE = True
globals.TEXT_MESSAGE_QUEUE = []
logger.info("✓ Text channels paused (messages will be queued)")
async def _resume_text_channels(self):
"""Process queued messages after voice session"""
globals.VOICE_SESSION_ACTIVE = False
queued_count = len(globals.TEXT_MESSAGE_QUEUE)
if queued_count > 0:
logger.info(f"Resuming text channels, {queued_count} messages queued")
# TODO: Process queue in Phase 2 (need message handler integration)
# For now, just clear the queue
globals.TEXT_MESSAGE_QUEUE = []
logger.warning(f"Discarded {queued_count} queued messages (queue processing not yet implemented)")
else:
logger.info("✓ Text channels resumed (no queued messages)")
async def _disable_bipolar_mode(self):
"""Prevent Miku/Evil Miku arguments during voice session"""
try:
from utils.bipolar_mode import pause_bipolar_interactions
pause_bipolar_interactions()
logger.info("✓ Bipolar mode interactions disabled")
except ImportError:
logger.warning("bipolar_mode module not found, skipping")
except AttributeError:
logger.warning("pause_bipolar_interactions not implemented yet, skipping")
async def _enable_bipolar_mode(self):
"""Re-enable Miku/Evil Miku arguments after voice session"""
try:
from utils.bipolar_mode import resume_bipolar_interactions
resume_bipolar_interactions()
logger.info("✓ Bipolar mode interactions re-enabled")
except ImportError:
logger.warning("bipolar_mode module not found, skipping")
except AttributeError:
logger.warning("resume_bipolar_interactions not implemented yet, skipping")
async def _disable_profile_picture_switching(self):
"""Lock profile picture during voice session"""
try:
from utils.profile_picture_manager import profile_picture_manager
if hasattr(profile_picture_manager, 'lock_switching'):
profile_picture_manager.lock_switching()
logger.info("✓ Profile picture switching disabled")
else:
logger.warning("profile_picture_manager.lock_switching not implemented yet, skipping")
except ImportError:
logger.warning("profile_picture_manager module not found, skipping")
async def _enable_profile_picture_switching(self):
"""Unlock profile picture after voice session"""
try:
from utils.profile_picture_manager import profile_picture_manager
if hasattr(profile_picture_manager, 'unlock_switching'):
profile_picture_manager.unlock_switching()
logger.info("✓ Profile picture switching re-enabled")
else:
logger.warning("profile_picture_manager.unlock_switching not implemented yet, skipping")
except ImportError:
logger.warning("profile_picture_manager module not found, skipping")
async def _pause_autonomous_engine(self):
"""Pause autonomous message generation during voice session"""
try:
from utils.autonomous import pause_autonomous_system
pause_autonomous_system()
logger.info("✓ Autonomous engine paused")
except ImportError:
logger.warning("autonomous module not found, skipping")
except AttributeError:
logger.warning("pause_autonomous_system not implemented yet, skipping")
async def _resume_autonomous_engine(self):
"""Resume autonomous message generation after voice session"""
try:
from utils.autonomous import resume_autonomous_system
resume_autonomous_system()
logger.info("✓ Autonomous engine resumed")
except ImportError:
logger.warning("autonomous module not found, skipping")
except AttributeError:
logger.warning("resume_autonomous_system not implemented yet, skipping")
async def _pause_scheduled_events(self):
"""Pause all scheduled jobs during voice session"""
try:
globals.scheduler.pause()
logger.info("✓ Scheduled events paused")
except Exception as e:
logger.error(f"Failed to pause scheduler: {e}")
async def _resume_scheduled_events(self):
"""Resume scheduled jobs after voice session"""
try:
globals.scheduler.resume()
logger.info("✓ Scheduled events resumed")
except Exception as e:
logger.error(f"Failed to resume scheduler: {e}")
async def _pause_figurine_notifier(self):
"""Pause figurine notifications during voice session"""
try:
# Assuming figurine notifier is a scheduled job
globals.scheduler.pause_job('figurine_notifier')
logger.info("✓ Figurine notifier paused")
except Exception as e:
# Job might not exist, that's okay
logger.debug(f"Could not pause figurine notifier (may not exist): {e}")
async def _resume_figurine_notifier(self):
"""Resume figurine notifications after voice session"""
try:
globals.scheduler.resume_job('figurine_notifier')
logger.info("✓ Figurine notifier resumed")
except Exception as e:
# Job might not exist, that's okay
logger.debug(f"Could not resume figurine notifier (may not exist): {e}")
async def _cleanup_failed_start(self):
"""Cleanup resources if session start fails"""
logger.warning("Cleaning up after failed session start...")
try:
await self._unblock_vision_model()
await self._enable_image_generation()
await self._resume_text_channels()
await self._enable_bipolar_mode()
await self._enable_profile_picture_switching()
await self._resume_autonomous_engine()
await self._resume_scheduled_events()
await self._resume_figurine_notifier()
except Exception as e:
logger.error(f"Error during cleanup: {e}")
class VoiceSession:
"""
Represents an active voice chat session.
Phase 1: Basic structure only, voice connection in Phase 2.
"""
def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
self.guild_id = guild_id
self.voice_channel = voice_channel
self.text_channel = text_channel
self.voice_client: Optional[discord.VoiceClient] = None
self.active = False
logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
# Phase 2: Implement voice connection, audio streaming, TTS integration
# Global singleton instance
voice_manager = VoiceSessionManager()