Untested Phase 1 (Foundation & Resource management) of voice chat integration

2026-01-16 13:01:08 +02:00
parent 353c9c9583
commit 911f11ee9f
9 changed files with 2288 additions and 0 deletions
--- a/bot/utils/autonomous.py
+++ b/bot/utils/autonomous.py
@@ -17,12 +17,34 @@ logger = get_logger('autonomous')
 _last_action_execution = {}  # guild_id -> timestamp
 _MIN_ACTION_INTERVAL = 30  # Minimum 30 seconds between autonomous actions

+# Pause state for voice sessions
+_autonomous_paused = False
+
+
+def pause_autonomous_system():
+    """Pause autonomous message generation (called during voice sessions)"""
+    global _autonomous_paused
+    _autonomous_paused = True
+    logger.info("Autonomous system paused")
+
+
+def resume_autonomous_system():
+    """Resume autonomous message generation (called after voice sessions)"""
+    global _autonomous_paused
+    _autonomous_paused = False
+    logger.info("Autonomous system resumed")
+

 async def autonomous_tick_v2(guild_id: int):
    """
    New autonomous tick that uses context-aware decision making.
    Replaces the random 10% chance with intelligent decision.
    """
+    # Check if autonomous is paused (voice session)
+    if _autonomous_paused:
+        logger.debug(f"[V2] Autonomous system paused (voice session active)")
+        return
+    
    # Rate limiting check
    now = time.time()
    if guild_id in _last_action_execution:
--- a/bot/utils/bipolar_mode.py
+++ b/bot/utils/bipolar_mode.py
@@ -28,6 +28,31 @@ MIN_EXCHANGES = 4  # Minimum number of back-and-forth exchanges before ending ca
 ARGUMENT_TRIGGER_CHANCE = 0.15  # 15% chance for the other Miku to break through
 DELAY_BETWEEN_MESSAGES = (2.0, 5.0)  # Random delay between argument messages (seconds)

+# Pause state for voice sessions
+_bipolar_interactions_paused = False
+
+# ============================================================================
+# VOICE SESSION PAUSE/RESUME
+# ============================================================================
+
+def pause_bipolar_interactions():
+    """Pause all bipolar interactions (called during voice sessions)"""
+    global _bipolar_interactions_paused
+    _bipolar_interactions_paused = True
+    logger.info("Bipolar interactions paused")
+
+
+def resume_bipolar_interactions():
+    """Resume bipolar interactions (called after voice sessions)"""
+    global _bipolar_interactions_paused
+    _bipolar_interactions_paused = False
+    logger.info("Bipolar interactions resumed")
+
+
+def is_bipolar_paused():
+    """Check if bipolar interactions are currently paused"""
+    return _bipolar_interactions_paused
+
 # ============================================================================
 # STATE PERSISTENCE
 # ============================================================================
@@ -1039,6 +1064,11 @@ async def maybe_trigger_argument(channel: discord.TextChannel, client, context:
    if not globals.BIPOLAR_MODE:
        return False
    
+    # Check if bipolar interactions are paused (voice session)
+    if is_bipolar_paused():
+        logger.debug("Bipolar argument blocked (voice session active)")
+        return False
+    
    if is_argument_in_progress(channel.id):
        return False
    
--- a/bot/utils/profile_picture_manager.py
+++ b/bot/utils/profile_picture_manager.py
@@ -47,6 +47,17 @@ class ProfilePictureManager:
    
    def __init__(self):
        self._ensure_directories()
+        self.switching_locked = False  # Lock for voice session
+    
+    def lock_switching(self):
+        """Lock profile picture changes during voice session"""
+        self.switching_locked = True
+        logger.info("Profile picture switching locked")
+        
+    def unlock_switching(self):
+        """Unlock profile picture changes after voice session"""
+        self.switching_locked = False
+        logger.info("Profile picture switching unlocked")
    
    def _ensure_directories(self):
        """Ensure profile picture directory exists"""
@@ -247,6 +258,16 @@ class ProfilePictureManager:
        Returns:
            Dict with status and metadata
        """
+        # Check if switching is locked (voice session active)
+        if self.switching_locked:
+            logger.info("Profile picture change blocked (voice session active)")
+            return {
+                "success": False,
+                "source": None,
+                "error": "Profile picture switching locked during voice session",
+                "metadata": {}
+            }
+        
        result = {
            "success": False,
            "source": None,
--- a/bot/utils/voice_manager.py
+++ b/bot/utils/voice_manager.py
@@ -0,0 +1,358 @@
+# voice_manager.py
+"""
+Voice session manager for Miku Discord bot.
+Handles Discord voice channel connections, resource locking, and feature blocking during voice sessions.
+
+During a voice session:
+- GPU switches to AMD for text inference only
+- Vision model is blocked (keeps GTX 1660 for TTS)
+- Image generation is blocked
+- Bipolar mode interactions are disabled
+- Profile picture switching is locked
+- Autonomous engine is paused
+- Scheduled events are paused
+- Text channels are paused (messages queued)
+"""
+
+import asyncio
+import json
+import os
+from typing import Optional
+import discord
+import globals
+from utils.logger import get_logger
+
+logger = get_logger('voice_manager')
+
+
+class VoiceSessionManager:
+    """
+    Singleton manager for voice chat sessions.
+    Ensures only one voice session active at a time and manages all resource locks.
+    """
+    
+    _instance = None
+    
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if self._initialized:
+            return
+        
+        self.active_session: Optional['VoiceSession'] = None
+        self.session_lock = asyncio.Lock()
+        self._initialized = True
+        logger.info("VoiceSessionManager initialized")
+    
+    async def start_session(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
+        """
+        Start a voice session with full resource locking.
+        
+        Args:
+            guild_id: Discord guild ID
+            voice_channel: Voice channel to join
+            text_channel: Text channel for voice prompts
+            
+        Raises:
+            Exception: If session already active or resources can't be locked
+        """
+        async with self.session_lock:
+            if self.active_session:
+                raise Exception("Voice session already active")
+            
+            logger.info(f"Starting voice session in {voice_channel.name} (guild {guild_id})")
+            
+            try:
+                # 1. Switch to AMD GPU for text inference
+                await self._switch_to_amd_gpu()
+                
+                # 2. Block vision model loading
+                await self._block_vision_model()
+                
+                # 3. Disable image generation (ComfyUI)
+                await self._disable_image_generation()
+                
+                # 4. Pause text channel inference (queue messages)
+                await self._pause_text_channels()
+                
+                # 5. Disable bipolar mode interactions (Miku/Evil Miku arguments)
+                await self._disable_bipolar_mode()
+                
+                # 6. Disable profile picture switching
+                await self._disable_profile_picture_switching()
+                
+                # 7. Pause autonomous engine
+                await self._pause_autonomous_engine()
+                
+                # 8. Pause scheduled events
+                await self._pause_scheduled_events()
+                
+                # 9. Pause figurine notifier
+                await self._pause_figurine_notifier()
+                
+                # 10. Create and connect voice session
+                self.active_session = VoiceSession(guild_id, voice_channel, text_channel)
+                # Note: Actual voice connection will be implemented in Phase 2
+                
+                logger.info(f"✓ Voice session started successfully")
+                
+            except Exception as e:
+                logger.error(f"Failed to start voice session: {e}", exc_info=True)
+                # Cleanup on failure
+                await self._cleanup_failed_start()
+                raise
+    
+    async def end_session(self):
+        """
+        End voice session and release all resources.
+        """
+        async with self.session_lock:
+            if not self.active_session:
+                logger.warning("No active voice session to end")
+                return
+            
+            logger.info("Ending voice session...")
+            
+            try:
+                # 1. Disconnect from voice (Phase 2 implementation)
+                # await self.active_session.disconnect()
+                
+                # 2. Resume text channel inference
+                await self._resume_text_channels()
+                
+                # 3. Unblock vision model
+                await self._unblock_vision_model()
+                
+                # 4. Re-enable image generation
+                await self._enable_image_generation()
+                
+                # 5. Re-enable bipolar mode interactions
+                await self._enable_bipolar_mode()
+                
+                # 6. Re-enable profile picture switching
+                await self._enable_profile_picture_switching()
+                
+                # 7. Resume autonomous engine
+                await self._resume_autonomous_engine()
+                
+                # 8. Resume scheduled events
+                await self._resume_scheduled_events()
+                
+                # 9. Resume figurine notifier
+                await self._resume_figurine_notifier()
+                
+                # 10. Clear active session
+                self.active_session = None
+                
+                logger.info("✓ Voice session ended successfully, all resources released")
+                
+            except Exception as e:
+                logger.error(f"Error during session cleanup: {e}", exc_info=True)
+                # Force clear session even on error
+                self.active_session = None
+                raise
+    
+    # ==================== Resource Locking Methods ====================
+    
+    async def _switch_to_amd_gpu(self):
+        """Switch text inference to AMD GPU (RX 6800)"""
+        try:
+            gpu_state_file = os.path.join("memory", "gpu_state.json")
+            os.makedirs("memory", exist_ok=True)
+            
+            with open(gpu_state_file, "w") as f:
+                json.dump({"current_gpu": "amd", "reason": "voice_session"}, f)
+            
+            logger.info("✓ Switched to AMD GPU for text inference")
+        except Exception as e:
+            logger.error(f"Failed to switch GPU: {e}")
+            raise
+    
+    async def _block_vision_model(self):
+        """Prevent vision model from loading during voice session"""
+        globals.VISION_MODEL_BLOCKED = True
+        logger.info("✓ Vision model blocked")
+    
+    async def _unblock_vision_model(self):
+        """Allow vision model to load after voice session"""
+        globals.VISION_MODEL_BLOCKED = False
+        logger.info("✓ Vision model unblocked")
+    
+    async def _disable_image_generation(self):
+        """Block ComfyUI image generation during voice session"""
+        globals.IMAGE_GENERATION_BLOCKED = True
+        globals.IMAGE_GENERATION_BLOCK_MESSAGE = (
+            "🎤 I can't draw right now, I'm talking in voice chat! "
+            "Ask me again after I leave the voice channel."
+        )
+        logger.info("✓ Image generation disabled")
+    
+    async def _enable_image_generation(self):
+        """Re-enable image generation after voice session"""
+        globals.IMAGE_GENERATION_BLOCKED = False
+        globals.IMAGE_GENERATION_BLOCK_MESSAGE = None
+        logger.info("✓ Image generation re-enabled")
+    
+    async def _pause_text_channels(self):
+        """Queue text messages instead of processing during voice session"""
+        globals.VOICE_SESSION_ACTIVE = True
+        globals.TEXT_MESSAGE_QUEUE = []
+        logger.info("✓ Text channels paused (messages will be queued)")
+    
+    async def _resume_text_channels(self):
+        """Process queued messages after voice session"""
+        globals.VOICE_SESSION_ACTIVE = False
+        queued_count = len(globals.TEXT_MESSAGE_QUEUE)
+        
+        if queued_count > 0:
+            logger.info(f"Resuming text channels, {queued_count} messages queued")
+            # TODO: Process queue in Phase 2 (need message handler integration)
+            # For now, just clear the queue
+            globals.TEXT_MESSAGE_QUEUE = []
+            logger.warning(f"Discarded {queued_count} queued messages (queue processing not yet implemented)")
+        else:
+            logger.info("✓ Text channels resumed (no queued messages)")
+    
+    async def _disable_bipolar_mode(self):
+        """Prevent Miku/Evil Miku arguments during voice session"""
+        try:
+            from utils.bipolar_mode import pause_bipolar_interactions
+            pause_bipolar_interactions()
+            logger.info("✓ Bipolar mode interactions disabled")
+        except ImportError:
+            logger.warning("bipolar_mode module not found, skipping")
+        except AttributeError:
+            logger.warning("pause_bipolar_interactions not implemented yet, skipping")
+    
+    async def _enable_bipolar_mode(self):
+        """Re-enable Miku/Evil Miku arguments after voice session"""
+        try:
+            from utils.bipolar_mode import resume_bipolar_interactions
+            resume_bipolar_interactions()
+            logger.info("✓ Bipolar mode interactions re-enabled")
+        except ImportError:
+            logger.warning("bipolar_mode module not found, skipping")
+        except AttributeError:
+            logger.warning("resume_bipolar_interactions not implemented yet, skipping")
+    
+    async def _disable_profile_picture_switching(self):
+        """Lock profile picture during voice session"""
+        try:
+            from utils.profile_picture_manager import profile_picture_manager
+            if hasattr(profile_picture_manager, 'lock_switching'):
+                profile_picture_manager.lock_switching()
+                logger.info("✓ Profile picture switching disabled")
+            else:
+                logger.warning("profile_picture_manager.lock_switching not implemented yet, skipping")
+        except ImportError:
+            logger.warning("profile_picture_manager module not found, skipping")
+    
+    async def _enable_profile_picture_switching(self):
+        """Unlock profile picture after voice session"""
+        try:
+            from utils.profile_picture_manager import profile_picture_manager
+            if hasattr(profile_picture_manager, 'unlock_switching'):
+                profile_picture_manager.unlock_switching()
+                logger.info("✓ Profile picture switching re-enabled")
+            else:
+                logger.warning("profile_picture_manager.unlock_switching not implemented yet, skipping")
+        except ImportError:
+            logger.warning("profile_picture_manager module not found, skipping")
+    
+    async def _pause_autonomous_engine(self):
+        """Pause autonomous message generation during voice session"""
+        try:
+            from utils.autonomous import pause_autonomous_system
+            pause_autonomous_system()
+            logger.info("✓ Autonomous engine paused")
+        except ImportError:
+            logger.warning("autonomous module not found, skipping")
+        except AttributeError:
+            logger.warning("pause_autonomous_system not implemented yet, skipping")
+    
+    async def _resume_autonomous_engine(self):
+        """Resume autonomous message generation after voice session"""
+        try:
+            from utils.autonomous import resume_autonomous_system
+            resume_autonomous_system()
+            logger.info("✓ Autonomous engine resumed")
+        except ImportError:
+            logger.warning("autonomous module not found, skipping")
+        except AttributeError:
+            logger.warning("resume_autonomous_system not implemented yet, skipping")
+    
+    async def _pause_scheduled_events(self):
+        """Pause all scheduled jobs during voice session"""
+        try:
+            globals.scheduler.pause()
+            logger.info("✓ Scheduled events paused")
+        except Exception as e:
+            logger.error(f"Failed to pause scheduler: {e}")
+    
+    async def _resume_scheduled_events(self):
+        """Resume scheduled jobs after voice session"""
+        try:
+            globals.scheduler.resume()
+            logger.info("✓ Scheduled events resumed")
+        except Exception as e:
+            logger.error(f"Failed to resume scheduler: {e}")
+    
+    async def _pause_figurine_notifier(self):
+        """Pause figurine notifications during voice session"""
+        try:
+            # Assuming figurine notifier is a scheduled job
+            globals.scheduler.pause_job('figurine_notifier')
+            logger.info("✓ Figurine notifier paused")
+        except Exception as e:
+            # Job might not exist, that's okay
+            logger.debug(f"Could not pause figurine notifier (may not exist): {e}")
+    
+    async def _resume_figurine_notifier(self):
+        """Resume figurine notifications after voice session"""
+        try:
+            globals.scheduler.resume_job('figurine_notifier')
+            logger.info("✓ Figurine notifier resumed")
+        except Exception as e:
+            # Job might not exist, that's okay
+            logger.debug(f"Could not resume figurine notifier (may not exist): {e}")
+    
+    async def _cleanup_failed_start(self):
+        """Cleanup resources if session start fails"""
+        logger.warning("Cleaning up after failed session start...")
+        try:
+            await self._unblock_vision_model()
+            await self._enable_image_generation()
+            await self._resume_text_channels()
+            await self._enable_bipolar_mode()
+            await self._enable_profile_picture_switching()
+            await self._resume_autonomous_engine()
+            await self._resume_scheduled_events()
+            await self._resume_figurine_notifier()
+        except Exception as e:
+            logger.error(f"Error during cleanup: {e}")
+
+
+class VoiceSession:
+    """
+    Represents an active voice chat session.
+    Phase 1: Basic structure only, voice connection in Phase 2.
+    """
+    
+    def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
+        self.guild_id = guild_id
+        self.voice_channel = voice_channel
+        self.text_channel = text_channel
+        self.voice_client: Optional[discord.VoiceClient] = None
+        self.active = False
+        
+        logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
+    
+    # Phase 2: Implement voice connection, audio streaming, TTS integration
+
+
+# Global singleton instance
+voice_manager = VoiceSessionManager()