Phase 2 implemented and tested. Added warmup to pipeline and Miku queues tokens while the pipeline is warming up

2026-01-16 23:37:34 +02:00
parent b0066f3525
commit 9943cecdec
9 changed files with 631 additions and 15 deletions
--- a/bot/utils/voice_manager.py
+++ b/bot/utils/voice_manager.py
@@ -107,6 +107,14 @@ class VoiceSessionManager:
                    logger.error(f"Failed to connect to voice channel: {e}", exc_info=True)
                    raise
                
+                # 12. Start audio streaming (Phase 2)
+                try:
+                    await self.active_session.start_audio_streaming()
+                    logger.info(f"✓ Audio streaming started")
+                except Exception as e:
+                    logger.error(f"Failed to start audio streaming: {e}", exc_info=True)
+                    # Continue anyway - audio streaming is optional for Phase 2 testing
+                
                logger.info(f"✓ Voice session started successfully")
                
            except Exception as e:
@@ -127,7 +135,14 @@ class VoiceSessionManager:
            logger.info("Ending voice session...")
            
            try:
-                # 1. Disconnect from voice channel
+                # 1. Stop audio streaming
+                if self.active_session:
+                    try:
+                        await self.active_session.stop_audio_streaming()
+                    except Exception as e:
+                        logger.error(f"Error stopping audio streaming: {e}")
+                
+                # 2. Disconnect from voice channel
                if self.active_session.voice_client:
                    try:
                        await self.active_session.voice_client.disconnect()
@@ -135,28 +150,28 @@ class VoiceSessionManager:
                    except Exception as e:
                        logger.error(f"Error disconnecting from voice: {e}")
                
-                # 2. Resume text channel inference
+                # 3. Resume text channel inference
                await self._resume_text_channels()
                
-                # 3. Unblock vision model
+                # 4. Unblock vision model
                await self._unblock_vision_model()
                
-                # 4. Re-enable image generation
+                # 5. Re-enable image generation
                await self._enable_image_generation()
                
-                # 5. Re-enable bipolar mode interactions
+                # 6. Re-enable bipolar mode interactions
                await self._enable_bipolar_mode()
                
-                # 6. Re-enable profile picture switching
+                # 7. Re-enable profile picture switching
                await self._enable_profile_picture_switching()
                
-                # 7. Resume autonomous engine
+                # 8. Resume autonomous engine
                await self._resume_autonomous_engine()
                
-                # 8. Resume scheduled events
+                # 9. Resume scheduled events
                await self._resume_scheduled_events()
                
-                # 9. Resume figurine notifier
+                # 10. Resume figurine notifier
                await self._resume_figurine_notifier()
                
                # 10. Clear active session
@@ -362,8 +377,7 @@ class VoiceSessionManager:

 class VoiceSession:
    """
-    Represents an active voice chat session.
-    Phase 1: Basic structure only, voice connection in Phase 2.
+    Represents an active voice chat session with audio streaming.
    """
    
    def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
@@ -371,11 +385,54 @@ class VoiceSession:
        self.voice_channel = voice_channel
        self.text_channel = text_channel
        self.voice_client: Optional[discord.VoiceClient] = None
+        self.audio_source: Optional['MikuVoiceSource'] = None  # Forward reference
+        self.tts_streamer: Optional['TTSTokenStreamer'] = None  # Forward reference
        self.active = False
        
        logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
    
-    # Phase 2: Implement voice connection, audio streaming, TTS integration
+    async def start_audio_streaming(self):
+        """
+        Start audio streaming from TTS WebSocket to Discord voice.
+        This should be called after voice_client is connected.
+        """
+        from utils.voice_audio import MikuVoiceSource
+        
+        try:
+            # Create and connect audio source (handles both sending tokens and receiving audio)
+            self.audio_source = MikuVoiceSource()
+            await self.audio_source.connect()
+            
+            # The audio_source now serves as both the audio source AND the token sender
+            # Set tts_streamer to point to audio_source for backwards compatibility
+            self.tts_streamer = self.audio_source
+            
+            # Start playing audio to Discord
+            if self.voice_client and not self.voice_client.is_playing():
+                self.voice_client.play(self.audio_source)
+                logger.info("✓ Started audio streaming to Discord")
+            
+        except Exception as e:
+            logger.error(f"Failed to start audio streaming: {e}", exc_info=True)
+            raise
+    
+    async def stop_audio_streaming(self):
+        """Stop audio streaming and cleanup resources."""
+        try:
+            # Stop Discord audio playback
+            if self.voice_client and self.voice_client.is_playing():
+                self.voice_client.stop()
+            
+            # Disconnect audio source (which also handles token streaming)
+            if self.audio_source:
+                await self.audio_source.disconnect()
+                self.audio_source = None
+                self.tts_streamer = None  # Clear reference since it pointed to audio_source
+            
+            logger.info("✓ Stopped audio streaming")
+            
+        except Exception as e:
+            logger.error(f"Error stopping audio streaming: {e}", exc_info=True)


 # Global singleton instance