Phase 2 implemented and tested. Added warmup to pipeline and Miku queues tokens while the pipeline is warming up

This commit is contained in:
2026-01-16 23:37:34 +02:00
parent b0066f3525
commit 9943cecdec
9 changed files with 631 additions and 15 deletions

View File

@@ -107,6 +107,14 @@ class VoiceSessionManager:
logger.error(f"Failed to connect to voice channel: {e}", exc_info=True)
raise
# 12. Start audio streaming (Phase 2)
try:
await self.active_session.start_audio_streaming()
logger.info(f"✓ Audio streaming started")
except Exception as e:
logger.error(f"Failed to start audio streaming: {e}", exc_info=True)
# Continue anyway - audio streaming is optional for Phase 2 testing
logger.info(f"✓ Voice session started successfully")
except Exception as e:
@@ -127,7 +135,14 @@ class VoiceSessionManager:
logger.info("Ending voice session...")
try:
# 1. Disconnect from voice channel
# 1. Stop audio streaming
if self.active_session:
try:
await self.active_session.stop_audio_streaming()
except Exception as e:
logger.error(f"Error stopping audio streaming: {e}")
# 2. Disconnect from voice channel
if self.active_session.voice_client:
try:
await self.active_session.voice_client.disconnect()
@@ -135,28 +150,28 @@ class VoiceSessionManager:
except Exception as e:
logger.error(f"Error disconnecting from voice: {e}")
# 2. Resume text channel inference
# 3. Resume text channel inference
await self._resume_text_channels()
# 3. Unblock vision model
# 4. Unblock vision model
await self._unblock_vision_model()
# 4. Re-enable image generation
# 5. Re-enable image generation
await self._enable_image_generation()
# 5. Re-enable bipolar mode interactions
# 6. Re-enable bipolar mode interactions
await self._enable_bipolar_mode()
# 6. Re-enable profile picture switching
# 7. Re-enable profile picture switching
await self._enable_profile_picture_switching()
# 7. Resume autonomous engine
# 8. Resume autonomous engine
await self._resume_autonomous_engine()
# 8. Resume scheduled events
# 9. Resume scheduled events
await self._resume_scheduled_events()
# 9. Resume figurine notifier
# 10. Resume figurine notifier
await self._resume_figurine_notifier()
# 10. Clear active session
@@ -362,8 +377,7 @@ class VoiceSessionManager:
class VoiceSession:
"""
Represents an active voice chat session.
Phase 1: Basic structure only, voice connection in Phase 2.
Represents an active voice chat session with audio streaming.
"""
def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
@@ -371,11 +385,54 @@ class VoiceSession:
self.voice_channel = voice_channel
self.text_channel = text_channel
self.voice_client: Optional[discord.VoiceClient] = None
self.audio_source: Optional['MikuVoiceSource'] = None # Forward reference
self.tts_streamer: Optional['TTSTokenStreamer'] = None # Forward reference
self.active = False
logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
# Phase 2: Implement voice connection, audio streaming, TTS integration
async def start_audio_streaming(self):
"""
Start audio streaming from TTS WebSocket to Discord voice.
This should be called after voice_client is connected.
"""
from utils.voice_audio import MikuVoiceSource
try:
# Create and connect audio source (handles both sending tokens and receiving audio)
self.audio_source = MikuVoiceSource()
await self.audio_source.connect()
# The audio_source now serves as both the audio source AND the token sender
# Set tts_streamer to point to audio_source for backwards compatibility
self.tts_streamer = self.audio_source
# Start playing audio to Discord
if self.voice_client and not self.voice_client.is_playing():
self.voice_client.play(self.audio_source)
logger.info("✓ Started audio streaming to Discord")
except Exception as e:
logger.error(f"Failed to start audio streaming: {e}", exc_info=True)
raise
async def stop_audio_streaming(self):
"""Stop audio streaming and cleanup resources."""
try:
# Stop Discord audio playback
if self.voice_client and self.voice_client.is_playing():
self.voice_client.stop()
# Disconnect audio source (which also handles token streaming)
if self.audio_source:
await self.audio_source.disconnect()
self.audio_source = None
self.tts_streamer = None # Clear reference since it pointed to audio_source
logger.info("✓ Stopped audio streaming")
except Exception as e:
logger.error(f"Error stopping audio streaming: {e}", exc_info=True)
# Global singleton instance