Phase 2 implemented and tested. Added warmup to pipeline and Miku queues tokens while the pipeline is warming up
This commit is contained in:
@@ -107,6 +107,14 @@ class VoiceSessionManager:
|
||||
logger.error(f"Failed to connect to voice channel: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
# 12. Start audio streaming (Phase 2)
|
||||
try:
|
||||
await self.active_session.start_audio_streaming()
|
||||
logger.info(f"✓ Audio streaming started")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start audio streaming: {e}", exc_info=True)
|
||||
# Continue anyway - audio streaming is optional for Phase 2 testing
|
||||
|
||||
logger.info(f"✓ Voice session started successfully")
|
||||
|
||||
except Exception as e:
|
||||
@@ -127,7 +135,14 @@ class VoiceSessionManager:
|
||||
logger.info("Ending voice session...")
|
||||
|
||||
try:
|
||||
# 1. Disconnect from voice channel
|
||||
# 1. Stop audio streaming
|
||||
if self.active_session:
|
||||
try:
|
||||
await self.active_session.stop_audio_streaming()
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping audio streaming: {e}")
|
||||
|
||||
# 2. Disconnect from voice channel
|
||||
if self.active_session.voice_client:
|
||||
try:
|
||||
await self.active_session.voice_client.disconnect()
|
||||
@@ -135,28 +150,28 @@ class VoiceSessionManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Error disconnecting from voice: {e}")
|
||||
|
||||
# 2. Resume text channel inference
|
||||
# 3. Resume text channel inference
|
||||
await self._resume_text_channels()
|
||||
|
||||
# 3. Unblock vision model
|
||||
# 4. Unblock vision model
|
||||
await self._unblock_vision_model()
|
||||
|
||||
# 4. Re-enable image generation
|
||||
# 5. Re-enable image generation
|
||||
await self._enable_image_generation()
|
||||
|
||||
# 5. Re-enable bipolar mode interactions
|
||||
# 6. Re-enable bipolar mode interactions
|
||||
await self._enable_bipolar_mode()
|
||||
|
||||
# 6. Re-enable profile picture switching
|
||||
# 7. Re-enable profile picture switching
|
||||
await self._enable_profile_picture_switching()
|
||||
|
||||
# 7. Resume autonomous engine
|
||||
# 8. Resume autonomous engine
|
||||
await self._resume_autonomous_engine()
|
||||
|
||||
# 8. Resume scheduled events
|
||||
# 9. Resume scheduled events
|
||||
await self._resume_scheduled_events()
|
||||
|
||||
# 9. Resume figurine notifier
|
||||
# 10. Resume figurine notifier
|
||||
await self._resume_figurine_notifier()
|
||||
|
||||
# 10. Clear active session
|
||||
@@ -362,8 +377,7 @@ class VoiceSessionManager:
|
||||
|
||||
class VoiceSession:
|
||||
"""
|
||||
Represents an active voice chat session.
|
||||
Phase 1: Basic structure only, voice connection in Phase 2.
|
||||
Represents an active voice chat session with audio streaming.
|
||||
"""
|
||||
|
||||
def __init__(self, guild_id: int, voice_channel: discord.VoiceChannel, text_channel: discord.TextChannel):
|
||||
@@ -371,11 +385,54 @@ class VoiceSession:
|
||||
self.voice_channel = voice_channel
|
||||
self.text_channel = text_channel
|
||||
self.voice_client: Optional[discord.VoiceClient] = None
|
||||
self.audio_source: Optional['MikuVoiceSource'] = None # Forward reference
|
||||
self.tts_streamer: Optional['TTSTokenStreamer'] = None # Forward reference
|
||||
self.active = False
|
||||
|
||||
logger.info(f"VoiceSession created for {voice_channel.name} in guild {guild_id}")
|
||||
|
||||
# Phase 2: Implement voice connection, audio streaming, TTS integration
|
||||
async def start_audio_streaming(self):
|
||||
"""
|
||||
Start audio streaming from TTS WebSocket to Discord voice.
|
||||
This should be called after voice_client is connected.
|
||||
"""
|
||||
from utils.voice_audio import MikuVoiceSource
|
||||
|
||||
try:
|
||||
# Create and connect audio source (handles both sending tokens and receiving audio)
|
||||
self.audio_source = MikuVoiceSource()
|
||||
await self.audio_source.connect()
|
||||
|
||||
# The audio_source now serves as both the audio source AND the token sender
|
||||
# Set tts_streamer to point to audio_source for backwards compatibility
|
||||
self.tts_streamer = self.audio_source
|
||||
|
||||
# Start playing audio to Discord
|
||||
if self.voice_client and not self.voice_client.is_playing():
|
||||
self.voice_client.play(self.audio_source)
|
||||
logger.info("✓ Started audio streaming to Discord")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start audio streaming: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def stop_audio_streaming(self):
|
||||
"""Stop audio streaming and cleanup resources."""
|
||||
try:
|
||||
# Stop Discord audio playback
|
||||
if self.voice_client and self.voice_client.is_playing():
|
||||
self.voice_client.stop()
|
||||
|
||||
# Disconnect audio source (which also handles token streaming)
|
||||
if self.audio_source:
|
||||
await self.audio_source.disconnect()
|
||||
self.audio_source = None
|
||||
self.tts_streamer = None # Clear reference since it pointed to audio_source
|
||||
|
||||
logger.info("✓ Stopped audio streaming")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping audio streaming: {e}", exc_info=True)
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
|
||||
Reference in New Issue
Block a user