Phase 4 STT pipeline implemented — Silero VAD + faster-whisper — still not working well at all

2026-01-17 03:14:40 +02:00
parent 3e59e5d2f6
commit d1e6b21508
30 changed files with 156595 additions and 8 deletions
--- a/bot/commands/voice.py
+++ b/bot/commands/voice.py
@@ -39,6 +39,12 @@ async def handle_voice_command(message, cmd, args):
    elif cmd == 'say':
        await _handle_say(message, args)
    
+    elif cmd == 'listen':
+        await _handle_listen(message, args)
+    
+    elif cmd == 'stop-listening':
+        await _handle_stop_listening(message, args)
+    
    else:
        await message.channel.send(f"❌ Unknown voice command: `{cmd}`")

@@ -366,8 +372,97 @@ Keep responses short (1-3 sentences) since they will be spoken aloud."""
                await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
                logger.info(f"✓ Voice say complete: {full_response.strip()}")
                await message.add_reaction("✅")
-                
+    
    except Exception as e:
-        logger.error(f"Voice say failed: {e}", exc_info=True)
-        await message.channel.send(f"❌ Voice say failed: {str(e)}")
+        logger.error(f"Failed to generate voice response: {e}", exc_info=True)
+        await message.channel.send(f"❌ Error generating voice response: {e}")
+
+
+async def _handle_listen(message, args):
+    """
+    Handle !miku listen command.
+    Start listening to a user's voice for STT.
+    
+    Usage:
+        !miku listen - Start listening to command author
+        !miku listen @user - Start listening to mentioned user
+    """
+    # Check if Miku is in voice channel
+    session = voice_manager.active_session
+    
+    if not session or not session.voice_client or not session.voice_client.is_connected():
+        await message.channel.send("❌ I'm not in a voice channel! Use `!miku join` first.")
+        return
+    
+    # Determine target user
+    target_user = None
+    if args and len(message.mentions) > 0:
+        # Listen to mentioned user
+        target_user = message.mentions[0]
+    else:
+        # Listen to command author
+        target_user = message.author
+    
+    # Check if user is in voice channel
+    if not target_user.voice or not target_user.voice.channel:
+        await message.channel.send(f"❌ {target_user.mention} is not in a voice channel!")
+        return
+    
+    # Check if user is in same channel as Miku
+    if target_user.voice.channel.id != session.voice_client.channel.id:
+        await message.channel.send(
+            f"❌ {target_user.mention} must be in the same voice channel as me!"
+        )
+        return
+    
+    try:
+        # Start listening to user
+        await session.start_listening(target_user)
+        await message.channel.send(
+            f"👂 Now listening to {target_user.mention}'s voice! "
+            f"Speak to me and I'll respond. Use `!miku stop-listening` to stop."
+        )
+        await message.add_reaction("👂")
+        logger.info(f"Started listening to user {target_user.id} ({target_user.name})")
+    
+    except Exception as e:
+        logger.error(f"Failed to start listening: {e}", exc_info=True)
+        await message.channel.send(f"❌ Failed to start listening: {str(e)}")
+
+
+async def _handle_stop_listening(message, args):
+    """
+    Handle !miku stop-listening command.
+    Stop listening to a user's voice.
+    
+    Usage:
+        !miku stop-listening - Stop listening to command author
+        !miku stop-listening @user - Stop listening to mentioned user
+    """
+    # Check if Miku is in voice channel
+    session = voice_manager.active_session
+    
+    if not session:
+        await message.channel.send("❌ I'm not in a voice channel!")
+        return
+    
+    # Determine target user
+    target_user = None
+    if args and len(message.mentions) > 0:
+        # Stop listening to mentioned user
+        target_user = message.mentions[0]
+    else:
+        # Stop listening to command author
+        target_user = message.author
+    
+    try:
+        # Stop listening to user
+        await session.stop_listening(target_user.id)
+        await message.channel.send(f"🔇 Stopped listening to {target_user.mention}.")
+        await message.add_reaction("🔇")
+        logger.info(f"Stopped listening to user {target_user.id} ({target_user.name})")
+    
+    except Exception as e:
+        logger.error(f"Failed to stop listening: {e}", exc_info=True)
+        await message.channel.send(f"❌ Failed to stop listening: {str(e)}")