Phase 4 STT pipeline implemented — Silero VAD + faster-whisper — still not working well at all

This commit is contained in:
2026-01-17 03:14:40 +02:00
parent 3e59e5d2f6
commit d1e6b21508
30 changed files with 156595 additions and 8 deletions

View File

@@ -39,6 +39,12 @@ async def handle_voice_command(message, cmd, args):
elif cmd == 'say':
await _handle_say(message, args)
elif cmd == 'listen':
await _handle_listen(message, args)
elif cmd == 'stop-listening':
await _handle_stop_listening(message, args)
else:
await message.channel.send(f"❌ Unknown voice command: `{cmd}`")
@@ -366,8 +372,97 @@ Keep responses short (1-3 sentences) since they will be spoken aloud."""
await message.channel.send(f"🎤 Miku: *\"{full_response.strip()}\"*")
logger.info(f"✓ Voice say complete: {full_response.strip()}")
await message.add_reaction("")
except Exception as e:
logger.error(f"Voice say failed: {e}", exc_info=True)
await message.channel.send(f"Voice say failed: {str(e)}")
logger.error(f"Failed to generate voice response: {e}", exc_info=True)
await message.channel.send(f"Error generating voice response: {e}")
async def _handle_listen(message, args):
"""
Handle !miku listen command.
Start listening to a user's voice for STT.
Usage:
!miku listen - Start listening to command author
!miku listen @user - Start listening to mentioned user
"""
# Check if Miku is in voice channel
session = voice_manager.active_session
if not session or not session.voice_client or not session.voice_client.is_connected():
await message.channel.send("❌ I'm not in a voice channel! Use `!miku join` first.")
return
# Determine target user
target_user = None
if args and len(message.mentions) > 0:
# Listen to mentioned user
target_user = message.mentions[0]
else:
# Listen to command author
target_user = message.author
# Check if user is in voice channel
if not target_user.voice or not target_user.voice.channel:
await message.channel.send(f"{target_user.mention} is not in a voice channel!")
return
# Check if user is in same channel as Miku
if target_user.voice.channel.id != session.voice_client.channel.id:
await message.channel.send(
f"{target_user.mention} must be in the same voice channel as me!"
)
return
try:
# Start listening to user
await session.start_listening(target_user)
await message.channel.send(
f"👂 Now listening to {target_user.mention}'s voice! "
f"Speak to me and I'll respond. Use `!miku stop-listening` to stop."
)
await message.add_reaction("👂")
logger.info(f"Started listening to user {target_user.id} ({target_user.name})")
except Exception as e:
logger.error(f"Failed to start listening: {e}", exc_info=True)
await message.channel.send(f"❌ Failed to start listening: {str(e)}")
async def _handle_stop_listening(message, args):
"""
Handle !miku stop-listening command.
Stop listening to a user's voice.
Usage:
!miku stop-listening - Stop listening to command author
!miku stop-listening @user - Stop listening to mentioned user
"""
# Check if Miku is in voice channel
session = voice_manager.active_session
if not session:
await message.channel.send("❌ I'm not in a voice channel!")
return
# Determine target user
target_user = None
if args and len(message.mentions) > 0:
# Stop listening to mentioned user
target_user = message.mentions[0]
else:
# Stop listening to command author
target_user = message.author
try:
# Stop listening to user
await session.stop_listening(target_user.id)
await message.channel.send(f"🔇 Stopped listening to {target_user.mention}.")
await message.add_reaction("🔇")
logger.info(f"Stopped listening to user {target_user.id} ({target_user.name})")
except Exception as e:
logger.error(f"Failed to stop listening: {e}", exc_info=True)
await message.channel.send(f"❌ Failed to stop listening: {str(e)}")