Files
miku-discord/stt-parakeet/test_vad_client.py

126 lines
4.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Test client for VAD-enabled server
Simulates Discord bot audio streaming with speech detection
"""
import asyncio
import websockets
import json
import numpy as np
import soundfile as sf
import sys
async def test_vad_server(audio_file="test.wav"):
"""Test VAD server with audio file."""
uri = "ws://localhost:8766"
print(f"Connecting to {uri}...")
try:
async with websockets.connect(uri) as websocket:
print("✓ Connected!\n")
# Receive welcome message
message = await websocket.recv()
data = json.loads(message)
print(f"Server says: {data.get('message')}")
print(f"VAD enabled: {data.get('vad_enabled')}\n")
# Load audio file
print(f"Loading audio: {audio_file}")
audio, sr = sf.read(audio_file, dtype='float32')
if audio.ndim > 1:
audio = audio[:, 0] # Mono
print(f"Duration: {len(audio)/sr:.2f}s")
print(f"Sample rate: {sr} Hz\n")
# Convert to int16
audio_int16 = (audio * 32767).astype(np.int16)
# Listen for responses in background
async def receive_messages():
"""Receive and display server messages."""
try:
while True:
response = await websocket.recv()
result = json.loads(response)
msg_type = result.get('type')
if msg_type == 'vad_status':
is_speech = result.get('is_speech')
if is_speech:
print("\n🎤 VAD: Speech detected\n")
else:
print("\n🛑 VAD: Speech ended\n")
elif msg_type == 'transcript':
text = result.get('text', '')
duration = result.get('duration', 0)
is_final = result.get('is_final', False)
if is_final:
print(f"\n{'='*70}")
print(f"✅ FINAL TRANSCRIPTION ({duration:.2f}s):")
print(f" \"{text}\"")
print(f"{'='*70}\n")
else:
print(f"📝 PARTIAL ({duration:.2f}s): {text}")
elif msg_type == 'info':
print(f" {result.get('message')}")
elif msg_type == 'error':
print(f"❌ Error: {result.get('message')}")
except Exception as e:
pass
# Start listener
listen_task = asyncio.create_task(receive_messages())
# Send audio in small chunks (simulate streaming)
chunk_size = int(sr * 0.1) # 100ms chunks
print("Streaming audio...\n")
for i in range(0, len(audio_int16), chunk_size):
chunk = audio_int16[i:i+chunk_size]
await websocket.send(chunk.tobytes())
await asyncio.sleep(0.05) # Simulate real-time
print("\nAll audio sent. Waiting for final transcription...")
# Wait for processing
await asyncio.sleep(3.0)
# Force transcribe any remaining buffer
print("Sending force_transcribe command...\n")
await websocket.send(json.dumps({"type": "force_transcribe"}))
# Wait a bit more
await asyncio.sleep(2.0)
# Cancel listener
listen_task.cancel()
try:
await listen_task
except asyncio.CancelledError:
pass
print("\n✓ Test completed!")
except Exception as e:
print(f"❌ Error: {e}")
return 1
return 0
if __name__ == "__main__":
audio_file = sys.argv[1] if len(sys.argv) > 1 else "test.wav"
exit_code = asyncio.run(test_vad_server(audio_file))
sys.exit(exit_code)