Files
miku-discord/stt-parakeet/test_vad_client.py

126 lines
4.4 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Test client for VAD-enabled server
Simulates Discord bot audio streaming with speech detection
"""
import asyncio
import websockets
import json
import numpy as np
import soundfile as sf
import sys
async def test_vad_server(audio_file="test.wav"):
"""Test VAD server with audio file."""
uri = "ws://localhost:8766"
print(f"Connecting to {uri}...")
try:
async with websockets.connect(uri) as websocket:
print("✓ Connected!\n")
# Receive welcome message
message = await websocket.recv()
data = json.loads(message)
print(f"Server says: {data.get('message')}")
print(f"VAD enabled: {data.get('vad_enabled')}\n")
# Load audio file
print(f"Loading audio: {audio_file}")
audio, sr = sf.read(audio_file, dtype='float32')
if audio.ndim > 1:
audio = audio[:, 0] # Mono
print(f"Duration: {len(audio)/sr:.2f}s")
print(f"Sample rate: {sr} Hz\n")
# Convert to int16
audio_int16 = (audio * 32767).astype(np.int16)
# Listen for responses in background
async def receive_messages():
"""Receive and display server messages."""
try:
while True:
response = await websocket.recv()
result = json.loads(response)
msg_type = result.get('type')
if msg_type == 'vad_status':
is_speech = result.get('is_speech')
if is_speech:
print("\n🎤 VAD: Speech detected\n")
else:
print("\n🛑 VAD: Speech ended\n")
elif msg_type == 'transcript':
text = result.get('text', '')
duration = result.get('duration', 0)
is_final = result.get('is_final', False)
if is_final:
print(f"\n{'='*70}")
print(f"✅ FINAL TRANSCRIPTION ({duration:.2f}s):")
print(f" \"{text}\"")
print(f"{'='*70}\n")
else:
print(f"📝 PARTIAL ({duration:.2f}s): {text}")
elif msg_type == 'info':
print(f" {result.get('message')}")
elif msg_type == 'error':
print(f"❌ Error: {result.get('message')}")
except Exception as e:
pass
# Start listener
listen_task = asyncio.create_task(receive_messages())
# Send audio in small chunks (simulate streaming)
chunk_size = int(sr * 0.1) # 100ms chunks
print("Streaming audio...\n")
for i in range(0, len(audio_int16), chunk_size):
chunk = audio_int16[i:i+chunk_size]
await websocket.send(chunk.tobytes())
await asyncio.sleep(0.05) # Simulate real-time
print("\nAll audio sent. Waiting for final transcription...")
# Wait for processing
await asyncio.sleep(3.0)
# Force transcribe any remaining buffer
print("Sending force_transcribe command...\n")
await websocket.send(json.dumps({"type": "force_transcribe"}))
# Wait a bit more
await asyncio.sleep(2.0)
# Cancel listener
listen_task.cancel()
try:
await listen_task
except asyncio.CancelledError:
pass
print("\n✓ Test completed!")
except Exception as e:
print(f"❌ Error: {e}")
return 1
return 0
if __name__ == "__main__":
audio_file = sys.argv[1] if len(sys.argv) > 1 else "test.wav"
exit_code = asyncio.run(test_vad_server(audio_file))
sys.exit(exit_code)