126 lines
4.4 KiB
Python
126 lines
4.4 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
Test client for VAD-enabled server
|
|||
|
|
Simulates Discord bot audio streaming with speech detection
|
|||
|
|
"""
|
|||
|
|
import asyncio
|
|||
|
|
import websockets
|
|||
|
|
import json
|
|||
|
|
import numpy as np
|
|||
|
|
import soundfile as sf
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
|
|||
|
|
async def test_vad_server(audio_file="test.wav"):
|
|||
|
|
"""Test VAD server with audio file."""
|
|||
|
|
uri = "ws://localhost:8766"
|
|||
|
|
|
|||
|
|
print(f"Connecting to {uri}...")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
async with websockets.connect(uri) as websocket:
|
|||
|
|
print("✓ Connected!\n")
|
|||
|
|
|
|||
|
|
# Receive welcome message
|
|||
|
|
message = await websocket.recv()
|
|||
|
|
data = json.loads(message)
|
|||
|
|
print(f"Server says: {data.get('message')}")
|
|||
|
|
print(f"VAD enabled: {data.get('vad_enabled')}\n")
|
|||
|
|
|
|||
|
|
# Load audio file
|
|||
|
|
print(f"Loading audio: {audio_file}")
|
|||
|
|
audio, sr = sf.read(audio_file, dtype='float32')
|
|||
|
|
|
|||
|
|
if audio.ndim > 1:
|
|||
|
|
audio = audio[:, 0] # Mono
|
|||
|
|
|
|||
|
|
print(f"Duration: {len(audio)/sr:.2f}s")
|
|||
|
|
print(f"Sample rate: {sr} Hz\n")
|
|||
|
|
|
|||
|
|
# Convert to int16
|
|||
|
|
audio_int16 = (audio * 32767).astype(np.int16)
|
|||
|
|
|
|||
|
|
# Listen for responses in background
|
|||
|
|
async def receive_messages():
|
|||
|
|
"""Receive and display server messages."""
|
|||
|
|
try:
|
|||
|
|
while True:
|
|||
|
|
response = await websocket.recv()
|
|||
|
|
result = json.loads(response)
|
|||
|
|
|
|||
|
|
msg_type = result.get('type')
|
|||
|
|
|
|||
|
|
if msg_type == 'vad_status':
|
|||
|
|
is_speech = result.get('is_speech')
|
|||
|
|
if is_speech:
|
|||
|
|
print("\n🎤 VAD: Speech detected\n")
|
|||
|
|
else:
|
|||
|
|
print("\n🛑 VAD: Speech ended\n")
|
|||
|
|
|
|||
|
|
elif msg_type == 'transcript':
|
|||
|
|
text = result.get('text', '')
|
|||
|
|
duration = result.get('duration', 0)
|
|||
|
|
is_final = result.get('is_final', False)
|
|||
|
|
|
|||
|
|
if is_final:
|
|||
|
|
print(f"\n{'='*70}")
|
|||
|
|
print(f"✅ FINAL TRANSCRIPTION ({duration:.2f}s):")
|
|||
|
|
print(f" \"{text}\"")
|
|||
|
|
print(f"{'='*70}\n")
|
|||
|
|
else:
|
|||
|
|
print(f"📝 PARTIAL ({duration:.2f}s): {text}")
|
|||
|
|
|
|||
|
|
elif msg_type == 'info':
|
|||
|
|
print(f"ℹ️ {result.get('message')}")
|
|||
|
|
|
|||
|
|
elif msg_type == 'error':
|
|||
|
|
print(f"❌ Error: {result.get('message')}")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
# Start listener
|
|||
|
|
listen_task = asyncio.create_task(receive_messages())
|
|||
|
|
|
|||
|
|
# Send audio in small chunks (simulate streaming)
|
|||
|
|
chunk_size = int(sr * 0.1) # 100ms chunks
|
|||
|
|
print("Streaming audio...\n")
|
|||
|
|
|
|||
|
|
for i in range(0, len(audio_int16), chunk_size):
|
|||
|
|
chunk = audio_int16[i:i+chunk_size]
|
|||
|
|
await websocket.send(chunk.tobytes())
|
|||
|
|
await asyncio.sleep(0.05) # Simulate real-time
|
|||
|
|
|
|||
|
|
print("\nAll audio sent. Waiting for final transcription...")
|
|||
|
|
|
|||
|
|
# Wait for processing
|
|||
|
|
await asyncio.sleep(3.0)
|
|||
|
|
|
|||
|
|
# Force transcribe any remaining buffer
|
|||
|
|
print("Sending force_transcribe command...\n")
|
|||
|
|
await websocket.send(json.dumps({"type": "force_transcribe"}))
|
|||
|
|
|
|||
|
|
# Wait a bit more
|
|||
|
|
await asyncio.sleep(2.0)
|
|||
|
|
|
|||
|
|
# Cancel listener
|
|||
|
|
listen_task.cancel()
|
|||
|
|
try:
|
|||
|
|
await listen_task
|
|||
|
|
except asyncio.CancelledError:
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
print("\n✓ Test completed!")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ Error: {e}")
|
|||
|
|
return 1
|
|||
|
|
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
audio_file = sys.argv[1] if len(sys.argv) > 1 else "test.wav"
|
|||
|
|
exit_code = asyncio.run(test_vad_server(audio_file))
|
|||
|
|
sys.exit(exit_code)
|