Decided on Parakeet ONNX Runtime. Works pretty great. Realtime voice chat possible now. UX lacking.
This commit is contained in:
88
stt-parakeet/test_client.py
Executable file
88
stt-parakeet/test_client.py
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple WebSocket client to test the ASR server
|
||||
Sends a test audio file to the server
|
||||
"""
|
||||
import asyncio
|
||||
import websockets
|
||||
import json
|
||||
import sys
|
||||
import soundfile as sf
|
||||
import numpy as np
|
||||
|
||||
|
||||
async def test_connection(audio_file="test.wav"):
|
||||
"""Test connection to ASR server."""
|
||||
uri = "ws://localhost:8766"
|
||||
|
||||
print(f"Connecting to {uri}...")
|
||||
|
||||
try:
|
||||
async with websockets.connect(uri) as websocket:
|
||||
print("Connected!")
|
||||
|
||||
# Receive welcome message
|
||||
message = await websocket.recv()
|
||||
data = json.loads(message)
|
||||
print(f"Server: {data}")
|
||||
|
||||
# Load audio file
|
||||
print(f"\nLoading audio file: {audio_file}")
|
||||
audio, sr = sf.read(audio_file, dtype='float32')
|
||||
|
||||
if audio.ndim > 1:
|
||||
audio = audio[:, 0] # Convert to mono
|
||||
|
||||
print(f"Sample rate: {sr} Hz")
|
||||
print(f"Duration: {len(audio)/sr:.2f} seconds")
|
||||
|
||||
# Convert to int16 for sending
|
||||
audio_int16 = (audio * 32767).astype(np.int16)
|
||||
|
||||
# Send audio in chunks
|
||||
chunk_size = int(sr * 0.5) # 0.5 second chunks
|
||||
|
||||
print("\nSending audio...")
|
||||
|
||||
# Send all audio chunks
|
||||
for i in range(0, len(audio_int16), chunk_size):
|
||||
chunk = audio_int16[i:i+chunk_size]
|
||||
await websocket.send(chunk.tobytes())
|
||||
print(f"Sent chunk {i//chunk_size + 1}", end='\r')
|
||||
|
||||
print("\nAll chunks sent. Sending final command...")
|
||||
|
||||
# Send final command
|
||||
await websocket.send(json.dumps({"type": "final"}))
|
||||
|
||||
# Now receive ALL responses
|
||||
print("\nWaiting for transcriptions...\n")
|
||||
timeout_count = 0
|
||||
while timeout_count < 3: # Wait for 3 timeouts (6 seconds total) before giving up
|
||||
try:
|
||||
response = await asyncio.wait_for(websocket.recv(), timeout=2.0)
|
||||
result = json.loads(response)
|
||||
if result.get('type') == 'transcript':
|
||||
text = result.get('text', '')
|
||||
is_final = result.get('is_final', False)
|
||||
prefix = "→ FINAL:" if is_final else "→ Progressive:"
|
||||
print(f"{prefix} {text}\n")
|
||||
timeout_count = 0 # Reset timeout counter when we get a message
|
||||
if is_final:
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
timeout_count += 1
|
||||
|
||||
print("\nTest completed!")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
audio_file = sys.argv[1] if len(sys.argv) > 1 else "test.wav"
|
||||
exit_code = asyncio.run(test_connection(audio_file))
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user