Decided on Parakeet ONNX Runtime. Works pretty great. Realtime voice chat possible now. UX lacking.
This commit is contained in:
219
stt-parakeet/tools/diagnose.py
Normal file
219
stt-parakeet/tools/diagnose.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""
|
||||
System diagnostics for ASR setup
|
||||
"""
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
|
||||
def print_section(title):
|
||||
"""Print a section header."""
|
||||
print(f"\n{'='*80}")
|
||||
print(f" {title}")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
|
||||
def check_python():
|
||||
"""Check Python version."""
|
||||
print_section("Python Version")
|
||||
print(f"Python: {sys.version}")
|
||||
print(f"Executable: {sys.executable}")
|
||||
|
||||
|
||||
def check_packages():
|
||||
"""Check installed packages."""
|
||||
print_section("Installed Packages")
|
||||
|
||||
packages = [
|
||||
"onnx-asr",
|
||||
"onnxruntime",
|
||||
"onnxruntime-gpu",
|
||||
"numpy",
|
||||
"websockets",
|
||||
"sounddevice",
|
||||
"soundfile",
|
||||
]
|
||||
|
||||
for package in packages:
|
||||
try:
|
||||
if package == "onnx-asr":
|
||||
import onnx_asr
|
||||
version = getattr(onnx_asr, "__version__", "unknown")
|
||||
elif package == "onnxruntime":
|
||||
import onnxruntime
|
||||
version = onnxruntime.__version__
|
||||
elif package == "onnxruntime-gpu":
|
||||
try:
|
||||
import onnxruntime
|
||||
version = onnxruntime.__version__
|
||||
print(f"✓ {package}: {version}")
|
||||
except ImportError:
|
||||
print(f"✗ {package}: Not installed")
|
||||
continue
|
||||
elif package == "numpy":
|
||||
import numpy
|
||||
version = numpy.__version__
|
||||
elif package == "websockets":
|
||||
import websockets
|
||||
version = websockets.__version__
|
||||
elif package == "sounddevice":
|
||||
import sounddevice
|
||||
version = sounddevice.__version__
|
||||
elif package == "soundfile":
|
||||
import soundfile
|
||||
version = soundfile.__version__
|
||||
|
||||
print(f"✓ {package}: {version}")
|
||||
except ImportError:
|
||||
print(f"✗ {package}: Not installed")
|
||||
|
||||
|
||||
def check_cuda():
|
||||
"""Check CUDA availability."""
|
||||
print_section("CUDA Information")
|
||||
|
||||
# Check nvcc
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nvcc", "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
print("NVCC (CUDA Compiler):")
|
||||
print(result.stdout)
|
||||
except FileNotFoundError:
|
||||
print("✗ nvcc not found - CUDA may not be installed")
|
||||
|
||||
# Check nvidia-smi
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["nvidia-smi"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
print("NVIDIA GPU Information:")
|
||||
print(result.stdout)
|
||||
except FileNotFoundError:
|
||||
print("✗ nvidia-smi not found - NVIDIA drivers may not be installed")
|
||||
|
||||
|
||||
def check_onnxruntime():
|
||||
"""Check ONNX Runtime providers."""
|
||||
print_section("ONNX Runtime Providers")
|
||||
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
|
||||
print("Available providers:")
|
||||
for provider in ort.get_available_providers():
|
||||
print(f" ✓ {provider}")
|
||||
|
||||
# Check if CUDA is available
|
||||
if "CUDAExecutionProvider" in ort.get_available_providers():
|
||||
print("\n✓ GPU acceleration available via CUDA")
|
||||
else:
|
||||
print("\n✗ GPU acceleration NOT available")
|
||||
print(" Make sure onnxruntime-gpu is installed and CUDA is working")
|
||||
|
||||
# Get device info
|
||||
print(f"\nONNX Runtime version: {ort.__version__}")
|
||||
|
||||
except ImportError:
|
||||
print("✗ onnxruntime not installed")
|
||||
|
||||
|
||||
def check_audio_devices():
|
||||
"""Check audio devices."""
|
||||
print_section("Audio Devices")
|
||||
|
||||
try:
|
||||
import sounddevice as sd
|
||||
|
||||
devices = sd.query_devices()
|
||||
|
||||
print("Input devices:")
|
||||
for i, device in enumerate(devices):
|
||||
if device['max_input_channels'] > 0:
|
||||
default = " [DEFAULT]" if i == sd.default.device[0] else ""
|
||||
print(f" [{i}] {device['name']}{default}")
|
||||
print(f" Channels: {device['max_input_channels']}")
|
||||
print(f" Sample rate: {device['default_samplerate']} Hz")
|
||||
|
||||
except ImportError:
|
||||
print("✗ sounddevice not installed")
|
||||
except Exception as e:
|
||||
print(f"✗ Error querying audio devices: {e}")
|
||||
|
||||
|
||||
def check_model_files():
|
||||
"""Check if model files exist."""
|
||||
print_section("Model Files")
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
model_dir = Path("models/parakeet")
|
||||
|
||||
expected_files = [
|
||||
"config.json",
|
||||
"encoder-parakeet-tdt-0.6b-v3.onnx",
|
||||
"decoder_joint-parakeet-tdt-0.6b-v3.onnx",
|
||||
"vocab.txt",
|
||||
]
|
||||
|
||||
if not model_dir.exists():
|
||||
print(f"✗ Model directory not found: {model_dir}")
|
||||
print(" Models will be downloaded on first run")
|
||||
return
|
||||
|
||||
print(f"Model directory: {model_dir.absolute()}")
|
||||
print("\nExpected files:")
|
||||
|
||||
for filename in expected_files:
|
||||
filepath = model_dir / filename
|
||||
if filepath.exists():
|
||||
size_mb = filepath.stat().st_size / (1024 * 1024)
|
||||
print(f" ✓ {filename} ({size_mb:.1f} MB)")
|
||||
else:
|
||||
print(f" ✗ {filename} (missing)")
|
||||
|
||||
|
||||
def test_onnx_asr():
|
||||
"""Test onnx-asr import and basic functionality."""
|
||||
print_section("onnx-asr Test")
|
||||
|
||||
try:
|
||||
import onnx_asr
|
||||
|
||||
print("✓ onnx-asr imported successfully")
|
||||
print(f" Version: {getattr(onnx_asr, '__version__', 'unknown')}")
|
||||
|
||||
# Test loading model info (without downloading)
|
||||
print("\n✓ onnx-asr is ready to use")
|
||||
print(" Run test_offline.py to download models and test transcription")
|
||||
|
||||
except ImportError as e:
|
||||
print(f"✗ Failed to import onnx-asr: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error testing onnx-asr: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all diagnostics."""
|
||||
print("\n" + "="*80)
|
||||
print(" ASR System Diagnostics")
|
||||
print("="*80)
|
||||
|
||||
check_python()
|
||||
check_packages()
|
||||
check_cuda()
|
||||
check_onnxruntime()
|
||||
check_audio_devices()
|
||||
check_model_files()
|
||||
test_onnx_asr()
|
||||
|
||||
print("\n" + "="*80)
|
||||
print(" Diagnostics Complete")
|
||||
print("="*80 + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
114
stt-parakeet/tools/test_offline.py
Normal file
114
stt-parakeet/tools/test_offline.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""
|
||||
Test offline ASR pipeline with onnx-asr
|
||||
"""
|
||||
import soundfile as sf
|
||||
import numpy as np
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from asr.asr_pipeline import ASRPipeline
|
||||
|
||||
|
||||
def test_transcription(audio_file: str, use_vad: bool = False, quantization: str = None):
|
||||
"""
|
||||
Test ASR transcription on an audio file.
|
||||
|
||||
Args:
|
||||
audio_file: Path to audio file
|
||||
use_vad: Whether to use VAD
|
||||
quantization: Optional quantization (e.g., "int8")
|
||||
"""
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Testing ASR Pipeline with onnx-asr")
|
||||
print(f"{'='*80}")
|
||||
print(f"Audio file: {audio_file}")
|
||||
print(f"Use VAD: {use_vad}")
|
||||
print(f"Quantization: {quantization}")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
# Initialize pipeline
|
||||
print("Initializing ASR pipeline...")
|
||||
pipeline = ASRPipeline(
|
||||
model_name="nemo-parakeet-tdt-0.6b-v3",
|
||||
quantization=quantization,
|
||||
use_vad=use_vad,
|
||||
)
|
||||
print("Pipeline initialized successfully!\n")
|
||||
|
||||
# Read audio file
|
||||
print(f"Reading audio file: {audio_file}")
|
||||
audio, sr = sf.read(audio_file, dtype="float32")
|
||||
print(f"Sample rate: {sr} Hz")
|
||||
print(f"Audio shape: {audio.shape}")
|
||||
print(f"Audio duration: {len(audio) / sr:.2f} seconds")
|
||||
|
||||
# Ensure mono
|
||||
if audio.ndim > 1:
|
||||
print("Converting stereo to mono...")
|
||||
audio = audio[:, 0]
|
||||
|
||||
# Verify sample rate
|
||||
if sr != 16000:
|
||||
print(f"WARNING: Sample rate is {sr} Hz, expected 16000 Hz")
|
||||
print("Consider resampling the audio file")
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Transcribing...")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
# Transcribe
|
||||
result = pipeline.transcribe(audio, sample_rate=sr)
|
||||
|
||||
# Display results
|
||||
if use_vad and isinstance(result, list):
|
||||
print("TRANSCRIPTION (with VAD):")
|
||||
print("-" * 80)
|
||||
for i, segment in enumerate(result, 1):
|
||||
print(f"Segment {i}: {segment}")
|
||||
print("-" * 80)
|
||||
else:
|
||||
print("TRANSCRIPTION:")
|
||||
print("-" * 80)
|
||||
print(result)
|
||||
print("-" * 80)
|
||||
|
||||
# Audio statistics
|
||||
print(f"\nAUDIO STATISTICS:")
|
||||
print(f" dtype: {audio.dtype}")
|
||||
print(f" min: {audio.min():.6f}")
|
||||
print(f" max: {audio.max():.6f}")
|
||||
print(f" mean: {audio.mean():.6f}")
|
||||
print(f" std: {audio.std():.6f}")
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print("Test completed successfully!")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test offline ASR transcription")
|
||||
parser.add_argument("audio_file", help="Path to audio file (WAV format)")
|
||||
parser.add_argument("--use-vad", action="store_true", help="Enable VAD")
|
||||
parser.add_argument("--quantization", default=None, choices=["int8", "fp16"],
|
||||
help="Model quantization")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if file exists
|
||||
if not Path(args.audio_file).exists():
|
||||
print(f"ERROR: Audio file not found: {args.audio_file}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
test_transcription(args.audio_file, args.use_vad, args.quantization)
|
||||
except Exception as e:
|
||||
print(f"\nERROR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user