Decided on Parakeet ONNX Runtime. Works pretty great. Realtime voice chat possible now. UX lacking.

This commit is contained in:
2026-01-19 00:29:44 +02:00
parent 0a8910fff8
commit 362108f4b0
34 changed files with 4593 additions and 73 deletions

View File

@@ -78,20 +78,18 @@ services:
miku-stt:
build:
context: ./stt
dockerfile: Dockerfile.stt
context: ./stt-parakeet
dockerfile: Dockerfile
container_name: miku-stt
runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=0 # GTX 1660 (same as Soprano)
- NVIDIA_VISIBLE_DEVICES=0 # GTX 1660
- CUDA_VISIBLE_DEVICES=0
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
volumes:
- ./stt:/app
- ./stt/models:/models
- ./stt-parakeet/models:/app/models # Persistent model storage
ports:
- "8001:8000"
- "8766:8766" # WebSocket port
networks:
- miku-voice
deploy:
@@ -102,6 +100,7 @@ services:
device_ids: ['0'] # GTX 1660
capabilities: [gpu]
restart: unless-stopped
command: ["python3.11", "-m", "server.ws_server", "--host", "0.0.0.0", "--port", "8766", "--model", "nemo-parakeet-tdt-0.6b-v3"]
anime-face-detector:
build: ./face-detector