Phase 4 STT pipeline implemented — Silero VAD + faster-whisper — still not working well at all

This commit is contained in:
2026-01-17 03:14:40 +02:00
parent 3e59e5d2f6
commit d1e6b21508
30 changed files with 156595 additions and 8 deletions

View File

@@ -76,6 +76,33 @@ services:
- miku-voice # Connect to voice network for RVC/TTS
restart: unless-stopped
miku-stt:
build:
context: ./stt
dockerfile: Dockerfile.stt
container_name: miku-stt
runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=0 # GTX 1660 (same as Soprano)
- CUDA_VISIBLE_DEVICES=0
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
volumes:
- ./stt:/app
- ./stt/models:/models
ports:
- "8001:8000"
networks:
- miku-voice
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0'] # GTX 1660
capabilities: [gpu]
restart: unless-stopped
anime-face-detector:
build: ./face-detector
container_name: anime-face-detector