Phase 4 STT pipeline implemented — Silero VAD + faster-whisper — still not working well at all

2026-01-17 03:14:40 +02:00
parent 3e59e5d2f6
commit d1e6b21508
30 changed files with 156595 additions and 8 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -76,6 +76,33 @@ services:
      - miku-voice  # Connect to voice network for RVC/TTS
    restart: unless-stopped

+  miku-stt:
+    build:
+      context: ./stt
+      dockerfile: Dockerfile.stt
+    container_name: miku-stt
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=0  # GTX 1660 (same as Soprano)
+      - CUDA_VISIBLE_DEVICES=0
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cudnn/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
+    volumes:
+      - ./stt:/app
+      - ./stt/models:/models
+    ports:
+      - "8001:8000"
+    networks:
+      - miku-voice
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']  # GTX 1660
+              capabilities: [gpu]
+    restart: unless-stopped
+
  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector