# RealtimeSTT Container # Uses Faster-Whisper with CUDA for GPU-accelerated inference # Includes dual VAD (WebRTC + Silero) for robust voice detection FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04 # Prevent interactive prompts during build ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-venv \ python3.11-dev \ python3-pip \ build-essential \ ffmpeg \ libsndfile1 \ libportaudio2 \ portaudio19-dev \ git \ curl \ && rm -rf /var/lib/apt/lists/* # Upgrade pip RUN python3.11 -m pip install --upgrade pip # Copy requirements first (for Docker layer caching) COPY requirements.txt . # Install Python dependencies RUN python3.11 -m pip install --no-cache-dir -r requirements.txt # Install PyTorch with CUDA 12.1 support (compatible with CUDA 12.6) RUN python3.11 -m pip install --no-cache-dir \ torch==2.5.1+cu121 \ torchaudio==2.5.1+cu121 \ --index-url https://download.pytorch.org/whl/cu121 # Copy application code COPY stt_server.py . # Create models directory (models will be downloaded on first run) RUN mkdir -p /root/.cache/huggingface # Expose WebSocket port EXPOSE 8766 # Health check - use netcat to check if port is listening HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD python3.11 -c "import socket; s=socket.socket(); s.settimeout(2); s.connect(('localhost', 8766)); s.close()" || exit 1 # Run the server CMD ["python3.11", "stt_server.py"]