# Parakeet ONNX ASR STT Container # Uses ONNX Runtime with CUDA for GPU-accelerated inference # Optimized for NVIDIA GTX 1660 and similar GPUs # Using CUDA 12.6 with cuDNN 9 for ONNX Runtime GPU support FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04 # Prevent interactive prompts during build ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 # Set working directory WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ python3.11 \ python3.11-venv \ python3.11-dev \ python3-pip \ build-essential \ ffmpeg \ libsndfile1 \ libportaudio2 \ portaudio19-dev \ git \ curl \ && rm -rf /var/lib/apt/lists/* # Upgrade pip to exact version used in requirements RUN python3.11 -m pip install --upgrade pip==25.3 # Copy requirements first (for Docker layer caching) COPY requirements-stt.txt . # Install Python dependencies RUN python3.11 -m pip install --no-cache-dir -r requirements-stt.txt # Copy application code COPY asr/ ./asr/ COPY server/ ./server/ COPY vad/ ./vad/ COPY client/ ./client/ # Create models directory (models will be downloaded on first run) RUN mkdir -p models/parakeet # Expose WebSocket port EXPOSE 8766 # Set GPU visibility (default to GPU 0) ENV CUDA_VISIBLE_DEVICES=0 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD python3.11 -c "import onnxruntime as ort; assert 'CUDAExecutionProvider' in ort.get_available_providers()" || exit 1 # Run the WebSocket server CMD ["python3.11", "-m", "server.ws_server"]