# Parakeet ONNX ASR STT Container
# Uses ONNX Runtime with CUDA for GPU-accelerated inference
# Optimized for NVIDIA GTX 1660 and similar GPUs
# Using CUDA 12.6 with cuDNN 9 for ONNX Runtime GPU support

FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04

# Prevent interactive prompts during build
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    python3.11 \
    python3.11-venv \
    python3.11-dev \
    python3-pip \
    build-essential \
    ffmpeg \
    libsndfile1 \
    libportaudio2 \
    portaudio19-dev \
    git \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Upgrade pip to exact version used in requirements
RUN python3.11 -m pip install --upgrade pip==25.3

# Copy requirements first (for Docker layer caching)
COPY requirements-stt.txt .

# Install Python dependencies
RUN python3.11 -m pip install --no-cache-dir -r requirements-stt.txt

# Copy application code
COPY asr/ ./asr/
COPY server/ ./server/
COPY vad/ ./vad/
COPY client/ ./client/

# Create models directory (models will be downloaded on first run)
RUN mkdir -p models/parakeet

# Expose WebSocket port
EXPOSE 8766

# Set GPU visibility (default to GPU 0)
ENV CUDA_VISIBLE_DEVICES=0

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD python3.11 -c "import onnxruntime as ort; assert 'CUDAExecutionProvider' in ort.get_available_providers()" || exit 1

# Run the WebSocket server
CMD ["python3.11", "-m", "server.ws_server"]