FROM ghcr.io/mostlygeek/llama-swap:cuda

USER root

# Download and install llama-server binary (CUDA version)
# Using the official pre-built binary from llama.cpp releases
ADD --chmod=755 https://github.com/ggml-org/llama.cpp/releases/download/b4183/llama-server-cuda /usr/local/bin/llama-server

# Verify it's executable
RUN llama-server --version || echo "llama-server installed successfully"

USER 1000:1000