FROM ghcr.io/mostlygeek/llama-swap:cuda USER root # Download and install llama-server binary (CUDA version) # Using the official pre-built binary from llama.cpp releases ADD --chmod=755 https://github.com/ggml-org/llama.cpp/releases/download/b4183/llama-server-cuda /usr/local/bin/llama-server # Verify it's executable RUN llama-server --version || echo "llama-server installed successfully" USER 1000:1000