14 lines
412 B
Docker
14 lines
412 B
Docker
|
|
FROM ghcr.io/mostlygeek/llama-swap:cuda
|
||
|
|
|
||
|
|
USER root
|
||
|
|
|
||
|
|
# Download and install llama-server binary (CUDA version)
|
||
|
|
# Using the official pre-built binary from llama.cpp releases
|
||
|
|
ADD --chmod=755 https://github.com/ggml-org/llama.cpp/releases/download/b4183/llama-server-cuda /usr/local/bin/llama-server
|
||
|
|
|
||
|
|
# Verify it's executable
|
||
|
|
RUN llama-server --version || echo "llama-server installed successfully"
|
||
|
|
|
||
|
|
USER 1000:1000
|
||
|
|
|