# Multi-stage build for llama-swap with ROCm support # Stage 1: Build llama.cpp with ROCm (requires ROCm 6.1+) FROM rocm/dev-ubuntu-22.04:6.2.4 AS llama-builder WORKDIR /build # Install build dependencies including ROCm/HIP development libraries RUN apt-get update && apt-get install -y \ git \ build-essential \ cmake \ wget \ libcurl4-openssl-dev \ hip-dev \ hipblas-dev \ rocblas-dev \ && rm -rf /var/lib/apt/lists/* # Clone and build llama.cpp with HIP/ROCm support (gfx1030 = RX 6800) RUN git clone https://github.com/ggml-org/llama.cpp.git && \ cd llama.cpp && \ HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ cmake -S . -B build -DGGML_HIP=ON -DGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release && \ cmake --build build --config Release -- -j$(nproc) && \ cp build/bin/llama-server /build/llama-server && \ find build -name "*.so*" -exec cp {} /build/ \; # Stage 2: Build llama-swap UI and binary FROM node:22-alpine AS ui-builder WORKDIR /build # Install git RUN apk add --no-cache git # Clone llama-swap RUN git clone https://github.com/mostlygeek/llama-swap.git # Build UI WORKDIR /build/llama-swap/ui RUN npm install && npm run build # Stage 3: Build llama-swap binary FROM golang:1.23-alpine AS swap-builder WORKDIR /build # Install git RUN apk add --no-cache git # Copy llama-swap source with built UI COPY --from=ui-builder /build/llama-swap /build/llama-swap # Build llama-swap binary WORKDIR /build/llama-swap RUN GOTOOLCHAIN=auto go build -o /build/llama-swap-binary . # Stage 4: Final runtime image FROM rocm/dev-ubuntu-22.04:6.2.4 WORKDIR /app # Install runtime dependencies including additional ROCm libraries RUN apt-get update && apt-get install -y \ curl \ ca-certificates \ rocm-libs \ && rm -rf /var/lib/apt/lists/* # Copy built binaries and shared libraries from previous stages COPY --from=llama-builder /build/llama-server /app/llama-server COPY --from=llama-builder /build/*.so* /app/ COPY --from=swap-builder /build/llama-swap-binary /app/llama-swap # Make binaries executable RUN chmod +x /app/llama-server /app/llama-swap # Create user and add to GPU access groups (using host GIDs) # GID 187 = render group on host, GID 989 = video/kfd group on host RUN groupadd -g 187 hostrender && \ groupadd -g 989 hostvideo && \ useradd -m -u 1000 -G hostrender,hostvideo llamaswap && \ chown -R llamaswap:llamaswap /app # Set environment for ROCm (RX 6800 is gfx1030) ENV HSA_OVERRIDE_GFX_VERSION=10.3.0 ENV ROCM_PATH=/opt/rocm ENV HIP_VISIBLE_DEVICES=0 ENV LD_LIBRARY_PATH=/opt/rocm/lib:/app:$LD_LIBRARY_PATH USER llamaswap # Expose port EXPOSE 8080 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \ CMD curl -f http://localhost:8080/health || exit 1 # Run llama-swap CMD ["/app/llama-swap", "-config", "/app/config.yaml", "-listen", "0.0.0.0:8080"]