# llama-swap configuration for AMD RX 6800 (ROCm) # This manages automatic model switching and unloading for the secondary GPU # Vision model stays on NVIDIA GPU - AMD only handles text models models: # Main text generation model (same name as NVIDIA for uniform switching) llama3.1: cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup ttl: 1800 # Unload after 30 minutes of inactivity (1800 seconds) aliases: - llama3.1 - text-model # Evil/Uncensored model (same name as NVIDIA for uniform switching) darkidol: cmd: /app/llama-server --port ${PORT} --model /models/DarkIdol-Llama-3.1-8B-Instruct-1.3-Uncensored_Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup ttl: 1800 # Unload after 30 minutes of inactivity aliases: - darkidol - evil-model - uncensored # Server configuration # llama-swap will listen on this address # Inside Docker, we bind to 0.0.0.0 to allow bot container to connect