26 lines
1.0 KiB
YAML
26 lines
1.0 KiB
YAML
|
|
# llama-swap configuration for AMD RX 6800 (ROCm)
|
||
|
|
# This manages automatic model switching and unloading for the secondary GPU
|
||
|
|
# Vision model stays on NVIDIA GPU - AMD only handles text models
|
||
|
|
|
||
|
|
models:
|
||
|
|
# Main text generation model (same name as NVIDIA for uniform switching)
|
||
|
|
llama3.1:
|
||
|
|
cmd: /app/llama-server --port ${PORT} --model /models/Llama-3.1-8B-Instruct-UD-Q4_K_XL.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup
|
||
|
|
ttl: 1800 # Unload after 30 minutes of inactivity (1800 seconds)
|
||
|
|
aliases:
|
||
|
|
- llama3.1
|
||
|
|
- text-model
|
||
|
|
|
||
|
|
# Evil/Uncensored model (same name as NVIDIA for uniform switching)
|
||
|
|
darkidol:
|
||
|
|
cmd: /app/llama-server --port ${PORT} --model /models/DarkIdol-Llama-3.1-8B-Instruct-1.3-Uncensored_Q4_K_M.gguf -ngl 99 -nkvo -c 16384 --host 0.0.0.0 --no-warmup
|
||
|
|
ttl: 1800 # Unload after 30 minutes of inactivity
|
||
|
|
aliases:
|
||
|
|
- darkidol
|
||
|
|
- evil-model
|
||
|
|
- uncensored
|
||
|
|
|
||
|
|
# Server configuration
|
||
|
|
# llama-swap will listen on this address
|
||
|
|
# Inside Docker, we bind to 0.0.0.0 to allow bot container to connect
|