diff --git a/bot/globals.py b/bot/globals.py
index 3a7db0e..788d37d 100644
--- a/bot/globals.py
+++ b/bot/globals.py
@@ -26,6 +26,7 @@ VISION_MODEL = os.getenv("VISION_MODEL", "vision")
 EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol")  # Uncensored model for evil mode
 JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow")  # Llama 3.1 Swallow model for Japanese
 OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032"))  # Bot owner's Discord user ID for reports
+PREFER_AMD_GPU = os.getenv("PREFER_AMD_GPU", "false").lower() == "true"  # Runtime-overridable via config API
 
 # Cheshire Cat AI integration (Phase 3)
 CHESHIRE_CAT_URL = os.getenv("CHESHIRE_CAT_URL", "http://cheshire-cat:80")
diff --git a/bot/utils/gpu_router.py b/bot/utils/gpu_router.py
index 1ea4059..7214b89 100644
--- a/bot/utils/gpu_router.py
+++ b/bot/utils/gpu_router.py
@@ -37,7 +37,8 @@ MODEL_TO_GPU = {
 }
 
 # Configuration
-PREFER_AMD_GPU = os.getenv("PREFER_AMD_GPU", "false").lower() == "true"
+# PREFER_AMD_GPU lives in globals so the config API can update it at runtime.
+# We read globals.PREFER_AMD_GPU in functions below instead of a frozen local.
 AMD_MODELS_ENABLED = os.getenv("AMD_MODELS_ENABLED", "true").lower() == "true"
 
 
@@ -101,7 +102,7 @@ def get_llama_url_with_load_balancing(
             return globals.LLAMA_URL, "llama3.1"
     
     # AMD enabled - implement load balancing
-    use_amd = prefer_amd or PREFER_AMD_GPU or (random.random() < 0.5)
+    use_amd = prefer_amd or globals.PREFER_AMD_GPU or (random.random() < 0.5)
     
     if task_type == "evil":
         # Evil/uncensored models