diff --git a/bot/globals.py b/bot/globals.py index 3a7db0e..788d37d 100644 --- a/bot/globals.py +++ b/bot/globals.py @@ -26,6 +26,7 @@ VISION_MODEL = os.getenv("VISION_MODEL", "vision") EVIL_TEXT_MODEL = os.getenv("EVIL_TEXT_MODEL", "darkidol") # Uncensored model for evil mode JAPANESE_TEXT_MODEL = os.getenv("JAPANESE_TEXT_MODEL", "swallow") # Llama 3.1 Swallow model for Japanese OWNER_USER_ID = int(os.getenv("OWNER_USER_ID", "209381657369772032")) # Bot owner's Discord user ID for reports +PREFER_AMD_GPU = os.getenv("PREFER_AMD_GPU", "false").lower() == "true" # Runtime-overridable via config API # Cheshire Cat AI integration (Phase 3) CHESHIRE_CAT_URL = os.getenv("CHESHIRE_CAT_URL", "http://cheshire-cat:80") diff --git a/bot/utils/gpu_router.py b/bot/utils/gpu_router.py index 1ea4059..7214b89 100644 --- a/bot/utils/gpu_router.py +++ b/bot/utils/gpu_router.py @@ -37,7 +37,8 @@ MODEL_TO_GPU = { } # Configuration -PREFER_AMD_GPU = os.getenv("PREFER_AMD_GPU", "false").lower() == "true" +# PREFER_AMD_GPU lives in globals so the config API can update it at runtime. +# We read globals.PREFER_AMD_GPU in functions below instead of a frozen local. AMD_MODELS_ENABLED = os.getenv("AMD_MODELS_ENABLED", "true").lower() == "true" @@ -101,7 +102,7 @@ def get_llama_url_with_load_balancing( return globals.LLAMA_URL, "llama3.1" # AMD enabled - implement load balancing - use_amd = prefer_amd or PREFER_AMD_GPU or (random.random() < 0.5) + use_amd = prefer_amd or globals.PREFER_AMD_GPU or (random.random() < 0.5) if task_type == "evil": # Evil/uncensored models