Files
miku-discord/bot/utils/gpu_preload.py

70 lines
2.5 KiB
Python
Raw Permalink Normal View History

2026-01-09 00:03:59 +02:00
"""
GPU Model Preloading Utility
Preloads models on AMD GPU to take advantage of 16GB VRAM
"""
import aiohttp
import asyncio
import json
import globals
async def preload_amd_models():
"""
Preload both text and vision models on AMD GPU
Since AMD RX 6800 has 16GB VRAM, we can keep both loaded simultaneously
"""
print("🔧 Preloading models on AMD GPU...")
# Preload text model
try:
async with aiohttp.ClientSession() as session:
payload = {
"model": "llama3.1",
"messages": [{"role": "user", "content": "Hi"}],
"max_tokens": 1
}
async with session.post(
f"{globals.LLAMA_AMD_URL}/v1/chat/completions",
json=payload,
timeout=aiohttp.ClientTimeout(total=60)
) as response:
if response.status == 200:
print("✅ Text model (llama3.1) preloaded on AMD GPU")
else:
print(f"⚠️ Text model preload returned status {response.status}")
except Exception as e:
print(f"⚠️ Failed to preload text model on AMD: {e}")
# Preload vision model
try:
async with aiohttp.ClientSession() as session:
# Create a minimal test image (1x1 white pixel)
import base64
test_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
payload = {
"model": "vision",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What do you see?"},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{test_image}"}}
]
}
],
"max_tokens": 1
}
async with session.post(
f"{globals.LLAMA_AMD_URL}/v1/chat/completions",
json=payload,
timeout=aiohttp.ClientTimeout(total=120)
) as response:
if response.status == 200:
print("✅ Vision model preloaded on AMD GPU")
else:
print(f"⚠️ Vision model preload returned status {response.status}")
except Exception as e:
print(f"⚠️ Failed to preload vision model on AMD: {e}")
print("✅ AMD GPU preload complete - both models ready")