version: '3.9' services: llama-swap: image: ghcr.io/mostlygeek/llama-swap:cuda container_name: llama-swap ports: - "8090:8080" # Map host port 8090 to container port 8080 volumes: - ./models:/models # GGUF model files - ./llama-swap-config.yaml:/app/config.yaml # llama-swap configuration runtime: nvidia restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 10s timeout: 5s retries: 10 start_period: 30s # Give more time for initial model loading environment: - NVIDIA_VISIBLE_DEVICES=all miku-bot: build: ./bot container_name: miku-bot volumes: - ./bot/memory:/app/memory - /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro - /var/run/docker.sock:/var/run/docker.sock # Allow container management depends_on: llama-swap: condition: service_healthy environment: - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw - LLAMA_URL=http://llama-swap:8080 - TEXT_MODEL=llama3.1 - VISION_MODEL=vision - OWNER_USER_ID=209381657369772032 # Your Discord user ID for DM analysis reports ports: - "3939:3939" restart: unless-stopped anime-face-detector: build: ./face-detector container_name: anime-face-detector runtime: nvidia deploy: resources: reservations: devices: - capabilities: [gpu] volumes: - ./face-detector/api:/app/api - ./face-detector/images:/app/images ports: - "7860:7860" # Gradio UI - "6078:6078" # FastAPI API environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=compute,utility restart: "no" # Don't auto-restart - only run on-demand profiles: - tools # Don't start by default