docker-compose.yml

version: '3.9'

services:
  llama-swap:
    image: ghcr.io/mostlygeek/llama-swap:cuda
    container_name: llama-swap
    ports:
      - "8090:8080"  # Map host port 8090 to container port 8080
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-config.yaml:/app/config.yaml  # llama-swap configuration
    runtime: nvidia
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 10s
      timeout: 5s
      retries: 10
      start_period: 30s  # Give more time for initial model loading
    environment:
      - NVIDIA_VISIBLE_DEVICES=all

  miku-bot:
    build: ./bot
    container_name: miku-bot
    volumes:
      - ./bot/memory:/app/memory
      - /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro
      - /var/run/docker.sock:/var/run/docker.sock  # Allow container management
    depends_on:
      llama-swap:
        condition: service_healthy
    environment:
      - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
      - LLAMA_URL=http://llama-swap:8080
      - TEXT_MODEL=llama3.1
      - VISION_MODEL=vision
      - OWNER_USER_ID=209381657369772032  # Your Discord user ID for DM analysis reports
    ports:
      - "3939:3939"
    restart: unless-stopped

  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector
    runtime: nvidia
    deploy:
      resources:
        reservations:
          devices:
            - capabilities: [gpu]
    volumes:
      - ./face-detector/api:/app/api
      - ./face-detector/images:/app/images
    ports:
      - "7860:7860"  # Gradio UI
      - "6078:6078"  # FastAPI API
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
    restart: "no"  # Don't auto-restart - only run on-demand
    profiles:
      - tools  # Don't start by default
Initial commit: Miku Discord Bot 2025-12-07 17:15:09 +02:00			`version: '3.9'`

			`services:`
			`llama-swap:`
			`image: ghcr.io/mostlygeek/llama-swap:cuda`
			`container_name: llama-swap`
			`ports:`
			`- "8090:8080" # Map host port 8090 to container port 8080`
			`volumes:`
			`- ./models:/models # GGUF model files`
			`- ./llama-swap-config.yaml:/app/config.yaml # llama-swap configuration`
			`runtime: nvidia`
			`restart: unless-stopped`
			`healthcheck:`
			`test: ["CMD", "curl", "-f", "http://localhost:8080/health"]`
			`interval: 10s`
			`timeout: 5s`
			`retries: 10`
			`start_period: 30s # Give more time for initial model loading`
			`environment:`
			`- NVIDIA_VISIBLE_DEVICES=all`

			`miku-bot:`
			`build: ./bot`
			`container_name: miku-bot`
			`volumes:`
			`- ./bot/memory:/app/memory`
			`- /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro`
			`- /var/run/docker.sock:/var/run/docker.sock # Allow container management`
			`depends_on:`
			`llama-swap:`
			`condition: service_healthy`
			`environment:`
			`- DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw`
			`- LLAMA_URL=http://llama-swap:8080`
			`- TEXT_MODEL=llama3.1`
			`- VISION_MODEL=vision`
			`- OWNER_USER_ID=209381657369772032 # Your Discord user ID for DM analysis reports`
			`ports:`
			`- "3939:3939"`
			`restart: unless-stopped`

			`anime-face-detector:`
			`build: ./face-detector`
			`container_name: anime-face-detector`
			`runtime: nvidia`
			`deploy:`
			`resources:`
			`reservations:`
			`devices:`
			`- capabilities: [gpu]`
			`volumes:`
			`- ./face-detector/api:/app/api`
			`- ./face-detector/images:/app/images`
			`ports:`
			`- "7860:7860" # Gradio UI`
			`- "6078:6078" # FastAPI API`
			`environment:`
			`- NVIDIA_VISIBLE_DEVICES=all`
			`- NVIDIA_DRIVER_CAPABILITIES=compute,utility`
			`restart: "no" # Don't auto-restart - only run on-demand`
			`profiles:`
			`- tools # Don't start by default`