version: '3.9'

services:
  llama-swap:
    image: ghcr.io/mostlygeek/llama-swap:cuda
    container_name: llama-swap
    ports:
      - "8090:8080"  # Map host port 8090 to container port 8080
    volumes:
      - ./models:/models  # GGUF model files
      - ./llama-swap-config.yaml:/app/config.yaml  # llama-swap configuration
    runtime: nvidia
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 10s
      timeout: 5s
      retries: 10
      start_period: 30s  # Give more time for initial model loading
    environment:
      - NVIDIA_VISIBLE_DEVICES=all

  miku-bot:
    build: ./bot
    container_name: miku-bot
    volumes:
      - ./bot/memory:/app/memory
      - /home/koko210Serve/ComfyUI/output:/app/ComfyUI/output:ro
      - /var/run/docker.sock:/var/run/docker.sock  # Allow container management
    depends_on:
      llama-swap:
        condition: service_healthy
    environment:
      - DISCORD_BOT_TOKEN=MTM0ODAyMjY0Njc3NTc0NjY1MQ.GXsxML.nNCDOplmgNxKgqdgpAomFM2PViX10GjxyuV8uw
      - LLAMA_URL=http://llama-swap:8080
      - TEXT_MODEL=llama3.1
      - VISION_MODEL=vision
      - OWNER_USER_ID=209381657369772032  # Your Discord user ID for DM analysis reports
    ports:
      - "3939:3939"
    restart: unless-stopped

  anime-face-detector:
    build: ./face-detector
    container_name: anime-face-detector
    runtime: nvidia
    deploy:
      resources:
        reservations:
          devices:
            - capabilities: [gpu]
    volumes:
      - ./face-detector/api:/app/api
      - ./face-detector/images:/app/images
    ports:
      - "7860:7860"  # Gradio UI
      - "6078:6078"  # FastAPI API
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
    restart: "no"  # Don't auto-restart - only run on-demand
    profiles:
      - tools  # Don't start by default