llama-swap-stack/docker-compose.yml


services:
  llama:
    container_name: llama
    # image: ghcr.io/mostlygeek/llama-swap:cuda
    image: llama-swap:mtp # Change this to vulkan, cpu etc.
    ports:
      - '9292:8080'
    restart: unless-stopped
    environment:
      LLAMA_CACHE: /models/hf
      HF_HUB_CACHE: /models/hf
    deploy:
      resources:
        reservations:
          devices:
            - capabilities:
                - gpu
              count: all
              driver: nvidia # Remove this line if using AMD/Vulkan.
    # configs:
    #  - source: llama-swap-config # Takes the content of the llama-swap-config variable
    #    target: /app/config.yaml  # and writes it to this file.
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
      - /usr/bin/docker:/usr/bin/docker
      - ./models:/models
      - ./llama-swap-config.yml:/etc/llama-swap/config/config.yaml
    networks:
      - nerd-network

 webui:
   container_name: webui
   image: ghcr.io/open-webui/open-webui:main
   restart: unless-stopped
   ports:
     - 3000:8080
   volumes:
     - /srv/webui/data:/app/backend/data
   networks:
     - nerd-network

networks:
  nerd-network:
    name: nerd-network
    external: true