services: llama: container_name: llama # image: ghcr.io/mostlygeek/llama-swap:cuda image: llama-swap:mtp # Change this to vulkan, cpu etc. ports: - '9292:8080' restart: unless-stopped environment: LLAMA_CACHE: /models/hf HF_HUB_CACHE: /models/hf deploy: resources: reservations: devices: - capabilities: - gpu count: all driver: nvidia # Remove this line if using AMD/Vulkan. # configs: # - source: llama-swap-config # Takes the content of the llama-swap-config variable # target: /app/config.yaml # and writes it to this file. volumes: - /var/run/docker.sock:/var/run/docker.sock - /usr/bin/docker:/usr/bin/docker - ./models:/models - ./llama-swap-config.yml:/etc/llama-swap/config/config.yaml networks: - nerd-network webui: container_name: webui image: ghcr.io/open-webui/open-webui:main restart: unless-stopped ports: - 3000:8080 volumes: - /srv/webui/data:/app/backend/data networks: - nerd-network networks: nerd-network: name: nerd-network external: true