Initial commit

This commit is contained in:
Marker689
2026-05-12 01:34:16 +03:00
commit 55d0ae4437
5 changed files with 617 additions and 0 deletions

64
llama-swap-config.yml Normal file
View File

@@ -0,0 +1,64 @@
# From here is where you define the config for llama-swap.
healthCheckTimeout: 3600 # Set it to one hour so model downloads don't stop halfway through.
# 262144
models:
GLM47:
aliases:
- "glm-coder"
cmd: >
llama-server
--port ${PORT}
-m /models/GLM-4.7-Flash-MXFP4_MOE.gguf
--fit-ctx 230000
--temp 0.7 --top-p 1.0 --min-p 0.01
Qwen3.6-35B-A3B:
aliases:
- "qwen-omni"
cmd: >
llama-server
--port ${PORT}
-m /models/Qwen3.6-35B-A3B-MXFP4_MOE.gguf
--mmproj /models/Qwen-mmproj-F16.gguf
--fit-ctx 230000
--fit-target 2048
--temp 0.6 --top-p 0.95 --top-k 20 --presence-penalty 0.0 --min-p 0.00 --no-mmap
Qwen3.6-Opus:
aliases:
- "qwen-opus"
cmd: >
llama-server
--port ${PORT}
--fit-ctx 262144
-m /models/Qwen3.6-Opus.gguf
--fit-target 2048
--temp 0.6 --top-p 0.95 --top-k 20 --presence-penalty 0.0 --min-p 0.00
--spec-type mtp --spec-draft-n-max 3 -np 1 --no-mmap
kokoro-tts:
proxy: http://${MODEL_ID}:8880
name: "kokoro TTS"
useModelName: "tts-1"
checkEndpoint: /health
cmd: |
docker run --rm --name ${MODEL_ID} --network nerd-network
--gpus 'device=0'
--env 'API_LOG_LEVEL=INFO'
ghcr.io/remsky/kokoro-fastapi-gpu:latest
cmdStop: docker stop ${MODEL_ID}
Qwen-Image:
proxy: http://127.0.0.1:${PORT}
checkEndpoint: /
aliases:
- "qwen-image"
cmd: >
sd-server
--listen-port ${PORT} --fa --offload-to-cpu
--diffusion-model /models/sd/unet/qwen-image-2512-Q4_K_M.gguf
--llm /models/sd/text_encoders/Qwen2.5-VL-7B-Instruct-UD-Q4_K_XL.gguf
--llm_vision /models/sd/text_encoders/Qwen2.5-VL-7B-Instruct-mmproj-BF16.gguf
--vae /models/sd/vae/qwen_image_vae.safetensors