Initial commit
This commit is contained in:
64
llama-swap-config.yml
Normal file
64
llama-swap-config.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
# From here is where you define the config for llama-swap.
|
||||
healthCheckTimeout: 3600 # Set it to one hour so model downloads don't stop halfway through.
|
||||
# 262144
|
||||
models:
|
||||
GLM47:
|
||||
aliases:
|
||||
- "glm-coder"
|
||||
cmd: >
|
||||
llama-server
|
||||
--port ${PORT}
|
||||
-m /models/GLM-4.7-Flash-MXFP4_MOE.gguf
|
||||
--fit-ctx 230000
|
||||
--temp 0.7 --top-p 1.0 --min-p 0.01
|
||||
|
||||
Qwen3.6-35B-A3B:
|
||||
aliases:
|
||||
- "qwen-omni"
|
||||
cmd: >
|
||||
llama-server
|
||||
--port ${PORT}
|
||||
-m /models/Qwen3.6-35B-A3B-MXFP4_MOE.gguf
|
||||
--mmproj /models/Qwen-mmproj-F16.gguf
|
||||
--fit-ctx 230000
|
||||
--fit-target 2048
|
||||
--temp 0.6 --top-p 0.95 --top-k 20 --presence-penalty 0.0 --min-p 0.00 --no-mmap
|
||||
|
||||
|
||||
Qwen3.6-Opus:
|
||||
aliases:
|
||||
- "qwen-opus"
|
||||
cmd: >
|
||||
llama-server
|
||||
--port ${PORT}
|
||||
--fit-ctx 262144
|
||||
-m /models/Qwen3.6-Opus.gguf
|
||||
--fit-target 2048
|
||||
--temp 0.6 --top-p 0.95 --top-k 20 --presence-penalty 0.0 --min-p 0.00
|
||||
--spec-type mtp --spec-draft-n-max 3 -np 1 --no-mmap
|
||||
|
||||
kokoro-tts:
|
||||
proxy: http://${MODEL_ID}:8880
|
||||
name: "kokoro TTS"
|
||||
useModelName: "tts-1"
|
||||
checkEndpoint: /health
|
||||
cmd: |
|
||||
docker run --rm --name ${MODEL_ID} --network nerd-network
|
||||
--gpus 'device=0'
|
||||
--env 'API_LOG_LEVEL=INFO'
|
||||
ghcr.io/remsky/kokoro-fastapi-gpu:latest
|
||||
cmdStop: docker stop ${MODEL_ID}
|
||||
|
||||
|
||||
Qwen-Image:
|
||||
proxy: http://127.0.0.1:${PORT}
|
||||
checkEndpoint: /
|
||||
aliases:
|
||||
- "qwen-image"
|
||||
cmd: >
|
||||
sd-server
|
||||
--listen-port ${PORT} --fa --offload-to-cpu
|
||||
--diffusion-model /models/sd/unet/qwen-image-2512-Q4_K_M.gguf
|
||||
--llm /models/sd/text_encoders/Qwen2.5-VL-7B-Instruct-UD-Q4_K_XL.gguf
|
||||
--llm_vision /models/sd/text_encoders/Qwen2.5-VL-7B-Instruct-mmproj-BF16.gguf
|
||||
--vae /models/sd/vae/qwen_image_vae.safetensors
|
||||
Reference in New Issue
Block a user