125 lines
2.8 KiB
YAML
125 lines
2.8 KiB
YAML
services:
|
|
qwen-audio:
|
|
build:
|
|
context: .
|
|
dockerfile: vllm-audio.Dockerfile
|
|
image: vllm-audio:local
|
|
container_name: qwen-audio
|
|
profiles:
|
|
- qwen-audio
|
|
- audio-compare
|
|
restart: unless-stopped
|
|
ipc: host
|
|
runtime: nvidia
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
environment:
|
|
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
|
VLLM_API_KEY: ${VLLM_API_KEY}
|
|
HF_HOME: /cache
|
|
volumes:
|
|
- ./data/vllm-cache:/cache
|
|
networks:
|
|
- audio-models
|
|
ports:
|
|
- "10.2.3.5:8003:8000"
|
|
command:
|
|
- "--model"
|
|
- "Qwen/Qwen2-Audio-7B-Instruct"
|
|
- "--served-model-name"
|
|
- "Qwen/Qwen2-Audio-7B-Instruct"
|
|
- "--trust-remote-code"
|
|
- "--host"
|
|
- "0.0.0.0"
|
|
- "--port"
|
|
- "8000"
|
|
- "--max-model-len"
|
|
- "8192"
|
|
- "--gpu-memory-utilization"
|
|
- "0.25"
|
|
- "--api-key"
|
|
- "${VLLM_API_KEY}"
|
|
- "--max-num-seqs"
|
|
- "4"
|
|
- "--max-num-batched-tokens"
|
|
- "4096"
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 5
|
|
start_period: 900s
|
|
|
|
voxtral-small:
|
|
build:
|
|
context: .
|
|
dockerfile: vllm-audio.Dockerfile
|
|
image: vllm-audio:local
|
|
container_name: voxtral-small
|
|
profiles:
|
|
- voxtral-small
|
|
- audio-compare
|
|
restart: unless-stopped
|
|
ipc: host
|
|
runtime: nvidia
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
environment:
|
|
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
|
|
VLLM_API_KEY: ${VLLM_API_KEY}
|
|
HF_HOME: /cache
|
|
volumes:
|
|
- ./data/vllm-cache:/cache
|
|
networks:
|
|
- audio-models
|
|
ports:
|
|
- "10.2.3.5:8004:8000"
|
|
command:
|
|
- "--model"
|
|
- "mistralai/Voxtral-Small-24B-2507"
|
|
- "--served-model-name"
|
|
- "mistralai/Voxtral-Small-24B-2507"
|
|
- "--tokenizer-mode"
|
|
- "mistral"
|
|
- "--config-format"
|
|
- "mistral"
|
|
- "--load-format"
|
|
- "mistral"
|
|
- "--tool-call-parser"
|
|
- "mistral"
|
|
- "--enable-auto-tool-choice"
|
|
- "--host"
|
|
- "0.0.0.0"
|
|
- "--port"
|
|
- "8000"
|
|
- "--max-model-len"
|
|
- "32768"
|
|
- "--gpu-memory-utilization"
|
|
- "0.62"
|
|
- "--api-key"
|
|
- "${VLLM_API_KEY}"
|
|
- "--max-num-seqs"
|
|
- "2"
|
|
- "--max-num-batched-tokens"
|
|
- "8192"
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 5
|
|
start_period: 1200s
|
|
|
|
networks:
|
|
audio-models:
|
|
driver: bridge
|