services: qwen-audio: image: vllm/vllm-openai:latest container_name: qwen-audio profiles: - qwen-audio - audio-compare restart: unless-stopped ipc: host runtime: nvidia deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] environment: HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} VLLM_API_KEY: ${VLLM_API_KEY} HF_HOME: /cache volumes: - ./data/vllm-cache:/cache networks: - audio-models ports: - "10.2.3.5:8003:8000" command: - "--model" - "Qwen/Qwen2-Audio-7B-Instruct" - "--served-model-name" - "Qwen/Qwen2-Audio-7B-Instruct" - "--trust-remote-code" - "--host" - "0.0.0.0" - "--port" - "8000" - "--max-model-len" - "8192" - "--gpu-memory-utilization" - "0.25" - "--api-key" - "${VLLM_API_KEY}" - "--max-num-seqs" - "4" - "--max-num-batched-tokens" - "4096" healthcheck: test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"] interval: 30s timeout: 5s retries: 5 start_period: 900s voxtral-small: image: vllm/vllm-openai:latest container_name: voxtral-small profiles: - voxtral-small - audio-compare restart: unless-stopped ipc: host runtime: nvidia deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] environment: HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN} VLLM_API_KEY: ${VLLM_API_KEY} HF_HOME: /cache volumes: - ./data/vllm-cache:/cache networks: - audio-models ports: - "10.2.3.5:8004:8000" command: - "--model" - "mistralai/Voxtral-Small-24B-2507" - "--served-model-name" - "mistralai/Voxtral-Small-24B-2507" - "--tokenizer-mode" - "mistral" - "--config-format" - "mistral" - "--load-format" - "mistral" - "--tool-call-parser" - "mistral" - "--enable-auto-tool-choice" - "--host" - "0.0.0.0" - "--port" - "8000" - "--max-model-len" - "32768" - "--gpu-memory-utilization" - "0.62" - "--api-key" - "${VLLM_API_KEY}" - "--max-num-seqs" - "2" - "--max-num-batched-tokens" - "8192" healthcheck: test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"] interval: 30s timeout: 5s retries: 5 start_period: 1200s networks: audio-models: driver: bridge