Files
ai-service/deploy/ai-server/docker-compose.audio.yml
Grendgi bde56978d6
Some checks failed
CI / test (push) Failing after 8s
Build and Deploy / build-and-deploy (push) Successful in 18s
Fix Whisper large v3 audio compose
2026-06-10 10:15:24 +03:00

53 lines
1.2 KiB
YAML

services:
whisper-large-v3:
build:
context: .
dockerfile: vllm-audio.Dockerfile
image: vllm-audio:local
container_name: whisper-large-v3
profiles:
- whisper-large-v3
restart: unless-stopped
ipc: host
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
VLLM_API_KEY: ${VLLM_API_KEY}
HF_HOME: /cache
volumes:
- ./data/vllm-cache:/cache
networks:
- audio-models
ports:
- "10.2.3.5:8004:8000"
command:
- "--model"
- "openai/whisper-large-v3"
- "--served-model-name"
- "openai/whisper-large-v3"
- "--host"
- "0.0.0.0"
- "--port"
- "8000"
- "--gpu-memory-utilization"
- "0.55"
- "--api-key"
- "${VLLM_API_KEY}"
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s
timeout: 5s
retries: 5
start_period: 1200s
networks:
audio-models:
driver: bridge