Switch transcription to Whisper large v3
Some checks failed
CI / test (push) Failing after 10s
Build and Deploy / build-and-deploy (push) Successful in 24s

This commit is contained in:
Grendgi
2026-06-10 10:10:13 +03:00
parent 1b63dcdbf5
commit 8d6cd84403
12 changed files with 85 additions and 93 deletions

View File

@@ -1,12 +1,12 @@
services:
voxtral-small:
whisper-large-v3:
build:
context: .
dockerfile: vllm-audio.Dockerfile
image: vllm-audio:local
container_name: voxtral-small
container_name: whisper-large-v3
profiles:
- voxtral-small
- whisper-large-v3
restart: unless-stopped
ipc: host
runtime: nvidia
@@ -29,32 +29,19 @@ services:
- "10.2.3.5:8004:8000"
command:
- "--model"
- "mistralai/Voxtral-Small-24B-2507"
- "openai/whisper-large-v3"
- "--served-model-name"
- "mistralai/Voxtral-Small-24B-2507"
- "--tokenizer-mode"
- "mistral"
- "--config-format"
- "mistral"
- "--load-format"
- "mistral"
- "--tool-call-parser"
- "mistral"
- "--enable-auto-tool-choice"
- "openai/whisper-large-v3"
- "--task"
- "transcription"
- "--host"
- "0.0.0.0"
- "--port"
- "8000"
- "--max-model-len"
- "16384"
- "--gpu-memory-utilization"
- "0.55"
- "--api-key"
- "${VLLM_API_KEY}"
- "--max-num-seqs"
- "1"
- "--max-num-batched-tokens"
- "4096"
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s