Switch transcription to Whisper large v3

2026-06-10 10:10:13 +03:00
parent 1b63dcdbf5
commit 8d6cd84403
12 changed files with 85 additions and 93 deletions
--- a/deploy/ai-server/docker-compose.audio.yml
+++ b/deploy/ai-server/docker-compose.audio.yml
@@ -1,12 +1,12 @@
 services:
-  voxtral-small:
+  whisper-large-v3:
    build:
      context: .
      dockerfile: vllm-audio.Dockerfile
    image: vllm-audio:local
-    container_name: voxtral-small
+    container_name: whisper-large-v3
    profiles:
-      - voxtral-small
+      - whisper-large-v3
    restart: unless-stopped
    ipc: host
    runtime: nvidia
@@ -29,32 +29,19 @@ services:
      - "10.2.3.5:8004:8000"
    command:
      - "--model"
-      - "mistralai/Voxtral-Small-24B-2507"
+      - "openai/whisper-large-v3"
      - "--served-model-name"
-      - "mistralai/Voxtral-Small-24B-2507"
-      - "--tokenizer-mode"
-      - "mistral"
-      - "--config-format"
-      - "mistral"
-      - "--load-format"
-      - "mistral"
-      - "--tool-call-parser"
-      - "mistral"
-      - "--enable-auto-tool-choice"
+      - "openai/whisper-large-v3"
+      - "--task"
+      - "transcription"
      - "--host"
      - "0.0.0.0"
      - "--port"
      - "8000"
-      - "--max-model-len"
-      - "16384"
      - "--gpu-memory-utilization"
      - "0.55"
      - "--api-key"
      - "${VLLM_API_KEY}"
-      - "--max-num-seqs"
-      - "1"
-      - "--max-num-batched-tokens"
-      - "4096"
    healthcheck:
      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
      interval: 30s