Switch transcription comparison to Voxtral

2026-06-09 15:11:52 +03:00
parent add15f1385
commit 94e0d03580
2 changed files with 7 additions and 7 deletions
--- a/deploy/ai-server/docker-compose.audio.yml
+++ b/deploy/ai-server/docker-compose.audio.yml
@@ -103,15 +103,15 @@ services:
      - "--port"
      - "8000"
      - "--max-model-len"
-      - "32768"
+      - "16384"
      - "--gpu-memory-utilization"
-      - "0.62"
+      - "0.55"
      - "--api-key"
      - "${VLLM_API_KEY}"
      - "--max-num-seqs"
-      - "2"
+      - "1"
      - "--max-num-batched-tokens"
-      - "8192"
+      - "4096"
    healthcheck:
      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
      interval: 30s
--- a/k8s/configmap.yaml
+++ b/k8s/configmap.yaml
@@ -11,16 +11,16 @@ data:
  LLM_BASE_URL: "http://10.2.3.5:8002"
  LLM_MODEL: "qwen2.5-14b"
  LLM_TIMEOUT: "5m"
-  TRANSCRIPTION_PROVIDERS: "whisperx,qwen2-audio,voxtral-small"
+  TRANSCRIPTION_PROVIDERS: "voxtral-small"
  WHISPERX_URL: "http://10.2.3.5:8001"
  WHISPERX_TIMEOUT: "10m"
  WHISPERX_LEAD_SILENCE: "800ms"
  # Fill these after Qwen2-Audio and Voxtral are exposed as OpenAI-compatible
  # chat-completions endpoints on the AI server.
-  QWEN_AUDIO_BASE_URL: "http://10.2.3.5:8003"
+  QWEN_AUDIO_BASE_URL: ""
  QWEN_AUDIO_MODEL: "Qwen/Qwen2-Audio-7B-Instruct"
  QWEN_AUDIO_TIMEOUT: "10m"
-  VOXTRAL_BASE_URL: ""
+  VOXTRAL_BASE_URL: "http://10.2.3.5:8004"
  VOXTRAL_MODEL: "mistralai/Voxtral-Small-24B-2507"
  VOXTRAL_TIMEOUT: "10m"
  AUDIO_LLM_MAX_TOKENS: "4096"