From 94e0d035808e40dd79bd039797117c304464c46c Mon Sep 17 00:00:00 2001 From: Grendgi Date: Tue, 9 Jun 2026 15:11:52 +0300 Subject: [PATCH] Switch transcription comparison to Voxtral --- deploy/ai-server/docker-compose.audio.yml | 8 ++++---- k8s/configmap.yaml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deploy/ai-server/docker-compose.audio.yml b/deploy/ai-server/docker-compose.audio.yml index e9b5233..a5d286f 100644 --- a/deploy/ai-server/docker-compose.audio.yml +++ b/deploy/ai-server/docker-compose.audio.yml @@ -103,15 +103,15 @@ services: - "--port" - "8000" - "--max-model-len" - - "32768" + - "16384" - "--gpu-memory-utilization" - - "0.62" + - "0.55" - "--api-key" - "${VLLM_API_KEY}" - "--max-num-seqs" - - "2" + - "1" - "--max-num-batched-tokens" - - "8192" + - "4096" healthcheck: test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"] interval: 30s diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index 37699d9..6c61319 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -11,16 +11,16 @@ data: LLM_BASE_URL: "http://10.2.3.5:8002" LLM_MODEL: "qwen2.5-14b" LLM_TIMEOUT: "5m" - TRANSCRIPTION_PROVIDERS: "whisperx,qwen2-audio,voxtral-small" + TRANSCRIPTION_PROVIDERS: "voxtral-small" WHISPERX_URL: "http://10.2.3.5:8001" WHISPERX_TIMEOUT: "10m" WHISPERX_LEAD_SILENCE: "800ms" # Fill these after Qwen2-Audio and Voxtral are exposed as OpenAI-compatible # chat-completions endpoints on the AI server. - QWEN_AUDIO_BASE_URL: "http://10.2.3.5:8003" + QWEN_AUDIO_BASE_URL: "" QWEN_AUDIO_MODEL: "Qwen/Qwen2-Audio-7B-Instruct" QWEN_AUDIO_TIMEOUT: "10m" - VOXTRAL_BASE_URL: "" + VOXTRAL_BASE_URL: "http://10.2.3.5:8004" VOXTRAL_MODEL: "mistralai/Voxtral-Small-24B-2507" VOXTRAL_TIMEOUT: "10m" AUDIO_LLM_MAX_TOKENS: "4096"