Make Voxtral the only transcription provider

2026-06-09 16:54:54 +03:00
parent 5c965be8c9
commit 9bd6d726f0
15 changed files with 128 additions and 900 deletions
--- a/deploy/ai-server/docker-compose.audio.yml
+++ b/deploy/ai-server/docker-compose.audio.yml
@@ -1,60 +1,4 @@
 services:
-  qwen-audio:
-    build:
-      context: .
-      dockerfile: vllm-audio.Dockerfile
-    image: vllm-audio:local
-    container_name: qwen-audio
-    profiles:
-      - qwen-audio
-      - audio-compare
-    restart: unless-stopped
-    ipc: host
-    runtime: nvidia
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    environment:
-      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
-      VLLM_API_KEY: ${VLLM_API_KEY}
-      HF_HOME: /cache
-    volumes:
-      - ./data/vllm-cache:/cache
-    networks:
-      - audio-models
-    ports:
-      - "10.2.3.5:8003:8000"
-    command:
-      - "--model"
-      - "Qwen/Qwen2-Audio-7B-Instruct"
-      - "--served-model-name"
-      - "Qwen/Qwen2-Audio-7B-Instruct"
-      - "--trust-remote-code"
-      - "--host"
-      - "0.0.0.0"
-      - "--port"
-      - "8000"
-      - "--max-model-len"
-      - "8192"
-      - "--gpu-memory-utilization"
-      - "0.25"
-      - "--api-key"
-      - "${VLLM_API_KEY}"
-      - "--max-num-seqs"
-      - "4"
-      - "--max-num-batched-tokens"
-      - "4096"
-    healthcheck:
-      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
-      interval: 30s
-      timeout: 5s
-      retries: 5
-      start_period: 900s
-
  voxtral-small:
    build:
      context: .
@@ -63,7 +7,6 @@ services:
    container_name: voxtral-small
    profiles:
      - voxtral-small
-      - audio-compare
    restart: unless-stopped
    ipc: host
    runtime: nvidia