Switch transcription to Whisper large v3
Some checks failed
CI / test (push) Failing after 10s
Build and Deploy / build-and-deploy (push) Successful in 24s

This commit is contained in:
Grendgi
2026-06-10 10:10:13 +03:00
parent 1b63dcdbf5
commit 8d6cd84403
12 changed files with 85 additions and 93 deletions

View File

@@ -15,7 +15,7 @@ The service is intentionally domain-agnostic:
`beeline/{call_id}` or `channel/{message_id}`. `beeline/{call_id}` or `channel/{message_id}`.
- `task_type` describes the technical task class, for example - `task_type` describes the technical task class, for example
`transcribe`, `call_analysis`, `tg_analysis`, `pf_competitor_analysis`. `transcribe`, `call_analysis`, `tg_analysis`, `pf_competitor_analysis`.
- `model_profile` selects a runtime profile, for example `voxtral-small`, - `model_profile` selects a runtime profile, for example `whisper-large-v3`,
`qwen2.5-14b`, `vision`, or a future provider profile. `qwen2.5-14b`, `vision`, or a future provider profile.
- `input` and `result` are JSON payloads owned by the caller and worker. - `input` and `result` are JSON payloads owned by the caller and worker.
@@ -46,23 +46,22 @@ or compact `system` / `user` fields. The completed job result contains
domain metadata fields in `input`, but the worker only reads chat fields such as domain metadata fields in `input`, but the worker only reads chat fields such as
`system`, `user`, `messages`, `max_tokens` and `response_format`. `system`, `user`, `messages`, `max_tokens` and `response_format`.
`transcription` jobs are processed only by Voxtral Small `transcription` jobs are processed only by Whisper Large v3
(`mistralai/Voxtral-Small-24B-2507`) through an OpenAI-compatible (`openai/whisper-large-v3`) through an OpenAI-compatible
`/v1/audio/transcriptions` endpoint. The returned `segments` field stays `/v1/audio/transcriptions` endpoint. The returned `segments` field stays
compatible with telephony. If the provider returns one long segment, AI Service compatible with telephony. If the provider returns one long segment, AI Service
splits it into smaller transcript segments and adds heuristic speaker labels splits it into smaller transcript segments without inventing speaker labels.
when diarization is requested.
AI-server compose snippet for Voxtral lives in AI-server compose snippet for Whisper Large v3 lives in
`deploy/ai-server/docker-compose.audio.yml`: `deploy/ai-server/docker-compose.audio.yml`:
- Voxtral endpoint: `http://10.2.3.5:8004` - Whisper endpoint: `http://10.2.3.5:8004`
- Start Voxtral: - Start Whisper:
`docker compose -f docker-compose.yml -f docker-compose.audio.yml --profile voxtral-small up -d voxtral-small` `docker compose -f docker-compose.yml -f docker-compose.audio.yml --profile whisper-large-v3 up -d whisper-large-v3`
In Kubernetes the dedicated transcription worker may claim more than one In Kubernetes the dedicated transcription worker may claim more than one
`voxtral-small` job at a time. This keeps download/upload/wait overhead from `whisper-large-v3` job at a time. This keeps download/upload/wait overhead from
serializing the queue while Voxtral/vLLM still controls the actual GPU serializing the queue while Whisper/vLLM still controls the actual GPU
scheduling. scheduling.
## API ## API
@@ -102,11 +101,11 @@ for Kubernetes probes.
- `LLM_API_KEY`, primary LLM API key - `LLM_API_KEY`, primary LLM API key
- `LLM_MODEL`, default `qwen2.5-14b` - `LLM_MODEL`, default `qwen2.5-14b`
- `LLM_TIMEOUT`, default `5m` - `LLM_TIMEOUT`, default `5m`
- `VOXTRAL_BASE_URL`, OpenAI-compatible endpoint for Voxtral - `AUDIO_TRANSCRIPTION_BASE_URL`, OpenAI-compatible transcription endpoint
- `VOXTRAL_MODEL`, default `mistralai/Voxtral-Small-24B-2507` - `AUDIO_TRANSCRIPTION_MODEL`, default `openai/whisper-large-v3`
- `VOXTRAL_API_KEY`, optional bearer token for Voxtral; falls back to - `AUDIO_TRANSCRIPTION_API_KEY`, optional bearer token; falls back to
`AUDIO_LLM_API_KEY`, then `LLM_API_KEY` `AUDIO_LLM_API_KEY`, then `LLM_API_KEY`
- `AUDIO_LLM_PROMPT`, transcription instruction for Voxtral - `AUDIO_TRANSCRIPTION_PROMPT`, transcription instruction
- `WORKER_ID`, default hostname - `WORKER_ID`, default hostname
- `WORKER_HTTP_HOST`, default `0.0.0.0` - `WORKER_HTTP_HOST`, default `0.0.0.0`
- `WORKER_HTTP_PORT`, default `8081` - `WORKER_HTTP_PORT`, default `8081`

View File

@@ -49,11 +49,11 @@ func main() {
llmClient := llm.New(cfg.LLMBaseURL, cfg.LLMAPIKey, cfg.LLMModel, cfg.LLMTimeout) llmClient := llm.New(cfg.LLMBaseURL, cfg.LLMAPIKey, cfg.LLMModel, cfg.LLMTimeout)
transcriber := transcription.NewWithOptions(transcription.Options{ transcriber := transcription.NewWithOptions(transcription.Options{
VoxtralBaseURL: cfg.VoxtralBaseURL, AudioBaseURL: cfg.AudioBaseURL,
VoxtralAPIKey: cfg.VoxtralAPIKey, AudioAPIKey: cfg.AudioAPIKey,
VoxtralModel: cfg.VoxtralModel, AudioModel: cfg.AudioModel,
VoxtralTimeout: cfg.VoxtralTimeout, AudioTimeout: cfg.AudioTimeout,
AudioLLMPrompt: cfg.AudioLLMPrompt, AudioPrompt: cfg.AudioPrompt,
}) })
w := worker.New(db, llmClient, transcriber, cfg.WorkerID, cfg.LLMModel, cfg.WorkerTaskTypes, cfg.WorkerModelProfiles, cfg.WorkerPollInterval, cfg.WorkerLeaseTimeout, cfg.WorkerClaimLimit) w := worker.New(db, llmClient, transcriber, cfg.WorkerID, cfg.LLMModel, cfg.WorkerTaskTypes, cfg.WorkerModelProfiles, cfg.WorkerPollInterval, cfg.WorkerLeaseTimeout, cfg.WorkerClaimLimit)
healthSrv := startHealthServer(ctx, db, cfg) healthSrv := startHealthServer(ctx, db, cfg)
@@ -62,8 +62,8 @@ func main() {
"worker_id", cfg.WorkerID, "worker_id", cfg.WorkerID,
"model", cfg.LLMModel, "model", cfg.LLMModel,
"transcription_enabled", transcriber != nil, "transcription_enabled", transcriber != nil,
"transcription_provider", "voxtral-small", "transcription_provider", transcription.ProviderWhisperLargeV3,
"transcription_model", cfg.VoxtralModel, "transcription_model", cfg.AudioModel,
"task_types", cfg.WorkerTaskTypes, "task_types", cfg.WorkerTaskTypes,
"model_profiles", cfg.WorkerModelProfiles, "model_profiles", cfg.WorkerModelProfiles,
"poll_interval", cfg.WorkerPollInterval.String(), "poll_interval", cfg.WorkerPollInterval.String(),
@@ -134,8 +134,8 @@ func (h workerHealth) ServeHTTP(w http.ResponseWriter, r *http.Request) {
"worker_id": h.cfg.WorkerID, "worker_id": h.cfg.WorkerID,
"task_types": h.cfg.WorkerTaskTypes, "task_types": h.cfg.WorkerTaskTypes,
"model_profiles": h.cfg.WorkerModelProfiles, "model_profiles": h.cfg.WorkerModelProfiles,
"transcription_provider": "voxtral-small", "transcription_provider": transcription.ProviderWhisperLargeV3,
"transcription_model": h.cfg.VoxtralModel, "transcription_model": h.cfg.AudioModel,
"claim_limit": h.cfg.WorkerClaimLimit, "claim_limit": h.cfg.WorkerClaimLimit,
"poll_interval": h.cfg.WorkerPollInterval.String(), "poll_interval": h.cfg.WorkerPollInterval.String(),
"lease_timeout": h.cfg.WorkerLeaseTimeout.String(), "lease_timeout": h.cfg.WorkerLeaseTimeout.String(),

View File

@@ -1,12 +1,12 @@
services: services:
voxtral-small: whisper-large-v3:
build: build:
context: . context: .
dockerfile: vllm-audio.Dockerfile dockerfile: vllm-audio.Dockerfile
image: vllm-audio:local image: vllm-audio:local
container_name: voxtral-small container_name: whisper-large-v3
profiles: profiles:
- voxtral-small - whisper-large-v3
restart: unless-stopped restart: unless-stopped
ipc: host ipc: host
runtime: nvidia runtime: nvidia
@@ -29,32 +29,19 @@ services:
- "10.2.3.5:8004:8000" - "10.2.3.5:8004:8000"
command: command:
- "--model" - "--model"
- "mistralai/Voxtral-Small-24B-2507" - "openai/whisper-large-v3"
- "--served-model-name" - "--served-model-name"
- "mistralai/Voxtral-Small-24B-2507" - "openai/whisper-large-v3"
- "--tokenizer-mode" - "--task"
- "mistral" - "transcription"
- "--config-format"
- "mistral"
- "--load-format"
- "mistral"
- "--tool-call-parser"
- "mistral"
- "--enable-auto-tool-choice"
- "--host" - "--host"
- "0.0.0.0" - "0.0.0.0"
- "--port" - "--port"
- "8000" - "8000"
- "--max-model-len"
- "16384"
- "--gpu-memory-utilization" - "--gpu-memory-utilization"
- "0.55" - "0.55"
- "--api-key" - "--api-key"
- "${VLLM_API_KEY}" - "${VLLM_API_KEY}"
- "--max-num-seqs"
- "1"
- "--max-num-batched-tokens"
- "4096"
healthcheck: healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"] test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 30s interval: 30s

View File

@@ -18,11 +18,11 @@ type Config struct {
LLMAPIKey string LLMAPIKey string
LLMModel string LLMModel string
LLMTimeout time.Duration LLMTimeout time.Duration
VoxtralBaseURL string AudioBaseURL string
VoxtralAPIKey string AudioAPIKey string
VoxtralModel string AudioModel string
VoxtralTimeout time.Duration AudioTimeout time.Duration
AudioLLMPrompt string AudioPrompt string
AIStatsSidecarURL string AIStatsSidecarURL string
AIStatsTimeout time.Duration AIStatsTimeout time.Duration
@@ -48,11 +48,11 @@ func Load() Config {
LLMAPIKey: envString("LLM_API_KEY", ""), LLMAPIKey: envString("LLM_API_KEY", ""),
LLMModel: envString("LLM_MODEL", "qwen2.5-14b"), LLMModel: envString("LLM_MODEL", "qwen2.5-14b"),
LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute), LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute),
VoxtralBaseURL: envString("VOXTRAL_BASE_URL", envString("AUDIO_LLM_BASE_URL", "")), AudioBaseURL: envString("AUDIO_TRANSCRIPTION_BASE_URL", envString("AUDIO_LLM_BASE_URL", "")),
VoxtralAPIKey: envString("VOXTRAL_API_KEY", envString("AUDIO_LLM_API_KEY", envString("LLM_API_KEY", ""))), AudioAPIKey: envString("AUDIO_TRANSCRIPTION_API_KEY", envString("AUDIO_LLM_API_KEY", envString("LLM_API_KEY", ""))),
VoxtralModel: envString("VOXTRAL_MODEL", "mistralai/Voxtral-Small-24B-2507"), AudioModel: envString("AUDIO_TRANSCRIPTION_MODEL", "openai/whisper-large-v3"),
VoxtralTimeout: envDuration("VOXTRAL_TIMEOUT", envDuration("AUDIO_LLM_TIMEOUT", 10*time.Minute)), AudioTimeout: envDuration("AUDIO_TRANSCRIPTION_TIMEOUT", envDuration("AUDIO_LLM_TIMEOUT", 10*time.Minute)),
AudioLLMPrompt: envString("AUDIO_LLM_PROMPT", defaultAudioLLMPrompt()), AudioPrompt: envString("AUDIO_TRANSCRIPTION_PROMPT", envString("AUDIO_LLM_PROMPT", defaultAudioPrompt())),
AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""), AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""),
AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second), AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second),
@@ -132,7 +132,7 @@ func envCSVDefault(key string, fallback []string) []string {
return fallback return fallback
} }
func defaultAudioLLMPrompt() string { func defaultAudioPrompt() string {
return "Расшифруй речь из аудио максимально точно. Сохрани русский язык, имена, телефоны, суммы и смысловые паузы. Не добавляй комментарии, анализ, Markdown или JSON. Верни только чистый текст расшифровки." return "Расшифруй речь из аудио максимально точно. Сохрани русский язык, имена, телефоны, суммы и смысловые паузы. Не добавляй комментарии, анализ, Markdown или JSON. Верни только чистый текст расшифровки."
} }

View File

@@ -5,6 +5,7 @@ import (
"time" "time"
"ai-service/internal/model" "ai-service/internal/model"
"ai-service/internal/transcription"
) )
type dashboardResponse struct { type dashboardResponse struct {
@@ -51,7 +52,7 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
At: now, At: now,
Providers: []providerStatus{ Providers: []providerStatus{
s.checkLLM(ctx), s.checkLLM(ctx),
s.checkAudioLLM(ctx, "voxtral-small", s.cfg.VoxtralBaseURL, s.cfg.VoxtralAPIKey, s.cfg.VoxtralModel, s.cfg.VoxtralTimeout), s.checkAudioLLM(ctx, transcription.ProviderWhisperLargeV3, s.cfg.AudioBaseURL, s.cfg.AudioAPIKey, s.cfg.AudioModel, s.cfg.AudioTimeout),
}, },
}, },
Infra: loadInfraSnapshot(r, s.cfg), Infra: loadInfraSnapshot(r, s.cfg),

View File

@@ -8,6 +8,8 @@ import (
"net/http" "net/http"
"strings" "strings"
"time" "time"
"ai-service/internal/transcription"
) )
type providerStatus struct { type providerStatus struct {
@@ -42,7 +44,7 @@ func (s *Server) handleProviderStatus(w http.ResponseWriter, r *http.Request) {
At: time.Now().UTC(), At: time.Now().UTC(),
Providers: []providerStatus{ Providers: []providerStatus{
s.checkLLM(ctx), s.checkLLM(ctx),
s.checkAudioLLM(ctx, "voxtral-small", s.cfg.VoxtralBaseURL, s.cfg.VoxtralAPIKey, s.cfg.VoxtralModel, s.cfg.VoxtralTimeout), s.checkAudioLLM(ctx, transcription.ProviderWhisperLargeV3, s.cfg.AudioBaseURL, s.cfg.AudioAPIKey, s.cfg.AudioModel, s.cfg.AudioTimeout),
}, },
} }
writeJSON(w, http.StatusOK, resp) writeJSON(w, http.StatusOK, resp)

View File

@@ -19,16 +19,19 @@ type Client struct {
http *http.Client http *http.Client
} }
const ProviderVoxtral = "voxtral-small" const (
ProviderWhisperLargeV3 = "whisper-large-v3"
defaultWhisperModel = "openai/whisper-large-v3"
)
var speakerLabelPattern = regexp.MustCompile(`(?i)(?:^|[\n\r ]+)((?:speaker|спикер|говорящий)\s*\d+)\s*[:-]`) var speakerLabelPattern = regexp.MustCompile(`(?i)(?:^|[\n\r ]+)((?:speaker|спикер|говорящий)\s*\d+)\s*[:-]`)
type Options struct { type Options struct {
VoxtralBaseURL string AudioBaseURL string
VoxtralAPIKey string AudioAPIKey string
VoxtralModel string AudioModel string
VoxtralTimeout time.Duration AudioTimeout time.Duration
AudioLLMPrompt string AudioPrompt string
} }
type ProviderConfig struct { type ProviderConfig struct {
@@ -102,17 +105,17 @@ type audioTranscriptionSegment struct {
func New(baseURL string, timeout time.Duration, ffmpegPath string, leadSilence time.Duration) *Client { func New(baseURL string, timeout time.Duration, ffmpegPath string, leadSilence time.Duration) *Client {
return NewWithOptions(Options{ return NewWithOptions(Options{
VoxtralBaseURL: baseURL, AudioBaseURL: baseURL,
VoxtralTimeout: timeout, AudioTimeout: timeout,
}) })
} }
func NewWithOptions(opts Options) *Client { func NewWithOptions(opts Options) *Client {
audioLLMPrompt := strings.TrimSpace(opts.AudioLLMPrompt) audioPrompt := strings.TrimSpace(opts.AudioPrompt)
if audioLLMPrompt == "" { if audioPrompt == "" {
audioLLMPrompt = "Transcribe the audio exactly. Return only the transcript text." audioPrompt = "Transcribe the audio exactly. Return only the transcript text."
} }
provider := buildVoxtralProvider(opts, audioLLMPrompt) provider := buildAudioProvider(opts, audioPrompt)
if provider.BaseURL == "" { if provider.BaseURL == "" {
return nil return nil
} }
@@ -122,18 +125,18 @@ func NewWithOptions(opts Options) *Client {
} }
} }
func buildVoxtralProvider(opts Options, prompt string) ProviderConfig { func buildAudioProvider(opts Options, prompt string) ProviderConfig {
baseURL := strings.TrimRight(strings.TrimSpace(opts.VoxtralBaseURL), "/") baseURL := strings.TrimRight(strings.TrimSpace(opts.AudioBaseURL), "/")
if baseURL == "" { if baseURL == "" {
return ProviderConfig{} return ProviderConfig{}
} }
model := firstNonEmpty(opts.VoxtralModel, "mistralai/Voxtral-Small-24B-2507") model := firstNonEmpty(opts.AudioModel, defaultWhisperModel)
return ProviderConfig{ return ProviderConfig{
Name: ProviderVoxtral, Name: ProviderWhisperLargeV3,
BaseURL: baseURL, BaseURL: baseURL,
APIKey: strings.TrimSpace(opts.VoxtralAPIKey), APIKey: strings.TrimSpace(opts.AudioAPIKey),
Model: model, Model: model,
Timeout: defaultDuration(opts.VoxtralTimeout, 10*time.Minute), Timeout: defaultDuration(opts.AudioTimeout, 10*time.Minute),
Prompt: prompt, Prompt: prompt,
} }
} }
@@ -147,7 +150,7 @@ func defaultDuration(v, fallback time.Duration) time.Duration {
func (c *Client) Transcribe(ctx context.Context, in Input) (*Result, error) { func (c *Client) Transcribe(ctx context.Context, in Input) (*Result, error) {
if c == nil || c.provider.BaseURL == "" { if c == nil || c.provider.BaseURL == "" {
return nil, fmt.Errorf("voxtral transcription provider not configured") return nil, fmt.Errorf("audio transcription provider not configured")
} }
if strings.TrimSpace(in.AudioURL) == "" { if strings.TrimSpace(in.AudioURL) == "" {
return nil, fmt.Errorf("audio_url is required") return nil, fmt.Errorf("audio_url is required")

View File

@@ -7,22 +7,22 @@ import (
"testing" "testing"
) )
func TestNewWithOptionsBuildsVoxtralProvider(t *testing.T) { func TestNewWithOptionsBuildsWhisperProvider(t *testing.T) {
client := NewWithOptions(Options{ client := NewWithOptions(Options{
VoxtralBaseURL: "http://voxtral", AudioBaseURL: "http://whisper",
}) })
if client == nil { if client == nil {
t.Fatal("client is nil") t.Fatal("client is nil")
} }
if client.provider.Name != ProviderVoxtral { if client.provider.Name != ProviderWhisperLargeV3 {
t.Fatalf("provider = %q, want %q", client.provider.Name, ProviderVoxtral) t.Fatalf("provider = %q, want %q", client.provider.Name, ProviderWhisperLargeV3)
} }
if client.provider.Model != "mistralai/Voxtral-Small-24B-2507" { if client.provider.Model != "openai/whisper-large-v3" {
t.Fatalf("model = %q", client.provider.Model) t.Fatalf("model = %q", client.provider.Model)
} }
} }
func TestVoxtralUsesAudioTranscriptionsEndpoint(t *testing.T) { func TestWhisperUsesAudioTranscriptionsEndpoint(t *testing.T) {
audioSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { audioSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("fake audio")) _, _ = w.Write([]byte("fake audio"))
})) }))
@@ -50,8 +50,8 @@ func TestVoxtralUsesAudioTranscriptionsEndpoint(t *testing.T) {
defer providerSrv.Close() defer providerSrv.Close()
client := NewWithOptions(Options{ client := NewWithOptions(Options{
VoxtralBaseURL: providerSrv.URL, AudioBaseURL: providerSrv.URL,
VoxtralModel: "mistralai/Voxtral-Small-24B-2507", AudioModel: "openai/whisper-large-v3",
}) })
if client == nil { if client == nil {
t.Fatal("client is nil") t.Fatal("client is nil")
@@ -63,7 +63,7 @@ func TestVoxtralUsesAudioTranscriptionsEndpoint(t *testing.T) {
if gotPath != "/v1/audio/transcriptions" { if gotPath != "/v1/audio/transcriptions" {
t.Fatalf("path = %q, want /v1/audio/transcriptions", gotPath) t.Fatalf("path = %q, want /v1/audio/transcriptions", gotPath)
} }
if gotModel != "mistralai/Voxtral-Small-24B-2507" { if gotModel != "openai/whisper-large-v3" {
t.Fatalf("model = %q", gotModel) t.Fatalf("model = %q", gotModel)
} }
if gotResponseFormat != "json" { if gotResponseFormat != "json" {

View File

@@ -20,7 +20,7 @@ const (
TaskCallAnalysis = "call_analysis" TaskCallAnalysis = "call_analysis"
TaskTranscription = "transcription" TaskTranscription = "transcription"
TranscriptionProfile = "voxtral-small" TranscriptionProfile = "whisper-large-v3"
) )
type Worker struct { type Worker struct {

View File

@@ -11,11 +11,11 @@ data:
LLM_BASE_URL: "http://10.2.3.5:8002" LLM_BASE_URL: "http://10.2.3.5:8002"
LLM_MODEL: "qwen2.5-14b" LLM_MODEL: "qwen2.5-14b"
LLM_TIMEOUT: "5m" LLM_TIMEOUT: "5m"
# Voxtral Small is the only transcription provider. It is exposed on the AI # Whisper Large v3 is exposed on the AI server through an OpenAI-compatible
# server through an OpenAI-compatible /v1/audio/transcriptions endpoint. # /v1/audio/transcriptions endpoint.
VOXTRAL_BASE_URL: "http://10.2.3.5:8004" AUDIO_TRANSCRIPTION_BASE_URL: "http://10.2.3.5:8004"
VOXTRAL_MODEL: "mistralai/Voxtral-Small-24B-2507" AUDIO_TRANSCRIPTION_MODEL: "openai/whisper-large-v3"
VOXTRAL_TIMEOUT: "30m" AUDIO_TRANSCRIPTION_TIMEOUT: "30m"
AI_STATS_SIDECAR_URL: "http://10.2.3.5:9090" AI_STATS_SIDECAR_URL: "http://10.2.3.5:9090"
AI_STATS_TIMEOUT: "8s" AI_STATS_TIMEOUT: "8s"
WORKER_POLL_INTERVAL: "2s" WORKER_POLL_INTERVAL: "2s"

View File

@@ -18,5 +18,5 @@ type: Opaque
stringData: stringData:
DATABASE_URL: "postgres://ai_service:ai_service@postgres:5432/ai_service?sslmode=disable" DATABASE_URL: "postgres://ai_service:ai_service@postgres:5432/ai_service?sslmode=disable"
LLM_API_KEY: "sk-111f838ccec43406e078cd9094b6797307cb895236179f32" LLM_API_KEY: "sk-111f838ccec43406e078cd9094b6797307cb895236179f32"
VOXTRAL_API_KEY: "sk-111f838ccec43406e078cd9094b6797307cb895236179f32" AUDIO_TRANSCRIPTION_API_KEY: "sk-111f838ccec43406e078cd9094b6797307cb895236179f32"
AI_SERVICE_TOKEN: "d18bcacf9e02bae1806ee6b6eeda62b95be6a915c0a22936d9a700128b275442" AI_SERVICE_TOKEN: "d18bcacf9e02bae1806ee6b6eeda62b95be6a915c0a22936d9a700128b275442"

View File

@@ -98,7 +98,7 @@ spec:
- name: WORKER_TASK_TYPES - name: WORKER_TASK_TYPES
value: "transcription" value: "transcription"
- name: WORKER_MODEL_PROFILES - name: WORKER_MODEL_PROFILES
value: "voxtral-small" value: "whisper-large-v3"
- name: WORKER_CLAIM_LIMIT - name: WORKER_CLAIM_LIMIT
value: "2" value: "2"
- name: WORKER_LEASE_TIMEOUT - name: WORKER_LEASE_TIMEOUT