Add transcription provider comparison chain

This commit is contained in:
Grendgi
2026-06-09 12:34:08 +03:00
parent 562fad6f87
commit aaecbb1bed
9 changed files with 600 additions and 57 deletions

View File

@@ -14,16 +14,27 @@ type Config struct {
MigrateOnStart bool
APIAuthToken string
LLMBaseURL string
LLMAPIKey string
LLMModel string
LLMTimeout time.Duration
WhisperXURL string
WhisperXTimeout time.Duration
WhisperXLeadSilence time.Duration
FfmpegPath string
AIStatsSidecarURL string
AIStatsTimeout time.Duration
LLMBaseURL string
LLMAPIKey string
LLMModel string
LLMTimeout time.Duration
TranscriptionProviders []string
WhisperXURL string
WhisperXTimeout time.Duration
WhisperXLeadSilence time.Duration
QwenAudioBaseURL string
QwenAudioAPIKey string
QwenAudioModel string
QwenAudioTimeout time.Duration
VoxtralBaseURL string
VoxtralAPIKey string
VoxtralModel string
VoxtralTimeout time.Duration
AudioLLMMaxTokens int
AudioLLMPrompt string
FfmpegPath string
AIStatsSidecarURL string
AIStatsTimeout time.Duration
WorkerID string
WorkerHTTPHost string
@@ -43,16 +54,27 @@ func Load() Config {
MigrateOnStart: envBool("MIGRATE_ON_START", true),
APIAuthToken: envString("AI_SERVICE_TOKEN", ""),
LLMBaseURL: envString("LLM_BASE_URL", ""),
LLMAPIKey: envString("LLM_API_KEY", ""),
LLMModel: envString("LLM_MODEL", "qwen2.5-14b"),
LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute),
WhisperXURL: envString("WHISPERX_URL", ""),
WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute),
WhisperXLeadSilence: envDuration("WHISPERX_LEAD_SILENCE", 800*time.Millisecond),
FfmpegPath: envString("FFMPEG_PATH", "/usr/bin/ffmpeg"),
AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""),
AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second),
LLMBaseURL: envString("LLM_BASE_URL", ""),
LLMAPIKey: envString("LLM_API_KEY", ""),
LLMModel: envString("LLM_MODEL", "qwen2.5-14b"),
LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute),
TranscriptionProviders: envCSVDefault("TRANSCRIPTION_PROVIDERS", []string{"whisperx"}),
WhisperXURL: envString("WHISPERX_URL", ""),
WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute),
WhisperXLeadSilence: envDuration("WHISPERX_LEAD_SILENCE", 800*time.Millisecond),
QwenAudioBaseURL: envString("QWEN_AUDIO_BASE_URL", envString("AUDIO_LLM_BASE_URL", "")),
QwenAudioAPIKey: envString("QWEN_AUDIO_API_KEY", envString("AUDIO_LLM_API_KEY", "")),
QwenAudioModel: envString("QWEN_AUDIO_MODEL", "Qwen/Qwen2-Audio-7B-Instruct"),
QwenAudioTimeout: envDuration("QWEN_AUDIO_TIMEOUT", envDuration("AUDIO_LLM_TIMEOUT", 10*time.Minute)),
VoxtralBaseURL: envString("VOXTRAL_BASE_URL", envString("AUDIO_LLM_BASE_URL", "")),
VoxtralAPIKey: envString("VOXTRAL_API_KEY", envString("AUDIO_LLM_API_KEY", "")),
VoxtralModel: envString("VOXTRAL_MODEL", "mistralai/Voxtral-Small-24B-2507"),
VoxtralTimeout: envDuration("VOXTRAL_TIMEOUT", envDuration("AUDIO_LLM_TIMEOUT", 10*time.Minute)),
AudioLLMMaxTokens: envInt("AUDIO_LLM_MAX_TOKENS", 4096),
AudioLLMPrompt: envString("AUDIO_LLM_PROMPT", defaultAudioLLMPrompt()),
FfmpegPath: envString("FFMPEG_PATH", "/usr/bin/ffmpeg"),
AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""),
AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second),
WorkerID: envString("WORKER_ID", hostname()),
WorkerHTTPHost: envString("WORKER_HTTP_HOST", "0.0.0.0"),
@@ -123,6 +145,17 @@ func envCSV(key string) []string {
return out
}
func envCSVDefault(key string, fallback []string) []string {
if values := envCSV(key); len(values) > 0 {
return values
}
return fallback
}
func defaultAudioLLMPrompt() string {
return "Расшифруй речь из аудио максимально точно. Сохрани русский язык, имена, телефоны, суммы и смысловые паузы. Не добавляй комментарии, анализ, Markdown или JSON. Верни только чистый текст расшифровки."
}
func hostname() string {
h, err := os.Hostname()
if err != nil || h == "" {