package transcription import ( "encoding/json" "math" "net/http" "net/http/httptest" "testing" "time" ) func TestAdjustLeadSilence(t *testing.T) { got := adjustLeadSilence([]Segment{ {Start: 0.2, End: 1.1, Text: "first"}, {Start: 1.4, End: 2.0, Text: "second"}, }, 800*time.Millisecond) if got[0].Start != 0 { t.Fatalf("first start = %v, want 0", got[0].Start) } if !near(got[0].End, 0.3) { t.Fatalf("first end = %v, want 0.3", got[0].End) } if !near(got[1].Start, 0.6) { t.Fatalf("second start = %v, want 0.6", got[1].Start) } } func TestNormalizeProviderOrder(t *testing.T) { got := normalizeProviderOrder([]string{"whisperx", "qwen", "voxtral", "qwen2-audio"}) want := []string{ProviderWhisperX, ProviderQwenAudio, ProviderVoxtral} if len(got) != len(want) { t.Fatalf("providers = %#v, want %#v", got, want) } for i := range want { if got[i] != want[i] { t.Fatalf("providers = %#v, want %#v", got, want) } } } func TestNewWithOptionsBuildsComparisonProviders(t *testing.T) { client := NewWithOptions(Options{ Providers: []string{"whisperx", "qwen2-audio", "voxtral-small"}, WhisperXURL: "http://whisperx", QwenAudioBaseURL: "http://qwen", VoxtralBaseURL: "http://voxtral", }) if client == nil { t.Fatal("client is nil") } got := make([]string, 0, len(client.providers)) for _, provider := range client.providers { got = append(got, provider.Name) } want := []string{ProviderWhisperX, ProviderQwenAudio, ProviderVoxtral} for i := range want { if got[i] != want[i] { t.Fatalf("providers = %#v, want %#v", got, want) } } } func TestAudioDataURLUsesVLLMAudioURLFormat(t *testing.T) { got := audioDataURL([]byte("abc"), "call.wav") want := "data:audio/wav;base64,YWJj" if got != want { t.Fatalf("audio data url = %q, want %q", got, want) } } func TestVoxtralUsesAudioTranscriptionsEndpoint(t *testing.T) { audioSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("fake audio")) })) defer audioSrv.Close() var gotPath, gotModel string providerSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { gotPath = r.URL.Path if err := r.ParseMultipartForm(16 << 20); err != nil { t.Fatalf("ParseMultipartForm: %v", err) } gotModel = r.FormValue("model") if _, _, err := r.FormFile("file"); err != nil { t.Fatalf("FormFile: %v", err) } _ = json.NewEncoder(w).Encode(map[string]string{"text": "Алло, тест."}) })) defer providerSrv.Close() client := NewWithOptions(Options{ Providers: []string{"voxtral-small"}, VoxtralBaseURL: providerSrv.URL, VoxtralModel: "mistralai/Voxtral-Small-24B-2507", }) if client == nil { t.Fatal("client is nil") } got, err := client.Transcribe(t.Context(), Input{AudioURL: audioSrv.URL, Filename: "call.mp3"}) if err != nil { t.Fatalf("Transcribe: %v", err) } if gotPath != "/v1/audio/transcriptions" { t.Fatalf("path = %q, want /v1/audio/transcriptions", gotPath) } if gotModel != "mistralai/Voxtral-Small-24B-2507" { t.Fatalf("model = %q", gotModel) } if len(got.Segments) != 1 || got.Segments[0].Text != "Алло, тест." { t.Fatalf("segments = %#v", got.Segments) } } func near(got, want float64) bool { return math.Abs(got-want) < 0.000001 }