141 lines
4.2 KiB
Go
141 lines
4.2 KiB
Go
package transcription
|
|
|
|
import (
|
|
"encoding/json"
|
|
"math"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestAdjustLeadSilence(t *testing.T) {
|
|
got := adjustLeadSilence([]Segment{
|
|
{Start: 0.2, End: 1.1, Text: "first"},
|
|
{Start: 1.4, End: 2.0, Text: "second"},
|
|
}, 800*time.Millisecond)
|
|
|
|
if got[0].Start != 0 {
|
|
t.Fatalf("first start = %v, want 0", got[0].Start)
|
|
}
|
|
if !near(got[0].End, 0.3) {
|
|
t.Fatalf("first end = %v, want 0.3", got[0].End)
|
|
}
|
|
if !near(got[1].Start, 0.6) {
|
|
t.Fatalf("second start = %v, want 0.6", got[1].Start)
|
|
}
|
|
}
|
|
|
|
func TestNormalizeProviderOrder(t *testing.T) {
|
|
got := normalizeProviderOrder([]string{"whisperx", "qwen", "voxtral", "qwen2-audio"})
|
|
want := []string{ProviderWhisperX, ProviderQwenAudio, ProviderVoxtral}
|
|
if len(got) != len(want) {
|
|
t.Fatalf("providers = %#v, want %#v", got, want)
|
|
}
|
|
for i := range want {
|
|
if got[i] != want[i] {
|
|
t.Fatalf("providers = %#v, want %#v", got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestNewWithOptionsBuildsComparisonProviders(t *testing.T) {
|
|
client := NewWithOptions(Options{
|
|
Providers: []string{"whisperx", "qwen2-audio", "voxtral-small"},
|
|
WhisperXURL: "http://whisperx",
|
|
QwenAudioBaseURL: "http://qwen",
|
|
VoxtralBaseURL: "http://voxtral",
|
|
})
|
|
if client == nil {
|
|
t.Fatal("client is nil")
|
|
}
|
|
got := make([]string, 0, len(client.providers))
|
|
for _, provider := range client.providers {
|
|
got = append(got, provider.Name)
|
|
}
|
|
want := []string{ProviderWhisperX, ProviderQwenAudio, ProviderVoxtral}
|
|
for i := range want {
|
|
if got[i] != want[i] {
|
|
t.Fatalf("providers = %#v, want %#v", got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestAudioDataURLUsesVLLMAudioURLFormat(t *testing.T) {
|
|
got := audioDataURL([]byte("abc"), "call.wav")
|
|
want := "data:audio/wav;base64,YWJj"
|
|
if got != want {
|
|
t.Fatalf("audio data url = %q, want %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestVoxtralUsesAudioTranscriptionsEndpoint(t *testing.T) {
|
|
audioSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte("fake audio"))
|
|
}))
|
|
defer audioSrv.Close()
|
|
|
|
var gotPath, gotModel, gotResponseFormat string
|
|
providerSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
gotPath = r.URL.Path
|
|
if err := r.ParseMultipartForm(16 << 20); err != nil {
|
|
t.Fatalf("ParseMultipartForm: %v", err)
|
|
}
|
|
gotModel = r.FormValue("model")
|
|
gotResponseFormat = r.FormValue("response_format")
|
|
if _, _, err := r.FormFile("file"); err != nil {
|
|
t.Fatalf("FormFile: %v", err)
|
|
}
|
|
_ = json.NewEncoder(w).Encode(map[string]any{
|
|
"text": "Алло, тест. Да, слышно.",
|
|
"segments": []map[string]any{
|
|
{"start": 0, "end": 1.2, "text": "Алло, тест."},
|
|
{"start": 1.2, "end": 2.4, "text": "Да, слышно."},
|
|
},
|
|
})
|
|
}))
|
|
defer providerSrv.Close()
|
|
|
|
client := NewWithOptions(Options{
|
|
Providers: []string{"voxtral-small"},
|
|
VoxtralBaseURL: providerSrv.URL,
|
|
VoxtralModel: "mistralai/Voxtral-Small-24B-2507",
|
|
})
|
|
if client == nil {
|
|
t.Fatal("client is nil")
|
|
}
|
|
got, err := client.Transcribe(t.Context(), Input{AudioURL: audioSrv.URL, Filename: "call.mp3"})
|
|
if err != nil {
|
|
t.Fatalf("Transcribe: %v", err)
|
|
}
|
|
if gotPath != "/v1/audio/transcriptions" {
|
|
t.Fatalf("path = %q, want /v1/audio/transcriptions", gotPath)
|
|
}
|
|
if gotModel != "mistralai/Voxtral-Small-24B-2507" {
|
|
t.Fatalf("model = %q", gotModel)
|
|
}
|
|
if gotResponseFormat != "verbose_json" {
|
|
t.Fatalf("response_format = %q, want verbose_json", gotResponseFormat)
|
|
}
|
|
if len(got.Segments) != 2 || got.Segments[0].Text != "Алло, тест." || got.Segments[1].Start != 1.2 {
|
|
t.Fatalf("segments = %#v", got.Segments)
|
|
}
|
|
}
|
|
|
|
func TestSegmentTranscriptTextAddsHeuristicSpeakers(t *testing.T) {
|
|
got := segmentTranscriptText("Алло, добрый день. Да, слушаю. Скажите, квартира продается? Да, продается.", true)
|
|
if len(got) < 2 {
|
|
t.Fatalf("segments = %#v, want multiple", got)
|
|
}
|
|
if got[0].Speaker != "SPEAKER_00" || got[1].Speaker != "SPEAKER_01" {
|
|
t.Fatalf("speakers = %q/%q", got[0].Speaker, got[1].Speaker)
|
|
}
|
|
if got[1].Start <= got[0].Start {
|
|
t.Fatalf("segment times did not advance: %#v", got)
|
|
}
|
|
}
|
|
|
|
func near(got, want float64) bool {
|
|
return math.Abs(got-want) < 0.000001
|
|
}
|