diff --git a/internal/transcription/client.go b/internal/transcription/client.go index 3e3563e..ebc95b9 100644 --- a/internal/transcription/client.go +++ b/internal/transcription/client.go @@ -120,14 +120,13 @@ type audioLLMChatMessage struct { } type audioLLMContentPart struct { - Type string `json:"type"` - Text string `json:"text,omitempty"` - InputAudio *audioLLMAudio `json:"input_audio,omitempty"` + Type string `json:"type"` + Text string `json:"text,omitempty"` + AudioURL *audioLLMURLRef `json:"audio_url,omitempty"` } -type audioLLMAudio struct { - Data string `json:"data"` - Format string `json:"format,omitempty"` +type audioLLMURLRef struct { + URL string `json:"url"` } type audioLLMChatResponse struct { @@ -567,11 +566,8 @@ func (c *Client) transcribeAudioLLM(ctx context.Context, provider ProviderConfig Content: []audioLLMContentPart{ {Type: "text", Text: prompt}, { - Type: "input_audio", - InputAudio: &audioLLMAudio{ - Data: base64.StdEncoding.EncodeToString(audio), - Format: audioFormat(filename), - }, + Type: "audio_url", + AudioURL: &audioLLMURLRef{URL: audioDataURL(audio, filename)}, }, }, }, @@ -634,6 +630,10 @@ func audioFormat(filename string) string { } } +func audioDataURL(audio []byte, filename string) string { + return "data:audio/" + audioFormat(filename) + ";base64," + base64.StdEncoding.EncodeToString(audio) +} + func firstNonEmpty(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { diff --git a/internal/transcription/client_test.go b/internal/transcription/client_test.go index 6f44e94..8f9fb61 100644 --- a/internal/transcription/client_test.go +++ b/internal/transcription/client_test.go @@ -58,6 +58,14 @@ func TestNewWithOptionsBuildsComparisonProviders(t *testing.T) { } } +func TestAudioDataURLUsesVLLMAudioURLFormat(t *testing.T) { + got := audioDataURL([]byte("abc"), "call.wav") + want := "data:audio/wav;base64,YWJj" + if got != want { + t.Fatalf("audio data url = %q, want %q", got, want) + } +} + func near(got, want float64) bool { return math.Abs(got-want) < 0.000001 }