Split single Voxtral transcript segments
All checks were successful
CI / test (push) Successful in 13s
Build and Deploy / build-and-deploy (push) Successful in 23s

This commit is contained in:
Grendgi
2026-06-09 16:20:52 +03:00
parent 64bf40b3ba
commit 5c965be8c9
2 changed files with 61 additions and 4 deletions

View File

@@ -377,10 +377,7 @@ func (c *Client) transcribeWithProvider(ctx context.Context, provider ProviderCo
return nil, attempt, err
}
text := strings.TrimSpace(resp.Text)
segments := resp.Segments
if len(segments) == 0 {
segments = segmentTranscriptText(text, in.Diarize)
}
segments := normalizeAudioLLMSegments(resp.Segments, text, in.Diarize)
attempt.Status = "ok"
attempt.Model = resp.Model
attempt.Text = text
@@ -749,6 +746,42 @@ func convertAudioSegments(in []audioTranscriptionSegment) []Segment {
return out
}
func normalizeAudioLLMSegments(segments []Segment, text string, diarize bool) []Segment {
text = strings.TrimSpace(text)
if len(segments) <= 1 && text != "" {
heuristic := segmentTranscriptText(text, diarize)
if len(heuristic) > len(segments) {
segments = heuristic
}
}
return ensureHeuristicSpeakers(segments, diarize)
}
func ensureHeuristicSpeakers(segments []Segment, diarize bool) []Segment {
if !diarize || len(segments) < 2 || segmentsHaveSpeakers(segments) {
return segments
}
out := make([]Segment, len(segments))
copy(out, segments)
for i := range out {
if i%2 == 0 {
out[i].Speaker = "SPEAKER_00"
} else {
out[i].Speaker = "SPEAKER_01"
}
}
return out
}
func segmentsHaveSpeakers(segments []Segment) bool {
for _, segment := range segments {
if strings.TrimSpace(segment.Speaker) != "" {
return true
}
}
return false
}
func segmentTranscriptText(text string, diarize bool) []Segment {
parts := splitTranscriptSentences(text)
out := make([]Segment, 0, len(parts))