Split single Voxtral transcript segments
This commit is contained in:
@@ -377,10 +377,7 @@ func (c *Client) transcribeWithProvider(ctx context.Context, provider ProviderCo
|
||||
return nil, attempt, err
|
||||
}
|
||||
text := strings.TrimSpace(resp.Text)
|
||||
segments := resp.Segments
|
||||
if len(segments) == 0 {
|
||||
segments = segmentTranscriptText(text, in.Diarize)
|
||||
}
|
||||
segments := normalizeAudioLLMSegments(resp.Segments, text, in.Diarize)
|
||||
attempt.Status = "ok"
|
||||
attempt.Model = resp.Model
|
||||
attempt.Text = text
|
||||
@@ -749,6 +746,42 @@ func convertAudioSegments(in []audioTranscriptionSegment) []Segment {
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeAudioLLMSegments(segments []Segment, text string, diarize bool) []Segment {
|
||||
text = strings.TrimSpace(text)
|
||||
if len(segments) <= 1 && text != "" {
|
||||
heuristic := segmentTranscriptText(text, diarize)
|
||||
if len(heuristic) > len(segments) {
|
||||
segments = heuristic
|
||||
}
|
||||
}
|
||||
return ensureHeuristicSpeakers(segments, diarize)
|
||||
}
|
||||
|
||||
func ensureHeuristicSpeakers(segments []Segment, diarize bool) []Segment {
|
||||
if !diarize || len(segments) < 2 || segmentsHaveSpeakers(segments) {
|
||||
return segments
|
||||
}
|
||||
out := make([]Segment, len(segments))
|
||||
copy(out, segments)
|
||||
for i := range out {
|
||||
if i%2 == 0 {
|
||||
out[i].Speaker = "SPEAKER_00"
|
||||
} else {
|
||||
out[i].Speaker = "SPEAKER_01"
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func segmentsHaveSpeakers(segments []Segment) bool {
|
||||
for _, segment := range segments {
|
||||
if strings.TrimSpace(segment.Speaker) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func segmentTranscriptText(text string, diarize bool) []Segment {
|
||||
parts := splitTranscriptSentences(text)
|
||||
out := make([]Segment, 0, len(parts))
|
||||
|
||||
Reference in New Issue
Block a user