From 63553fba33e2a89bf74c7e5d1f3396f7d72b1617 Mon Sep 17 00:00:00 2001 From: Grendgi Date: Wed, 17 Jun 2026 16:46:03 +0300 Subject: [PATCH] feat: version ai result schemas --- README.md | 20 ++++++++++++- internal/llm/client.go | 20 ++++++++----- internal/llm/client_test.go | 43 +++++++++++++++++++++++++++ internal/transcription/client.go | 30 +++++++++++-------- internal/transcription/client_test.go | 3 ++ 5 files changed, 94 insertions(+), 22 deletions(-) create mode 100644 internal/llm/client_test.go diff --git a/README.md b/README.md index 5128c42..834f436 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,8 @@ Input can be either explicit messages: ``` or compact `system` / `user` fields. The completed job result contains -`content`, `model`, `usage` and `duration_ms`. +`schema_version=ai.chat_result.v1`, `content`, `model`, `usage` and +`duration_ms`. `call_analysis` and `transcript_summary` use the same input contract as `llm_chat`; callers may include domain metadata fields in `input`, but the @@ -55,6 +56,9 @@ worker only reads chat fields such as `system`, `user`, `messages`, `/v1/audio/transcriptions` endpoint. The returned `segments` field stays compatible with telephony. If the provider returns one long segment, AI Service splits it into smaller transcript segments without inventing speaker labels. +The completed job result contains +`schema_version=ai.transcription_result.v1`, `provider`, `model`, `language`, +`segments`, optional provider `attempts` and `duration_ms`. AI-server compose snippet for Whisper Large v3 lives in `deploy/ai-server/docker-compose.audio.yml`: @@ -110,6 +114,20 @@ explicitly retryable categories while attempts remain. Domain services may still expose manual retry for terminal errors after the underlying data or prompt is corrected. +## Result schemas + +AI Service result payloads are versioned with `schema_version`. Consumers should +ignore unknown fields and reject only unsupported major schema names. + +Current schemas: + +- `ai.chat_result.v1`: `{schema_version, content, model, usage?, duration_ms}`. +- `ai.transcription_result.v1`: + `{schema_version, provider?, model?, attempts?, language, segments, duration_ms}`. + +New optional fields may be added to a `v1` schema without a breaking change. +Breaking shape changes require a new schema name. + ## Configuration - `HTTP_HOST`, default `0.0.0.0` diff --git a/internal/llm/client.go b/internal/llm/client.go index 9de6fd5..7629daa 100644 --- a/internal/llm/client.go +++ b/internal/llm/client.go @@ -38,11 +38,14 @@ type Usage struct { TotalTokens int `json:"total_tokens"` } +const ChatResultSchemaVersion = "ai.chat_result.v1" + type ChatResult struct { - Content string `json:"content"` - Model string `json:"model"` - Usage *Usage `json:"usage,omitempty"` - DurationMS int64 `json:"duration_ms"` + SchemaVersion string `json:"schema_version"` + Content string `json:"content"` + Model string `json:"model"` + Usage *Usage `json:"usage,omitempty"` + DurationMS int64 `json:"duration_ms"` } type chatRequest struct { @@ -137,10 +140,11 @@ func (c *Client) Chat(ctx context.Context, in ChatInput) (*ChatResult, error) { modelName = c.model } return &ChatResult{ - Content: out.Choices[0].Message.Content, - Model: modelName, - Usage: out.Usage, - DurationMS: duration.Milliseconds(), + SchemaVersion: ChatResultSchemaVersion, + Content: out.Choices[0].Message.Content, + Model: modelName, + Usage: out.Usage, + DurationMS: duration.Milliseconds(), }, nil } diff --git a/internal/llm/client_test.go b/internal/llm/client_test.go new file mode 100644 index 0000000..8f39bf1 --- /dev/null +++ b/internal/llm/client_test.go @@ -0,0 +1,43 @@ +package llm + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestChatResultIncludesSchemaVersion(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/chat/completions" { + t.Fatalf("path = %q, want /v1/chat/completions", r.URL.Path) + } + _ = json.NewEncoder(w).Encode(map[string]any{ + "model": "qwen2.5-14b", + "choices": []map[string]any{ + {"message": map[string]string{"role": "assistant", "content": `{"ok":true}`}}, + }, + "usage": map[string]int{ + "prompt_tokens": 10, + "completion_tokens": 2, + "total_tokens": 12, + }, + }) + })) + defer server.Close() + + client := New(server.URL, "", "fallback-model", 0) + got, err := client.Chat(t.Context(), ChatInput{User: "test", MaxTokens: 32}) + if err != nil { + t.Fatalf("Chat: %v", err) + } + if got.SchemaVersion != ChatResultSchemaVersion { + t.Fatalf("schema_version = %q, want %q", got.SchemaVersion, ChatResultSchemaVersion) + } + if got.Content != `{"ok":true}` { + t.Fatalf("content = %q", got.Content) + } + if got.Usage == nil || got.Usage.TotalTokens != 12 { + t.Fatalf("usage = %#v", got.Usage) + } +} diff --git a/internal/transcription/client.go b/internal/transcription/client.go index c6a7f52..1a872f2 100644 --- a/internal/transcription/client.go +++ b/internal/transcription/client.go @@ -60,15 +60,18 @@ type Segment struct { Speaker string `json:"speaker,omitempty"` } +const ResultSchemaVersion = "ai.transcription_result.v1" + type Result struct { - Provider string `json:"provider,omitempty"` - Model string `json:"model,omitempty"` - Attempts []Attempt `json:"attempts,omitempty"` - Language string `json:"language"` - Segments []Segment `json:"segments"` - DiarizeError *string `json:"diarize_error,omitempty"` - AlignError *string `json:"align_error,omitempty"` - DurationMS int64 `json:"duration_ms"` + SchemaVersion string `json:"schema_version"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + Attempts []Attempt `json:"attempts,omitempty"` + Language string `json:"language"` + Segments []Segment `json:"segments"` + DiarizeError *string `json:"diarize_error,omitempty"` + AlignError *string `json:"align_error,omitempty"` + DurationMS int64 `json:"duration_ms"` } type Attempt struct { @@ -199,11 +202,12 @@ func (c *Client) transcribeWithProvider(ctx context.Context, provider ProviderCo attempt.Text = text attempt.Segments = segments return &Result{ - Provider: provider.Name, - Model: resp.Model, - Language: firstNonEmpty(resp.Language, in.Language, "unknown"), - Segments: segments, - DurationMS: duration.Milliseconds(), + SchemaVersion: ResultSchemaVersion, + Provider: provider.Name, + Model: resp.Model, + Language: firstNonEmpty(resp.Language, in.Language, "unknown"), + Segments: segments, + DurationMS: duration.Milliseconds(), }, attempt, nil } diff --git a/internal/transcription/client_test.go b/internal/transcription/client_test.go index 555f85b..b11d90a 100644 --- a/internal/transcription/client_test.go +++ b/internal/transcription/client_test.go @@ -84,6 +84,9 @@ func TestWhisperUsesAudioTranscriptionsEndpoint(t *testing.T) { if len(got.Segments) != 2 || got.Segments[0].Text != "Алло, тест." || got.Segments[1].Start != 1.2 { t.Fatalf("segments = %#v", got.Segments) } + if got.SchemaVersion != ResultSchemaVersion { + t.Fatalf("schema_version = %q, want %q", got.SchemaVersion, ResultSchemaVersion) + } } func TestWhisperFallsBackToJSONWhenVerboseJSONUnsupported(t *testing.T) {