Add transcription comparison stats

2026-06-09 14:59:08 +03:00
parent 88e7c86836
commit 35c60f0e0e
3 changed files with 118 additions and 8 deletions
--- a/internal/httpapi/dashboard.go
+++ b/internal/httpapi/dashboard.go
@@ -8,12 +8,13 @@ import (
 )

 type dashboardResponse struct {
-	At        time.Time               `json:"at"`
-	Summary   dashboardSummary        `json:"summary"`
-	Stats     *model.Stats            `json:"stats"`
-	Providers providersStatusResponse `json:"providers"`
-	Infra     infraStatusResponse     `json:"infra"`
-	Jobs      []*model.JobSummary     `json:"jobs"`
+	At                      time.Time                           `json:"at"`
+	Summary                 dashboardSummary                    `json:"summary"`
+	Stats                   *model.Stats                        `json:"stats"`
+	Providers               providersStatusResponse             `json:"providers"`
+	Infra                   infraStatusResponse                 `json:"infra"`
+	TranscriptionComparison []model.TranscriptionComparisonStat `json:"transcription_comparison"`
+	Jobs                    []*model.JobSummary                 `json:"jobs"`
 }

 type dashboardSummary struct {
@@ -43,6 +44,11 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
+	comparison, err := s.store.TranscriptionComparison(ctx)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}

 	resp := dashboardResponse{
 		At:      now,
@@ -57,8 +63,9 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
 				s.checkAudioLLM(ctx, "voxtral-small", s.cfg.VoxtralBaseURL, s.cfg.VoxtralAPIKey, s.cfg.VoxtralModel, s.cfg.VoxtralTimeout),
 			},
 		},
-		Infra: loadInfraSnapshot(r, s.cfg),
-		Jobs:  jobs,
+		Infra:                   loadInfraSnapshot(r, s.cfg),
+		TranscriptionComparison: comparison,
+		Jobs:                    jobs,
 	}
 	writeJSON(w, http.StatusOK, resp)
 }
--- a/internal/model/job.go
+++ b/internal/model/job.go
@@ -119,6 +119,22 @@ type ErrorStat struct {
 	Last24h      int64  `json:"last_24h"`
 }

+type TranscriptionComparisonStat struct {
+	Provider        string     `json:"provider"`
+	Model           string     `json:"model,omitempty"`
+	Attempts        int64      `json:"attempts"`
+	Success         int64      `json:"success"`
+	Failed          int64      `json:"failed"`
+	SuccessRate     float64    `json:"success_rate"`
+	Wins            int64      `json:"wins"`
+	Last24hAttempts int64      `json:"last_24h_attempts"`
+	Last24hSuccess  int64      `json:"last_24h_success"`
+	AvgDurationMS   int64      `json:"avg_duration_ms"`
+	P50DurationMS   int64      `json:"p50_duration_ms"`
+	AvgTextChars    int64      `json:"avg_text_chars"`
+	LastAt          *time.Time `json:"last_at,omitempty"`
+}
+
 type OwnerStat struct {
 	OwnerService string `json:"owner_service"`
 	TaskType     string `json:"task_type"`
--- a/internal/store/store.go
+++ b/internal/store/store.go
@@ -2,6 +2,7 @@ package store

 import (
 	"context"
+	"database/sql"
 	"encoding/json"
 	"errors"
 	"fmt"
@@ -621,6 +622,92 @@ ORDER BY owner_service, last_24h DESC, total DESC
 	return out, errorRows.Err()
 }

+func (s *Store) TranscriptionComparison(ctx context.Context) ([]model.TranscriptionComparisonStat, error) {
+	const q = `
+WITH done_jobs AS (
+  SELECT result, completed_at, result->>'provider' AS winner_provider
+  FROM ai_jobs
+  WHERE task_type = 'transcription'
+    AND status = 'done'
+    AND result ? 'attempts'
+),
+attempts AS (
+  SELECT
+    completed_at,
+    winner_provider,
+    item
+  FROM done_jobs
+  CROSS JOIN LATERAL jsonb_array_elements(result->'attempts') AS item
+)
+SELECT
+  item->>'provider' AS provider,
+  COALESCE(NULLIF(MAX(item->>'model'), ''), '') AS model,
+  count(*) AS attempts,
+  count(*) FILTER (WHERE item->>'status' = 'ok') AS success,
+  count(*) FILTER (WHERE item->>'status' <> 'ok') AS failed,
+  COALESCE(
+    count(*) FILTER (WHERE item->>'status' = 'ok')::double precision / NULLIF(count(*), 0),
+    0
+  ) AS success_rate,
+  count(*) FILTER (WHERE item->>'provider' = winner_provider) AS wins,
+  count(*) FILTER (WHERE completed_at > NOW() - INTERVAL '24 hours') AS last_24h_attempts,
+  count(*) FILTER (WHERE completed_at > NOW() - INTERVAL '24 hours' AND item->>'status' = 'ok') AS last_24h_success,
+  COALESCE(avg(NULLIF(item->>'duration_ms', '')::bigint), 0) AS avg_duration_ms,
+  COALESCE(percentile_cont(0.5) WITHIN GROUP (ORDER BY NULLIF(item->>'duration_ms', '')::bigint), 0) AS p50_duration_ms,
+  COALESCE(avg(length(COALESCE(item->>'text', ''))), 0) AS avg_text_chars,
+  max(completed_at) AS last_at
+FROM attempts
+WHERE COALESCE(item->>'provider', '') <> ''
+GROUP BY item->>'provider'
+ORDER BY wins DESC, success DESC, item->>'provider'
+`
+	rows, err := s.pool.Query(ctx, q)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	var out []model.TranscriptionComparisonStat
+	for rows.Next() {
+		var stat model.TranscriptionComparisonStat
+		var avgDuration sql.NullFloat64
+		var p50Duration sql.NullFloat64
+		var avgText sql.NullFloat64
+		if err := rows.Scan(
+			&stat.Provider,
+			&stat.Model,
+			&stat.Attempts,
+			&stat.Success,
+			&stat.Failed,
+			&stat.SuccessRate,
+			&stat.Wins,
+			&stat.Last24hAttempts,
+			&stat.Last24hSuccess,
+			&avgDuration,
+			&p50Duration,
+			&avgText,
+			&stat.LastAt,
+		); err != nil {
+			return nil, err
+		}
+		stat.AvgDurationMS = roundedInt64(avgDuration)
+		stat.P50DurationMS = roundedInt64(p50Duration)
+		stat.AvgTextChars = roundedInt64(avgText)
+		out = append(out, stat)
+	}
+	return out, rows.Err()
+}
+
+func roundedInt64(v sql.NullFloat64) int64 {
+	if !v.Valid {
+		return 0
+	}
+	if v.Float64 < 0 {
+		return 0
+	}
+	return int64(v.Float64 + 0.5)
+}
+
 func scanJobSummary(row pgx.Row) (*model.JobSummary, error) {
 	var job model.JobSummary
 	err := row.Scan(