Add AI queue backlog metrics
Some checks failed
CI / test (push) Failing after 8s
Build and Deploy / build-and-deploy (push) Successful in 24s

This commit is contained in:
Grendgi
2026-06-10 16:36:35 +03:00
parent 631a45aff3
commit 837acf2f00
2 changed files with 57 additions and 6 deletions

View File

@@ -129,6 +129,16 @@ type StageStat struct {
Retried24h int64 `json:"retried_24h"` Retried24h int64 `json:"retried_24h"`
} }
type BacklogStat struct {
OwnerService string `json:"owner_service"`
TaskType string `json:"task_type"`
ModelProfile string `json:"model_profile"`
Pending int64 `json:"pending"`
Running int64 `json:"running"`
OldestPendingAgeSeconds int64 `json:"oldest_pending_age_seconds"`
OldestPendingScheduledAt string `json:"oldest_pending_scheduled_at,omitempty"`
}
type OwnerStat struct { type OwnerStat struct {
OwnerService string `json:"owner_service"` OwnerService string `json:"owner_service"`
TaskType string `json:"task_type"` TaskType string `json:"task_type"`
@@ -143,4 +153,5 @@ type Stats struct {
Owners []OwnerStat `json:"owners,omitempty"` Owners []OwnerStat `json:"owners,omitempty"`
Errors []ErrorStat `json:"errors,omitempty"` Errors []ErrorStat `json:"errors,omitempty"`
Stages []StageStat `json:"stages,omitempty"` Stages []StageStat `json:"stages,omitempty"`
Backlog []BacklogStat `json:"backlog,omitempty"`
} }

View File

@@ -713,7 +713,47 @@ ORDER BY owner_service, task_type, model_profile
} }
out.Stages = append(out.Stages, stat) out.Stages = append(out.Stages, stat)
} }
return out, stageRows.Err() if err := stageRows.Err(); err != nil {
return nil, err
}
backlogRows, err := s.pool.Query(ctx, `
SELECT owner_service,
task_type,
model_profile,
count(*) FILTER (WHERE status = 'pending') AS pending,
count(*) FILTER (WHERE status = 'running') AS running,
COALESCE(EXTRACT(EPOCH FROM (NOW() - MIN(scheduled_at) FILTER (WHERE status = 'pending')))::bigint, 0) AS oldest_pending_age_seconds,
MIN(scheduled_at) FILTER (WHERE status = 'pending') AS oldest_pending_scheduled_at
FROM ai_jobs
WHERE status IN ('pending', 'running')
GROUP BY owner_service, task_type, model_profile
ORDER BY pending DESC, running DESC, owner_service, task_type, model_profile
`)
if err != nil {
return nil, err
}
defer backlogRows.Close()
for backlogRows.Next() {
var stat model.BacklogStat
var oldestPendingScheduledAt *time.Time
if err := backlogRows.Scan(
&stat.OwnerService,
&stat.TaskType,
&stat.ModelProfile,
&stat.Pending,
&stat.Running,
&stat.OldestPendingAgeSeconds,
&oldestPendingScheduledAt,
); err != nil {
return nil, err
}
if oldestPendingScheduledAt != nil {
stat.OldestPendingScheduledAt = oldestPendingScheduledAt.UTC().Format(time.RFC3339)
}
out.Backlog = append(out.Backlog, stat)
}
return out, backlogRows.Err()
} }
func scanJobSummary(row pgx.Row) (*model.JobSummary, error) { func scanJobSummary(row pgx.Row) (*model.JobSummary, error) {