Expose AI infrastructure status
All checks were successful
CI / test (push) Successful in 16s
Build and Deploy / build-and-deploy (push) Successful in 23s

This commit is contained in:
Grendgi
2026-06-08 16:35:08 +03:00
parent 04e463d03f
commit c9435612ec
5 changed files with 85 additions and 12 deletions

View File

@@ -61,6 +61,8 @@ domain metadata fields in `input`, but the worker only reads chat fields such as
- `GET /api/v1/stats` returns queue and error counters. - `GET /api/v1/stats` returns queue and error counters.
- `GET /api/v1/providers/status` checks configured AI providers without - `GET /api/v1/providers/status` checks configured AI providers without
returning secrets. returning secrets.
- `GET /api/v1/infra/status` returns AI-server sidecar telemetry
(GPU, containers, vLLM and WhisperX live metrics) when configured.
- `GET /healthz` returns process health. - `GET /healthz` returns process health.
- `GET /readyz` checks PostgreSQL readiness. - `GET /readyz` checks PostgreSQL readiness.

View File

@@ -14,12 +14,14 @@ type Config struct {
MigrateOnStart bool MigrateOnStart bool
APIAuthToken string APIAuthToken string
LLMBaseURL string LLMBaseURL string
LLMAPIKey string LLMAPIKey string
LLMModel string LLMModel string
LLMTimeout time.Duration LLMTimeout time.Duration
WhisperXURL string WhisperXURL string
WhisperXTimeout time.Duration WhisperXTimeout time.Duration
AIStatsSidecarURL string
AIStatsTimeout time.Duration
WorkerID string WorkerID string
WorkerPollInterval time.Duration WorkerPollInterval time.Duration
@@ -37,12 +39,14 @@ func Load() Config {
MigrateOnStart: envBool("MIGRATE_ON_START", true), MigrateOnStart: envBool("MIGRATE_ON_START", true),
APIAuthToken: envString("AI_SERVICE_TOKEN", ""), APIAuthToken: envString("AI_SERVICE_TOKEN", ""),
LLMBaseURL: envString("LLM_BASE_URL", ""), LLMBaseURL: envString("LLM_BASE_URL", ""),
LLMAPIKey: envString("LLM_API_KEY", ""), LLMAPIKey: envString("LLM_API_KEY", ""),
LLMModel: envString("LLM_MODEL", "qwen2.5-14b"), LLMModel: envString("LLM_MODEL", "qwen2.5-14b"),
LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute), LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute),
WhisperXURL: envString("WHISPERX_URL", ""), WhisperXURL: envString("WHISPERX_URL", ""),
WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute), WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute),
AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""),
AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second),
WorkerID: envString("WORKER_ID", hostname()), WorkerID: envString("WORKER_ID", hostname()),
WorkerPollInterval: envDuration("WORKER_POLL_INTERVAL", 2*time.Second), WorkerPollInterval: envDuration("WORKER_POLL_INTERVAL", 2*time.Second),

63
internal/httpapi/infra.go Normal file
View File

@@ -0,0 +1,63 @@
package httpapi
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
type infraStatusResponse struct {
At time.Time `json:"at"`
Sidecar map[string]any `json:"sidecar,omitempty"`
SidecarError string `json:"sidecar_error,omitempty"`
}
func (s *Server) handleInfraStatus(w http.ResponseWriter, r *http.Request) {
resp := infraStatusResponse{At: time.Now().UTC()}
baseURL := strings.TrimRight(strings.TrimSpace(s.cfg.AIStatsSidecarURL), "/")
if baseURL == "" {
resp.SidecarError = "AI stats sidecar is not configured"
writeJSON(w, http.StatusOK, resp)
return
}
timeout := s.cfg.AIStatsTimeout
if timeout <= 0 {
timeout = 8 * time.Second
}
ctx, cancel := contextWithTimeout(r, timeout)
defer cancel()
sidecar, err := fetchAIStatsSidecar(ctx, baseURL, timeout)
if err != nil {
resp.SidecarError = err.Error()
} else {
resp.Sidecar = sidecar
}
writeJSON(w, http.StatusOK, resp)
}
func fetchAIStatsSidecar(ctx context.Context, baseURL string, timeout time.Duration) (map[string]any, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/stats", nil)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: timeout}
res, err := client.Do(req)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(res.Body, 2048))
return nil, fmt.Errorf("sidecar HTTP %d: %s", res.StatusCode, strings.TrimSpace(string(body)))
}
var out map[string]any
if err := json.NewDecoder(res.Body).Decode(&out); err != nil {
return nil, err
}
return out, nil
}

View File

@@ -67,6 +67,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.handleStats(w, r) s.handleStats(w, r)
case r.Method == http.MethodGet && path == "/api/v1/providers/status": case r.Method == http.MethodGet && path == "/api/v1/providers/status":
s.handleProviderStatus(w, r) s.handleProviderStatus(w, r)
case r.Method == http.MethodGet && path == "/api/v1/infra/status":
s.handleInfraStatus(w, r)
default: default:
writeError(w, http.StatusNotFound, "not found") writeError(w, http.StatusNotFound, "not found")
} }

View File

@@ -13,6 +13,8 @@ data:
LLM_TIMEOUT: "5m" LLM_TIMEOUT: "5m"
WHISPERX_URL: "http://10.2.3.5:8001" WHISPERX_URL: "http://10.2.3.5:8001"
WHISPERX_TIMEOUT: "10m" WHISPERX_TIMEOUT: "10m"
AI_STATS_SIDECAR_URL: "http://10.2.3.5:9090"
AI_STATS_TIMEOUT: "8s"
WORKER_POLL_INTERVAL: "2s" WORKER_POLL_INTERVAL: "2s"
WORKER_CLAIM_LIMIT: "4" WORKER_CLAIM_LIMIT: "4"
WORKER_LEASE_TIMEOUT: "15m" WORKER_LEASE_TIMEOUT: "15m"