Expose AI infrastructure status
This commit is contained in:
@@ -61,6 +61,8 @@ domain metadata fields in `input`, but the worker only reads chat fields such as
|
||||
- `GET /api/v1/stats` returns queue and error counters.
|
||||
- `GET /api/v1/providers/status` checks configured AI providers without
|
||||
returning secrets.
|
||||
- `GET /api/v1/infra/status` returns AI-server sidecar telemetry
|
||||
(GPU, containers, vLLM and WhisperX live metrics) when configured.
|
||||
- `GET /healthz` returns process health.
|
||||
- `GET /readyz` checks PostgreSQL readiness.
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ type Config struct {
|
||||
LLMTimeout time.Duration
|
||||
WhisperXURL string
|
||||
WhisperXTimeout time.Duration
|
||||
AIStatsSidecarURL string
|
||||
AIStatsTimeout time.Duration
|
||||
|
||||
WorkerID string
|
||||
WorkerPollInterval time.Duration
|
||||
@@ -43,6 +45,8 @@ func Load() Config {
|
||||
LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute),
|
||||
WhisperXURL: envString("WHISPERX_URL", ""),
|
||||
WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute),
|
||||
AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""),
|
||||
AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second),
|
||||
|
||||
WorkerID: envString("WORKER_ID", hostname()),
|
||||
WorkerPollInterval: envDuration("WORKER_POLL_INTERVAL", 2*time.Second),
|
||||
|
||||
63
internal/httpapi/infra.go
Normal file
63
internal/httpapi/infra.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package httpapi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type infraStatusResponse struct {
|
||||
At time.Time `json:"at"`
|
||||
Sidecar map[string]any `json:"sidecar,omitempty"`
|
||||
SidecarError string `json:"sidecar_error,omitempty"`
|
||||
}
|
||||
|
||||
func (s *Server) handleInfraStatus(w http.ResponseWriter, r *http.Request) {
|
||||
resp := infraStatusResponse{At: time.Now().UTC()}
|
||||
baseURL := strings.TrimRight(strings.TrimSpace(s.cfg.AIStatsSidecarURL), "/")
|
||||
if baseURL == "" {
|
||||
resp.SidecarError = "AI stats sidecar is not configured"
|
||||
writeJSON(w, http.StatusOK, resp)
|
||||
return
|
||||
}
|
||||
|
||||
timeout := s.cfg.AIStatsTimeout
|
||||
if timeout <= 0 {
|
||||
timeout = 8 * time.Second
|
||||
}
|
||||
ctx, cancel := contextWithTimeout(r, timeout)
|
||||
defer cancel()
|
||||
sidecar, err := fetchAIStatsSidecar(ctx, baseURL, timeout)
|
||||
if err != nil {
|
||||
resp.SidecarError = err.Error()
|
||||
} else {
|
||||
resp.Sidecar = sidecar
|
||||
}
|
||||
writeJSON(w, http.StatusOK, resp)
|
||||
}
|
||||
|
||||
func fetchAIStatsSidecar(ctx context.Context, baseURL string, timeout time.Duration) (map[string]any, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/stats", nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
client := &http.Client{Timeout: timeout}
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode >= 300 {
|
||||
body, _ := io.ReadAll(io.LimitReader(res.Body, 2048))
|
||||
return nil, fmt.Errorf("sidecar HTTP %d: %s", res.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
var out map[string]any
|
||||
if err := json.NewDecoder(res.Body).Decode(&out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
@@ -67,6 +67,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
s.handleStats(w, r)
|
||||
case r.Method == http.MethodGet && path == "/api/v1/providers/status":
|
||||
s.handleProviderStatus(w, r)
|
||||
case r.Method == http.MethodGet && path == "/api/v1/infra/status":
|
||||
s.handleInfraStatus(w, r)
|
||||
default:
|
||||
writeError(w, http.StatusNotFound, "not found")
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@ data:
|
||||
LLM_TIMEOUT: "5m"
|
||||
WHISPERX_URL: "http://10.2.3.5:8001"
|
||||
WHISPERX_TIMEOUT: "10m"
|
||||
AI_STATS_SIDECAR_URL: "http://10.2.3.5:9090"
|
||||
AI_STATS_TIMEOUT: "8s"
|
||||
WORKER_POLL_INTERVAL: "2s"
|
||||
WORKER_CLAIM_LIMIT: "4"
|
||||
WORKER_LEASE_TIMEOUT: "15m"
|
||||
|
||||
Reference in New Issue
Block a user