diff --git a/README.md b/README.md index 147e237..a384181 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,8 @@ domain metadata fields in `input`, but the worker only reads chat fields such as - `GET /api/v1/stats` returns queue and error counters. - `GET /api/v1/providers/status` checks configured AI providers without returning secrets. +- `GET /api/v1/infra/status` returns AI-server sidecar telemetry + (GPU, containers, vLLM and WhisperX live metrics) when configured. - `GET /healthz` returns process health. - `GET /readyz` checks PostgreSQL readiness. diff --git a/internal/config/config.go b/internal/config/config.go index bae8f7c..d56da79 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -14,12 +14,14 @@ type Config struct { MigrateOnStart bool APIAuthToken string - LLMBaseURL string - LLMAPIKey string - LLMModel string - LLMTimeout time.Duration - WhisperXURL string - WhisperXTimeout time.Duration + LLMBaseURL string + LLMAPIKey string + LLMModel string + LLMTimeout time.Duration + WhisperXURL string + WhisperXTimeout time.Duration + AIStatsSidecarURL string + AIStatsTimeout time.Duration WorkerID string WorkerPollInterval time.Duration @@ -37,12 +39,14 @@ func Load() Config { MigrateOnStart: envBool("MIGRATE_ON_START", true), APIAuthToken: envString("AI_SERVICE_TOKEN", ""), - LLMBaseURL: envString("LLM_BASE_URL", ""), - LLMAPIKey: envString("LLM_API_KEY", ""), - LLMModel: envString("LLM_MODEL", "qwen2.5-14b"), - LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute), - WhisperXURL: envString("WHISPERX_URL", ""), - WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute), + LLMBaseURL: envString("LLM_BASE_URL", ""), + LLMAPIKey: envString("LLM_API_KEY", ""), + LLMModel: envString("LLM_MODEL", "qwen2.5-14b"), + LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute), + WhisperXURL: envString("WHISPERX_URL", ""), + WhisperXTimeout: envDuration("WHISPERX_TIMEOUT", 10*time.Minute), + AIStatsSidecarURL: envString("AI_STATS_SIDECAR_URL", ""), + AIStatsTimeout: envDuration("AI_STATS_TIMEOUT", 8*time.Second), WorkerID: envString("WORKER_ID", hostname()), WorkerPollInterval: envDuration("WORKER_POLL_INTERVAL", 2*time.Second), diff --git a/internal/httpapi/infra.go b/internal/httpapi/infra.go new file mode 100644 index 0000000..4fd92e5 --- /dev/null +++ b/internal/httpapi/infra.go @@ -0,0 +1,63 @@ +package httpapi + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type infraStatusResponse struct { + At time.Time `json:"at"` + Sidecar map[string]any `json:"sidecar,omitempty"` + SidecarError string `json:"sidecar_error,omitempty"` +} + +func (s *Server) handleInfraStatus(w http.ResponseWriter, r *http.Request) { + resp := infraStatusResponse{At: time.Now().UTC()} + baseURL := strings.TrimRight(strings.TrimSpace(s.cfg.AIStatsSidecarURL), "/") + if baseURL == "" { + resp.SidecarError = "AI stats sidecar is not configured" + writeJSON(w, http.StatusOK, resp) + return + } + + timeout := s.cfg.AIStatsTimeout + if timeout <= 0 { + timeout = 8 * time.Second + } + ctx, cancel := contextWithTimeout(r, timeout) + defer cancel() + sidecar, err := fetchAIStatsSidecar(ctx, baseURL, timeout) + if err != nil { + resp.SidecarError = err.Error() + } else { + resp.Sidecar = sidecar + } + writeJSON(w, http.StatusOK, resp) +} + +func fetchAIStatsSidecar(ctx context.Context, baseURL string, timeout time.Duration) (map[string]any, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/stats", nil) + if err != nil { + return nil, err + } + client := &http.Client{Timeout: timeout} + res, err := client.Do(req) + if err != nil { + return nil, err + } + defer res.Body.Close() + if res.StatusCode >= 300 { + body, _ := io.ReadAll(io.LimitReader(res.Body, 2048)) + return nil, fmt.Errorf("sidecar HTTP %d: %s", res.StatusCode, strings.TrimSpace(string(body))) + } + var out map[string]any + if err := json.NewDecoder(res.Body).Decode(&out); err != nil { + return nil, err + } + return out, nil +} diff --git a/internal/httpapi/server.go b/internal/httpapi/server.go index d81b866..7a9168b 100644 --- a/internal/httpapi/server.go +++ b/internal/httpapi/server.go @@ -67,6 +67,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handleStats(w, r) case r.Method == http.MethodGet && path == "/api/v1/providers/status": s.handleProviderStatus(w, r) + case r.Method == http.MethodGet && path == "/api/v1/infra/status": + s.handleInfraStatus(w, r) default: writeError(w, http.StatusNotFound, "not found") } diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index 6ed7baa..b4b32c6 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -13,6 +13,8 @@ data: LLM_TIMEOUT: "5m" WHISPERX_URL: "http://10.2.3.5:8001" WHISPERX_TIMEOUT: "10m" + AI_STATS_SIDECAR_URL: "http://10.2.3.5:9090" + AI_STATS_TIMEOUT: "8s" WORKER_POLL_INTERVAL: "2s" WORKER_CLAIM_LIMIT: "4" WORKER_LEASE_TIMEOUT: "15m"