diff --git a/README.md b/README.md index ca6a1c1..ed0fdcc 100644 --- a/README.md +++ b/README.md @@ -41,9 +41,23 @@ service. - `HTTP_PORT`, default `8080` - `DATABASE_URL`, required - `MIGRATE_ON_START`, default `true` +- `LLM_BASE_URL`, primary OpenAI-compatible LLM endpoint +- `LLM_API_KEY`, primary LLM API key +- `LLM_MODEL`, default `qwen2.5-14b` +- `LLM_TIMEOUT`, default `5m` +- `WHISPERX_URL`, WhisperX endpoint for transcription jobs +- `OPENCLAW_URL`, optional OpenClaw gateway URL if we route through OpenClaw + instead of direct vLLM ## Next integration step `telephony` should first mirror low-risk analysis jobs into this service while continuing local processing. Remote execution can then be enabled by feature flag per task type. + +## OpenClaw note + +Current Portal services call the local AI server directly: vLLM for LLM tasks +and WhisperX for transcription. OpenClaw is not required for the current +`ai-service` queue deployment. It becomes useful if we want centralized model +routing, provider fallback, request policy and cross-model gateway behavior. diff --git a/internal/config/config.go b/internal/config/config.go index c24140b..37cb844 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -3,6 +3,7 @@ package config import ( "os" "strconv" + "time" ) type Config struct { @@ -10,6 +11,13 @@ type Config struct { HTTPPort int DatabaseURL string MigrateOnStart bool + + LLMBaseURL string + LLMAPIKey string + LLMModel string + LLMTimeout time.Duration + WhisperXURL string + OpenClawURL string } func Load() Config { @@ -18,6 +26,13 @@ func Load() Config { HTTPPort: envInt("HTTP_PORT", 8080), DatabaseURL: envString("DATABASE_URL", ""), MigrateOnStart: envBool("MIGRATE_ON_START", true), + + LLMBaseURL: envString("LLM_BASE_URL", ""), + LLMAPIKey: envString("LLM_API_KEY", ""), + LLMModel: envString("LLM_MODEL", "qwen2.5-14b"), + LLMTimeout: envDuration("LLM_TIMEOUT", 5*time.Minute), + WhisperXURL: envString("WHISPERX_URL", ""), + OpenClawURL: envString("OPENCLAW_URL", ""), } } @@ -51,3 +66,15 @@ func envBool(key string, fallback bool) bool { } return v } + +func envDuration(key string, fallback time.Duration) time.Duration { + raw := os.Getenv(key) + if raw == "" { + return fallback + } + v, err := time.ParseDuration(raw) + if err != nil { + return fallback + } + return v +} diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index 3f24174..c4b9285 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -7,3 +7,10 @@ data: HTTP_HOST: "0.0.0.0" HTTP_PORT: "8080" MIGRATE_ON_START: "true" + # Default direct AI endpoints. OpenClaw can replace LLM_BASE_URL later when + # we decide to route model traffic through a gateway instead of direct vLLM. + LLM_BASE_URL: "http://10.2.3.5:8002" + LLM_MODEL: "qwen2.5-14b" + LLM_TIMEOUT: "5m" + WHISPERX_URL: "http://10.2.3.5:8001" + OPENCLAW_URL: "" diff --git a/k8s/secrets.yaml b/k8s/secrets.yaml index 9de00a2..41b1a07 100644 --- a/k8s/secrets.yaml +++ b/k8s/secrets.yaml @@ -17,3 +17,4 @@ metadata: type: Opaque stringData: DATABASE_URL: "postgres://ai_service:ai_service@postgres:5432/ai_service?sslmode=disable" + LLM_API_KEY: "sk-111f838ccec43406e078cd9094b6797307cb895236179f32"