Route monitoring TG classification through AI service
Some checks failed
Build and Deploy / build-and-deploy (push) Failing after 5s

This commit is contained in:
Grendgi
2026-06-08 15:47:42 +03:00
parent a924cd832b
commit 8259a01a88
6 changed files with 281 additions and 75 deletions

View File

@@ -4,8 +4,8 @@ Backend-сервис мониторинга Telegram-каналов для Porta
AI-классификация работают на Go, Python оставлен только как внутренний AI-классификация работают на Go, Python оставлен только как внутренний
MTProto/Telethon-адаптер для авторизации, опроса каналов и дозагрузки медиа. MTProto/Telethon-адаптер для авторизации, опроса каналов и дозагрузки медиа.
Сервис сохраняет сообщения в Postgres, раскладывает каналы по Сервис сохраняет сообщения в Postgres, раскладывает каналы по
вертикалям/подразделам и выполняет AI-анализ через OpenAI-compatible endpoint, вертикалям/подразделам и выполняет AI-анализ через общий `ai-service`,
общий с другими сервисами портала. который уже сам обращается к OpenAI-compatible backend.
Пользовательский UI живёт в `portal/frontend/src/app/features/monitoring-tg`. Пользовательский UI живёт в `portal/frontend/src/app/features/monitoring-tg`.
Этот сервис не отдаёт отдельные HTML-страницы и работает как API/worker за Этот сервис не отдаёт отдельные HTML-страницы и работает как API/worker за
@@ -33,7 +33,7 @@ MTProto/Telethon-адаптер для авторизации, опроса ка
## Запуск в k8s ## Запуск в k8s
Манифесты лежат в `k8s/`. Перед применением нужно заполнить `k8s/secrets.yaml` Манифесты лежат в `k8s/`. Перед применением нужно заполнить `k8s/secrets.yaml`
реальными Telegram-кредами и, при необходимости, `LLM_API_KEY`. реальными Telegram-кредами и `AI_SERVICE_TOKEN`.
```bash ```bash
kubectl apply -k k8s kubectl apply -k k8s

View File

@@ -1,14 +1,11 @@
package main package main
import ( import (
"bytes"
"context" "context"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"log/slog" "log/slog"
"net/http"
"net/url" "net/url"
"os" "os"
"os/signal" "os/signal"
@@ -17,6 +14,8 @@ import (
"syscall" "syscall"
"time" "time"
"monitoring-tg/internal/aiservice"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
) )
@@ -33,14 +32,14 @@ type config struct {
PostgresPort int PostgresPort int
LLMEnabled bool LLMEnabled bool
LLMBaseURL string
LLMAPIKey string
LLMModel string LLMModel string
LLMTimeout time.Duration LLMTimeout time.Duration
LLMMaxTokens int LLMMaxTokens int
LLMMinTextLength int LLMMinTextLength int
ClassifyInterval time.Duration ClassifyInterval time.Duration
ClassifyBatchSize int ClassifyBatchSize int
AIServiceURL string
AIServiceToken string
} }
type pendingMessage struct { type pendingMessage struct {
@@ -52,29 +51,10 @@ type pendingMessage struct {
Extracted map[string]any Extracted map[string]any
} }
type chatRequest struct {
Model string `json:"model"`
Messages []chatMessage `json:"messages"`
Temperature float64 `json:"temperature"`
MaxTokens int `json:"max_tokens"`
ResponseFormat responseFmt `json:"response_format"`
}
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type responseFmt struct { type responseFmt struct {
Type string `json:"type"` Type string `json:"type"`
} }
type chatResponse struct {
Choices []struct {
Message chatMessage `json:"message"`
} `json:"choices"`
}
func main() { func main() {
cfg := loadConfig() cfg := loadConfig()
logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
@@ -95,7 +75,11 @@ func main() {
} }
defer pool.Close() defer pool.Close()
worker := &classifier{cfg: cfg, db: pool, http: &http.Client{Timeout: cfg.LLMTimeout}} worker := &classifier{
cfg: cfg,
db: pool,
ai: aiservice.New(cfg.AIServiceURL, cfg.AIServiceToken, cfg.LLMTimeout),
}
slog.Info( slog.Info(
"classifier_started", "classifier_started",
"interval", cfg.ClassifyInterval.String(), "interval", cfg.ClassifyInterval.String(),
@@ -126,7 +110,7 @@ func main() {
type classifier struct { type classifier struct {
cfg config cfg config
db *pgxpool.Pool db *pgxpool.Pool
http *http.Client ai *aiservice.Client
} }
func (c *classifier) runOnce(ctx context.Context) (int, error) { func (c *classifier) runOnce(ctx context.Context) (int, error) {
@@ -218,50 +202,53 @@ func (c *classifier) classify(ctx context.Context, msg pendingMessage) (json.Raw
return nil, err return nil, err
} }
payload := chatRequest{ responseFormat, _ := json.Marshal(responseFmt{Type: "json_object"})
Model: c.cfg.LLMModel, payload := aiservice.ChatInput{
Messages: []chatMessage{ Messages: []aiservice.Message{
{Role: "system", Content: systemPrompt}, {Role: "system", Content: systemPrompt},
{Role: "user", Content: buildUserPrompt(msg.Text)}, {Role: "user", Content: buildUserPrompt(msg.Text)},
}, },
Temperature: 0.1, Temperature: 0.1,
MaxTokens: c.cfg.LLMMaxTokens, MaxTokens: c.cfg.LLMMaxTokens,
ResponseFormat: responseFmt{Type: "json_object"}, ResponseFormat: responseFormat,
} }
body, err := json.Marshal(payload) body, err := json.Marshal(payload)
if err != nil { if err != nil {
return nil, err return nil, err
} }
req, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimRight(c.cfg.LLMBaseURL, "/")+"/v1/chat/completions", bytes.NewReader(body)) job, err := c.ai.CreateJob(ctx, aiservice.CreateJobRequest{
OwnerService: "monitoring-tg",
OwnerRef: fmt.Sprintf("%d", msg.ID),
TaskType: "telegram_classification",
ModelProfile: c.cfg.LLMModel,
Priority: 5,
MaxAttempts: 2,
Input: body,
IdempotencyKey: fmt.Sprintf("monitoring-tg:telegram_classification:%d", msg.ID),
})
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header.Set("Content-Type", "application/json") waitCtx, cancel := context.WithTimeout(ctx, c.cfg.LLMTimeout)
if c.cfg.LLMAPIKey != "" { defer cancel()
req.Header.Set("Authorization", "Bearer "+c.cfg.LLMAPIKey) job, err = c.ai.WaitJob(waitCtx, job.ID, 2*time.Second)
}
resp, err := c.http.Do(req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer resp.Body.Close() if job.Status != "done" {
msg := "ai-service job " + job.Status
if resp.StatusCode < 200 || resp.StatusCode >= 300 { if job.ErrorMessage != nil && *job.ErrorMessage != "" {
b, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) msg += ": " + *job.ErrorMessage
return nil, fmt.Errorf("llm http %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
} }
return nil, errors.New(msg)
var parsed chatResponse }
if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil { var parsed aiservice.ChatResult
if err := json.Unmarshal(job.Result, &parsed); err != nil {
return nil, err return nil, err
} }
if len(parsed.Choices) == 0 {
return nil, errors.New("llm returned no choices")
}
raw := strings.TrimSpace(parsed.Choices[0].Message.Content) raw := strings.TrimSpace(parsed.Content)
if raw == "" { if raw == "" {
return nil, errors.New("llm returned empty content") return nil, errors.New("llm returned empty content")
} }
@@ -425,14 +412,14 @@ func loadConfig() config {
PostgresHost: env("POSTGRES_HOST", "db"), PostgresHost: env("POSTGRES_HOST", "db"),
PostgresPort: envInt("POSTGRES_PORT", 5432), PostgresPort: envInt("POSTGRES_PORT", 5432),
LLMEnabled: envBool("LLM_ENABLED", true), LLMEnabled: envBool("LLM_ENABLED", true),
LLMBaseURL: env("LLM_BASE_URL", "http://10.2.3.5:8002"),
LLMAPIKey: env("LLM_API_KEY", ""),
LLMModel: env("LLM_MODEL", "qwen2.5-14b"), LLMModel: env("LLM_MODEL", "qwen2.5-14b"),
LLMTimeout: time.Duration(envInt("LLM_TIMEOUT_SECONDS", 120)) * time.Second, LLMTimeout: time.Duration(envInt("LLM_TIMEOUT_SECONDS", 120)) * time.Second,
LLMMaxTokens: envInt("LLM_MAX_TOKENS", 600), LLMMaxTokens: envInt("LLM_MAX_TOKENS", 600),
LLMMinTextLength: envInt("LLM_MIN_TEXT_LENGTH", 20), LLMMinTextLength: envInt("LLM_MIN_TEXT_LENGTH", 20),
ClassifyInterval: time.Duration(envInt("LLM_CLASSIFY_INTERVAL_SECONDS", 20)) * time.Second, ClassifyInterval: time.Duration(envInt("LLM_CLASSIFY_INTERVAL_SECONDS", 20)) * time.Second,
ClassifyBatchSize: envInt("LLM_CLASSIFY_BATCH_SIZE", 5), ClassifyBatchSize: envInt("LLM_CLASSIFY_BATCH_SIZE", 5),
AIServiceURL: env("AI_SERVICE_URL", ""),
AIServiceToken: env("AI_SERVICE_TOKEN", ""),
} }
} }

View File

@@ -20,6 +20,8 @@ import (
"syscall" "syscall"
"time" "time"
"monitoring-tg/internal/aiservice"
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool" "github.com/jackc/pgx/v5/pgxpool"
"github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7"
@@ -43,10 +45,10 @@ type config struct {
PostgresPort int PostgresPort int
PollIntervalSeconds int PollIntervalSeconds int
LLMEnabled bool LLMEnabled bool
LLMBaseURL string
LLMAPIKey string
LLMModel string LLMModel string
LLMTimeout time.Duration LLMTimeout time.Duration
AIServiceURL string
AIServiceToken string
MinioEndpoint string MinioEndpoint string
MinioAccessKey string MinioAccessKey string
MinioSecretKey string MinioSecretKey string
@@ -62,6 +64,7 @@ type app struct {
http *http.Client http *http.Client
python *http.Client python *http.Client
minio *minio.Client minio *minio.Client
ai *aiservice.Client
} }
type accessScope struct { type accessScope struct {
@@ -149,6 +152,7 @@ func main() {
http: &http.Client{Timeout: cfg.LLMTimeout}, http: &http.Client{Timeout: cfg.LLMTimeout},
python: &http.Client{Timeout: 15 * time.Minute}, python: &http.Client{Timeout: 15 * time.Minute},
minio: minioClient, minio: minioClient,
ai: aiservice.New(cfg.AIServiceURL, cfg.AIServiceToken, cfg.LLMTimeout),
} }
server := &http.Server{ server := &http.Server{
@@ -1118,24 +1122,32 @@ func (a *app) handleStats(ctx context.Context, w http.ResponseWriter, r *http.Re
func (a *app) handleLLMStatus(ctx context.Context, w http.ResponseWriter) { func (a *app) handleLLMStatus(ctx context.Context, w http.ResponseWriter) {
ready := false ready := false
var providerError string
model := a.cfg.LLMModel
if a.cfg.LLMEnabled { if a.cfg.LLMEnabled {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, strings.TrimRight(a.cfg.LLMBaseURL, "/")+"/v1/models", nil) status, err := a.ai.ProvidersStatus(ctx)
if err == nil { if err != nil {
if a.cfg.LLMAPIKey != "" { providerError = err.Error()
req.Header.Set("Authorization", "Bearer "+a.cfg.LLMAPIKey) } else {
for _, provider := range status.Providers {
if provider.Name == "llm" {
ready = provider.Configured && provider.OK
providerError = provider.Error
if provider.Model != "" {
model = provider.Model
}
break
} }
resp, err := a.http.Do(req)
if err == nil {
ready = resp.StatusCode >= 200 && resp.StatusCode < 300
_ = resp.Body.Close()
} }
} }
} }
writeJSON(w, http.StatusOK, map[string]any{ writeJSON(w, http.StatusOK, map[string]any{
"enabled": a.cfg.LLMEnabled, "enabled": a.cfg.LLMEnabled,
"ready": ready, "ready": ready,
"base_url": a.cfg.LLMBaseURL, "base_url": a.cfg.AIServiceURL,
"model": a.cfg.LLMModel, "model": model,
"provider": "ai-service",
"provider_error": providerError,
}) })
} }
@@ -1777,10 +1789,10 @@ func loadConfig() config {
PostgresPort: envInt("POSTGRES_PORT", 5432), PostgresPort: envInt("POSTGRES_PORT", 5432),
PollIntervalSeconds: envInt("POLL_INTERVAL_SECONDS", 60), PollIntervalSeconds: envInt("POLL_INTERVAL_SECONDS", 60),
LLMEnabled: envBool("LLM_ENABLED", true), LLMEnabled: envBool("LLM_ENABLED", true),
LLMBaseURL: env("LLM_BASE_URL", "http://10.2.3.5:8002"),
LLMAPIKey: env("LLM_API_KEY", ""),
LLMModel: env("LLM_MODEL", "qwen2.5-14b"), LLMModel: env("LLM_MODEL", "qwen2.5-14b"),
LLMTimeout: time.Duration(envInt("LLM_TIMEOUT_SECONDS", 120)) * time.Second, LLMTimeout: time.Duration(envInt("LLM_TIMEOUT_SECONDS", 120)) * time.Second,
AIServiceURL: env("AI_SERVICE_URL", ""),
AIServiceToken: env("AI_SERVICE_TOKEN", ""),
MinioEndpoint: env("MINIO_ENDPOINT", ""), MinioEndpoint: env("MINIO_ENDPOINT", ""),
MinioAccessKey: env("MINIO_ACCESS_KEY", ""), MinioAccessKey: env("MINIO_ACCESS_KEY", ""),
MinioSecretKey: env("MINIO_SECRET_KEY", ""), MinioSecretKey: env("MINIO_SECRET_KEY", ""),

View File

@@ -0,0 +1,207 @@
package aiservice
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
type Client struct {
baseURL string
token string
http *http.Client
}
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
}
type ChatInput struct {
Messages []Message `json:"messages"`
Temperature float64 `json:"temperature"`
MaxTokens int `json:"max_tokens,omitempty"`
ResponseFormat json.RawMessage `json:"response_format,omitempty"`
}
type CreateJobRequest struct {
OwnerService string `json:"owner_service"`
OwnerRef string `json:"owner_ref"`
TaskType string `json:"task_type"`
ModelProfile string `json:"model_profile"`
Priority int `json:"priority"`
MaxAttempts int `json:"max_attempts"`
Input json.RawMessage `json:"input"`
IdempotencyKey string `json:"idempotency_key,omitempty"`
}
type Job struct {
ID string `json:"id"`
Status string `json:"status"`
Result json.RawMessage `json:"result,omitempty"`
ErrorCode *string `json:"error_code,omitempty"`
ErrorMessage *string `json:"error_message,omitempty"`
}
type ChatResult struct {
Content string `json:"content"`
Model string `json:"model"`
DurationMS int64 `json:"duration_ms"`
}
type ProvidersStatus struct {
At time.Time `json:"at"`
Providers []ProviderStatus `json:"providers"`
}
type ProviderStatus struct {
Name string `json:"name"`
Configured bool `json:"configured"`
OK bool `json:"ok"`
URL string `json:"url,omitempty"`
Model string `json:"model,omitempty"`
LatencyMS int64 `json:"latency_ms,omitempty"`
Error string `json:"error,omitempty"`
}
func New(baseURL, token string, timeout time.Duration) *Client {
baseURL = strings.TrimRight(strings.TrimSpace(baseURL), "/")
if baseURL == "" {
return nil
}
if timeout <= 0 {
timeout = 2 * time.Minute
}
return &Client{
baseURL: baseURL,
token: strings.TrimSpace(token),
http: &http.Client{Timeout: timeout},
}
}
func (c *Client) CreateJob(ctx context.Context, req CreateJobRequest) (*Job, error) {
if c == nil {
return nil, fmt.Errorf("ai-service not configured")
}
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("marshal ai job: %w", err)
}
httpReq, err := c.request(ctx, http.MethodPost, "/api/v1/jobs", body)
if err != nil {
return nil, err
}
resp, err := c.http.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("create ai job: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("create ai job: http %d: %s", resp.StatusCode, readSmall(resp.Body))
}
var out Job
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("decode ai job: %w", err)
}
return &out, nil
}
func (c *Client) GetJob(ctx context.Context, id string) (*Job, error) {
if c == nil || strings.TrimSpace(id) == "" {
return nil, fmt.Errorf("ai job id is required")
}
req, err := c.request(ctx, http.MethodGet, "/api/v1/jobs/"+id, nil)
if err != nil {
return nil, err
}
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("get ai job: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("get ai job: http %d: %s", resp.StatusCode, readSmall(resp.Body))
}
var out Job
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("decode ai job: %w", err)
}
return &out, nil
}
func (c *Client) WaitJob(ctx context.Context, id string, pollInterval time.Duration) (*Job, error) {
if pollInterval <= 0 {
pollInterval = 2 * time.Second
}
ticker := time.NewTicker(pollInterval)
defer ticker.Stop()
for {
job, err := c.GetJob(ctx, id)
if err != nil {
return nil, err
}
switch job.Status {
case "done", "failed", "cancelled":
return job, nil
}
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-ticker.C:
}
}
}
func (c *Client) ProvidersStatus(ctx context.Context) (*ProvidersStatus, error) {
if c == nil {
return nil, fmt.Errorf("ai-service not configured")
}
req, err := c.request(ctx, http.MethodGet, "/api/v1/providers/status", nil)
if err != nil {
return nil, err
}
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("ai providers status: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("ai providers status: http %d: %s", resp.StatusCode, readSmall(resp.Body))
}
var out ProvidersStatus
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("decode ai providers status: %w", err)
}
return &out, nil
}
func (c *Client) request(ctx context.Context, method, path string, body []byte) (*http.Request, error) {
var r io.Reader
if body != nil {
r = bytes.NewReader(body)
}
req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, r)
if err != nil {
return nil, err
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
if c.token != "" {
req.Header.Set("Authorization", "Bearer "+c.token)
}
return req, nil
}
func readSmall(r io.Reader) string {
body, err := io.ReadAll(io.LimitReader(r, 1024))
if err != nil {
return err.Error()
}
return strings.TrimSpace(string(body))
}

View File

@@ -22,7 +22,7 @@ data:
POLL_INTERVAL_SECONDS: "60" POLL_INTERVAL_SECONDS: "60"
POLL_HISTORY_LIMIT: "50" POLL_HISTORY_LIMIT: "50"
LLM_ENABLED: "1" LLM_ENABLED: "1"
LLM_BASE_URL: "http://10.2.3.5:8002"
LLM_MODEL: "qwen2.5-14b" LLM_MODEL: "qwen2.5-14b"
LLM_MAX_TOKENS: "600" LLM_MAX_TOKENS: "600"
LLM_CLASSIFIER_OWNER: "go" LLM_CLASSIFIER_OWNER: "go"
AI_SERVICE_URL: "http://ai-service.ai-service.svc.cluster.local:8080"

View File

@@ -10,7 +10,7 @@ stringData:
TG_PHONE: "+971524994695" TG_PHONE: "+971524994695"
TG_SESSION_STRING: "" TG_SESSION_STRING: ""
POSTGRES_PASSWORD: "parser" POSTGRES_PASSWORD: "parser"
LLM_API_KEY: "sk-111f838ccec43406e078cd9094b6797307cb895236179f32" AI_SERVICE_TOKEN: "d18bcacf9e02bae1806ee6b6eeda62b95be6a915c0a22936d9a700128b275442"
MINIO_ACCESS_KEY: "admjn" MINIO_ACCESS_KEY: "admjn"
MINIO_SECRET_KEY: "TropicalMacaw9Fantasize" MINIO_SECRET_KEY: "TropicalMacaw9Fantasize"
--- ---