Batch enqueue TG AI classifications
All checks were successful
CI / go (push) Successful in 18s
CI / python (push) Successful in 2s
Build and Deploy / build-and-deploy (push) Successful in 29s

This commit is contained in:
Grendgi
2026-06-12 09:01:41 +03:00
parent fc696c9e13
commit fd1ee0611b
2 changed files with 145 additions and 48 deletions

View File

@@ -92,11 +92,11 @@ func main() {
defer ticker.Stop() defer ticker.Stop()
for { for {
updated, err := worker.runOnce(ctx) updated, enqueued, err := worker.runOnce(ctx)
if err != nil { if err != nil {
slog.Error("classify_batch_failed", "error", err) slog.Error("classify_batch_failed", "error", err)
} else if updated > 0 { } else if updated > 0 || enqueued > 0 {
slog.Info("classify_batch_done", "updated", updated) slog.Info("classify_batch_done", "updated", updated, "enqueued", enqueued)
} }
select { select {
@@ -114,42 +114,92 @@ type classifier struct {
ai *aiservice.Client ai *aiservice.Client
} }
func (c *classifier) runOnce(ctx context.Context) (int, error) { func (c *classifier) runOnce(ctx context.Context) (int, int, error) {
rows, err := c.loadPending(ctx) rows, err := c.loadPending(ctx)
if err != nil { if err != nil {
return 0, err return 0, 0, err
} }
if len(rows) == 0 { if len(rows) == 0 {
return 0, nil return 0, 0, nil
} }
byRef := make(map[string]pendingMessage, len(rows))
jobs := make([]aiservice.CreateJobRequest, 0, len(rows))
updated := 0 updated := 0
for _, msg := range rows { for _, msg := range rows {
key := verdictKey(msg.Vertical) key := verdictKey(msg.Vertical)
if _, ok := msg.Extracted[key]; ok { if _, ok := msg.Extracted[key]; ok {
continue continue
} }
if len(strings.TrimSpace(msg.Text)) < c.cfg.LLMMinTextLength {
verdict, err := c.classify(ctx, msg) verdict, err := marshalRaw(negativeVerdict(msg.Vertical))
if err != nil { if err != nil {
slog.Warn("llm_classify_failed", "message_id", msg.ID, "vertical", msg.Vertical, "error", err) slog.Warn("negative_verdict_failed", "message_id", msg.ID, "section_id", msg.SectionID, "error", err)
continue continue
} }
if len(verdict) == 0 {
verdict, err = marshalRaw(negativeVerdict(msg.Vertical))
if err != nil {
slog.Warn("negative_verdict_failed", "message_id", msg.ID, "error", err)
continue
}
}
if err := c.saveVerdict(ctx, msg, key, verdict); err != nil { if err := c.saveVerdict(ctx, msg, key, verdict); err != nil {
slog.Warn("save_verdict_failed", "message_id", msg.ID, "error", err) slog.Warn("save_verdict_failed", "message_id", msg.ID, "section_id", msg.SectionID, "error", err)
continue continue
} }
updated++ updated++
continue
} }
return updated, nil
req, err := c.buildJobRequest(ctx, msg)
if err != nil {
slog.Warn("build_classify_job_failed", "message_id", msg.ID, "section_id", msg.SectionID, "vertical", msg.Vertical, "error", err)
continue
}
byRef[req.OwnerRef] = msg
jobs = append(jobs, req)
}
if len(jobs) == 0 {
return updated, 0, nil
}
created, err := c.ai.CreateJobs(ctx, aiservice.CreateJobsRequest{
OwnerService: "monitoring-tg",
TaskType: "telegram_classification",
ModelProfile: c.cfg.LLMModel,
Priority: 5,
MaxAttempts: 2,
Jobs: jobs,
})
if err != nil {
return updated, 0, err
}
for _, job := range created {
msg, ok := byRef[job.OwnerRef]
if !ok {
continue
}
switch job.Status {
case "done":
verdict, err := c.verdictFromJob(job, msg.Vertical)
if err != nil {
slog.Warn("parse_classify_job_failed", "message_id", msg.ID, "section_id", msg.SectionID, "job_id", job.ID, "error", err)
continue
}
if err := c.saveVerdict(ctx, msg, verdictKey(msg.Vertical), verdict); err != nil {
slog.Warn("save_verdict_failed", "message_id", msg.ID, "section_id", msg.SectionID, "job_id", job.ID, "error", err)
continue
}
updated++
case "failed", "cancelled":
verdict, err := marshalRaw(negativeVerdict(msg.Vertical))
if err != nil {
slog.Warn("negative_verdict_failed", "message_id", msg.ID, "section_id", msg.SectionID, "job_id", job.ID, "error", err)
continue
}
if err := c.saveVerdict(ctx, msg, verdictKey(msg.Vertical), verdict); err != nil {
slog.Warn("save_failed_job_verdict_failed", "message_id", msg.ID, "section_id", msg.SectionID, "job_id", job.ID, "error", err)
continue
}
updated++
slog.Warn("classify_job_failed_marked_negative", "message_id", msg.ID, "section_id", msg.SectionID, "job_id", job.ID, "status", job.Status, "error", derefString(job.ErrorMessage))
}
}
return updated, len(created), nil
} }
func (c *classifier) loadPending(ctx context.Context) ([]pendingMessage, error) { func (c *classifier) loadPending(ctx context.Context) ([]pendingMessage, error) {
@@ -192,14 +242,10 @@ func (c *classifier) loadPending(ctx context.Context) ([]pendingMessage, error)
return out, rows.Err() return out, rows.Err()
} }
func (c *classifier) classify(ctx context.Context, msg pendingMessage) (json.RawMessage, error) { func (c *classifier) buildJobRequest(ctx context.Context, msg pendingMessage) (aiservice.CreateJobRequest, error) {
if len(strings.TrimSpace(msg.Text)) < c.cfg.LLMMinTextLength {
return marshalRaw(negativeVerdict(msg.Vertical))
}
systemPrompt, err := c.resolvePrompt(ctx, msg.Vertical, msg.DepartmentID, msg.SectionSlug) systemPrompt, err := c.resolvePrompt(ctx, msg.Vertical, msg.DepartmentID, msg.SectionSlug)
if err != nil { if err != nil {
return nil, err return aiservice.CreateJobRequest{}, err
} }
systemPrompt = promptWithVerticalGuard(msg.Vertical, systemPrompt) systemPrompt = promptWithVerticalGuard(msg.Vertical, systemPrompt)
@@ -215,27 +261,24 @@ func (c *classifier) classify(ctx context.Context, msg pendingMessage) (json.Raw
} }
body, err := json.Marshal(payload) body, err := json.Marshal(payload)
if err != nil { if err != nil {
return nil, err return aiservice.CreateJobRequest{}, err
} }
job, err := c.ai.CreateJob(ctx, aiservice.CreateJobRequest{ ownerRef := classifyOwnerRef(msg)
return aiservice.CreateJobRequest{
OwnerService: "monitoring-tg", OwnerService: "monitoring-tg",
OwnerRef: fmt.Sprintf("%d", msg.ID), OwnerRef: ownerRef,
TaskType: "telegram_classification", TaskType: "telegram_classification",
ModelProfile: c.cfg.LLMModel, ModelProfile: c.cfg.LLMModel,
Priority: 5, Priority: 5,
MaxAttempts: 2, MaxAttempts: 2,
Input: body, Input: body,
}) // Classification is section-specific because prompts are section-specific.
if err != nil { IdempotencyKey: "monitoring-tg:telegram_classification:" + ownerRef,
return nil, err }, nil
}
waitCtx, cancel := context.WithTimeout(ctx, c.cfg.LLMTimeout)
defer cancel()
job, err = c.ai.WaitJob(waitCtx, job.ID, 2*time.Second)
if err != nil {
return nil, err
} }
func (c *classifier) verdictFromJob(job *aiservice.Job, vertical string) (json.RawMessage, error) {
if job.Status != "done" { if job.Status != "done" {
msg := "ai-service job " + job.Status msg := "ai-service job " + job.Status
if job.ErrorMessage != nil && *job.ErrorMessage != "" { if job.ErrorMessage != nil && *job.ErrorMessage != "" {
@@ -256,7 +299,7 @@ func (c *classifier) classify(ctx context.Context, msg pendingMessage) (json.Raw
if err != nil { if err != nil {
return nil, err return nil, err
} }
normalized, err := normalizeVerdict(msg.Vertical, block) normalized, err := normalizeVerdict(vertical, block)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -323,6 +366,10 @@ func verdictKey(vertical string) string {
return "lead" return "lead"
} }
func classifyOwnerRef(msg pendingMessage) string {
return fmt.Sprintf("%d:%d", msg.ID, msg.SectionID)
}
func buildUserPrompt(text string) string { func buildUserPrompt(text string) string {
return "Текст сообщения:\n```\n" + text + "\n```\nВерни JSON." return "Текст сообщения:\n```\n" + text + "\n```\nВерни JSON."
} }
@@ -408,6 +455,13 @@ func asFloat(v any) (float64, bool) {
} }
} }
func derefString(v *string) string {
if v == nil {
return ""
}
return *v
}
func defaultPrompt(vertical string) string { func defaultPrompt(vertical string) string {
if vertical == verticalHR { if vertical == verticalHR {
return defaultHRPrompt return defaultHRPrompt
@@ -437,8 +491,8 @@ func loadConfig() config {
LLMTimeout: time.Duration(envInt("LLM_TIMEOUT_SECONDS", 120)) * time.Second, LLMTimeout: time.Duration(envInt("LLM_TIMEOUT_SECONDS", 120)) * time.Second,
LLMMaxTokens: envInt("LLM_MAX_TOKENS", 600), LLMMaxTokens: envInt("LLM_MAX_TOKENS", 600),
LLMMinTextLength: envInt("LLM_MIN_TEXT_LENGTH", 20), LLMMinTextLength: envInt("LLM_MIN_TEXT_LENGTH", 20),
ClassifyInterval: time.Duration(envInt("LLM_CLASSIFY_INTERVAL_SECONDS", 20)) * time.Second, ClassifyInterval: time.Duration(envInt("LLM_CLASSIFY_INTERVAL_SECONDS", 5)) * time.Second,
ClassifyBatchSize: envInt("LLM_CLASSIFY_BATCH_SIZE", 5), ClassifyBatchSize: envInt("LLM_CLASSIFY_BATCH_SIZE", 200),
AIServiceURL: env("AI_SERVICE_URL", ""), AIServiceURL: env("AI_SERVICE_URL", ""),
AIServiceToken: env("AI_SERVICE_TOKEN", ""), AIServiceToken: env("AI_SERVICE_TOKEN", ""),
} }

View File

@@ -40,12 +40,26 @@ type CreateJobRequest struct {
IdempotencyKey string `json:"idempotency_key,omitempty"` IdempotencyKey string `json:"idempotency_key,omitempty"`
} }
type CreateJobsRequest struct {
OwnerService string `json:"owner_service,omitempty"`
TaskType string `json:"task_type,omitempty"`
ModelProfile string `json:"model_profile,omitempty"`
Priority int `json:"priority,omitempty"`
MaxAttempts int `json:"max_attempts,omitempty"`
Jobs []CreateJobRequest `json:"jobs"`
}
type Job struct { type Job struct {
ID string `json:"id"` ID string `json:"id"`
OwnerService string `json:"owner_service,omitempty"`
OwnerRef string `json:"owner_ref,omitempty"`
TaskType string `json:"task_type,omitempty"`
ModelProfile string `json:"model_profile,omitempty"`
Status string `json:"status"` Status string `json:"status"`
Result json.RawMessage `json:"result,omitempty"` Result json.RawMessage `json:"result,omitempty"`
ErrorCode *string `json:"error_code,omitempty"` ErrorCode *string `json:"error_code,omitempty"`
ErrorMessage *string `json:"error_message,omitempty"` ErrorMessage *string `json:"error_message,omitempty"`
IdempotencyKey *string `json:"idempotency_key,omitempty"`
} }
type ChatResult struct { type ChatResult struct {
@@ -111,6 +125,35 @@ func (c *Client) CreateJob(ctx context.Context, req CreateJobRequest) (*Job, err
return &out, nil return &out, nil
} }
func (c *Client) CreateJobs(ctx context.Context, req CreateJobsRequest) ([]*Job, error) {
if c == nil {
return nil, fmt.Errorf("ai-service not configured")
}
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("marshal ai jobs: %w", err)
}
httpReq, err := c.request(ctx, http.MethodPost, "/api/v1/jobs/batch", body)
if err != nil {
return nil, err
}
resp, err := c.http.Do(httpReq)
if err != nil {
return nil, fmt.Errorf("create ai jobs: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("create ai jobs: http %d: %s", resp.StatusCode, readSmall(resp.Body))
}
var out struct {
Jobs []*Job `json:"jobs"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("decode ai jobs: %w", err)
}
return out.Jobs, nil
}
func (c *Client) GetJob(ctx context.Context, id string) (*Job, error) { func (c *Client) GetJob(ctx context.Context, id string) (*Job, error) {
if c == nil || strings.TrimSpace(id) == "" { if c == nil || strings.TrimSpace(id) == "" {
return nil, fmt.Errorf("ai job id is required") return nil, fmt.Errorf("ai job id is required")