Requeue stale AI jobs
All checks were successful
CI / test (push) Successful in 12s
Build and Deploy / build-and-deploy (push) Successful in 27s

This commit is contained in:
Grendgi
2026-06-08 13:54:07 +03:00
parent 24c5d89c7b
commit 59e1073d96
6 changed files with 55 additions and 2 deletions

View File

@@ -24,12 +24,16 @@ type Worker struct {
modelProfile string
pollInterval time.Duration
claimLimit int
leaseTimeout time.Duration
}
func New(store *store.Store, llmClient *llm.Client, workerID, modelProfile string, pollInterval time.Duration, claimLimit int) *Worker {
func New(store *store.Store, llmClient *llm.Client, workerID, modelProfile string, pollInterval, leaseTimeout time.Duration, claimLimit int) *Worker {
if pollInterval <= 0 {
pollInterval = 2 * time.Second
}
if leaseTimeout <= 0 {
leaseTimeout = 15 * time.Minute
}
if claimLimit <= 0 {
claimLimit = 4
}
@@ -43,6 +47,7 @@ func New(store *store.Store, llmClient *llm.Client, workerID, modelProfile strin
modelProfile: modelProfile,
pollInterval: pollInterval,
claimLimit: claimLimit,
leaseTimeout: leaseTimeout,
}
}
@@ -60,6 +65,11 @@ func (w *Worker) Run(ctx context.Context) {
}
func (w *Worker) tick(ctx context.Context) {
if reset, err := w.store.RequeueStaleRunning(ctx, w.leaseTimeout, 100); err != nil {
slog.Error("requeue stale jobs failed", "error", err)
} else if reset > 0 {
slog.Warn("requeued stale jobs", "count", reset)
}
jobs, err := w.store.ClaimJobs(ctx, model.ClaimJobs{
WorkerID: w.workerID,
TaskTypes: []string{TaskLLMChat, TaskChatCompletion},