2
0
Files
logikonline 43adbaeffe
Some checks failed
Build and Release / Build Binaries (amd64, darwin, macos) (push) Has been cancelled
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Has been cancelled
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Has been cancelled
Build and Release / Build Binaries (arm64, darwin, macos) (push) Has been cancelled
Build and Release / Build Binary (linux/arm64) (push) Has been cancelled
Build and Release / Integration Tests (PostgreSQL) (push) Has been cancelled
Build and Release / Create Release (push) Has been cancelled
Build and Release / Unit Tests (push) Has been cancelled
Build and Release / Lint (push) Has been cancelled
feat(actions): add stuck job rescue mechanism
Introduce a cron task that rescues waiting jobs stuck due to version-sync issues by bumping the task version for affected scopes. Also bump version after each successful job pick to ensure runners re-poll for remaining waiting jobs. Configurable via STUCK_JOB_TIMEOUT (default: 5 minutes).
2026-01-27 09:00:36 -05:00

142 lines
3.9 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"errors"
"fmt"
actions_model "code.gitcaddy.com/server/v3/models/actions"
"code.gitcaddy.com/server/v3/models/db"
secret_model "code.gitcaddy.com/server/v3/models/secret"
"code.gitcaddy.com/server/v3/modules/log"
notify_service "code.gitcaddy.com/server/v3/services/notify"
runnerv1 "code.gitea.io/actions-proto-go/runner/v1"
"google.golang.org/protobuf/types/known/structpb"
)
func PickTask(ctx context.Context, runner *actions_model.ActionRunner) (*runnerv1.Task, bool, error) {
var (
task *runnerv1.Task
job *actions_model.ActionRunJob
actionTask *actions_model.ActionTask
)
if runner.Ephemeral {
var task actions_model.ActionTask
has, err := db.GetEngine(ctx).Where("runner_id = ?", runner.ID).Get(&task)
// Let the runner retry the request, do not allow to proceed
if err != nil {
return nil, false, err
}
if has {
if task.Status == actions_model.StatusWaiting || task.Status == actions_model.StatusRunning || task.Status == actions_model.StatusBlocked {
return nil, false, nil
}
// task has been finished, remove it
_, err = db.DeleteByID[actions_model.ActionRunner](ctx, runner.ID)
if err != nil {
return nil, false, err
}
return nil, false, errors.New("runner has been removed")
}
}
if err := db.WithTx(ctx, func(ctx context.Context) error {
t, ok, err := actions_model.CreateTaskForRunner(ctx, runner)
if err != nil {
return fmt.Errorf("CreateTaskForRunner: %w", err)
}
if !ok {
return nil
}
if err := t.LoadAttributes(ctx); err != nil {
return fmt.Errorf("task LoadAttributes: %w", err)
}
job = t.Job
actionTask = t
secrets, err := secret_model.GetSecretsOfTask(ctx, t)
if err != nil {
return fmt.Errorf("GetSecretsOfTask: %w", err)
}
vars, err := actions_model.GetVariablesOfRun(ctx, t.Job.Run)
if err != nil {
return fmt.Errorf("GetVariablesOfRun: %w", err)
}
needs, err := findTaskNeeds(ctx, job)
if err != nil {
return fmt.Errorf("findTaskNeeds: %w", err)
}
taskContext, err := generateTaskContext(t)
if err != nil {
return fmt.Errorf("generateTaskContext: %w", err)
}
task = &runnerv1.Task{
Id: t.ID,
WorkflowPayload: t.Job.WorkflowPayload,
Context: taskContext,
Secrets: secrets,
Vars: vars,
Needs: needs,
}
return nil
}); err != nil {
return nil, false, err
}
if task == nil {
return nil, false, nil
}
// Bump the tasks version after a successful pick so that runners re-poll
// and pick up any remaining waiting jobs. Without this, the version stays
// unchanged after a Waiting→Running transition, and runners that have
// already synced to the latest version will never attempt to pick again.
if err := actions_model.IncreaseTaskVersion(ctx, job.OwnerID, job.RepoID); err != nil {
log.Error("PickTask: failed to increase task version after pick: %v", err)
}
CreateCommitStatusForRunJobs(ctx, job.Run, job)
notify_service.WorkflowJobStatusUpdate(ctx, job.Run.Repo, job.Run.TriggerUser, job, actionTask)
return task, true, nil
}
func generateTaskContext(t *actions_model.ActionTask) (*structpb.Struct, error) {
giteaRuntimeToken, err := CreateAuthorizationToken(t.ID, t.Job.RunID, t.JobID)
if err != nil {
return nil, err
}
gitCtx := GenerateGiteaContext(t.Job.Run, t.Job)
gitCtx["token"] = t.Token
gitCtx["gitea_runtime_token"] = giteaRuntimeToken
return structpb.NewStruct(gitCtx)
}
func findTaskNeeds(ctx context.Context, taskJob *actions_model.ActionRunJob) (map[string]*runnerv1.TaskNeed, error) {
taskNeeds, err := FindTaskNeeds(ctx, taskJob)
if err != nil {
return nil, err
}
ret := make(map[string]*runnerv1.TaskNeed, len(taskNeeds))
for jobID, taskNeed := range taskNeeds {
ret[jobID] = &runnerv1.TaskNeed{
Outputs: taskNeed.Outputs,
Result: runnerv1.Result(taskNeed.Result),
}
}
return ret, nil
}