gitcaddy-server/models/actions/task.go

// Copyright 2022 The Gitea Authors. All rights reserved.
// Copyright 2026 MarketAlly. All rights reserved.
// SPDX-License-Identifier: MIT

package actions

import (
	"context"
	"crypto/subtle"
	"errors"
	"fmt"
	"time"

	auth_model "code.gitcaddy.com/server/v3/models/auth"
	"code.gitcaddy.com/server/v3/models/db"
	"code.gitcaddy.com/server/v3/models/unit"
	"code.gitcaddy.com/server/v3/modules/log"
	"code.gitcaddy.com/server/v3/modules/optional"
	"code.gitcaddy.com/server/v3/modules/setting"
	"code.gitcaddy.com/server/v3/modules/timeutil"
	"code.gitcaddy.com/server/v3/modules/util"

	runnerv1 "code.gitea.io/actions-proto-go/runner/v1"
	lru "github.com/hashicorp/golang-lru/v2"
	"google.golang.org/protobuf/types/known/timestamppb"
	"xorm.io/builder"
)

// ActionTask represents a distribution of job
type ActionTask struct {
	ID       int64
	JobID    int64
	Job      *ActionRunJob     `xorm:"-"`
	Steps    []*ActionTaskStep `xorm:"-"`
	Attempt  int64
	RunnerID int64              `xorm:"index"`
	Status   Status             `xorm:"index"`
	Started  timeutil.TimeStamp `xorm:"index"`
	Stopped  timeutil.TimeStamp `xorm:"index(stopped_log_expired)"`

	RepoID            int64  `xorm:"index"`
	OwnerID           int64  `xorm:"index"`
	CommitSHA         string `xorm:"index"`
	IsForkPullRequest bool

	Token          string `xorm:"-"`
	TokenHash      string `xorm:"UNIQUE"` // sha256 of token
	TokenSalt      string
	TokenLastEight string `xorm:"index token_last_eight"`

	LogFilename  string     // file name of log
	LogInStorage bool       // read log from database or from storage
	LogLength    int64      // lines count
	LogSize      int64      // blob size
	LogIndexes   LogIndexes `xorm:"LONGBLOB"`                   // line number to offset
	LogExpired   bool       `xorm:"index(stopped_log_expired)"` // files that are too old will be deleted

	Created timeutil.TimeStamp `xorm:"created"`
	Updated timeutil.TimeStamp `xorm:"updated index"`
}

var successfulTokenTaskCache *lru.Cache[string, any]

func init() {
	db.RegisterModel(new(ActionTask), func() error {
		if setting.SuccessfulTokensCacheSize > 0 {
			var err error
			successfulTokenTaskCache, err = lru.New[string, any](setting.SuccessfulTokensCacheSize)
			if err != nil {
				return fmt.Errorf("unable to allocate Task cache: %v", err)
			}
		} else {
			successfulTokenTaskCache = nil
		}
		return nil
	})
}

func (task *ActionTask) Duration() time.Duration {
	return calculateDuration(task.Started, task.Stopped, task.Status)
}

func (task *ActionTask) IsStopped() bool {
	return task.Stopped > 0
}

func (task *ActionTask) GetRunLink() string {
	if task.Job == nil || task.Job.Run == nil {
		return ""
	}
	return task.Job.Run.Link()
}

func (task *ActionTask) GetCommitLink() string {
	if task.Job == nil || task.Job.Run == nil || task.Job.Run.Repo == nil {
		return ""
	}
	return task.Job.Run.Repo.CommitLink(task.CommitSHA)
}

func (task *ActionTask) GetRepoName() string {
	if task.Job == nil || task.Job.Run == nil || task.Job.Run.Repo == nil {
		return ""
	}
	return task.Job.Run.Repo.FullName()
}

func (task *ActionTask) GetRepoLink() string {
	if task.Job == nil || task.Job.Run == nil || task.Job.Run.Repo == nil {
		return ""
	}
	return task.Job.Run.Repo.Link()
}

func (task *ActionTask) LoadJob(ctx context.Context) error {
	if task.Job == nil {
		job, err := GetRunJobByID(ctx, task.JobID)
		if err != nil {
			return err
		}
		task.Job = job
	}
	return nil
}

// LoadAttributes load Job Steps if not loaded
func (task *ActionTask) LoadAttributes(ctx context.Context) error {
	if task == nil {
		return nil
	}
	if err := task.LoadJob(ctx); err != nil {
		return err
	}

	if err := task.Job.LoadAttributes(ctx); err != nil {
		return err
	}

	if task.Steps == nil { // be careful, an empty slice (not nil) also means loaded
		steps, err := GetTaskStepsByTaskID(ctx, task.ID)
		if err != nil {
			return err
		}
		task.Steps = steps
	}

	return nil
}

func (task *ActionTask) GenerateToken() (err error) {
	task.Token, task.TokenSalt, task.TokenHash, task.TokenLastEight, err = generateSaltedToken()
	return err
}

func GetTaskByID(ctx context.Context, id int64) (*ActionTask, error) {
	var task ActionTask
	has, err := db.GetEngine(ctx).Where("id=?", id).Get(&task)
	if err != nil {
		return nil, err
	} else if !has {
		return nil, fmt.Errorf("task with id %d: %w", id, util.ErrNotExist)
	}

	return &task, nil
}

func GetRunningTaskByToken(ctx context.Context, token string) (*ActionTask, error) {
	errNotExist := fmt.Errorf("task with token %q: %w", token, util.ErrNotExist)
	if token == "" {
		return nil, errNotExist
	}
	// A token is defined as being SHA1 sum these are 40 hexadecimal bytes long
	if len(token) != 40 {
		return nil, errNotExist
	}
	for _, x := range []byte(token) {
		if x < '0' || (x > '9' && x < 'a') || x > 'f' {
			return nil, errNotExist
		}
	}

	lastEight := token[len(token)-8:]

	if id := getTaskIDFromCache(token); id > 0 {
		task := &ActionTask{
			TokenLastEight: lastEight,
		}
		// Re-get the task from the db in case it has been deleted in the intervening period
		has, err := db.GetEngine(ctx).ID(id).Get(task)
		if err != nil {
			return nil, err
		}
		if has {
			return task, nil
		}
		successfulTokenTaskCache.Remove(token)
	}

	var tasks []*ActionTask
	err := db.GetEngine(ctx).Where("token_last_eight = ? AND status = ?", lastEight, StatusRunning).Find(&tasks)
	if err != nil {
		return nil, err
	} else if len(tasks) == 0 {
		return nil, errNotExist
	}

	for _, t := range tasks {
		tempHash := auth_model.HashToken(token, t.TokenSalt)
		if subtle.ConstantTimeCompare([]byte(t.TokenHash), []byte(tempHash)) == 1 {
			if successfulTokenTaskCache != nil {
				successfulTokenTaskCache.Add(token, t.ID)
			}
			return t, nil
		}
	}
	return nil, errNotExist
}

// CreateTaskForRunner creates a task for a runner.
// It checks runner health before assigning jobs and skips unhealthy runners.
func CreateTaskForRunner(ctx context.Context, runner *ActionRunner) (*ActionTask, bool, error) {
	// Check runner health (but don't reject yet - we may be the only option)
	var runnerUnhealthy bool
	var healthStatus *RunnerHealthStatus
	if setting.Actions.RunnerHealthCheck.Enabled {
		healthStatus = runner.GetHealthStatus()
		if !healthStatus.Healthy {
			runnerUnhealthy = true
			log.Warn("Runner %s (ID: %d) is unhealthy: %s (disk: %.1f%% used, latency: %.0fms)",
				runner.Name, runner.ID, healthStatus.Reason,
				healthStatus.DiskUsedPercent, healthStatus.LatencyMs)

			// Request cleanup if cooldown allows
			if healthStatus.NeedsCleanup {
				canCleanup, err := CanRequestCleanup(ctx, runner.ID)
				if err != nil {
					log.Error("Failed to check cleanup cooldown for runner %s: %v", runner.Name, err)
				} else if canCleanup {
					if _, err := CreateCleanupRequest(ctx, runner.ID); err != nil {
						log.Error("Failed to create cleanup request for runner %s: %v", runner.Name, err)
					} else {
						log.Info("Requested cleanup for unhealthy runner %s", runner.Name)
					}
				}
			}

			// Request bandwidth recheck if needed (bandwidth is tested infrequently)
			if healthStatus.NeedsBandwidthCheck {
				if err := RequestBandwidthCheck(ctx, runner.ID); err != nil {
					log.Error("Failed to request bandwidth check for runner %s: %v", runner.Name, err)
				} else {
					log.Info("Requested bandwidth recheck for runner %s (current: %.1f Mbps)", runner.Name, healthStatus.BandwidthMbps)
				}
			}
		}
	}

	ctx, committer, err := db.TxContext(ctx)
	if err != nil {
		return nil, false, err
	}
	defer committer.Close()

	e := db.GetEngine(ctx)

	jobCond := builder.NewCond()
	if runner.RepoID != 0 {
		jobCond = builder.Eq{"repo_id": runner.RepoID}
	} else if runner.OwnerID != 0 {
		jobCond = builder.In("repo_id", builder.Select("`repository`.id").From("repository").
			Join("INNER", "repo_unit", "`repository`.id = `repo_unit`.repo_id").
			Where(builder.Eq{"`repository`.owner_id": runner.OwnerID, "`repo_unit`.type": unit.TypeActions}))
	}
	if jobCond.IsValid() {
		jobCond = builder.In("run_id", builder.Select("id").From("action_run").Where(jobCond))
	}

	var jobs []*ActionRunJob
	if err := e.Where("task_id=? AND status=?", 0, StatusWaiting).And(jobCond).Asc("updated", "id").Find(&jobs); err != nil {
		return nil, false, err
	}

	// Bandwidth-aware job assignment
	var job *ActionRunJob
	log.Trace("runner labels: %v", runner.AgentLabels)
	for _, v := range jobs {
		if runner.CanMatchLabels(v.RunsOn) {
			// If runner is unhealthy, only skip if a healthy idle alternative exists
			if runnerUnhealthy {
				if hasHealthyIdleAlternative(ctx, runner, v) {
					log.Trace("Skipping job %d for unhealthy runner %s: healthy idle alternative available", v.ID, runner.Name)
					continue
				}
				log.Info("Assigning job %d to unhealthy runner %s (no healthy idle alternative)", v.ID, runner.Name)
				job = v
				break
			}

			// Check if this runner should get this job based on bandwidth
			shouldAssign, reason := ShouldAssignJobToRunner(ctx, runner, v)
			if shouldAssign {
				log.Trace("Assigning job %d to runner %s: %s", v.ID, runner.Name, reason)
				job = v
				break
			}
			log.Trace("Skipping job %d for runner %s: %s", v.ID, runner.Name, reason)
		}
	}
	if job == nil {
		return nil, false, nil
	}
	if err := job.LoadAttributes(ctx); err != nil {
		return nil, false, err
	}

	now := timeutil.TimeStampNow()
	job.Attempt++
	job.Started = now
	job.Status = StatusRunning

	task := &ActionTask{
		JobID:             job.ID,
		Attempt:           job.Attempt,
		RunnerID:          runner.ID,
		Started:           now,
		Status:            StatusRunning,
		RepoID:            job.RepoID,
		OwnerID:           job.OwnerID,
		CommitSHA:         job.CommitSHA,
		IsForkPullRequest: job.IsForkPullRequest,
	}
	if err := task.GenerateToken(); err != nil {
		return nil, false, err
	}

	workflowJob, err := job.ParseJob()
	if err != nil {
		return nil, false, fmt.Errorf("load job %d: %w", job.ID, err)
	}

	if _, err := e.Insert(task); err != nil {
		return nil, false, err
	}

	task.LogFilename = logFileName(job.Run.Repo.FullName(), task.ID)
	if err := UpdateTask(ctx, task, "log_filename"); err != nil {
		return nil, false, err
	}

	if len(workflowJob.Steps) > 0 {
		steps := make([]*ActionTaskStep, len(workflowJob.Steps))
		for i, v := range workflowJob.Steps {
			name := util.EllipsisDisplayString(v.String(), 255)
			steps[i] = &ActionTaskStep{
				Name:   name,
				TaskID: task.ID,
				Index:  int64(i),
				RepoID: task.RepoID,
				Status: StatusWaiting,
			}
		}
		if _, err := e.Insert(steps); err != nil {
			return nil, false, err
		}
		task.Steps = steps
	}

	job.TaskID = task.ID
	if n, err := UpdateRunJob(ctx, job, builder.Eq{"task_id": 0}); err != nil {
		return nil, false, err
	} else if n != 1 {
		return nil, false, nil
	}

	task.Job = job

	if err := committer.Commit(); err != nil {
		return nil, false, err
	}

	return task, true, nil
}

func UpdateTask(ctx context.Context, task *ActionTask, cols ...string) error {
	sess := db.GetEngine(ctx).ID(task.ID)
	if len(cols) > 0 {
		sess.Cols(cols...)
	}
	_, err := sess.Update(task)

	// Automatically delete the ephemeral runner if the task is done
	if err == nil && task.Status.IsDone() && util.SliceContainsString(cols, "status") {
		return DeleteEphemeralRunner(ctx, task.RunnerID)
	}
	return err
}

// UpdateTaskByState updates the task by the state.
// It will always update the task if the state is not final, even there is no change.
// So it will update ActionTask.Updated to avoid the task being judged as a zombie task.
func UpdateTaskByState(ctx context.Context, runnerID int64, state *runnerv1.TaskState) (*ActionTask, error) {
	stepStates := map[int64]*runnerv1.StepState{}
	for _, v := range state.Steps {
		stepStates[v.Id] = v
	}

	return db.WithTx2(ctx, func(ctx context.Context) (*ActionTask, error) {
		e := db.GetEngine(ctx)

		task := &ActionTask{}
		if has, err := e.ID(state.Id).Get(task); err != nil {
			return nil, err
		} else if !has {
			return nil, util.ErrNotExist
		} else if runnerID != task.RunnerID {
			return nil, errors.New("invalid runner for task")
		}

		if task.Status.IsDone() {
			// the state is final, do nothing
			return task, nil
		}

		// state.Result is not unspecified means the task is finished
		if state.Result != runnerv1.Result_RESULT_UNSPECIFIED {
			task.Status = Status(state.Result)
			task.Stopped = timeutil.TimeStamp(state.StoppedAt.AsTime().Unix())
			if err := UpdateTask(ctx, task, "status", "stopped"); err != nil {
				return nil, err
			}
			if _, err := UpdateRunJob(ctx, &ActionRunJob{
				ID:      task.JobID,
				Status:  task.Status,
				Stopped: task.Stopped,
			}, nil); err != nil {
				return nil, err
			}
		} else {
			// Force update ActionTask.Updated to avoid the task being judged as a zombie task
			task.Updated = timeutil.TimeStampNow()
			if err := UpdateTask(ctx, task, "updated"); err != nil {
				return nil, err
			}
		}

		if err := task.LoadAttributes(ctx); err != nil {
			return nil, err
		}

		for _, step := range task.Steps {
			var result runnerv1.Result
			if v, ok := stepStates[step.Index]; ok {
				result = v.Result
				step.LogIndex = v.LogIndex
				step.LogLength = v.LogLength
				step.Started = convertTimestamp(v.StartedAt)
				step.Stopped = convertTimestamp(v.StoppedAt)
			}
			if result != runnerv1.Result_RESULT_UNSPECIFIED {
				step.Status = Status(result)
			} else if step.Started != 0 {
				step.Status = StatusRunning
			}
			if _, err := e.ID(step.ID).Update(step); err != nil {
				return nil, err
			}
		}

		return task, nil
	})
}

func StopTask(ctx context.Context, taskID int64, status Status) error {
	if !status.IsDone() {
		return fmt.Errorf("cannot stop task with status %v", status)
	}
	e := db.GetEngine(ctx)

	task := &ActionTask{}
	if has, err := e.ID(taskID).Get(task); err != nil {
		return err
	} else if !has {
		return util.ErrNotExist
	}
	if task.Status.IsDone() {
		return nil
	}

	now := timeutil.TimeStampNow()
	task.Status = status
	task.Stopped = now
	if _, err := UpdateRunJob(ctx, &ActionRunJob{
		ID:      task.JobID,
		Status:  task.Status,
		Stopped: task.Stopped,
	}, nil); err != nil {
		return err
	}

	if err := UpdateTask(ctx, task, "status", "stopped"); err != nil {
		return err
	}

	if err := task.LoadAttributes(ctx); err != nil {
		return err
	}

	for _, step := range task.Steps {
		if !step.Status.IsDone() {
			step.Status = status
			if step.Started == 0 {
				step.Started = now
			}
			step.Stopped = now
		}
		if _, err := e.ID(step.ID).Update(step); err != nil {
			return err
		}
	}

	return nil
}

func FindOldTasksToExpire(ctx context.Context, olderThan timeutil.TimeStamp, limit int) ([]*ActionTask, error) {
	e := db.GetEngine(ctx)

	tasks := make([]*ActionTask, 0, limit)
	// Check "stopped > 0" to avoid deleting tasks that are still running
	return tasks, e.Where("stopped > 0 AND stopped < ? AND log_expired = ?", olderThan, false).
		Limit(limit).
		Find(&tasks)
}

func convertTimestamp(timestamp *timestamppb.Timestamp) timeutil.TimeStamp {
	if timestamp.GetSeconds() == 0 && timestamp.GetNanos() == 0 {
		return timeutil.TimeStamp(0)
	}
	return timeutil.TimeStamp(timestamp.AsTime().Unix())
}

func logFileName(repoFullName string, taskID int64) string {
	ret := fmt.Sprintf("%s/%02x/%d.log", repoFullName, taskID%256, taskID)

	if setting.Actions.LogCompression.IsZstd() {
		ret += ".zst"
	}

	return ret
}

func getTaskIDFromCache(token string) int64 {
	if successfulTokenTaskCache == nil {
		return 0
	}
	tInterface, ok := successfulTokenTaskCache.Get(token)
	if !ok {
		return 0
	}
	t, ok := tInterface.(int64)
	if !ok {
		return 0
	}
	return t
}

// GetCurrentTaskByRunnerID returns the currently running task for a runner
func GetCurrentTaskByRunnerID(ctx context.Context, runnerID int64) (*ActionTask, error) {
	var task ActionTask
	has, err := db.GetEngine(ctx).Where("runner_id = ? AND status = ?", runnerID, StatusRunning).Get(&task)
	if err != nil {
		return nil, err
	}
	if !has {
		return nil, nil
	}
	return &task, nil
}

// GetCurrentTasksForRunners returns a map of runner ID to their current task
func GetCurrentTasksForRunners(ctx context.Context, runnerIDs []int64) (map[int64]*ActionTask, error) {
	if len(runnerIDs) == 0 {
		return make(map[int64]*ActionTask), nil
	}

	var tasks []*ActionTask
	err := db.GetEngine(ctx).Where("status = ?", StatusRunning).In("runner_id", runnerIDs).Find(&tasks)
	if err != nil {
		return nil, err
	}

	result := make(map[int64]*ActionTask)
	for _, task := range tasks {
		result[task.RunnerID] = task
	}
	return result, nil
}

// hasHealthyIdleAlternative checks if there is another online, healthy, and idle
// runner that can handle this job. Used to decide whether an unhealthy runner
// should skip a job: only skip if a healthy idle alternative actually exists.
// If all alternatives are also unhealthy or busy, the unhealthy runner takes the job.
func hasHealthyIdleAlternative(ctx context.Context, runner *ActionRunner, job *ActionRunJob) bool {
	runners, err := db.Find[ActionRunner](ctx, FindRunnerOptions{
		IsOnline: optional.Some(true),
	})
	if err != nil {
		log.Error("Failed to find runners for job %d: %v", job.ID, err)
		return false // No alternative found, assign to current runner
	}

	for _, r := range runners {
		if r.ID == runner.ID {
			continue
		}
		if !r.IsOnline() {
			continue
		}
		// Only count as an alternative if it can match labels, is healthy, and is idle
		if r.CanMatchLabels(job.RunsOn) && r.IsHealthy() && isRunnerIdle(ctx, r) {
			return true
		}
	}

	return false
}