Some checks failed
Build and Release / Unit Tests (push) Successful in 3m9s
Build and Release / Create Release (push) Successful in 0s
Build and Release / Integration Tests (PostgreSQL) (push) Successful in 5m1s
Build and Release / Lint (push) Successful in 5m18s
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Successful in 3m2s
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Successful in 9h4m47s
Build and Release / Build Binaries (amd64, darwin, macos) (push) Successful in 11m9s
Build and Release / Build Binary (linux/arm64) (push) Has been cancelled
Build and Release / Build Binaries (arm64, darwin, macos) (push) Has been cancelled
Adds bandwidth monitoring to runner health checks with critical threshold of 8 Mbps (1 MB/s). Runners below this threshold are blocked from job assignment and trigger automatic bandwidth rechecks. Also refines health check logic: disk/CPU now only block at 95%+ (critical), and latency is informational only. Includes new RunnerBandwidthCheckRequest model to track recheck requests.
183 lines
5.3 KiB
Go
183 lines
5.3 KiB
Go
// Copyright 2026 MarketAlly. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package actions
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
"sort"
|
|
|
|
"code.gitcaddy.com/server/v3/models/db"
|
|
"code.gitcaddy.com/server/v3/modules/log"
|
|
"code.gitcaddy.com/server/v3/modules/optional"
|
|
"code.gitcaddy.com/server/v3/modules/setting"
|
|
)
|
|
|
|
// RunnerBandwidthScore calculates a routing score for a runner
|
|
// Higher score = better for job assignment
|
|
// Score considers: bandwidth (primary), latency (secondary)
|
|
func RunnerBandwidthScore(runner *ActionRunner) float64 {
|
|
caps := runner.GetCapabilities()
|
|
if caps == nil {
|
|
return 50.0 // Default middling score if no capabilities
|
|
}
|
|
|
|
bw := caps.Bandwidth
|
|
if bw == nil || bw.DownloadMbps <= 0 {
|
|
// No bandwidth data - give a default middling score
|
|
return 50.0
|
|
}
|
|
|
|
// Base score from bandwidth (log scale to prevent huge gaps)
|
|
// 1 Mbps = 0, 10 Mbps = 33, 100 Mbps = 66, 1000 Mbps = 100
|
|
bandwidthScore := 0.0
|
|
if bw.DownloadMbps > 0 {
|
|
// Log10 scale: log10(1)=0, log10(10)=1, log10(100)=2, log10(1000)=3
|
|
logVal := math.Log10(bw.DownloadMbps)
|
|
if logVal < 0 {
|
|
logVal = 0
|
|
}
|
|
bandwidthScore = logVal * 33.3 // Scale to 0-100
|
|
}
|
|
|
|
// Latency penalty (subtract up to 20 points for high latency)
|
|
latencyPenalty := 0.0
|
|
if bw.LatencyMs > 10 {
|
|
// 0-10ms = no penalty, 10-50ms = small penalty, 50-200ms = bigger penalty
|
|
latencyPenalty = (bw.LatencyMs - 10) / 10.0
|
|
if latencyPenalty > 20 {
|
|
latencyPenalty = 20
|
|
}
|
|
}
|
|
|
|
return bandwidthScore - latencyPenalty
|
|
}
|
|
|
|
// ShouldAssignJobToRunner determines if a job should be assigned to this runner
|
|
// considering bandwidth-aware routing.
|
|
//
|
|
// IMPORTANT: This function should NEVER leave a valid runner idle when there are
|
|
// waiting jobs. Bandwidth routing is a preference, not a hard block.
|
|
// Returns: (shouldAssign bool, reason string)
|
|
func ShouldAssignJobToRunner(ctx context.Context, runner *ActionRunner, job *ActionRunJob) (bool, string) {
|
|
if !setting.Actions.BandwidthAwareRouting {
|
|
return true, "bandwidth routing disabled"
|
|
}
|
|
|
|
// Always assign if this runner is the only option
|
|
// (e.g., macos-only jobs when only mac runner available)
|
|
competingRunners := findCompetingRunners(ctx, runner, job)
|
|
if len(competingRunners) == 0 {
|
|
return true, "only matching runner"
|
|
}
|
|
|
|
// Calculate scores
|
|
myScore := RunnerBandwidthScore(runner)
|
|
|
|
// Find the best competing score among IDLE runners only
|
|
bestIdleScore := 0.0
|
|
var bestIdleCompetitor *ActionRunner
|
|
allCompetitorsBusy := true
|
|
|
|
for _, r := range competingRunners {
|
|
if isRunnerIdle(ctx, r) {
|
|
allCompetitorsBusy = false
|
|
score := RunnerBandwidthScore(r)
|
|
if score > bestIdleScore {
|
|
bestIdleScore = score
|
|
bestIdleCompetitor = r
|
|
}
|
|
}
|
|
}
|
|
|
|
// If all competing runners are busy, always assign to this runner
|
|
// We should never leave a valid idle runner sitting when jobs are waiting
|
|
if allCompetitorsBusy {
|
|
return true, "all competing runners busy"
|
|
}
|
|
|
|
// If this runner is within threshold of best idle runner, allow assignment
|
|
threshold := setting.Actions.BandwidthScoreThreshold // default 20
|
|
if myScore >= bestIdleScore-threshold {
|
|
return true, "within threshold of best idle runner"
|
|
}
|
|
|
|
// Only defer if there's actually a better idle runner available
|
|
// Give the better runner a brief window to claim it
|
|
if bestIdleCompetitor != nil {
|
|
log.Debug("Runner %s (score: %.1f) deferred job to faster idle runner %s (score: %.1f)",
|
|
runner.Name, myScore, bestIdleCompetitor.Name, bestIdleScore)
|
|
return false, "faster idle runner available"
|
|
}
|
|
|
|
// Default: always assign rather than leave runner idle
|
|
return true, "default assignment"
|
|
}
|
|
|
|
// findCompetingRunners finds other online runners that could handle this job
|
|
func findCompetingRunners(ctx context.Context, excludeRunner *ActionRunner, job *ActionRunJob) []*ActionRunner {
|
|
runners, err := db.Find[ActionRunner](ctx, FindRunnerOptions{
|
|
IsOnline: optional.Some(true),
|
|
})
|
|
if err != nil {
|
|
log.Error("Failed to find competing runners: %v", err)
|
|
return nil
|
|
}
|
|
|
|
var competing []*ActionRunner
|
|
for _, r := range runners {
|
|
// Skip the requesting runner
|
|
if r.ID == excludeRunner.ID {
|
|
continue
|
|
}
|
|
// Skip offline runners
|
|
if !r.IsOnline() {
|
|
continue
|
|
}
|
|
// Check if this runner can handle the job
|
|
if r.CanMatchLabels(job.RunsOn) {
|
|
competing = append(competing, r)
|
|
}
|
|
}
|
|
|
|
return competing
|
|
}
|
|
|
|
// isRunnerIdle checks if a runner currently has no active tasks
|
|
func isRunnerIdle(ctx context.Context, runner *ActionRunner) bool {
|
|
count, err := db.GetEngine(ctx).
|
|
Where("runner_id = ? AND status = ?", runner.ID, StatusRunning).
|
|
Count(&ActionTask{})
|
|
if err != nil {
|
|
log.Error("Failed to check if runner %s is idle: %v", runner.Name, err)
|
|
return false
|
|
}
|
|
return count == 0
|
|
}
|
|
|
|
// GetRunnersForJobByBandwidth returns runners sorted by bandwidth score (best first)
|
|
func GetRunnersForJobByBandwidth(ctx context.Context, job *ActionRunJob) []*ActionRunner {
|
|
runners, err := db.Find[ActionRunner](ctx, FindRunnerOptions{
|
|
IsOnline: optional.Some(true),
|
|
})
|
|
if err != nil {
|
|
log.Error("Failed to find runners for job: %v", err)
|
|
return nil
|
|
}
|
|
|
|
var matching []*ActionRunner
|
|
for _, r := range runners {
|
|
if r.CanMatchLabels(job.RunsOn) {
|
|
matching = append(matching, r)
|
|
}
|
|
}
|
|
|
|
// Sort by bandwidth score (highest first)
|
|
sort.Slice(matching, func(i, j int) bool {
|
|
return RunnerBandwidthScore(matching[i]) > RunnerBandwidthScore(matching[j])
|
|
})
|
|
|
|
return matching
|
|
}
|