gitcaddy-server/models/actions/runner_routing.go

// Copyright 2026 MarketAlly. All rights reserved.
// SPDX-License-Identifier: MIT

package actions

import (
	"context"
	"math"
	"sort"

	"code.gitcaddy.com/server/v3/models/db"
	"code.gitcaddy.com/server/v3/modules/log"
	"code.gitcaddy.com/server/v3/modules/optional"
	"code.gitcaddy.com/server/v3/modules/setting"
)

// RunnerBandwidthScore calculates a routing score for a runner
// Higher score = better for job assignment
// Score considers: bandwidth (primary), latency (secondary)
func RunnerBandwidthScore(runner *ActionRunner) float64 {
	caps := runner.GetCapabilities()
	if caps == nil {
		return 50.0 // Default middling score if no capabilities
	}

	bw := caps.Bandwidth
	if bw == nil || bw.DownloadMbps <= 0 {
		// No bandwidth data - give a default middling score
		return 50.0
	}

	// Base score from bandwidth (log scale to prevent huge gaps)
	// 1 Mbps = 0, 10 Mbps = 33, 100 Mbps = 66, 1000 Mbps = 100
	bandwidthScore := 0.0
	if bw.DownloadMbps > 0 {
		// Log10 scale: log10(1)=0, log10(10)=1, log10(100)=2, log10(1000)=3
		logVal := math.Log10(bw.DownloadMbps)
		if logVal < 0 {
			logVal = 0
		}
		bandwidthScore = logVal * 33.3 // Scale to 0-100
	}

	// Latency penalty (subtract up to 20 points for high latency)
	latencyPenalty := 0.0
	if bw.LatencyMs > 10 {
		// 0-10ms = no penalty, 10-50ms = small penalty, 50-200ms = bigger penalty
		latencyPenalty = (bw.LatencyMs - 10) / 10.0
		if latencyPenalty > 20 {
			latencyPenalty = 20
		}
	}

	return bandwidthScore - latencyPenalty
}

// ShouldAssignJobToRunner determines if a job should be assigned to this runner
// considering bandwidth-aware routing.
//
// IMPORTANT: This function should NEVER leave a valid runner idle when there are
// waiting jobs. Bandwidth routing is a preference, not a hard block.
// Returns: (shouldAssign bool, reason string)
func ShouldAssignJobToRunner(ctx context.Context, runner *ActionRunner, job *ActionRunJob) (bool, string) {
	if !setting.Actions.BandwidthAwareRouting {
		return true, "bandwidth routing disabled"
	}

	// Always assign if this runner is the only option
	// (e.g., macos-only jobs when only mac runner available)
	competingRunners := findCompetingRunners(ctx, runner, job)
	if len(competingRunners) == 0 {
		return true, "only matching runner"
	}

	// Calculate scores
	myScore := RunnerBandwidthScore(runner)

	// Find the best competing score among IDLE runners only
	bestIdleScore := 0.0
	var bestIdleCompetitor *ActionRunner
	allCompetitorsBusy := true

	for _, r := range competingRunners {
		if isRunnerIdle(ctx, r) {
			allCompetitorsBusy = false
			score := RunnerBandwidthScore(r)
			if score > bestIdleScore {
				bestIdleScore = score
				bestIdleCompetitor = r
			}
		}
	}

	// If all competing runners are busy, always assign to this runner
	// We should never leave a valid idle runner sitting when jobs are waiting
	if allCompetitorsBusy {
		return true, "all competing runners busy"
	}

	// If this runner is within threshold of best idle runner, allow assignment
	threshold := setting.Actions.BandwidthScoreThreshold // default 20
	if myScore >= bestIdleScore-threshold {
		return true, "within threshold of best idle runner"
	}

	// Only defer if there's actually a better idle runner available
	// Give the better runner a brief window to claim it
	if bestIdleCompetitor != nil {
		log.Debug("Runner %s (score: %.1f) deferred job to faster idle runner %s (score: %.1f)",
			runner.Name, myScore, bestIdleCompetitor.Name, bestIdleScore)
		return false, "faster idle runner available"
	}

	// Default: always assign rather than leave runner idle
	return true, "default assignment"
}

// findCompetingRunners finds other online runners that could handle this job
func findCompetingRunners(ctx context.Context, excludeRunner *ActionRunner, job *ActionRunJob) []*ActionRunner {
	runners, err := db.Find[ActionRunner](ctx, FindRunnerOptions{
		IsOnline: optional.Some(true),
	})
	if err != nil {
		log.Error("Failed to find competing runners: %v", err)
		return nil
	}

	var competing []*ActionRunner
	for _, r := range runners {
		// Skip the requesting runner
		if r.ID == excludeRunner.ID {
			continue
		}
		// Skip offline runners
		if !r.IsOnline() {
			continue
		}
		// Check if this runner can handle the job
		if r.CanMatchLabels(job.RunsOn) {
			competing = append(competing, r)
		}
	}

	return competing
}

// isRunnerIdle checks if a runner currently has no active tasks
func isRunnerIdle(ctx context.Context, runner *ActionRunner) bool {
	count, err := db.GetEngine(ctx).
		Where("runner_id = ? AND status = ?", runner.ID, StatusRunning).
		Count(&ActionTask{})
	if err != nil {
		log.Error("Failed to check if runner %s is idle: %v", runner.Name, err)
		return false
	}
	return count == 0
}

// GetRunnersForJobByBandwidth returns runners sorted by bandwidth score (best first)
func GetRunnersForJobByBandwidth(ctx context.Context, job *ActionRunJob) []*ActionRunner {
	runners, err := db.Find[ActionRunner](ctx, FindRunnerOptions{
		IsOnline: optional.Some(true),
	})
	if err != nil {
		log.Error("Failed to find runners for job: %v", err)
		return nil
	}

	var matching []*ActionRunner
	for _, r := range runners {
		if r.CanMatchLabels(job.RunsOn) {
			matching = append(matching, r)
		}
	}

	// Sort by bandwidth score (highest first)
	sort.Slice(matching, func(i, j int) bool {
		return RunnerBandwidthScore(matching[i]) > RunnerBandwidthScore(matching[j])
	})

	return matching
}