Some checks failed
Build and Release / Create Release (push) Successful in 0s
Trigger Vault Plugin Rebuild / Trigger Vault Rebuild (push) Successful in 0s
Build and Release / Integration Tests (PostgreSQL) (push) Successful in 2m48s
Build and Release / Lint (push) Failing after 5m2s
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Has been skipped
Build and Release / Build Binaries (amd64, darwin, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (arm64, darwin, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (arm64, linux, linux-latest) (push) Has been skipped
Build and Release / Unit Tests (push) Successful in 5m37s
Go's semantic import versioning requires v2+ modules to include the major version in the module path. This enables using proper version tags (v3.x.x) instead of pseudo-versions. Updated module path: code.gitcaddy.com/server/v3
121 lines
3.4 KiB
Go
121 lines
3.4 KiB
Go
// Copyright 2026 MarketAlly. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package actions
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
actions_model "code.gitcaddy.com/server/v3/models/actions"
|
|
"code.gitcaddy.com/server/v3/modules/graceful"
|
|
"code.gitcaddy.com/server/v3/modules/log"
|
|
"code.gitcaddy.com/server/v3/modules/setting"
|
|
"code.gitcaddy.com/server/v3/services/mailer"
|
|
)
|
|
|
|
var (
|
|
// Track which runners we've already alerted about (to avoid spam)
|
|
alertedRunners = make(map[int64]time.Time)
|
|
alertedRunnersMu sync.Mutex
|
|
alertCooldown = time.Hour // Only alert once per hour per runner
|
|
)
|
|
|
|
// StartRunnerHealthMonitor starts a background goroutine that monitors runner health
|
|
func StartRunnerHealthMonitor(ctx context.Context) {
|
|
if !setting.Actions.RunnerHealthCheck.Enabled {
|
|
log.Info("Runner health monitoring disabled")
|
|
return
|
|
}
|
|
|
|
go runHealthMonitor(graceful.GetManager().ShutdownContext())
|
|
}
|
|
|
|
func runHealthMonitor(ctx context.Context) {
|
|
log.Info("Starting runner health monitor")
|
|
ticker := time.NewTicker(5 * time.Minute)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
log.Info("Runner health monitor stopped")
|
|
return
|
|
case <-ticker.C:
|
|
checkRunnerHealth(ctx)
|
|
}
|
|
}
|
|
}
|
|
|
|
// checkRunnerHealth checks all online runners and alerts on unhealthy ones
|
|
func checkRunnerHealth(ctx context.Context) {
|
|
unhealthyRunners, err := actions_model.GetUnhealthyRunners(ctx)
|
|
if err != nil {
|
|
log.Error("Failed to get unhealthy runners: %v", err)
|
|
return
|
|
}
|
|
|
|
for _, runner := range unhealthyRunners {
|
|
healthStatus := runner.GetHealthStatus()
|
|
|
|
// Check if we should send an alert
|
|
if shouldAlert(runner.ID) {
|
|
log.Warn("Runner %s (ID: %d) is unhealthy: %s", runner.Name, runner.ID, healthStatus.Reason)
|
|
|
|
// Send email alert
|
|
mailer.SendRunnerHealthAlert(runner, healthStatus)
|
|
|
|
// Mark as alerted
|
|
markAlerted(runner.ID)
|
|
|
|
// Request cleanup if not recently requested
|
|
if healthStatus.NeedsCleanup {
|
|
canCleanup, err := actions_model.CanRequestCleanup(ctx, runner.ID)
|
|
if err != nil {
|
|
log.Error("Failed to check cleanup cooldown for runner %s: %v", runner.Name, err)
|
|
} else if canCleanup {
|
|
if _, err := actions_model.CreateCleanupRequest(ctx, runner.ID); err != nil {
|
|
log.Error("Failed to create cleanup request for runner %s: %v", runner.Name, err)
|
|
} else {
|
|
log.Info("Requested cleanup for unhealthy runner %s", runner.Name)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// shouldAlert checks if we should send an alert for this runner
|
|
func shouldAlert(runnerID int64) bool {
|
|
alertedRunnersMu.Lock()
|
|
defer alertedRunnersMu.Unlock()
|
|
|
|
lastAlert, exists := alertedRunners[runnerID]
|
|
if !exists {
|
|
return true
|
|
}
|
|
return time.Since(lastAlert) > alertCooldown
|
|
}
|
|
|
|
// markAlerted marks a runner as alerted
|
|
func markAlerted(runnerID int64) {
|
|
alertedRunnersMu.Lock()
|
|
defer alertedRunnersMu.Unlock()
|
|
alertedRunners[runnerID] = time.Now()
|
|
}
|
|
|
|
// ClearRunnerAlert clears the alert status for a runner (call when runner becomes healthy)
|
|
func ClearRunnerAlert(runnerID int64) {
|
|
alertedRunnersMu.Lock()
|
|
defer alertedRunnersMu.Unlock()
|
|
delete(alertedRunners, runnerID)
|
|
}
|
|
|
|
// GetPendingCleanupRequests returns cleanup requests that haven't been completed
|
|
func GetPendingCleanupRequests(ctx context.Context) ([]*actions_model.RunnerCleanupRequest, error) {
|
|
// This will be used by the runner when it polls for tasks
|
|
// to check if it should perform cleanup
|
|
return nil, nil // TODO: implement
|
|
}
|