2
0
Files
logikonline 12f4ea03a8
Some checks failed
Build and Release / Create Release (push) Successful in 0s
Trigger Vault Plugin Rebuild / Trigger Vault Rebuild (push) Successful in 0s
Build and Release / Integration Tests (PostgreSQL) (push) Successful in 2m48s
Build and Release / Lint (push) Failing after 5m2s
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Has been skipped
Build and Release / Build Binaries (amd64, darwin, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (arm64, darwin, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (arm64, linux, linux-latest) (push) Has been skipped
Build and Release / Unit Tests (push) Successful in 5m37s
refactor: add /v3 suffix to module path for proper Go semver
Go's semantic import versioning requires v2+ modules to include the
major version in the module path. This enables using proper version
tags (v3.x.x) instead of pseudo-versions.

Updated module path: code.gitcaddy.com/server/v3
2026-01-17 17:53:59 -05:00

121 lines
3.4 KiB
Go

// Copyright 2026 MarketAlly. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"sync"
"time"
actions_model "code.gitcaddy.com/server/v3/models/actions"
"code.gitcaddy.com/server/v3/modules/graceful"
"code.gitcaddy.com/server/v3/modules/log"
"code.gitcaddy.com/server/v3/modules/setting"
"code.gitcaddy.com/server/v3/services/mailer"
)
var (
// Track which runners we've already alerted about (to avoid spam)
alertedRunners = make(map[int64]time.Time)
alertedRunnersMu sync.Mutex
alertCooldown = time.Hour // Only alert once per hour per runner
)
// StartRunnerHealthMonitor starts a background goroutine that monitors runner health
func StartRunnerHealthMonitor(ctx context.Context) {
if !setting.Actions.RunnerHealthCheck.Enabled {
log.Info("Runner health monitoring disabled")
return
}
go runHealthMonitor(graceful.GetManager().ShutdownContext())
}
func runHealthMonitor(ctx context.Context) {
log.Info("Starting runner health monitor")
ticker := time.NewTicker(5 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Info("Runner health monitor stopped")
return
case <-ticker.C:
checkRunnerHealth(ctx)
}
}
}
// checkRunnerHealth checks all online runners and alerts on unhealthy ones
func checkRunnerHealth(ctx context.Context) {
unhealthyRunners, err := actions_model.GetUnhealthyRunners(ctx)
if err != nil {
log.Error("Failed to get unhealthy runners: %v", err)
return
}
for _, runner := range unhealthyRunners {
healthStatus := runner.GetHealthStatus()
// Check if we should send an alert
if shouldAlert(runner.ID) {
log.Warn("Runner %s (ID: %d) is unhealthy: %s", runner.Name, runner.ID, healthStatus.Reason)
// Send email alert
mailer.SendRunnerHealthAlert(runner, healthStatus)
// Mark as alerted
markAlerted(runner.ID)
// Request cleanup if not recently requested
if healthStatus.NeedsCleanup {
canCleanup, err := actions_model.CanRequestCleanup(ctx, runner.ID)
if err != nil {
log.Error("Failed to check cleanup cooldown for runner %s: %v", runner.Name, err)
} else if canCleanup {
if _, err := actions_model.CreateCleanupRequest(ctx, runner.ID); err != nil {
log.Error("Failed to create cleanup request for runner %s: %v", runner.Name, err)
} else {
log.Info("Requested cleanup for unhealthy runner %s", runner.Name)
}
}
}
}
}
}
// shouldAlert checks if we should send an alert for this runner
func shouldAlert(runnerID int64) bool {
alertedRunnersMu.Lock()
defer alertedRunnersMu.Unlock()
lastAlert, exists := alertedRunners[runnerID]
if !exists {
return true
}
return time.Since(lastAlert) > alertCooldown
}
// markAlerted marks a runner as alerted
func markAlerted(runnerID int64) {
alertedRunnersMu.Lock()
defer alertedRunnersMu.Unlock()
alertedRunners[runnerID] = time.Now()
}
// ClearRunnerAlert clears the alert status for a runner (call when runner becomes healthy)
func ClearRunnerAlert(runnerID int64) {
alertedRunnersMu.Lock()
defer alertedRunnersMu.Unlock()
delete(alertedRunners, runnerID)
}
// GetPendingCleanupRequests returns cleanup requests that haven't been completed
func GetPendingCleanupRequests(ctx context.Context) ([]*actions_model.RunnerCleanupRequest, error) {
// This will be used by the runner when it polls for tasks
// to check if it should perform cleanup
return nil, nil // TODO: implement
}