feat: Add auto-cleanup and fix container CPU detection
Some checks failed
CI / build-and-test (push) Failing after 37s
Some checks failed
CI / build-and-test (push) Failing after 37s
- Add automatic disk cleanup when usage exceeds 85% - Fix false CPU readings in LXC containers (was showing host load) - Add cross-platform cache cleanup (Linux, macOS, Windows) - Extend temp file patterns for go-build, node-compile-cache, etc. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"connectrpc.com/connect"
|
"connectrpc.com/connect"
|
||||||
@@ -23,6 +24,7 @@ import (
|
|||||||
|
|
||||||
"gitea.com/gitea/act_runner/internal/app/poll"
|
"gitea.com/gitea/act_runner/internal/app/poll"
|
||||||
"gitea.com/gitea/act_runner/internal/app/run"
|
"gitea.com/gitea/act_runner/internal/app/run"
|
||||||
|
"gitea.com/gitea/act_runner/internal/pkg/cleanup"
|
||||||
"gitea.com/gitea/act_runner/internal/pkg/client"
|
"gitea.com/gitea/act_runner/internal/pkg/client"
|
||||||
"gitea.com/gitea/act_runner/internal/pkg/config"
|
"gitea.com/gitea/act_runner/internal/pkg/config"
|
||||||
"gitea.com/gitea/act_runner/internal/pkg/envcheck"
|
"gitea.com/gitea/act_runner/internal/pkg/envcheck"
|
||||||
@@ -35,6 +37,10 @@ const (
|
|||||||
DiskSpaceWarningThreshold = 85.0
|
DiskSpaceWarningThreshold = 85.0
|
||||||
// DiskSpaceCriticalThreshold is the percentage at which to log critical warnings
|
// DiskSpaceCriticalThreshold is the percentage at which to log critical warnings
|
||||||
DiskSpaceCriticalThreshold = 95.0
|
DiskSpaceCriticalThreshold = 95.0
|
||||||
|
// DiskSpaceAutoCleanupThreshold is the percentage at which to trigger automatic cleanup
|
||||||
|
DiskSpaceAutoCleanupThreshold = 85.0
|
||||||
|
// CleanupCooldown is the minimum time between automatic cleanups
|
||||||
|
CleanupCooldown = 10 * time.Minute
|
||||||
// CapabilitiesUpdateInterval is how often to update capabilities (including disk space)
|
// CapabilitiesUpdateInterval is how often to update capabilities (including disk space)
|
||||||
CapabilitiesUpdateInterval = 5 * time.Minute
|
CapabilitiesUpdateInterval = 5 * time.Minute
|
||||||
// BandwidthTestInterval is how often to run bandwidth tests (hourly)
|
// BandwidthTestInterval is how often to run bandwidth tests (hourly)
|
||||||
@@ -44,6 +50,13 @@ const (
|
|||||||
// Global bandwidth manager - accessible for triggering manual tests
|
// Global bandwidth manager - accessible for triggering manual tests
|
||||||
var bandwidthManager *envcheck.BandwidthManager
|
var bandwidthManager *envcheck.BandwidthManager
|
||||||
|
|
||||||
|
// Global cleanup state
|
||||||
|
var (
|
||||||
|
lastCleanupTime time.Time
|
||||||
|
cleanupMutex sync.Mutex
|
||||||
|
globalConfig *config.Config
|
||||||
|
)
|
||||||
|
|
||||||
func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) func(cmd *cobra.Command, args []string) error {
|
func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) func(cmd *cobra.Command, args []string) error {
|
||||||
return func(cmd *cobra.Command, args []string) error {
|
return func(cmd *cobra.Command, args []string) error {
|
||||||
cfg, err := config.LoadDefault(*configFile)
|
cfg, err := config.LoadDefault(*configFile)
|
||||||
@@ -51,6 +64,9 @@ func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) fu
|
|||||||
return fmt.Errorf("invalid configuration: %w", err)
|
return fmt.Errorf("invalid configuration: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store config globally for auto-cleanup
|
||||||
|
globalConfig = cfg
|
||||||
|
|
||||||
initLogging(cfg)
|
initLogging(cfg)
|
||||||
log.Infoln("Starting runner daemon")
|
log.Infoln("Starting runner daemon")
|
||||||
|
|
||||||
@@ -170,7 +186,7 @@ func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) fu
|
|||||||
log.Infof("detected capabilities: %s", capabilitiesJson)
|
log.Infof("detected capabilities: %s", capabilitiesJson)
|
||||||
|
|
||||||
// Check disk space and warn if low
|
// Check disk space and warn if low
|
||||||
checkDiskSpaceWarnings(capabilities)
|
checkDiskSpaceAndCleanup(ctx, capabilities)
|
||||||
|
|
||||||
// declare the labels of the runner before fetching tasks
|
// declare the labels of the runner before fetching tasks
|
||||||
resp, err := runner.Declare(ctx, ls.Names(), capabilitiesJson)
|
resp, err := runner.Declare(ctx, ls.Names(), capabilitiesJson)
|
||||||
@@ -236,8 +252,8 @@ func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) fu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkDiskSpaceWarnings logs warnings if disk space is low
|
// checkDiskSpaceAndCleanup logs warnings if disk space is low and triggers cleanup if needed
|
||||||
func checkDiskSpaceWarnings(capabilities *envcheck.RunnerCapabilities) {
|
func checkDiskSpaceAndCleanup(ctx context.Context, capabilities *envcheck.RunnerCapabilities) {
|
||||||
if capabilities.Disk == nil {
|
if capabilities.Disk == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -247,11 +263,51 @@ func checkDiskSpaceWarnings(capabilities *envcheck.RunnerCapabilities) {
|
|||||||
|
|
||||||
if usedPercent >= DiskSpaceCriticalThreshold {
|
if usedPercent >= DiskSpaceCriticalThreshold {
|
||||||
log.Errorf("CRITICAL: Disk space critically low! %.1f%% used, only %.2f GB free. Runner may fail to execute jobs!", usedPercent, freeGB)
|
log.Errorf("CRITICAL: Disk space critically low! %.1f%% used, only %.2f GB free. Runner may fail to execute jobs!", usedPercent, freeGB)
|
||||||
|
// Always try cleanup at critical level
|
||||||
|
triggerAutoCleanup(ctx)
|
||||||
|
} else if usedPercent >= DiskSpaceAutoCleanupThreshold {
|
||||||
|
log.Warnf("WARNING: Disk space at %.1f%% used (%.2f GB free). Triggering automatic cleanup.", usedPercent, freeGB)
|
||||||
|
triggerAutoCleanup(ctx)
|
||||||
} else if usedPercent >= DiskSpaceWarningThreshold {
|
} else if usedPercent >= DiskSpaceWarningThreshold {
|
||||||
log.Warnf("WARNING: Disk space running low. %.1f%% used, %.2f GB free. Consider cleaning up disk space.", usedPercent, freeGB)
|
log.Warnf("WARNING: Disk space running low. %.1f%% used, %.2f GB free. Consider cleaning up disk space.", usedPercent, freeGB)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// triggerAutoCleanup runs cleanup if cooldown has passed
|
||||||
|
func triggerAutoCleanup(ctx context.Context) {
|
||||||
|
cleanupMutex.Lock()
|
||||||
|
defer cleanupMutex.Unlock()
|
||||||
|
|
||||||
|
// Check cooldown (except for first run)
|
||||||
|
if !lastCleanupTime.IsZero() && time.Since(lastCleanupTime) < CleanupCooldown {
|
||||||
|
log.Debugf("Skipping auto-cleanup, cooldown not expired (last cleanup: %s ago)", time.Since(lastCleanupTime))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if globalConfig == nil {
|
||||||
|
log.Warn("Cannot run auto-cleanup: config not available")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Starting automatic disk cleanup...")
|
||||||
|
lastCleanupTime = time.Now()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
result, err := cleanup.RunCleanup(ctx, globalConfig)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("Auto-cleanup failed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Infof("Auto-cleanup completed: freed %d bytes, deleted %d files in %s",
|
||||||
|
result.BytesFreed, result.FilesDeleted, result.Duration)
|
||||||
|
if len(result.Errors) > 0 {
|
||||||
|
for _, e := range result.Errors {
|
||||||
|
log.WithError(e).Warn("Cleanup error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
// periodicCapabilitiesUpdate periodically updates capabilities including disk space and bandwidth
|
// periodicCapabilitiesUpdate periodically updates capabilities including disk space and bandwidth
|
||||||
func periodicCapabilitiesUpdate(ctx context.Context, runner *run.Runner, labelNames []string, dockerHost string, workingDir string) {
|
func periodicCapabilitiesUpdate(ctx context.Context, runner *run.Runner, labelNames []string, dockerHost string, workingDir string) {
|
||||||
ticker := time.NewTicker(CapabilitiesUpdateInterval)
|
ticker := time.NewTicker(CapabilitiesUpdateInterval)
|
||||||
@@ -277,7 +333,7 @@ func periodicCapabilitiesUpdate(ctx context.Context, runner *run.Runner, labelNa
|
|||||||
capabilitiesJson := capabilities.ToJSON()
|
capabilitiesJson := capabilities.ToJSON()
|
||||||
|
|
||||||
// Check for disk space warnings
|
// Check for disk space warnings
|
||||||
checkDiskSpaceWarnings(capabilities)
|
checkDiskSpaceAndCleanup(ctx, capabilities)
|
||||||
|
|
||||||
// Send updated capabilities to server
|
// Send updated capabilities to server
|
||||||
_, err := runner.Declare(ctx, labelNames, capabilitiesJson)
|
_, err := runner.Declare(ctx, labelNames, capabilitiesJson)
|
||||||
|
|||||||
@@ -208,7 +208,7 @@ func cleanTempDir(maxAge time.Duration) (int64, int, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Only clean files/dirs that look like runner/act artifacts
|
// Only clean files/dirs that look like runner/act artifacts
|
||||||
runnerPatterns := []string{"act-", "runner-", "gitea-", "workflow-"}
|
runnerPatterns := []string{"act-", "runner-", "gitea-", "workflow-", "go-build", "go-link", "node-compile-cache", "npm-", "yarn-", "pnpm-"}
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
name := entry.Name()
|
name := entry.Name()
|
||||||
isRunner := false
|
isRunner := false
|
||||||
@@ -265,6 +265,9 @@ func dirSize(path string) int64 {
|
|||||||
// These are cleaned more aggressively (files older than 7 days) since they can grow very large
|
// These are cleaned more aggressively (files older than 7 days) since they can grow very large
|
||||||
func cleanBuildCaches(maxAge time.Duration) (int64, int, error) {
|
func cleanBuildCaches(maxAge time.Duration) (int64, int, error) {
|
||||||
home := os.Getenv("HOME")
|
home := os.Getenv("HOME")
|
||||||
|
if home == "" {
|
||||||
|
home = os.Getenv("USERPROFILE") // Windows
|
||||||
|
}
|
||||||
if home == "" {
|
if home == "" {
|
||||||
home = "/root" // fallback for runners typically running as root
|
home = "/root" // fallback for runners typically running as root
|
||||||
}
|
}
|
||||||
@@ -278,6 +281,7 @@ func cleanBuildCaches(maxAge time.Duration) (int64, int, error) {
|
|||||||
path string
|
path string
|
||||||
desc string
|
desc string
|
||||||
}{
|
}{
|
||||||
|
// Linux paths
|
||||||
{filepath.Join(home, ".cache", "go-build"), "Go build cache"},
|
{filepath.Join(home, ".cache", "go-build"), "Go build cache"},
|
||||||
{filepath.Join(home, ".cache", "golangci-lint"), "golangci-lint cache"},
|
{filepath.Join(home, ".cache", "golangci-lint"), "golangci-lint cache"},
|
||||||
{filepath.Join(home, ".npm", "_cacache"), "npm cache"},
|
{filepath.Join(home, ".npm", "_cacache"), "npm cache"},
|
||||||
@@ -289,6 +293,18 @@ func cleanBuildCaches(maxAge time.Duration) (int64, int, error) {
|
|||||||
{filepath.Join(home, ".cache", "pip"), "pip cache"},
|
{filepath.Join(home, ".cache", "pip"), "pip cache"},
|
||||||
{filepath.Join(home, ".cargo", "registry", "cache"), "Cargo cache"},
|
{filepath.Join(home, ".cargo", "registry", "cache"), "Cargo cache"},
|
||||||
{filepath.Join(home, ".rustup", "tmp"), "Rustup temp"},
|
{filepath.Join(home, ".rustup", "tmp"), "Rustup temp"},
|
||||||
|
// macOS paths (Library/Caches)
|
||||||
|
{filepath.Join(home, "Library", "Caches", "go-build"), "Go build cache (macOS)"},
|
||||||
|
{filepath.Join(home, "Library", "Caches", "Yarn"), "Yarn cache (macOS)"},
|
||||||
|
{filepath.Join(home, "Library", "Caches", "pip"), "pip cache (macOS)"},
|
||||||
|
{filepath.Join(home, "Library", "Caches", "Homebrew"), "Homebrew cache (macOS)"},
|
||||||
|
// Windows paths (LOCALAPPDATA)
|
||||||
|
{filepath.Join(os.Getenv("LOCALAPPDATA"), "go-build"), "Go build cache (Windows)"},
|
||||||
|
{filepath.Join(os.Getenv("LOCALAPPDATA"), "npm-cache"), "npm cache (Windows)"},
|
||||||
|
{filepath.Join(os.Getenv("LOCALAPPDATA"), "pnpm"), "pnpm cache (Windows)"},
|
||||||
|
{filepath.Join(os.Getenv("LOCALAPPDATA"), "Yarn", "Cache"), "Yarn cache (Windows)"},
|
||||||
|
{filepath.Join(os.Getenv("LOCALAPPDATA"), "NuGet", "v3-cache"), "NuGet cache (Windows)"},
|
||||||
|
{filepath.Join(os.Getenv("LOCALAPPDATA"), "pip", "Cache"), "pip cache (Windows)"},
|
||||||
}
|
}
|
||||||
|
|
||||||
cutoff := time.Now().Add(-maxAge)
|
cutoff := time.Now().Add(-maxAge)
|
||||||
|
|||||||
@@ -910,7 +910,24 @@ func detectCPULoad() *CPUInfo {
|
|||||||
|
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "linux":
|
case "linux":
|
||||||
// Read from /proc/loadavg
|
// Check if running in a container (LXC/Docker)
|
||||||
|
// Containers share /proc/loadavg with host, giving inaccurate readings
|
||||||
|
inContainer := isInContainer()
|
||||||
|
|
||||||
|
if inContainer {
|
||||||
|
// Try to get CPU usage from cgroups (more accurate for containers)
|
||||||
|
if cgroupCPU := getContainerCPUUsage(); cgroupCPU >= 0 {
|
||||||
|
info.LoadPercent = cgroupCPU
|
||||||
|
info.LoadAvg1m = cgroupCPU * float64(numCPU) / 100.0
|
||||||
|
return info
|
||||||
|
}
|
||||||
|
// If cgroup reading failed, report 0 - better than host's load
|
||||||
|
info.LoadPercent = 0
|
||||||
|
info.LoadAvg1m = 0
|
||||||
|
return info
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not in container - use traditional /proc/loadavg
|
||||||
data, err := os.ReadFile("/proc/loadavg")
|
data, err := os.ReadFile("/proc/loadavg")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return info
|
return info
|
||||||
@@ -979,6 +996,67 @@ func detectCPULoad() *CPUInfo {
|
|||||||
return info
|
return info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isInContainer checks if we're running inside a container (LXC/Docker)
|
||||||
|
func isInContainer() bool {
|
||||||
|
// Check for Docker
|
||||||
|
if _, err := os.Stat("/.dockerenv"); err == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Check PID 1's environment for container type (works for LXC on Proxmox)
|
||||||
|
if data, err := os.ReadFile("/proc/1/environ"); err == nil {
|
||||||
|
// environ uses null bytes as separators
|
||||||
|
content := string(data)
|
||||||
|
if strings.Contains(content, "container=lxc") || strings.Contains(content, "container=docker") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check for LXC/Docker in cgroup path (cgroup v1)
|
||||||
|
if data, err := os.ReadFile("/proc/1/cgroup"); err == nil {
|
||||||
|
content := string(data)
|
||||||
|
if strings.Contains(content, "/lxc/") || strings.Contains(content, "/docker/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check for container environment variable in current process
|
||||||
|
if os.Getenv("container") != "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Check for systemd-nspawn or other containers
|
||||||
|
if _, err := os.Stat("/run/.containerenv"); err == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// getContainerCPUUsage tries to get CPU usage from cgroups
|
||||||
|
// Returns -1 if unable to determine
|
||||||
|
func getContainerCPUUsage() float64 {
|
||||||
|
// Try cgroup v2 first
|
||||||
|
if data, err := os.ReadFile("/sys/fs/cgroup/cpu.stat"); err == nil {
|
||||||
|
lines := strings.Split(string(data), "\n")
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.HasPrefix(line, "usage_usec ") {
|
||||||
|
// This gives total CPU time, not current usage
|
||||||
|
// For now, we can't easily calculate percentage without storing previous value
|
||||||
|
// Return -1 to fall back to reporting 0
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try reading /proc/stat for this process's CPU usage
|
||||||
|
if data, err := os.ReadFile("/proc/self/stat"); err == nil {
|
||||||
|
fields := strings.Fields(string(data))
|
||||||
|
if len(fields) >= 15 {
|
||||||
|
// Fields 14 and 15 are utime and stime (in clock ticks)
|
||||||
|
// This is cumulative, not instantaneous
|
||||||
|
// For containers, we'll report 0 rather than misleading host data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1 // Unable to determine - caller should handle
|
||||||
|
}
|
||||||
|
|
||||||
// parseFloat parses a string to float64
|
// parseFloat parses a string to float64
|
||||||
func parseFloat(s string) (float64, error) {
|
func parseFloat(s string) (float64, error) {
|
||||||
s = strings.TrimSpace(s)
|
s = strings.TrimSpace(s)
|
||||||
|
|||||||
Reference in New Issue
Block a user