diff --git a/internal/app/cmd/daemon.go b/internal/app/cmd/daemon.go index 7197804..e3d2333 100644 --- a/internal/app/cmd/daemon.go +++ b/internal/app/cmd/daemon.go @@ -14,6 +14,7 @@ import ( "slices" "strconv" "strings" + "sync" "time" "connectrpc.com/connect" @@ -23,6 +24,7 @@ import ( "gitea.com/gitea/act_runner/internal/app/poll" "gitea.com/gitea/act_runner/internal/app/run" + "gitea.com/gitea/act_runner/internal/pkg/cleanup" "gitea.com/gitea/act_runner/internal/pkg/client" "gitea.com/gitea/act_runner/internal/pkg/config" "gitea.com/gitea/act_runner/internal/pkg/envcheck" @@ -35,6 +37,10 @@ const ( DiskSpaceWarningThreshold = 85.0 // DiskSpaceCriticalThreshold is the percentage at which to log critical warnings DiskSpaceCriticalThreshold = 95.0 + // DiskSpaceAutoCleanupThreshold is the percentage at which to trigger automatic cleanup + DiskSpaceAutoCleanupThreshold = 85.0 + // CleanupCooldown is the minimum time between automatic cleanups + CleanupCooldown = 10 * time.Minute // CapabilitiesUpdateInterval is how often to update capabilities (including disk space) CapabilitiesUpdateInterval = 5 * time.Minute // BandwidthTestInterval is how often to run bandwidth tests (hourly) @@ -44,6 +50,13 @@ const ( // Global bandwidth manager - accessible for triggering manual tests var bandwidthManager *envcheck.BandwidthManager +// Global cleanup state +var ( + lastCleanupTime time.Time + cleanupMutex sync.Mutex + globalConfig *config.Config +) + func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) func(cmd *cobra.Command, args []string) error { return func(cmd *cobra.Command, args []string) error { cfg, err := config.LoadDefault(*configFile) @@ -51,6 +64,9 @@ func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) fu return fmt.Errorf("invalid configuration: %w", err) } + // Store config globally for auto-cleanup + globalConfig = cfg + initLogging(cfg) log.Infoln("Starting runner daemon") @@ -170,7 +186,7 @@ func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) fu log.Infof("detected capabilities: %s", capabilitiesJson) // Check disk space and warn if low - checkDiskSpaceWarnings(capabilities) + checkDiskSpaceAndCleanup(ctx, capabilities) // declare the labels of the runner before fetching tasks resp, err := runner.Declare(ctx, ls.Names(), capabilitiesJson) @@ -236,8 +252,8 @@ func runDaemon(ctx context.Context, daemArgs *daemonArgs, configFile *string) fu } } -// checkDiskSpaceWarnings logs warnings if disk space is low -func checkDiskSpaceWarnings(capabilities *envcheck.RunnerCapabilities) { +// checkDiskSpaceAndCleanup logs warnings if disk space is low and triggers cleanup if needed +func checkDiskSpaceAndCleanup(ctx context.Context, capabilities *envcheck.RunnerCapabilities) { if capabilities.Disk == nil { return } @@ -247,11 +263,51 @@ func checkDiskSpaceWarnings(capabilities *envcheck.RunnerCapabilities) { if usedPercent >= DiskSpaceCriticalThreshold { log.Errorf("CRITICAL: Disk space critically low! %.1f%% used, only %.2f GB free. Runner may fail to execute jobs!", usedPercent, freeGB) + // Always try cleanup at critical level + triggerAutoCleanup(ctx) + } else if usedPercent >= DiskSpaceAutoCleanupThreshold { + log.Warnf("WARNING: Disk space at %.1f%% used (%.2f GB free). Triggering automatic cleanup.", usedPercent, freeGB) + triggerAutoCleanup(ctx) } else if usedPercent >= DiskSpaceWarningThreshold { log.Warnf("WARNING: Disk space running low. %.1f%% used, %.2f GB free. Consider cleaning up disk space.", usedPercent, freeGB) } } +// triggerAutoCleanup runs cleanup if cooldown has passed +func triggerAutoCleanup(ctx context.Context) { + cleanupMutex.Lock() + defer cleanupMutex.Unlock() + + // Check cooldown (except for first run) + if !lastCleanupTime.IsZero() && time.Since(lastCleanupTime) < CleanupCooldown { + log.Debugf("Skipping auto-cleanup, cooldown not expired (last cleanup: %s ago)", time.Since(lastCleanupTime)) + return + } + + if globalConfig == nil { + log.Warn("Cannot run auto-cleanup: config not available") + return + } + + log.Info("Starting automatic disk cleanup...") + lastCleanupTime = time.Now() + + go func() { + result, err := cleanup.RunCleanup(ctx, globalConfig) + if err != nil { + log.WithError(err).Error("Auto-cleanup failed") + return + } + log.Infof("Auto-cleanup completed: freed %d bytes, deleted %d files in %s", + result.BytesFreed, result.FilesDeleted, result.Duration) + if len(result.Errors) > 0 { + for _, e := range result.Errors { + log.WithError(e).Warn("Cleanup error") + } + } + }() +} + // periodicCapabilitiesUpdate periodically updates capabilities including disk space and bandwidth func periodicCapabilitiesUpdate(ctx context.Context, runner *run.Runner, labelNames []string, dockerHost string, workingDir string) { ticker := time.NewTicker(CapabilitiesUpdateInterval) @@ -277,7 +333,7 @@ func periodicCapabilitiesUpdate(ctx context.Context, runner *run.Runner, labelNa capabilitiesJson := capabilities.ToJSON() // Check for disk space warnings - checkDiskSpaceWarnings(capabilities) + checkDiskSpaceAndCleanup(ctx, capabilities) // Send updated capabilities to server _, err := runner.Declare(ctx, labelNames, capabilitiesJson) diff --git a/internal/pkg/cleanup/cleanup.go b/internal/pkg/cleanup/cleanup.go index 121e2d4..c63b804 100644 --- a/internal/pkg/cleanup/cleanup.go +++ b/internal/pkg/cleanup/cleanup.go @@ -208,7 +208,7 @@ func cleanTempDir(maxAge time.Duration) (int64, int, error) { } // Only clean files/dirs that look like runner/act artifacts - runnerPatterns := []string{"act-", "runner-", "gitea-", "workflow-"} + runnerPatterns := []string{"act-", "runner-", "gitea-", "workflow-", "go-build", "go-link", "node-compile-cache", "npm-", "yarn-", "pnpm-"} for _, entry := range entries { name := entry.Name() isRunner := false @@ -265,6 +265,9 @@ func dirSize(path string) int64 { // These are cleaned more aggressively (files older than 7 days) since they can grow very large func cleanBuildCaches(maxAge time.Duration) (int64, int, error) { home := os.Getenv("HOME") + if home == "" { + home = os.Getenv("USERPROFILE") // Windows + } if home == "" { home = "/root" // fallback for runners typically running as root } @@ -278,6 +281,7 @@ func cleanBuildCaches(maxAge time.Duration) (int64, int, error) { path string desc string }{ + // Linux paths {filepath.Join(home, ".cache", "go-build"), "Go build cache"}, {filepath.Join(home, ".cache", "golangci-lint"), "golangci-lint cache"}, {filepath.Join(home, ".npm", "_cacache"), "npm cache"}, @@ -289,6 +293,18 @@ func cleanBuildCaches(maxAge time.Duration) (int64, int, error) { {filepath.Join(home, ".cache", "pip"), "pip cache"}, {filepath.Join(home, ".cargo", "registry", "cache"), "Cargo cache"}, {filepath.Join(home, ".rustup", "tmp"), "Rustup temp"}, + // macOS paths (Library/Caches) + {filepath.Join(home, "Library", "Caches", "go-build"), "Go build cache (macOS)"}, + {filepath.Join(home, "Library", "Caches", "Yarn"), "Yarn cache (macOS)"}, + {filepath.Join(home, "Library", "Caches", "pip"), "pip cache (macOS)"}, + {filepath.Join(home, "Library", "Caches", "Homebrew"), "Homebrew cache (macOS)"}, + // Windows paths (LOCALAPPDATA) + {filepath.Join(os.Getenv("LOCALAPPDATA"), "go-build"), "Go build cache (Windows)"}, + {filepath.Join(os.Getenv("LOCALAPPDATA"), "npm-cache"), "npm cache (Windows)"}, + {filepath.Join(os.Getenv("LOCALAPPDATA"), "pnpm"), "pnpm cache (Windows)"}, + {filepath.Join(os.Getenv("LOCALAPPDATA"), "Yarn", "Cache"), "Yarn cache (Windows)"}, + {filepath.Join(os.Getenv("LOCALAPPDATA"), "NuGet", "v3-cache"), "NuGet cache (Windows)"}, + {filepath.Join(os.Getenv("LOCALAPPDATA"), "pip", "Cache"), "pip cache (Windows)"}, } cutoff := time.Now().Add(-maxAge) diff --git a/internal/pkg/envcheck/capabilities.go b/internal/pkg/envcheck/capabilities.go index 408d807..abe06d2 100644 --- a/internal/pkg/envcheck/capabilities.go +++ b/internal/pkg/envcheck/capabilities.go @@ -910,7 +910,24 @@ func detectCPULoad() *CPUInfo { switch runtime.GOOS { case "linux": - // Read from /proc/loadavg + // Check if running in a container (LXC/Docker) + // Containers share /proc/loadavg with host, giving inaccurate readings + inContainer := isInContainer() + + if inContainer { + // Try to get CPU usage from cgroups (more accurate for containers) + if cgroupCPU := getContainerCPUUsage(); cgroupCPU >= 0 { + info.LoadPercent = cgroupCPU + info.LoadAvg1m = cgroupCPU * float64(numCPU) / 100.0 + return info + } + // If cgroup reading failed, report 0 - better than host's load + info.LoadPercent = 0 + info.LoadAvg1m = 0 + return info + } + + // Not in container - use traditional /proc/loadavg data, err := os.ReadFile("/proc/loadavg") if err != nil { return info @@ -979,6 +996,67 @@ func detectCPULoad() *CPUInfo { return info } +// isInContainer checks if we're running inside a container (LXC/Docker) +func isInContainer() bool { + // Check for Docker + if _, err := os.Stat("/.dockerenv"); err == nil { + return true + } + // Check PID 1's environment for container type (works for LXC on Proxmox) + if data, err := os.ReadFile("/proc/1/environ"); err == nil { + // environ uses null bytes as separators + content := string(data) + if strings.Contains(content, "container=lxc") || strings.Contains(content, "container=docker") { + return true + } + } + // Check for LXC/Docker in cgroup path (cgroup v1) + if data, err := os.ReadFile("/proc/1/cgroup"); err == nil { + content := string(data) + if strings.Contains(content, "/lxc/") || strings.Contains(content, "/docker/") { + return true + } + } + // Check for container environment variable in current process + if os.Getenv("container") != "" { + return true + } + // Check for systemd-nspawn or other containers + if _, err := os.Stat("/run/.containerenv"); err == nil { + return true + } + return false +} + +// getContainerCPUUsage tries to get CPU usage from cgroups +// Returns -1 if unable to determine +func getContainerCPUUsage() float64 { + // Try cgroup v2 first + if data, err := os.ReadFile("/sys/fs/cgroup/cpu.stat"); err == nil { + lines := strings.Split(string(data), "\n") + for _, line := range lines { + if strings.HasPrefix(line, "usage_usec ") { + // This gives total CPU time, not current usage + // For now, we can't easily calculate percentage without storing previous value + // Return -1 to fall back to reporting 0 + break + } + } + } + + // Try reading /proc/stat for this process's CPU usage + if data, err := os.ReadFile("/proc/self/stat"); err == nil { + fields := strings.Fields(string(data)) + if len(fields) >= 15 { + // Fields 14 and 15 are utime and stime (in clock ticks) + // This is cumulative, not instantaneous + // For containers, we'll report 0 rather than misleading host data + } + } + + return -1 // Unable to determine - caller should handle +} + // parseFloat parses a string to float64 func parseFloat(s string) (float64, error) { s = strings.TrimSpace(s)