2
0
Files
gitcaddy-server/modules/plugins/health.go
logikonline f42c6c39f9
All checks were successful
Build and Release / Create Release (push) Has been skipped
Build and Release / Unit Tests (push) Successful in 6m49s
Build and Release / Integration Tests (PostgreSQL) (push) Successful in 7m6s
Build and Release / Lint (push) Successful in 7m15s
Build and Release / Build Binaries (amd64, windows, windows-latest) (push) Has been skipped
Build and Release / Build Binaries (amd64, darwin, macos) (push) Has been skipped
Build and Release / Build Binaries (amd64, linux, linux-latest) (push) Has been skipped
Build and Release / Build Binaries (arm64, darwin, macos) (push) Has been skipped
Build and Release / Build Binary (linux/arm64) (push) Has been skipped
feat(ai-service): complete ai production readiness tasks
Implement critical production readiness features for AI integration: per-request provider config, admin dashboard, workflow inspection, and plugin framework foundation.

Per-Request Provider Config:
- Add ProviderConfig struct to all AI request types
- Update queue to resolve provider/model/API key from cascade (repo > org > system)
- Pass resolved config to AI sidecar on every request
- Fixes multi-tenant issue where all orgs shared sidecar's hardcoded config

Admin AI Dashboard:
- Add /admin/ai page with sidecar health status
- Display global operation stats (total, 24h, success/fail/escalated counts)
- Show operations by tier, top 5 repos, token usage
- Recent operations table with repo, operation, status, duration
- Add GetGlobalOperationStats model method

Workflow Inspection:
- Add InspectWorkflow client method and types
- Implement workflow-inspect queue handler
- Add notifier trigger on workflow file push
- Analyzes YAML for syntax errors, security issues, best practices
- Returns structured issues with line numbers and suggested fixes

Plugin Framework (Phase 5 Foundation):
- Add external plugin config loading from app.ini
- Define ExternalPlugin interface and manager
- Add plugin.proto contract (Initialize, Shutdown, HealthCheck, OnEvent, HandleHTTP)
- Implement health monitoring with auto-restart for managed plugins
- Add event routing to subscribed plugins
- HTTP proxy support for plugin-served routes

This completes Tasks 1-4 from the production readiness plan and establishes the foundation for managed plugin lifecycle.
2026-02-13 01:16:58 -05:00

136 lines
3.2 KiB
Go

// Copyright 2026 MarketAlly. All rights reserved.
// SPDX-License-Identifier: MIT
package plugins
import (
"context"
"maps"
"time"
"code.gitcaddy.com/server/v3/modules/graceful"
"code.gitcaddy.com/server/v3/modules/log"
pluginv1 "code.gitcaddy.com/server/v3/modules/plugins/pluginv1"
)
const (
maxConsecutiveFailures = 3
)
// StartHealthMonitoring begins periodic health checks for all external plugins.
// It runs as a background goroutine managed by the graceful manager.
func (m *ExternalPluginManager) StartHealthMonitoring() {
interval := m.config.HealthCheckInterval
if interval <= 0 {
interval = 30 * time.Second
}
graceful.GetManager().RunWithShutdownContext(func(ctx context.Context) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-m.ctx.Done():
return
case <-ticker.C:
m.checkAllPlugins(ctx)
}
}
})
}
func (m *ExternalPluginManager) checkAllPlugins(ctx context.Context) {
m.mu.RLock()
plugins := make(map[string]*ManagedPlugin, len(m.plugins))
maps.Copy(plugins, m.plugins)
m.mu.RUnlock()
for name, mp := range plugins {
if err := m.checkPlugin(ctx, name, mp); err != nil {
log.Warn("Health check failed for plugin %s: %v", name, err)
}
}
}
func (m *ExternalPluginManager) checkPlugin(ctx context.Context, name string, mp *ManagedPlugin) error {
healthCtx, cancel := context.WithTimeout(ctx, mp.config.HealthTimeout)
defer cancel()
resp := &pluginv1.HealthCheckResponse{}
err := m.callRPCWithContext(healthCtx, mp, "health-check", &pluginv1.HealthCheckRequest{}, resp)
mp.mu.Lock()
defer mp.mu.Unlock()
if err != nil {
mp.failCount++
if mp.failCount >= maxConsecutiveFailures {
if mp.status != PluginStatusOffline {
log.Error("Plugin %s is now offline after %d consecutive health check failures", name, mp.failCount)
mp.status = PluginStatusOffline
}
// Auto-restart managed plugins
if mp.config.IsManaged() && mp.process != nil {
log.Info("Attempting to restart managed plugin %s", name)
go m.restartManagedPlugin(name, mp)
}
}
return err
}
// Health check succeeded
if mp.status != PluginStatusOnline {
log.Info("Plugin %s is back online", name)
}
mp.failCount = 0
mp.status = PluginStatusOnline
mp.lastSeen = time.Now()
if !resp.Healthy {
log.Warn("Plugin %s reports unhealthy: %s", name, resp.Status)
mp.status = PluginStatusError
}
return nil
}
func (m *ExternalPluginManager) restartManagedPlugin(name string, mp *ManagedPlugin) {
// Kill the old process first
if mp.process != nil {
_ = mp.process.Kill()
mp.process = nil
}
mp.mu.Lock()
mp.status = PluginStatusStarting
mp.mu.Unlock()
if err := m.startManagedPlugin(mp); err != nil {
log.Error("Failed to restart managed plugin %s: %v", name, err)
mp.mu.Lock()
mp.status = PluginStatusError
mp.mu.Unlock()
return
}
if err := m.initializePlugin(mp); err != nil {
log.Error("Failed to re-initialize managed plugin %s: %v", name, err)
mp.mu.Lock()
mp.status = PluginStatusError
mp.mu.Unlock()
return
}
mp.mu.Lock()
mp.status = PluginStatusOnline
mp.lastSeen = time.Now()
mp.failCount = 0
mp.mu.Unlock()
log.Info("Managed plugin %s restarted successfully", name)
}