Health Checks

Monitor application health with Forge's comprehensive health check system

Health Checks

Forge provides a comprehensive health check system that monitors the health of your application and its dependencies. The system includes automatic discovery, smart aggregation, and detailed reporting.

Health Manager Interface

The health manager is responsible for coordinating health checks across your application:

type HealthManager interface {
    // Health check registration
    Register(name string, check HealthCheck, opts ...HealthCheckOption) error
    
    // Health check execution
    Check(ctx context.Context) HealthReport
    CheckService(ctx context.Context, name string) HealthResult
    
    // Health check management
    Unregister(name string) error
    IsRegistered(name string) bool
    
    // Health check information
    RegisteredChecks() []string
    HealthStatus() HealthStatus
}

Health Check Types

Basic Health Checks

Simple health checks that return a status and optional message.

// Register a basic health check
app.HealthManager().Register("database", func(ctx context.Context) HealthResult {
    if err := db.PingContext(ctx); err != nil {
        return HealthResult{
            Status:  HealthStatusUnhealthy,
            Message: "database connection failed",
            Error:   err,
        }
    }
    return HealthResult{
        Status:  HealthStatusHealthy,
        Message: "database is healthy",
    }
})

Example:

func databaseHealthCheck(ctx context.Context) HealthResult {
    // Test database connection
    if err := db.PingContext(ctx); err != nil {
        return HealthResult{
            Status:  HealthStatusUnhealthy,
            Message: "database connection failed",
            Error:   err,
            Details: map[string]interface{}{
                "host":     db.Host(),
                "port":     db.Port(),
                "database": db.Database(),
            },
        }
    }
    
    return HealthResult{
        Status:  HealthStatusHealthy,
        Message: "database is healthy",
        Details: map[string]interface{}{
            "host":     db.Host(),
            "port":     db.Port(),
            "database": db.Database(),
        },
    }
}

Service Health Checks

Health checks for registered services with automatic discovery.

// Register service with health check
app.RegisterService("cache", func(container Container) (interface{}, error) {
    return &CacheService{}, nil
}, forge.WithHealthCheck())

// Service implements health check
type CacheService struct {
    client redis.Client
}

func (c *CacheService) Health(ctx context.Context) error {
    if err := c.client.Ping(ctx).Err(); err != nil {
        return fmt.Errorf("cache ping failed: %w", err)
    }
    return nil
}

Example:

type UserService struct {
    db     DatabaseService
    cache  CacheService
    logger Logger
}

func (u *UserService) Health(ctx context.Context) error {
    // Check database dependency
    if err := u.db.Health(ctx); err != nil {
        return fmt.Errorf("database dependency unhealthy: %w", err)
    }
    
    // Check cache dependency
    if err := u.cache.Health(ctx); err != nil {
        return fmt.Errorf("cache dependency unhealthy: %w", err)
    }
    
    // Check service-specific health
    if err := u.checkServiceHealth(ctx); err != nil {
        return fmt.Errorf("service health check failed: %w", err)
    }
    
    return nil
}

Dependency Health Checks

Health checks that monitor external dependencies and services.

// Register dependency health check
app.HealthManager().Register("external-api", func(ctx context.Context) HealthResult {
    resp, err := http.Get("https://api.example.com/health")
    if err != nil {
        return HealthResult{
            Status:  HealthStatusUnhealthy,
            Message: "external API unavailable",
            Error:   err,
        }
    }
    defer resp.Body.Close()
    
    if resp.StatusCode != 200 {
        return HealthResult{
            Status:  HealthStatusDegraded,
            Message: "external API responding with errors",
            Details: map[string]interface{}{
                "status_code": resp.StatusCode,
            },
        }
    }
    
    return HealthResult{
        Status:  HealthStatusHealthy,
        Message: "external API is healthy",
    }
})

Example:

func externalServiceHealthCheck(ctx context.Context) HealthResult {
    // Check multiple external services
    services := []string{"auth-service", "payment-service", "notification-service"}
    results := make(map[string]interface{})
    allHealthy := true
    
    for _, service := range services {
        if err := checkServiceHealth(ctx, service); err != nil {
            results[service] = err.Error()
            allHealthy = false
        } else {
            results[service] = "healthy"
        }
    }
    
    if allHealthy {
        return HealthResult{
            Status:  HealthStatusHealthy,
            Message: "all external services are healthy",
            Details: results,
        }
    }
    
    return HealthResult{
        Status:  HealthStatusDegraded,
        Message: "some external services are unhealthy",
        Details: results,
    }
}

Health Status Levels

Forge defines four health status levels:

const (
    HealthStatusHealthy   = "healthy"   // All checks passing
    HealthStatusDegraded  = "degraded"  // Some checks failing, but service functional
    HealthStatusUnhealthy = "unhealthy" // Critical checks failing
    HealthStatusUnknown   = "unknown"   // Status cannot be determined
)

Status Aggregation

The health manager automatically aggregates statuses from multiple checks:

// Overall health status
func (hm *HealthManager) HealthStatus() HealthStatus {
    checks := hm.Check(context.Background())
    
    if len(checks.Checks) == 0 {
        return HealthStatusUnknown
    }
    
    hasUnhealthy := false
    hasDegraded := false
    
    for _, check := range checks.Checks {
        switch check.Status {
        case HealthStatusUnhealthy:
            hasUnhealthy = true
        case HealthStatusDegraded:
            hasDegraded = true
        }
    }
    
    if hasUnhealthy {
        return HealthStatusUnhealthy
    }
    if hasDegraded {
        return HealthStatusDegraded
    }
    
    return HealthStatusHealthy
}

Health Check Configuration

Basic Configuration

app := forge.NewApp(forge.AppConfig{
    HealthConfig: forge.HealthConfig{
        Enabled:                true,
        CheckInterval:          30 * time.Second,
        ReportInterval:         60 * time.Second,
        EnableAutoDiscovery:    true,
        MaxConcurrentChecks:    10,
        DefaultTimeout:         5 * time.Second,
        EnableSmartAggregation: true,
        HistorySize:            100,
    },
})

Advanced Configuration

healthConfig := forge.HealthConfig{
    // Basic settings
    Enabled:             true,
    CheckInterval:       30 * time.Second,
    ReportInterval:      60 * time.Second,
    
    // Auto-discovery
    EnableAutoDiscovery: true,
    
    // Concurrency
    MaxConcurrentChecks: 10,
    
    // Timeouts
    DefaultTimeout:      5 * time.Second,
    
    // Aggregation
    EnableSmartAggregation: true,
    
    // History
    HistorySize: 100,
    
    // Custom settings
    CustomSettings: map[string]interface{}{
        "critical_checks": []string{"database", "cache"},
        "warning_checks":  []string{"external-api"},
    },
}

Health Check Options

Check Options

// Register health check with options
app.HealthManager().Register("database", healthCheck,
    forge.WithCheckTimeout(10*time.Second),
    forge.WithCheckInterval(30*time.Second),
    forge.WithCheckTags("critical", "database"),
    forge.WithCheckMetadata("service", "postgres"),
)

Check Categories

// Critical checks (affect overall health)
app.HealthManager().Register("database", healthCheck,
    forge.WithCheckCategory("critical"),
)

// Warning checks (don't affect overall health)
app.HealthManager().Register("external-api", healthCheck,
    forge.WithCheckCategory("warning"),
)

// Info checks (informational only)
app.HealthManager().Register("metrics", healthCheck,
    forge.WithCheckCategory("info"),
)

Health Check Endpoints

Built-in Endpoints

Forge automatically provides health check endpoints:

# Overall health status
GET /_/health

# Detailed health report
GET /_/health/detailed

# Specific service health
GET /_/health/service/{name}

# Health check history
GET /_/health/history

Custom Health Endpoints

// Custom health endpoint
app.Router().GET("/health/custom", func(ctx forge.Context) error {
    // Get health report
    report := app.HealthManager().Check(ctx.Request().Context())
    
    // Custom response format
    response := map[string]interface{}{
        "status":    report.Status,
        "timestamp": report.Timestamp,
        "checks":    report.Checks,
        "summary": map[string]int{
            "total":     len(report.Checks),
            "healthy":   countByStatus(report.Checks, HealthStatusHealthy),
            "degraded":  countByStatus(report.Checks, HealthStatusDegraded),
            "unhealthy": countByStatus(report.Checks, HealthStatusUnhealthy),
        },
    }
    
    return ctx.JSON(200, response)
})

Health Check Middleware

Health Check Middleware

// Add health check middleware
app.Router().Use(forge.HealthCheckMiddleware(func(ctx forge.Context) error {
    // Check if request is for health endpoint
    if ctx.Path() == "/_/health" {
        return nil // Skip health check for health endpoint
    }
    
    // Check overall health
    status := app.HealthManager().HealthStatus()
    if status == HealthStatusUnhealthy {
        return forge.ServiceUnavailable("service is unhealthy")
    }
    
    return nil
}))

Circuit Breaker Integration

// Integrate with circuit breaker
app.Router().Use(forge.CircuitBreakerMiddleware(func(ctx forge.Context) error {
    // Check health before allowing requests
    report := app.HealthManager().Check(ctx.Request().Context())
    
    // Open circuit if critical services are unhealthy
    for name, check := range report.Checks {
        if isCriticalService(name) && check.Status == HealthStatusUnhealthy {
            return forge.ServiceUnavailable("critical service unavailable")
        }
    }
    
    return nil
}))

Health Check Monitoring

Metrics Integration

// Health check metrics
app.HealthManager().Register("metrics", func(ctx context.Context) HealthResult {
    // Check metrics collection
    if err := app.Metrics().Health(); err != nil {
        return HealthResult{
            Status:  HealthStatusDegraded,
            Message: "metrics collection issues",
            Error:   err,
        }
    }
    
    return HealthResult{
        Status:  HealthStatusHealthy,
        Message: "metrics are healthy",
    }
})

Logging Integration

// Health check logging
app.HealthManager().Register("logging", func(ctx context.Context) HealthResult {
    // Check logging system
    if err := app.Logger().Health(); err != nil {
        return HealthResult{
            Status:  HealthStatusDegraded,
            Message: "logging system issues",
            Error:   err,
        }
    }
    
    return HealthResult{
        Status:  HealthStatusHealthy,
        Message: "logging is healthy",
    }
})

Health Check Testing

Unit Testing

func TestHealthChecks(t *testing.T) {
    app := forge.NewTestApp(forge.TestAppConfig{
        Name: "test-app",
    })
    
    // Register test health check
    app.HealthManager().Register("test-service", func(ctx context.Context) HealthResult {
        return HealthResult{
            Status:  HealthStatusHealthy,
            Message: "test service is healthy",
        }
    })
    
    // Test health check
    report := app.HealthManager().Check(context.Background())
    assert.Equal(t, HealthStatusHealthy, report.Status)
    assert.Contains(t, report.Checks, "test-service")
}

Integration Testing

func TestHealthEndpoints(t *testing.T) {
    app := forge.NewTestApp(forge.TestAppConfig{
        Name: "test-app",
    })
    
    // Start application
    go func() {
        app.Run()
    }()
    defer app.Stop(context.Background())
    
    // Test health endpoint
    resp, err := http.Get("http://localhost:8080/_/health")
    require.NoError(t, err)
    defer resp.Body.Close()
    
    assert.Equal(t, 200, resp.StatusCode)
    
    var health map[string]interface{}
    err = json.NewDecoder(resp.Body).Decode(&health)
    require.NoError(t, err)
    
    assert.Equal(t, "healthy", health["status"])
}

Best Practices

  1. Comprehensive Coverage: Check all critical dependencies
  2. Appropriate Timeouts: Set reasonable timeouts for health checks
  3. Error Handling: Handle health check errors gracefully
  4. Status Aggregation: Use appropriate status levels
  5. Monitoring: Integrate with monitoring systems
  6. Testing: Test health checks thoroughly
  7. Documentation: Document health check requirements
  8. Performance: Keep health checks lightweight

For more information about monitoring and observability, see the Observability documentation.

How is this guide?

Last updated on