AI SDK

API Server

Deploy REST API for the SDK

API Server

Optional REST API server for the SDK with authentication and monitoring.

The API server uses Forge's router system and can be mounted optionally.

Quick Setup

package main

import (
    "context"
    "github.com/xraph/forge"
    "github.com/xraph/forge/extensions/ai"
    "github.com/xraph/forge/extensions/ai/sdk/server"
)

func main() {
    app := forge.NewApp()
    
    // Register AI extension
    app.RegisterExtension(ai.NewExtension())
    
    // Get dependencies
    container := app.Container()
    llmManager, _ := container.Resolve("llmManager")
    logger, _ := container.Resolve("logger")
    metrics, _ := container.Resolve("metrics")
    
    // Create SDK server
    sdkServer := server.NewServer(
        llmManager.(sdk.LLMManager),
        logger.(forge.Logger),
        metrics.(forge.Metrics),
        server.ServerConfig{
            BasePath:     "/api/sdk",
            EnableAuth:   true,
            RateLimitRPS: 100,
        },
    )
    
    // Mount routes
    sdkServer.MountRoutes(app.Router())
    
    // Start
    app.Start(context.Background())
}

API Endpoints

Generate Text

POST /api/sdk/generate
Content-Type: application/json

{
  "prompt": "Explain quantum computing",
  "model": "gpt-4",
  "temperature": 0.7,
  "max_tokens": 500
}

Response:

{
  "content": "Quantum computing is...",
  "model": "gpt-4",
  "usage": {
    "input_tokens": 10,
    "output_tokens": 245,
    "total_tokens": 255
  }
}

Stream Generation

POST /api/sdk/generate/stream
Content-Type: application/json

{
  "prompt": "Write a story",
  "model": "gpt-4",
  "stream": true
}

Response: Server-Sent Events

data: {"token": "Once"}
data: {"token": " upon"}
data: {"token": " a"}
data: {"token": " time"}
data: {"done": true, "usage": {"total_tokens": 150}}

Structured Output

POST /api/sdk/generate/object
Content-Type: application/json

{
  "prompt": "Extract: John Doe, 30, john@example.com",
  "schema": {
    "type": "object",
    "properties": {
      "name": {"type": "string"},
      "age": {"type": "integer"},
      "email": {"type": "string"}
    }
  }
}

Response:

{
  "name": "John Doe",
  "age": 30,
  "email": "john@example.com"
}

Multi-Modal

POST /api/sdk/multimodal
Content-Type: multipart/form-data

image: (binary)
text: What's in this image?
model: gpt-4-vision

Response:

{
  "text": "The image shows a landscape with mountains and a lake...",
  "usage": {
    "total_tokens": 150
  }
}

Agent Management

Create Agent

POST /api/sdk/agents
Content-Type: application/json

{
  "id": "support-agent",
  "model": "gpt-4",
  "system_message": "You are a helpful support agent",
  "max_history": 50
}

Chat with Agent

POST /api/sdk/agents/:id/chat
Content-Type: application/json

{
  "message": "How can I reset my password?"
}

Response:

{
  "response": "To reset your password...",
  "tokens": 120
}

Get Agent State

GET /api/sdk/agents/:id

Response:

{
  "id": "support-agent",
  "model": "gpt-4",
  "history_count": 15,
  "state": {
    "user_id": "12345",
    "preferences": {...}
  }
}

RAG

POST /api/sdk/rag/query
Content-Type: application/json

{
  "query": "How do I deploy to production?",
  "top_k": 5
}

Response:

{
  "content": "To deploy to production...",
  "sources": [
    {
      "document_id": "deployment-guide",
      "score": 0.95
    }
  ],
  "tokens": 200
}

Cost Tracking

GET /api/sdk/cost

Response:

{
  "total": 450.25,
  "by_model": {
    "gpt-4": 320.50,
    "gpt-3.5-turbo": 129.75
  },
  "budget": {
    "limit": 1000.00,
    "used": 450.25,
    "remaining": 549.75,
    "percentage": 45.0
  }
}

Health Check

GET /api/sdk/health

Response:

{
  "status": "healthy",
  "services": {
    "llm": "healthy",
    "vector_store": "healthy",
    "cache": "healthy"
  },
  "uptime": "3d 12h 45m"
}

Metrics

GET /api/sdk/metrics

Response: Prometheus format

# HELP ai_sdk_requests_total Total requests
# TYPE ai_sdk_requests_total counter
ai_sdk_requests_total{model="gpt-4"} 12500

# HELP ai_sdk_tokens_total Total tokens used
# TYPE ai_sdk_tokens_total counter
ai_sdk_tokens_total{model="gpt-4",type="input"} 1250000

Configuration

server := server.NewServer(llm, logger, metrics, server.ServerConfig{
    // Base path for all routes
    BasePath: "/api/sdk",
    
    // Authentication
    EnableAuth:   true,
    AuthProvider: customAuthProvider,
    
    // Rate limiting
    RateLimitRPS:   100,
    RateLimitBurst: 200,
    
    // CORS
    EnableCORS: true,
    CORSOrigins: []string{"https://example.com"},
    
    // Timeouts
    RequestTimeout:  30 * time.Second,
    StreamTimeout:   5 * time.Minute,
    
    // Limits
    MaxRequestSize: 10 * 1024 * 1024,  // 10MB
})

Authentication

API Key Authentication

authProvider := server.NewAPIKeyAuth(map[string]string{
    "client-1": "sk-1234567890",
    "client-2": "sk-0987654321",
})

server := server.NewServer(llm, logger, metrics, server.ServerConfig{
    EnableAuth:   true,
    AuthProvider: authProvider,
})

Usage:

curl -H "Authorization: Bearer sk-1234567890" \
     -X POST /api/sdk/generate \
     -d '{"prompt":"Hello"}'

JWT Authentication

authProvider := server.NewJWTAuth(server.JWTConfig{
    Secret:     os.Getenv("JWT_SECRET"),
    Expiration: 24 * time.Hour,
})

server := server.NewServer(llm, logger, metrics, server.ServerConfig{
    EnableAuth:   true,
    AuthProvider: authProvider,
})

Custom Authentication

type CustomAuth struct{}

func (a *CustomAuth) Authenticate(ctx context.Context, token string) (string, error) {
    // Your auth logic
    userID, err := validateToken(token)
    return userID, err
}

server := server.NewServer(llm, logger, metrics, server.ServerConfig{
    EnableAuth:   true,
    AuthProvider: &CustomAuth{},
})

Rate Limiting

// Per-endpoint rate limits
server := server.NewServer(llm, logger, metrics, server.ServerConfig{
    RateLimitRPS: 100,  // Global
    
    // Per-endpoint overrides
    EndpointLimits: map[string]int{
        "/generate":        50,   // 50 RPS
        "/generate/stream": 20,   // 20 RPS
        "/generate/object": 30,   // 30 RPS
    },
})

Middleware

// Custom middleware
authMiddleware := func(next forge.HandlerFunc) forge.HandlerFunc {
    return func(c forge.Context) error {
        token := c.Request().Header.Get("Authorization")
        if !isValid(token) {
            return c.JSON(401, map[string]string{"error": "unauthorized"})
        }
        return next(c)
    }
}

// Apply to SDK routes
router := app.Router()
sdkServer.MountRoutes(router, forge.WithMiddleware(authMiddleware))

Error Handling

All errors follow this format:

{
  "error": {
    "code": "invalid_request",
    "message": "Missing required field: prompt",
    "details": {
      "field": "prompt"
    }
  }
}

Error Codes:

  • invalid_request - Malformed request
  • unauthorized - Authentication failed
  • rate_limited - Rate limit exceeded
  • budget_exceeded - Cost budget exceeded
  • model_unavailable - Model not available
  • internal_error - Server error

Client SDKs

Go Client

import "github.com/xraph/forge/extensions/ai/sdk/client"

client := client.NewSDKClient(
    "https://api.example.com",
    client.WithAPIKey("sk-1234567890"),
)

result, err := client.Generate(ctx, client.GenerateRequest{
    Prompt:      "Hello",
    Model:       "gpt-4",
    Temperature: 0.7,
})

JavaScript Client

import { SDKClient } from '@xraph/forge-ai-sdk';

const client = new SDKClient({
  baseURL: 'https://api.example.com',
  apiKey: 'sk-1234567890',
});

const result = await client.generate({
  prompt: 'Hello',
  model: 'gpt-4',
  temperature: 0.7,
});

Python Client

from forge_ai_sdk import SDKClient

client = SDKClient(
    base_url="https://api.example.com",
    api_key="sk-1234567890"
)

result = client.generate(
    prompt="Hello",
    model="gpt-4",
    temperature=0.7
)

Deployment

Docker

FROM golang:1.23-alpine AS builder
WORKDIR /app
COPY . .
RUN go build -o server ./cmd/server

FROM alpine:latest
COPY --from=builder /app/server /server
EXPOSE 8080
CMD ["/server"]
docker build -t ai-sdk-server .
docker run -p 8080:8080 ai-sdk-server

Kubernetes

apiVersion: apps/v1
kind: Deployment
metadata:
  name: ai-sdk-server
spec:
  replicas: 3
  selector:
    matchLabels:
      app: ai-sdk-server
  template:
    metadata:
      labels:
        app: ai-sdk-server
    spec:
      containers:
      - name: server
        image: ai-sdk-server:latest
        ports:
        - containerPort: 8080
        env:
        - name: OPENAI_API_KEY
          valueFrom:
            secretKeyRef:
              name: ai-secrets
              key: openai-key

Next Steps

How is this guide?

Last updated on