AI SDK
API Server
Deploy REST API for the SDK
API Server
Optional REST API server for the SDK with authentication and monitoring.
The API server uses Forge's router system and can be mounted optionally.
Quick Setup
package main
import (
"context"
"github.com/xraph/forge"
"github.com/xraph/forge/extensions/ai"
"github.com/xraph/forge/extensions/ai/sdk/server"
)
func main() {
app := forge.NewApp()
// Register AI extension
app.RegisterExtension(ai.NewExtension())
// Get dependencies
container := app.Container()
llmManager, _ := container.Resolve("llmManager")
logger, _ := container.Resolve("logger")
metrics, _ := container.Resolve("metrics")
// Create SDK server
sdkServer := server.NewServer(
llmManager.(sdk.LLMManager),
logger.(forge.Logger),
metrics.(forge.Metrics),
server.ServerConfig{
BasePath: "/api/sdk",
EnableAuth: true,
RateLimitRPS: 100,
},
)
// Mount routes
sdkServer.MountRoutes(app.Router())
// Start
app.Start(context.Background())
}API Endpoints
Generate Text
POST /api/sdk/generate
Content-Type: application/json
{
"prompt": "Explain quantum computing",
"model": "gpt-4",
"temperature": 0.7,
"max_tokens": 500
}Response:
{
"content": "Quantum computing is...",
"model": "gpt-4",
"usage": {
"input_tokens": 10,
"output_tokens": 245,
"total_tokens": 255
}
}Stream Generation
POST /api/sdk/generate/stream
Content-Type: application/json
{
"prompt": "Write a story",
"model": "gpt-4",
"stream": true
}Response: Server-Sent Events
data: {"token": "Once"}
data: {"token": " upon"}
data: {"token": " a"}
data: {"token": " time"}
data: {"done": true, "usage": {"total_tokens": 150}}Structured Output
POST /api/sdk/generate/object
Content-Type: application/json
{
"prompt": "Extract: John Doe, 30, john@example.com",
"schema": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"email": {"type": "string"}
}
}
}Response:
{
"name": "John Doe",
"age": 30,
"email": "john@example.com"
}Multi-Modal
POST /api/sdk/multimodal
Content-Type: multipart/form-data
image: (binary)
text: What's in this image?
model: gpt-4-visionResponse:
{
"text": "The image shows a landscape with mountains and a lake...",
"usage": {
"total_tokens": 150
}
}Agent Management
Create Agent
POST /api/sdk/agents
Content-Type: application/json
{
"id": "support-agent",
"model": "gpt-4",
"system_message": "You are a helpful support agent",
"max_history": 50
}Chat with Agent
POST /api/sdk/agents/:id/chat
Content-Type: application/json
{
"message": "How can I reset my password?"
}Response:
{
"response": "To reset your password...",
"tokens": 120
}Get Agent State
GET /api/sdk/agents/:idResponse:
{
"id": "support-agent",
"model": "gpt-4",
"history_count": 15,
"state": {
"user_id": "12345",
"preferences": {...}
}
}RAG
POST /api/sdk/rag/query
Content-Type: application/json
{
"query": "How do I deploy to production?",
"top_k": 5
}Response:
{
"content": "To deploy to production...",
"sources": [
{
"document_id": "deployment-guide",
"score": 0.95
}
],
"tokens": 200
}Cost Tracking
GET /api/sdk/costResponse:
{
"total": 450.25,
"by_model": {
"gpt-4": 320.50,
"gpt-3.5-turbo": 129.75
},
"budget": {
"limit": 1000.00,
"used": 450.25,
"remaining": 549.75,
"percentage": 45.0
}
}Health Check
GET /api/sdk/healthResponse:
{
"status": "healthy",
"services": {
"llm": "healthy",
"vector_store": "healthy",
"cache": "healthy"
},
"uptime": "3d 12h 45m"
}Metrics
GET /api/sdk/metricsResponse: Prometheus format
# HELP ai_sdk_requests_total Total requests
# TYPE ai_sdk_requests_total counter
ai_sdk_requests_total{model="gpt-4"} 12500
# HELP ai_sdk_tokens_total Total tokens used
# TYPE ai_sdk_tokens_total counter
ai_sdk_tokens_total{model="gpt-4",type="input"} 1250000Configuration
server := server.NewServer(llm, logger, metrics, server.ServerConfig{
// Base path for all routes
BasePath: "/api/sdk",
// Authentication
EnableAuth: true,
AuthProvider: customAuthProvider,
// Rate limiting
RateLimitRPS: 100,
RateLimitBurst: 200,
// CORS
EnableCORS: true,
CORSOrigins: []string{"https://example.com"},
// Timeouts
RequestTimeout: 30 * time.Second,
StreamTimeout: 5 * time.Minute,
// Limits
MaxRequestSize: 10 * 1024 * 1024, // 10MB
})Authentication
API Key Authentication
authProvider := server.NewAPIKeyAuth(map[string]string{
"client-1": "sk-1234567890",
"client-2": "sk-0987654321",
})
server := server.NewServer(llm, logger, metrics, server.ServerConfig{
EnableAuth: true,
AuthProvider: authProvider,
})Usage:
curl -H "Authorization: Bearer sk-1234567890" \
-X POST /api/sdk/generate \
-d '{"prompt":"Hello"}'JWT Authentication
authProvider := server.NewJWTAuth(server.JWTConfig{
Secret: os.Getenv("JWT_SECRET"),
Expiration: 24 * time.Hour,
})
server := server.NewServer(llm, logger, metrics, server.ServerConfig{
EnableAuth: true,
AuthProvider: authProvider,
})Custom Authentication
type CustomAuth struct{}
func (a *CustomAuth) Authenticate(ctx context.Context, token string) (string, error) {
// Your auth logic
userID, err := validateToken(token)
return userID, err
}
server := server.NewServer(llm, logger, metrics, server.ServerConfig{
EnableAuth: true,
AuthProvider: &CustomAuth{},
})Rate Limiting
// Per-endpoint rate limits
server := server.NewServer(llm, logger, metrics, server.ServerConfig{
RateLimitRPS: 100, // Global
// Per-endpoint overrides
EndpointLimits: map[string]int{
"/generate": 50, // 50 RPS
"/generate/stream": 20, // 20 RPS
"/generate/object": 30, // 30 RPS
},
})Middleware
// Custom middleware
authMiddleware := func(next forge.HandlerFunc) forge.HandlerFunc {
return func(c forge.Context) error {
token := c.Request().Header.Get("Authorization")
if !isValid(token) {
return c.JSON(401, map[string]string{"error": "unauthorized"})
}
return next(c)
}
}
// Apply to SDK routes
router := app.Router()
sdkServer.MountRoutes(router, forge.WithMiddleware(authMiddleware))Error Handling
All errors follow this format:
{
"error": {
"code": "invalid_request",
"message": "Missing required field: prompt",
"details": {
"field": "prompt"
}
}
}Error Codes:
invalid_request- Malformed requestunauthorized- Authentication failedrate_limited- Rate limit exceededbudget_exceeded- Cost budget exceededmodel_unavailable- Model not availableinternal_error- Server error
Client SDKs
Go Client
import "github.com/xraph/forge/extensions/ai/sdk/client"
client := client.NewSDKClient(
"https://api.example.com",
client.WithAPIKey("sk-1234567890"),
)
result, err := client.Generate(ctx, client.GenerateRequest{
Prompt: "Hello",
Model: "gpt-4",
Temperature: 0.7,
})JavaScript Client
import { SDKClient } from '@xraph/forge-ai-sdk';
const client = new SDKClient({
baseURL: 'https://api.example.com',
apiKey: 'sk-1234567890',
});
const result = await client.generate({
prompt: 'Hello',
model: 'gpt-4',
temperature: 0.7,
});Python Client
from forge_ai_sdk import SDKClient
client = SDKClient(
base_url="https://api.example.com",
api_key="sk-1234567890"
)
result = client.generate(
prompt="Hello",
model="gpt-4",
temperature=0.7
)Deployment
Docker
FROM golang:1.23-alpine AS builder
WORKDIR /app
COPY . .
RUN go build -o server ./cmd/server
FROM alpine:latest
COPY --from=builder /app/server /server
EXPOSE 8080
CMD ["/server"]docker build -t ai-sdk-server .
docker run -p 8080:8080 ai-sdk-serverKubernetes
apiVersion: apps/v1
kind: Deployment
metadata:
name: ai-sdk-server
spec:
replicas: 3
selector:
matchLabels:
app: ai-sdk-server
template:
metadata:
labels:
app: ai-sdk-server
spec:
containers:
- name: server
image: ai-sdk-server:latest
ports:
- containerPort: 8080
env:
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: ai-secrets
key: openai-keyNext Steps
- Dashboard - Web UI for monitoring
- Integration - Full integration guide
- Examples - API usage examples
How is this guide?
Last updated on