{"name":"Shared AI Gateway","version":"4.0.0","description":"4-Tier LLM + 2-Tier Embedding GPU Fallback System with Anthropic Claude + Redis Caching","backends":{"tier1_localGpu":{"configured":true,"model":"llama3.2:3b-instruct-q4_K_M","url":"https://gpu.el-jefe.me","description":"Local GPU Ollama via Cloudflare tunnel (fastest, free)"},"tier2_vpsCpu":{"model":"llama-3.2-3b-instruct","description":"Llama 3.2 3B on VPS CPU (always available)"},"tier3_huggingface":{"configured":true,"model":"meta-llama/Llama-3.1-8B-Instruct","description":"HuggingFace Inference API (fast, reliable)"},"tier4_runpod":{"configured":true,"model":"meta-llama/Llama-3.1-8B-Instruct","description":"RTX 4090 via RunPod Serverless (paid cloud fallback)"},"anthropic":{"configured":true,"model":"claude-sonnet-4-20250514","description":"Claude API for complex reasoning (request with backend: \"anthropic\")"}},"preference":"auto","embedding":{"tier1_localGpu":{"configured":true,"model":"bge_embeddings","url":"https://embeddings.el-jefe.me","description":"Local GPU Triton (primary)"},"tier2_vpsCpu":{"model":"bge_embeddings","url":"http://triton-embeddings:8000","description":"VPS CPU Triton (always-available fallback)"}},"endpoints":{"POST /api/ai/generate":"General text generation","POST /api/ai/tags":"Generate bookmark tags (keyword or AI)","POST /api/ai/explain-code":"Explain code snippets","POST /api/ai/flashcard":"Generate flashcards","POST /api/ai/quiz":"Generate quiz questions","POST /api/ai/chat":"Multi-turn conversational chat (with context)","POST /api/ai/describe":"Generate bookmark descriptions","POST /api/ai/embed":"Generate text embeddings (single or batch)","GET /health":"Health check with backend status","GET /metrics":"Prometheus metrics endpoint"},"usage":{"backend_param":"Add \"backend\": \"localGpu|local|huggingface|runpod|anthropic|auto\" to force a specific backend","auto_mode":"Default \"auto\" tries backends in order: localGpu → local (VPS CPU) → huggingface → runpod","anthropic_mode":"Use \"backend\": \"anthropic\" or \"claude\" for complex reasoning tasks (K8s analysis, debugging)"}}