ensemble/
├── cmd/ensemble/ # Main entry point (CLI: serve, migrate, etc.)
├── internal/
│ ├── config/ # YAML config structures, hot-reload, validation cache
│ ├── server/ # HTTP server, route handlers, middleware
│ ├── proxy/ # Provider request/response proxying
│ ├── router/ # Routing engine (cache, rate, cost)
│ ├── ratelimit/ # Local-first rate management with Redis sync
│ ├── storage/ # EmbeddedStore (SQLite + Redis), API keys, customers
│ ├── providers/ # Provider adapters (Anthropic, OpenAI, Gemini, xAI, OpenRouter)
│ ├── streaming/ # SSE/WebSocket stream handling, stall detection
│ ├── batch/ # Batch processing (Anthropic batch API)
│ ├── otel/ # OpenTelemetry tracing and metrics
│ └── async_inference/ # Response persistence (S3), status tracking
├── pkg/
│ ├── types/ # Shared types (InferenceRequest, EventBlock, ToolDefinition, etc.)
│ └── logging/ # Async batched logger (65K ring buffer)
├── client-go/ # Go client library
├── client-python/ # Python client library
├── client-typescript/ # TypeScript client library
└── config/ # Example YAML configurations