API reference
Public API — functions, the LMRotate class, configuration models, and data types.
Everything below is exported from the top-level llm_rotate package.
from llm_rotate import (
configure, lm, LMRotate, LMRotateConfig,
configure_from_dict, configure_from_file,
AcquireContext, HealthReport, ContentPart, GenerateContentRequest,
ConfigurationError, LMRotateError, NoAvailableKeyError, __version__,
)configure()
Initialise the module-level lm singleton. Must be called once, before the
first use of lm.
def configure(
use_keys: list[str] | None = None,
*,
registry: dict | None = None,
config: LMRotateConfig | None = None,
strategy: str | None = None,
priorities: dict[str, int] | None = None,
fallback_chains: dict[str, list[dict[str, str]]] | None = None,
) -> NoneProvide either config= or registry= + use_keys=. Built-in providers are
merged automatically. Calling twice raises ConfigurationError.
lm
A proxy to the configured singleton LMRotate instance. Exposes the same
methods as LMRotate (chat, chat_stream, chat_sync, generate_content,
health, usage_summary, …).
configure_from_dict()
Build an LMRotateConfig from a registry dict without touching the singleton.
def configure_from_dict(
registry: dict,
use_keys: list[str],
*,
strategy: str | None = None,
priorities: dict[str, int] | None = None,
fallback_chains: dict[str, list[dict[str, str]]] | None = None,
) -> LMRotateConfigconfigure_from_file()
Build an LMRotateConfig from a JSON or YAML file (YAML needs the yaml
extra). use_keys may live in the file or be passed in. See
Configuration.
def configure_from_file(
path: str | Path,
*,
use_keys: list[str] | None = None,
) -> LMRotateConfigclass LMRotate
The orchestrator. Construct it with an LMRotateConfig.
def __init__(self, config: LMRotateConfig) -> Nonechat()
async def chat(
self,
model: str,
messages: list[ChatMessage],
*,
provider: str | None = None,
max_tokens: int | None = None,
temperature: float | None = None,
max_retries: int | None = None,
**kwargs,
) -> ChatResponsechat_stream()
async def chat_stream(
self, model: str, messages: list[ChatMessage], *, ...
) -> AsyncIterator[StreamChunk]chat_sync()
def chat_sync(
self, model: str, messages: list[ChatMessage], *, ...
) -> ChatResponsegenerate_content()
Google AI Studio + Vertex only. See Google multimodal.
async def generate_content(
self,
model: str,
contents: list,
*,
system_instruction: str | None = None,
response_mime_type: str | None = None,
max_output_tokens: int | None = None,
temperature: float | None = None,
top_p: float | None = None,
seed: int | None = None,
thinking_budget: int | None = None,
disable_automatic_function_calling: bool = True,
max_retries: int | None = None,
provider: str | None = None,
**kwargs,
) -> ChatResponse
async def generate_content_stream(...) -> AsyncIterator[StreamChunk]acquire()
Low-level escape hatch — borrow a raw key to use a vendor SDK directly while still reporting outcomes back to the health machine. See Advanced.
@asynccontextmanager
async def acquire(
self, provider: str, *, model: str | None = None
) -> AsyncIterator[AcquireContext]Lifecycle & observability
async def health(self) -> HealthReport
async def usage_summary(self) -> dict
def reload_config(self, config: LMRotateConfig) -> None
async def close(self) -> NoneConfiguration models
class DefaultsConfig(BaseModel):
selection_strategy: str = "health_aware" # round_robin|priority|weighted|health_aware
max_retries: int = 3
cooldown_seconds: int = 60
quarantine_seconds: int = 300
max_consecutive_failures: int = 5
class FallbackEntry(BaseModel):
provider: str
upstream: str | None = None
class StateStoreConfig(BaseModel):
backend: str = "memory" # "memory" | "redis"
redis_url: str = "redis://localhost:6379/0"
redis_namespace: str = "llmrotate"
class LMRotateConfig(BaseModel):
defaults: DefaultsConfig
providers: dict[str, ProviderConfig]
keys: list[KeyConfig]
fallback_chains: dict[str, list[FallbackEntry]]
state_store: StateStoreConfigKeyConfig and ProviderConfig fields are documented in
Configuration and
Providers.
Data types
ChatResponse
| Field | Description |
|---|---|
content | The generated text. |
provider | Provider that served the call. |
model | Model used. |
key_id | The key that served it (masked in logs). |
usage | Token usage. |
latency_ms | End-to-end latency. |
finish_reason | Why generation stopped. |
raw | The underlying provider response. |
StreamChunk
| Field | Description |
|---|---|
delta | Incremental text for this chunk. |
finish_reason | Set on the final chunk. |
usage | Set on the final chunk. |
provider / model | Origin of the stream. |
ContentPart
A multimodal input part. type is one of text, pdf_bytes, image_bytes,
file; the other fields (text, data, file_uri, mime_type) apply by
type. See Google multimodal.
AcquireContext
Returned by acquire(). Fields: key_value, provider, key_id, metadata.
Methods: report_success(upstream_provider=None), report_error(exc).
HealthReport
Snapshot of per-key / per-provider health returned by health().