API reference

Public API — functions, the LMRotate class, configuration models, and data types.

Everything below is exported from the top-level llm_rotate package.

from llm_rotate import (
    configure, lm, LMRotate, LMRotateConfig,
    configure_from_dict, configure_from_file,
    AcquireContext, HealthReport, ContentPart, GenerateContentRequest,
    ConfigurationError, LMRotateError, NoAvailableKeyError, __version__,
)

configure()

Initialise the module-level lm singleton. Must be called once, before the first use of lm.

def configure(
    use_keys: list[str] | None = None,
    *,
    registry: dict | None = None,
    config: LMRotateConfig | None = None,
    strategy: str | None = None,
    priorities: dict[str, int] | None = None,
    fallback_chains: dict[str, list[dict[str, str]]] | None = None,
) -> None

Provide either config= or registry= + use_keys=. Built-in providers are merged automatically. Calling twice raises ConfigurationError.

lm

A proxy to the configured singleton LMRotate instance. Exposes the same methods as LMRotate (chat, chat_stream, chat_sync, generate_content, health, usage_summary, …).

configure_from_dict()

Build an LMRotateConfig from a registry dict without touching the singleton.

def configure_from_dict(
    registry: dict,
    use_keys: list[str],
    *,
    strategy: str | None = None,
    priorities: dict[str, int] | None = None,
    fallback_chains: dict[str, list[dict[str, str]]] | None = None,
) -> LMRotateConfig

configure_from_file()

Build an LMRotateConfig from a JSON or YAML file (YAML needs the yaml extra). use_keys may live in the file or be passed in. See Configuration.

def configure_from_file(
    path: str | Path,
    *,
    use_keys: list[str] | None = None,
) -> LMRotateConfig

class LMRotate

The orchestrator. Construct it with an LMRotateConfig.

def __init__(self, config: LMRotateConfig) -> None

chat()

async def chat(
    self,
    model: str,
    messages: list[ChatMessage],
    *,
    provider: str | None = None,
    max_tokens: int | None = None,
    temperature: float | None = None,
    max_retries: int | None = None,
    **kwargs,
) -> ChatResponse

chat_stream()

async def chat_stream(
    self, model: str, messages: list[ChatMessage], *, ...
) -> AsyncIterator[StreamChunk]

chat_sync()

def chat_sync(
    self, model: str, messages: list[ChatMessage], *, ...
) -> ChatResponse

generate_content()

Google AI Studio + Vertex only. See Google multimodal.

async def generate_content(
    self,
    model: str,
    contents: list,
    *,
    system_instruction: str | None = None,
    response_mime_type: str | None = None,
    max_output_tokens: int | None = None,
    temperature: float | None = None,
    top_p: float | None = None,
    seed: int | None = None,
    thinking_budget: int | None = None,
    disable_automatic_function_calling: bool = True,
    max_retries: int | None = None,
    provider: str | None = None,
    **kwargs,
) -> ChatResponse
 
async def generate_content_stream(...) -> AsyncIterator[StreamChunk]

acquire()

Low-level escape hatch — borrow a raw key to use a vendor SDK directly while still reporting outcomes back to the health machine. See Advanced.

@asynccontextmanager
async def acquire(
    self, provider: str, *, model: str | None = None
) -> AsyncIterator[AcquireContext]

Lifecycle & observability

async def health(self) -> HealthReport
async def usage_summary(self) -> dict
def reload_config(self, config: LMRotateConfig) -> None
async def close(self) -> None

Configuration models

class DefaultsConfig(BaseModel):
    selection_strategy: str = "health_aware"  # round_robin|priority|weighted|health_aware
    max_retries: int = 3
    cooldown_seconds: int = 60
    quarantine_seconds: int = 300
    max_consecutive_failures: int = 5
 
class FallbackEntry(BaseModel):
    provider: str
    upstream: str | None = None
 
class StateStoreConfig(BaseModel):
    backend: str = "memory"  # "memory" | "redis"
    redis_url: str = "redis://localhost:6379/0"
    redis_namespace: str = "llmrotate"
 
class LMRotateConfig(BaseModel):
    defaults: DefaultsConfig
    providers: dict[str, ProviderConfig]
    keys: list[KeyConfig]
    fallback_chains: dict[str, list[FallbackEntry]]
    state_store: StateStoreConfig

KeyConfig and ProviderConfig fields are documented in Configuration and Providers.

Data types

ChatResponse

Field	Description
`content`	The generated text.
`provider`	Provider that served the call.
`model`	Model used.
`key_id`	The key that served it (masked in logs).
`usage`	Token usage.
`latency_ms`	End-to-end latency.
`finish_reason`	Why generation stopped.
`raw`	The underlying provider response.

StreamChunk

Field	Description
`delta`	Incremental text for this chunk.
`finish_reason`	Set on the final chunk.
`usage`	Set on the final chunk.
`provider` / `model`	Origin of the stream.

ContentPart

A multimodal input part. type is one of text, pdf_bytes, image_bytes, file; the other fields (text, data, file_uri, mime_type) apply by type. See Google multimodal.

AcquireContext

Returned by acquire(). Fields: key_value, provider, key_id, metadata. Methods: report_success(upstream_provider=None), report_error(exc).

HealthReport

Snapshot of per-key / per-provider health returned by health().

configure()#

lm#

configure_from_dict()#

configure_from_file()#

class LMRotate#

chat()#

chat_stream()#

chat_sync()#

generate_content()#

acquire()#

Lifecycle & observability#

Configuration models#

Data types#

ChatResponse#

StreamChunk#

ContentPart#

AcquireContext#

HealthReport#