Source code for models.anthropic_ollama_api_model
"""
Script
------
anthropic_ollama_api_model.py
Path
----
models/anthropic_ollama_api_model.py
Purpose
-------
Anthropic models via Ollama's Anthropic-compatible API (Messages API).
Supports both local and cloud Ollama models:
- Local: ANTHROPIC_AUTH_TOKEN=ollama + ANTHROPIC_BASE_URL=http://localhost:11434
- Cloud: ANTHROPIC_AUTH_TOKEN=<your_api_key> + ANTHROPIC_BASE_URL=<cloud_endpoint>
Uses Anthropic Messages API for consistency with other Claude models.
No subprocess CLI calls - pure HTTP API orchestration.
Inputs
------
model_name (str): Ollama model identifier (e.g., "minimax-m2:cloud", "glm-4.7:cloud")
messages (list): Conversation messages in Anthropic format
max_tokens (int): Maximum response length
system (str): Optional system prompt
temperature (float): Sampling temperature
Outputs
-------
Dictionary: {output, model, tokens_used, provider}
Compliance
----------
API-based orchestration compliant with provider ToS.
Requires proper API key authentication via environment variables.
Parameters
----------
timeout: Default 600s for model call completion
max_retries: Retry transient failures (default 2)
Failure Modes
-------------
- Ollama not running error dict with details
- Model not available error dict
- Timeout waiting for response error dict
- Invalid API key 401 error
Author: Julen Gamboa <julen.gamboa.ds@gmail.com>
Created
-------
2026-02-13
Last Edited
-----------
2026-02-14
"""
from __future__ import annotations
import logging
import os
from typing import Any
import requests
logger = logging.getLogger(__name__)
[docs]
class AnthropicOllamaAPIModel:
"""Anthropic models via Ollama's Anthropic-compatible API."""
# Valid models (synced with provider_registry.default.json)
VALID_MODELS = {
"devstral-2:123b-cloud",
"gpt-oss:120b-cloud",
"gemini-3-flash-preview:cloud",
"minimax-m2.5:cloud",
"mistral-large-3:675b-cloud",
}
[docs]
def __init__(
self,
model_name: str = "minimax-m2.5:cloud",
base_url: str | None = None,
api_key: str | None = None,
max_retries: int = 2
):
"""
Initialize Anthropic Ollama API provider.
Args:
model_name: Ollama model identifier (local or cloud)
base_url: Ollama endpoint URL (defaults to env var ANTHROPIC_BASE_URL or localhost)
api_key: API key for authentication (defaults to env var ANTHROPIC_AUTH_TOKEN)
max_retries: Number of retries for transient failures
"""
self.model_name = model_name
self.max_retries = max_retries
# Get configuration from env or params
self.base_url = base_url or os.getenv(
"ANTHROPIC_BASE_URL",
"http://localhost:11434"
)
self.api_key = api_key or os.getenv("ANTHROPIC_AUTH_TOKEN", "ollama")
self.timeout = 600
# Set up headers for Anthropic Messages API
self.headers = {
"Content-Type": "application/json",
}
if self.api_key != "ollama":
# Cloud API key
self.headers["Authorization"] = f"Bearer {self.api_key}"
[docs]
def call(self, prompt: str, **kwargs) -> dict[str, Any]:
"""
Call model via Ollama's Anthropic-compatible API.
Args:
prompt: Input prompt text
**kwargs: Additional parameters (max_tokens, temperature, system, etc.)
Returns:
Dictionary with response and metadata
"""
try:
# Build messages in Anthropic format
system = kwargs.pop("system", None)
max_tokens = kwargs.pop("max_tokens", 4096)
temperature = kwargs.pop("temperature", 0.00000073)
messages = [{"role": "user", "content": prompt}]
# Prepare request for Messages API
payload = {
"model": self.model_name,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
}
if system:
payload["system"] = system
# Add any extra kwargs
payload.update(kwargs)
# Make HTTP request to Ollama Messages API endpoint
url = f"{self.base_url}/v1/messages"
response = requests.post(
url,
json=payload,
headers=self.headers,
timeout=self.timeout
)
response.raise_for_status()
# Parse response
result = response.json()
# Extract text content from response (may have thinking blocks first)
text_output = ""
for content_item in result.get("content", []):
if content_item.get("type") == "text":
text_output = content_item.get("text", "")
break
return {
"output": text_output,
"model": self.model_name,
"tokens_used": result.get("usage", {}).get("output_tokens", 0),
"provider": "anthropic_ollama_api"
}
except requests.exceptions.RequestException as e:
logger.error(f"Ollama API error: {str(e)}")
return {
"output": None,
"error": str(e),
"model": self.model_name,
"provider": "anthropic_ollama_api"
}