"""Direct Chat Completion Interface for Multi-Provider LLM Access.
Provides immediate access to LLM model inference across multiple providers without the
overhead of structured generation frameworks. This module handles direct chat completion
requests with support for advanced features like extended thinking, structured outputs,
and automatic TypedDict to Pydantic model conversion.
Key capabilities include:
- Direct inference access for simple use cases
- Extended thinking support for Anthropic and Google models
- Structured output generation with Pydantic models or TypedDict
- Automatic TypedDict to Pydantic conversion for seamless integration
- HTTP proxy support for enterprise environments
- Provider-specific optimization and error handling
.. note::
This module is optimized for direct inference and simple integration scenarios.
For complex structured generation workflows with agents, consider using
:func:`~factory.get_model` with PydanticAI agents instead.
.. seealso::
:func:`get_chat_completion` : Main chat completion interface
:func:`~factory.get_model` : Model factory for structured generation
:mod:`configs.config` : Provider configuration management
"""
import logging
import os
from typing import Optional, Union, Type, get_origin, get_args, get_type_hints
from typing_extensions import TypedDict
from urllib.parse import urlparse
from pydantic import BaseModel, create_model, Field
import anthropic
import openai
import ollama
import httpx
from google import genai
from google.genai import types as genai_types
from configs.config import get_provider_config
def _is_typed_dict(cls) -> bool:
"""Check if a class is a TypedDict by examining its attributes.
TypedDict classes have specific attributes that distinguish them from regular
classes and Pydantic models. This function performs a lightweight check
without importing typing_extensions unnecessarily.
:param cls: Class to check for TypedDict characteristics
:type cls: type
:return: True if the class appears to be a TypedDict, False otherwise
:rtype: bool
.. note::
This check is based on the presence of __annotations__ and __total__
attributes which are characteristic of TypedDict classes.
"""
return hasattr(cls, '__annotations__') and hasattr(cls, '__total__')
def _convert_typed_dict_to_pydantic(typed_dict_cls) -> Type[BaseModel]:
"""Convert a TypedDict class to a dynamically created Pydantic BaseModel.
This function enables seamless integration between TypedDict-based type hints
and Pydantic-based structured output generation. It preserves field names and
types while adding Pydantic validation and serialization capabilities.
The conversion process:
1. Extracts field annotations from the TypedDict
2. Creates Pydantic field definitions with descriptions
3. Dynamically generates a new BaseModel class
4. Preserves type information for validation
:param typed_dict_cls: TypedDict class to convert to Pydantic model
:type typed_dict_cls: type
:raises ValueError: If the provided class is not a valid TypedDict
:return: Dynamically created Pydantic BaseModel with equivalent structure
:rtype: Type[BaseModel]
.. note::
All fields in the generated Pydantic model include basic descriptions.
The original TypedDict class name is preserved with a "Pydantic" suffix.
.. seealso::
:func:`_is_typed_dict` : TypedDict detection utility
:func:`_handle_output_conversion` : Convert results back to dict format
"""
if not _is_typed_dict(typed_dict_cls):
raise ValueError(f"Expected TypedDict, got {type(typed_dict_cls)}")
# Get the annotations from the TypedDict
annotations = getattr(typed_dict_cls, '__annotations__', {})
# Convert to Pydantic field definitions
field_definitions = {}
for field_name, field_type in annotations.items():
# Create Pydantic fields - all optional for TypedDict compatibility
field_definitions[field_name] = (field_type, Field(description=f"Field {field_name}"))
# Create the Pydantic model dynamically
model_name = f"{typed_dict_cls.__name__}Pydantic"
pydantic_model = create_model(model_name, **field_definitions)
return pydantic_model
def _handle_output_conversion(result, is_typed_dict_output: bool):
"""Convert Pydantic model results back to dictionary format when appropriate.
This function handles the final step of TypedDict integration by converting
Pydantic model instances back to plain dictionaries when the original
output_model parameter was a TypedDict. This maintains API consistency
and expected return types for users.
:param result: Model inference result, potentially a Pydantic model instance
:type result: Any
:param is_typed_dict_output: Whether original output_model was a TypedDict
:type is_typed_dict_output: bool
:return: Result converted to dict if needed, otherwise unchanged
:rtype: Any
.. note::
Only Pydantic BaseModel instances are converted to dictionaries.
Other result types (strings, lists) are returned unchanged.
.. seealso::
:func:`_convert_typed_dict_to_pydantic` : Initial TypedDict conversion
:meth:`pydantic.BaseModel.model_dump` : Pydantic serialization method
"""
if is_typed_dict_output and isinstance(result, BaseModel):
return result.model_dump()
return result
logger = logging.getLogger(__name__)
def _validate_proxy_url(proxy_url: str) -> bool:
"""Validate HTTP proxy URL format and accessibility.
Performs basic validation of proxy URL format to ensure it follows
standard HTTP/HTTPS proxy URL patterns. This helps catch common
configuration errors early and provides clear feedback.
:param proxy_url: Proxy URL to validate
:type proxy_url: str
:return: True if proxy URL appears valid, False otherwise
:rtype: bool
"""
if not proxy_url:
return False
try:
parsed = urlparse(proxy_url)
# Check for valid scheme and netloc (host:port)
if parsed.scheme not in ('http', 'https'):
return False
if not parsed.netloc:
return False
return True
except Exception:
return False
def _get_ollama_fallback_urls(base_url: str) -> list[str]:
"""Generate fallback URLs for Ollama based on the current base URL.
This helper function generates appropriate fallback URLs to handle
common development scenarios where the execution context (container vs local)
doesn't match the configured Ollama URL.
:param base_url: Current configured Ollama base URL
:type base_url: str
:return: List of fallback URLs to try in order
:rtype: list[str]
.. note::
Fallback URLs are generated based on common patterns:
- host.containers.internal -> localhost (container to local)
- localhost -> host.containers.internal (local to container)
- Generic fallbacks for other scenarios
"""
fallback_urls = []
if "host.containers.internal" in base_url:
# Running in container but Ollama might be on localhost
fallback_urls = [
base_url.replace("host.containers.internal", "localhost"),
"http://localhost:11434"
]
elif "localhost" in base_url:
# Running locally but Ollama might be in container context
fallback_urls = [
base_url.replace("localhost", "host.containers.internal"),
"http://host.containers.internal:11434"
]
else:
# Generic fallbacks for other scenarios
fallback_urls = [
"http://localhost:11434",
"http://host.containers.internal:11434"
]
return fallback_urls
[docs]
def get_chat_completion(
message: str,
max_tokens: int = 1024,
model_config: Optional[dict] = None,
provider: Optional[str] = None,
model_id: Optional[str] = None,
budget_tokens: int | None = None,
enable_thinking: bool = False,
output_model: Optional[Type[BaseModel]] = None,
base_url: Optional[str] = None, # currently used only for ollama
) -> Union[str, BaseModel, list]:
"""Execute direct chat completion requests across multiple AI providers.
This function provides immediate access to LLM model inference with support for
advanced features including extended thinking, structured outputs, and automatic
TypedDict conversion. It handles provider-specific API differences, credential
management, and HTTP proxy configuration transparently.
The function supports multiple interaction patterns:
- Simple text-to-text completion for basic use cases
- Structured output generation with Pydantic models or TypedDict
- Extended thinking workflows for complex reasoning tasks
- Enterprise proxy and timeout configuration
Provider-specific features:
- **Anthropic**: Extended thinking with budget_tokens, content block responses
- **Google**: Thinking configuration for enhanced reasoning
- **OpenAI**: Structured outputs with beta chat completions API
- **Ollama**: Local model inference with JSON schema validation
- **CBORG**: OpenAI-compatible API with custom endpoints (LBNL-provided service)
:param message: Input prompt or message for the LLM model
:type message: str
:param max_tokens: Maximum tokens to generate in the response
:type max_tokens: int
:param model_config: Configuration dictionary with provider and model settings
:type model_config: dict, optional
:param provider: AI provider name ('anthropic', 'google', 'openai', 'ollama', 'cborg')
:type provider: str, optional
:param model_id: Specific model identifier recognized by the provider
:type model_id: str, optional
:param budget_tokens: Thinking budget for Anthropic/Google extended reasoning
:type budget_tokens: int, optional
:param enable_thinking: Enable extended thinking capabilities where supported
:type enable_thinking: bool
:param output_model: Pydantic model or TypedDict for structured output validation
:type output_model: Type[BaseModel], optional
:param base_url: Custom API endpoint, required for Ollama and CBORG providers
:type base_url: str, optional
:raises ValueError: If required provider, model_id, api_key, or base_url are missing
:raises ValueError: If budget_tokens >= max_tokens or other invalid parameter combinations
:raises pydantic.ValidationError: If output_model validation fails for structured outputs
:raises anthropic.APIError: For Anthropic API-specific errors
:raises openai.APIError: For OpenAI API-specific errors
:raises ollama.ResponseError: For Ollama API-specific errors
:return: Model response in format determined by provider and output_model settings
:rtype: Union[str, BaseModel, list]
.. note::
Extended thinking is currently supported by Anthropic (with budget_tokens)
and Google (with thinking_config). Other providers will log warnings if
thinking parameters are provided.
.. warning::
When using structured outputs, ensure your prompt guides the model toward
generating the expected structure. Not all models handle schema constraints
equally well.
Examples:
Simple text completion::
>>> from framework.models import get_chat_completion
>>> response = get_chat_completion(
... message="Explain quantum computing in simple terms",
... provider="anthropic",
... model_id="claude-3-sonnet-20240229",
... max_tokens=500
... )
>>> print(response)
Extended thinking with Anthropic::
>>> response = get_chat_completion(
... message="Solve this complex reasoning problem...",
... provider="anthropic",
... model_id="claude-3-sonnet-20240229",
... enable_thinking=True,
... budget_tokens=1000,
... max_tokens=2000
... )
>>> # Response includes thinking process and final answer
Structured output with Pydantic model::
>>> from pydantic import BaseModel
>>> class AnalysisResult(BaseModel):
... summary: str
... confidence: float
... recommendations: list[str]
>>>
>>> result = get_chat_completion(
... message="Analyze this data and provide structured results",
... provider="openai",
... model_id="gpt-4",
... output_model=AnalysisResult
... )
>>> print(f"Confidence: {result.confidence}")
Using configuration dictionary::
>>> config = {
... "provider": "ollama",
... "model_id": "llama3.1:8b",
... "max_tokens": 1000
... }
>>> response = get_chat_completion(
... message="Hello, how are you?",
... model_config=config,
... base_url="http://localhost:11434"
... )
.. seealso::
:func:`~factory.get_model` : Create model instances for PydanticAI agents
:func:`configs.config.get_provider_config` : Provider configuration loading
:class:`pydantic.BaseModel` : Base class for structured output models
:doc:`/developer-guides/01_understanding-the-framework/02_convention-over-configuration` : Complete model configuration and usage guide
"""
# Handle TypedDict to Pydantic conversion automatically
original_output_model = output_model
is_typed_dict_output = False
if output_model is not None and _is_typed_dict(output_model):
is_typed_dict_output = True
output_model = _convert_typed_dict_to_pydantic(output_model)
# Configuration setup - handle both model_config set and not set cases
if model_config is not None:
provider = model_config.get("provider", provider)
model_id = model_config.get("model_id", model_id)
max_tokens = model_config.get("max_tokens", max_tokens)
# Get provider config after provider is determined
provider_config = get_provider_config(provider) if provider else {}
base_url = provider_config.get("base_url", base_url)
api_key = provider_config.get("api_key")
else:
# Set defaults when model_config is not provided
if not provider:
raise ValueError("Provider must be specified either directly or via model_config")
provider_config = get_provider_config(provider)
if not model_id:
model_id = provider_config.get("default_model_id")
if base_url is None:
base_url = provider_config.get("base_url")
api_key = provider_config.get("api_key")
# Define provider requirements
provider_requirements = {
"google": {"model_id": True, "api_key": True, "base_url": False, "use_proxy": True},
"anthropic": {"model_id": True, "api_key": True, "base_url": False, "use_proxy": True},
"openai": {"model_id": True, "api_key": True, "base_url": True, "use_proxy": True},
"ollama": {"model_id": True, "api_key": False, "base_url": True, "use_proxy": False},
"cborg": {"model_id": True, "api_key": True, "base_url": True, "use_proxy": True},
}
if provider not in provider_requirements:
raise ValueError(f"Invalid provider: {provider}. Must be 'anthropic', 'cborg', 'google', 'ollama', or 'openai'.")
requirements = provider_requirements[provider]
# Common validation
if requirements["model_id"] and not model_id:
raise ValueError(f"Model ID for {provider} not provided.")
if requirements["api_key"] and not api_key:
raise ValueError(f"No API key provided for {provider}.")
if requirements["base_url"] and not base_url:
raise ValueError(f"No base URL provided for {provider}.")
# Set up HTTP client with proxy if needed
proxy_url = os.environ.get("HTTP_PROXY")
should_use_proxy = False
http_client = None
if requirements["use_proxy"] and proxy_url:
if _validate_proxy_url(proxy_url):
should_use_proxy = True
http_client = httpx.Client(proxy=proxy_url)
else:
logger.warning(f"Invalid HTTP_PROXY URL format '{proxy_url}', ignoring proxy configuration")
if not should_use_proxy and requirements["use_proxy"]:
# Only create client without proxy if no proxy was requested
http_client = None
# Provider-specific logic (validation already done above)
if provider == "anthropic":
client = anthropic.Anthropic(
api_key=api_key,
http_client=http_client,
)
request_params = {
"model": model_id,
"max_tokens": max_tokens,
"messages": [{"role": "user", "content": message}],
}
if enable_thinking and budget_tokens is not None:
if budget_tokens >= max_tokens:
raise ValueError("budget_tokens must be less than max_tokens.")
request_params["thinking"] = {
"type": "enabled",
"budget_tokens": budget_tokens
}
message_response = client.messages.create(**request_params)
if enable_thinking and "thinking" in request_params:
return message_response.content # Returns List[ContentBlock]
else:
# Concatenate text from all TextBlock instances
text_parts = [
block.text for block in message_response.content
if isinstance(block, anthropic.types.TextBlock)
]
return "\n".join(text_parts)
# ----- GEMINI ------
elif provider == "google":
client = genai.Client(api_key=api_key)
if not enable_thinking:
budget_tokens = 0
if budget_tokens >= max_tokens: # Assuming max_tokens is the overall limit
raise ValueError("budget_tokens must be less than max_tokens.")
response = client.models.generate_content(
model=model_id,
contents=[message], # Use the transformed messages
config=genai_types.GenerateContentConfig(
**({"thinking_config": genai_types.ThinkingConfig(thinking_budget=budget_tokens)}),
max_output_tokens=max_tokens
)
)
return response.text # Returns str
# ----- OPENAI ------
elif provider == "openai":
if enable_thinking or budget_tokens is not None:
logging.warning("enable_thinking and budget_tokens are not used for OpenAI provider.")
client = openai.OpenAI(
api_key=api_key,
base_url=base_url,
http_client=http_client,
)
if output_model is not None:
# Use structured outputs with Pydantic model (recommended approach)
response = client.beta.chat.completions.parse(
model=model_id,
messages=[{"role": "user", "content": message}],
max_tokens=max_tokens,
response_format=output_model,
)
if not response.choices:
raise ValueError("OpenAI API returned empty choices list")
result = response.choices[0].message.parsed
return _handle_output_conversion(result, is_typed_dict_output)
else:
# Regular text completion
response = client.chat.completions.create(
model=model_id,
messages=[{"role": "user", "content": message}],
max_tokens=max_tokens,
)
if not response.choices:
raise ValueError("OpenAI API returned empty choices list")
return response.choices[0].message.content
# ----- OLLAMA ------
elif provider == "ollama":
if enable_thinking or budget_tokens is not None:
# These features are not standard for Ollama's basic chat API
# You might log a warning or simply ignore them.
pass
chat_messages = [{'role': 'user', 'content': message}]
options = {}
if max_tokens is not None: # Default is 1024
options['num_predict'] = max_tokens
# Other options like temperature, top_p could be added if needed
request_args = {
"model": model_id,
"messages": chat_messages,
}
if options: # Only add options if there are any
request_args["options"] = options
if output_model is not None:
# Instruct Ollama to use the Pydantic model's JSON schema for the output format.
request_args["format"] = output_model.model_json_schema()
# The user's prompt ('message') should ideally also guide the model
# towards generating the desired structured output.
# Ollama connection with graceful fallback for development workflows
client = None
used_fallback = False
try:
# First attempt: Use configured base_url
client = ollama.Client(host=base_url)
# Test connection with a simple health check
client.list() # This will fail if Ollama is not accessible
logger.debug(f"Successfully connected to Ollama at {base_url}")
except Exception as e:
logger.debug(f"Failed to connect to Ollama at {base_url}: {e}")
# Determine fallback URLs based on current base_url
fallback_urls = _get_ollama_fallback_urls(base_url)
# Try fallback URLs
for fallback_url in fallback_urls:
try:
logger.debug(f"Attempting fallback connection to Ollama at {fallback_url}")
client = ollama.Client(host=fallback_url)
client.list() # Test connection
used_fallback = True
logger.warning(
f"⚠️ Ollama connection fallback: configured URL '{base_url}' failed, "
f"using fallback '{fallback_url}'. Consider updating your configuration "
f"for your current execution environment."
)
break
except Exception as fallback_e:
logger.debug(f"Fallback attempt failed for {fallback_url}: {fallback_e}")
continue
if client is None:
# All connection attempts failed
raise ValueError(
f"Failed to connect to Ollama at configured URL '{base_url}' "
f"and all fallback URLs {fallback_urls}. Please ensure Ollama is running "
f"and accessible, or update your configuration."
)
try:
response = client.chat(**request_args)
except Exception as e:
# Provide helpful error context
current_url = fallback_urls[0] if used_fallback else base_url
raise ValueError(
f"Ollama chat request failed using {current_url}. "
f"Error: {e}. Please verify the model '{model_id}' is available."
)
# response is a dict, e.g.:
# {'model': 'llama3.1', 'created_at': ...,
# 'message': {'role': 'assistant', 'content': '...'}, ...}
ollama_content_str = response['message']['content']
if output_model is not None:
# Validate the JSON string from Ollama against the Pydantic model
result = output_model.model_validate_json(ollama_content_str.strip())
return _handle_output_conversion(result, is_typed_dict_output)
else:
# If no output_model was specified, return the raw string content
return ollama_content_str
# ----- CBORG ------
elif provider == "cborg":
if enable_thinking or budget_tokens is not None:
logging.warning("enable_thinking and budget_tokens are not used for CBORG provider.")
client = openai.OpenAI(
api_key=api_key,
base_url=base_url,
http_client=http_client,
)
if output_model is not None:
# Use structured outputs with Pydantic model (same as OpenAI implementation)
response = client.beta.chat.completions.parse(
model=model_id,
messages=[{"role": "user", "content": message}],
max_tokens=max_tokens,
response_format=output_model,
)
if not response.choices:
raise ValueError("CBORG API returned empty choices list")
result = response.choices[0].message.parsed
return _handle_output_conversion(result, is_typed_dict_output)
else:
# Regular text completion
response = client.chat.completions.create(
model=model_id,
messages=[{"role": "user", "content": message}],
max_tokens=max_tokens,
)
if not response.choices:
raise ValueError("CBORG API returned empty choices list")
return response.choices[0].message.content