Documentation Index
Fetch the complete documentation index at: https://docs.helicone.ai/llms.txt
Use this file to discover all available pages before exploring further.
Python Manual Logger
Logging calls to custom models is supported via the Helicone Python SDK.
Install the Helicone helpers package
pip install helicone-helpers
Set `HELICONE_API_KEY` as an environment variable
export HELICONE_API_KEY=sk-<your-api-key>
You can also set the Helicone API Key in your code (See below)
Create a new HeliconeManualLogger instance
from openai import OpenAI
from helicone_helpers import HeliconeManualLogger
from helicone_helpers.manual_logger import HeliconeResultRecorder
# Initialize the logger
logger = HeliconeManualLogger(
api_key="your-helicone-api-key",
headers={}
)
# Initialize OpenAI client
client = OpenAI(
api_key="your-openai-api-key"
)
Define your operation and make the request
def chat_completion_operation(result_recorder: HeliconeResultRecorder):
response = client.chat.completions.create(
**result_recorder.request
)
import json
result_recorder.append_results(json.loads(response.to_json()))
return response
# Define your request
request = {
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "Hello, world!"}]
}
# Make the request with logging
result = logger.log_request(
provider="openai", # Specify the provider
request=request,
operation=chat_completion_operation,
additional_headers={
"Helicone-Session-Id": "1234567890" # Optional session tracking
}
)
print(result)
API Reference
HeliconeManualLogger
class HeliconeManualLogger:
def __init__(
self,
api_key: str,
headers: dict = {},
logging_endpoint: str = "https://api.worker.helicone.ai"
)
LoggingOptions
class LoggingOptions(TypedDict, total=False):
start_time: float
end_time: float
additional_headers: Dict[str, str]
time_to_first_token_ms: Optional[float]
log_request
def log_request(
self,
request: dict,
operation: Callable[[HeliconeResultRecorder], T],
additional_headers: dict = {},
provider: Optional[Union[Literal["openai", "anthropic"], str]] = None,
) -> T
Parameters
request: A dictionary containing the request parameters
operation: A callable that takes a HeliconeResultRecorder and returns a result
additional_headers: Optional dictionary of additional headers
provider: Optional provider specification (“openai”, “anthropic”, or None for custom)
send_log
def send_log(
self,
provider: Optional[str],
request: dict,
response: Union[dict, str],
options: LoggingOptions
)
Parameters
provider: Optional provider specification (“openai”, “anthropic”, or None for custom)
request: A dictionary containing the request parameters
response: Either a dictionary or string response to log
options: A LoggingOptions dictionary with timing information
HeliconeResultRecorder
class HeliconeResultRecorder:
def __init__(self, request: dict):
"""Initialize with request data"""
def append_results(self, data: dict):
"""Append results to be logged"""
def get_results(self) -> dict:
"""Get all recorded results"""
Advanced Usage Examples
Direct Logging with String Response
For direct logging of string responses:
import time
from helicone_helpers import HeliconeManualLogger, LoggingOptions
# Initialize the logger
helicone = HeliconeManualLogger(api_key="your-helicone-api-key")
# Log a request with a string response
start_time = time.time()
# Your request data
request = {
"model": "custom-model",
"prompt": "Tell me a joke"
}
# Your response as a string
response = "Why did the chicken cross the road? To get to the other side!"
# Log after some processing time
end_time = time.time()
# Send the log with timing information
helicone.send_log(
provider=None, # Custom provider
request=request,
response=response, # String response
options=LoggingOptions(
start_time=start_time,
end_time=end_time,
additional_headers={"Helicone-User-Id": "user-123"},
time_to_first_token_ms=150 # Optional time to first token in milliseconds
)
)
Streaming Responses
For streaming responses with Python, you can use the log_request method with time to first token tracking:
from helicone_helpers import HeliconeManualLogger, LoggingOptions
import openai
import time
# Initialize the logger
helicone = HeliconeManualLogger(api_key="your-helicone-api-key")
client = openai.OpenAI(api_key="your-openai-api-key")
# Define your request
request = {
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "Write a story about a robot."}],
"stream": True
}
def stream_operation(result_recorder):
start_time = time.time()
first_token_time = None
# Create a streaming response
response = client.chat.completions.create(**request)
# Process the stream and collect chunks
collected_chunks = []
for i, chunk in enumerate(response):
if i == 0 and first_token_time is None:
first_token_time = time.time()
collected_chunks.append(chunk)
# You can process each chunk here if needed
# Calculate time to first token in milliseconds
time_to_first_token = None
if first_token_time:
time_to_first_token = (first_token_time - start_time) * 1000 # convert to ms
# Record the results with timing information
result_recorder.append_results({
"chunks": [c.model_dump() for c in collected_chunks],
"time_to_first_token_ms": time_to_first_token
})
# Return the collected chunks or process them as needed
return collected_chunks
# Log the streaming request
result = helicone.log_request(
provider="openai",
request=request,
operation=stream_operation,
additional_headers={"Helicone-User-Id": "user-123"}
)
Using with Anthropic
from helicone_helpers import HeliconeManualLogger
import anthropic
# Initialize the logger
helicone = HeliconeManualLogger(api_key="your-helicone-api-key")
client = anthropic.Anthropic(api_key="your-anthropic-api-key")
# Define your request
request = {
"model": "claude-3-opus-20240229",
"messages": [{"role": "user", "content": "Explain quantum computing"}],
"max_tokens": 1000
}
def anthropic_operation(result_recorder):
# Create a response
response = client.messages.create(**request)
# Convert to dictionary for logging
response_dict = {
"id": response.id,
"content": [{"text": block.text, "type": block.type} for block in response.content],
"model": response.model,
"role": response.role,
"usage": {
"input_tokens": response.usage.input_tokens,
"output_tokens": response.usage.output_tokens
}
}
# Record the results
result_recorder.append_results(response_dict)
return response
# Log the request with Anthropic provider specified
result = helicone.log_request(
provider="anthropic",
request=request,
operation=anthropic_operation
)
Custom Model Integration
For custom models that don’t have a specific provider integration:
from helicone_helpers import HeliconeManualLogger
import requests
# Initialize the logger
helicone = HeliconeManualLogger(api_key="your-helicone-api-key")
# Define your request
request = {
"model": "custom-model-name",
"prompt": "Generate a poem about nature",
"temperature": 0.7
}
def custom_model_operation(result_recorder):
# Make a request to your custom model API
response = requests.post(
"https://your-custom-model-api.com/generate",
json=request,
headers={"Authorization": "Bearer your-api-key"}
)
# Parse the response
response_data = response.json()
# Record the results
result_recorder.append_results(response_data)
return response_data
# Log the request with no specific provider
result = helicone.log_request(
provider=None, # No specific provider
request=request,
operation=custom_model_operation
)
For more examples and detailed usage, check out our Manual Logger with Streaming cookbook.
Direct Stream Logging
For direct control over streaming responses, you can use the send_log method to manually track time to first token:
import time
from helicone_helpers import HeliconeManualLogger, LoggingOptions
import openai
# Initialize the logger and client
helicone_logger = HeliconeManualLogger(api_key="your-helicone-api-key")
client = openai.OpenAI(api_key="your-openai-api-key")
# Define your request
request_body = {
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "Write a story about a robot"}],
"stream": True,
"stream_options": {
"include_usage": True
}
}
# Create the streaming response
stream = client.chat.completions.create(**request_body)
# Track time to first token
chunks = []
time_to_first_token_ms = None
start_time = time.time()
# Process the stream
for i, chunk in enumerate(stream):
# Record time to first token on first chunk
if i == 0 and not time_to_first_token_ms:
time_to_first_token_ms = (time.time() - start_time) * 1000
# Store chunks (you might want to process them differently)
chunks.append(chunk.model_dump_json())
# Log the complete interaction with timing information
helicone_logger.send_log(
provider="openai",
request=request_body,
response="\n".join(chunks), # Join chunks or process as needed
options=LoggingOptions(
start_time=start_time,
end_time=time.time(),
additional_headers={"Helicone-User-Id": "user-123"},
time_to_first_token_ms=time_to_first_token_ms
)
)
This approach gives you complete control over the streaming process while still capturing important metrics like time to first token.