Documentation Index Fetch the complete documentation index at: https://mintlify.com/portkey-AI/gateway/llms.txt
Use this file to discover all available pages before exploring further.
The Portkey Python SDK provides a native, fully-typed interface to access 250+ LLMs through a unified API with production-grade routing, fallbacks, and observability.
Overview
The Python SDK offers:
OpenAI-Compatible API : Drop-in replacement for OpenAI SDK
250+ LLMs : Access any LLM through a unified interface
Type Safety : Full type hints and IDE autocomplete
Advanced Routing : Fallbacks, load balancing, and conditional routing
Production Features : Caching, retries, timeouts, and guardrails
Observability : Built-in logging, tracing, and analytics
Installation
Quick Start
Get Your API Keys
Sign up at Portkey and get your API key. Add your provider API keys as Virtual Keys.
Import and Initialize
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
Make Your First Request
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Hello!" }]
)
print (response.choices[ 0 ].message.content)
Basic Usage
Chat Completions
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [
{ "role" : "system" , "content" : "You are a helpful assistant." },
{ "role" : "user" , "content" : "Explain quantum computing in simple terms." }
],
temperature = 0.7 ,
max_tokens = 500
)
print (response.choices[ 0 ].message.content)
Streaming Responses
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
stream = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Write a story" }],
stream = True
)
for chunk in stream:
if chunk.choices[ 0 ].delta.content:
print (chunk.choices[ 0 ].delta.content, end = "" , flush = True )
Completions (Legacy)
response = client.completions.create(
model = "gpt-3.5-turbo-instruct" ,
prompt = "Once upon a time" ,
max_tokens = 100
)
print (response.choices[ 0 ].text)
Using Different Providers
Switch between providers by changing the provider or virtual_key:
OpenAI
Anthropic
Google Gemini
Azure OpenAI
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
provider = "openai" ,
Authorization = "your-openai-api-key"
)
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Hello!" }]
)
Advanced Routing with Configs
Fallback Strategy
Automatically fallback to backup providers:
from portkey_ai import Portkey
config = {
"strategy" : { "mode" : "fallback" },
"targets" : [
{ "virtual_key" : "openai-virtual-key" },
{ "virtual_key" : "anthropic-virtual-key" },
{ "virtual_key" : "together-virtual-key" }
]
}
client = Portkey(
api_key = "your-portkey-api-key" ,
config = config
)
# Will automatically fallback if OpenAI fails
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Hello!" }]
)
Load Balancing
Distribute traffic across multiple providers:
config = {
"strategy" : { "mode" : "loadbalance" },
"targets" : [
{
"virtual_key" : "openai-key-1" ,
"weight" : 0.7
},
{
"virtual_key" : "openai-key-2" ,
"weight" : 0.3
}
]
}
client = Portkey(
api_key = "your-portkey-api-key" ,
config = config
)
Automatic Retries
config = {
"retry" : {
"attempts" : 5 ,
"on_status_codes" : [ 429 , 500 , 502 , 503 ]
}
}
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key" ,
config = config
)
Request Timeouts
config = {
"request_timeout" : 30000 # 30 seconds
}
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key" ,
config = config
)
Caching
Enable caching to reduce costs and latency:
Simple Caching
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key"
)
# Enable cache for this request
response = client.with_options(
cache = "simple" ,
cache_force_refresh = False
).chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "What is 2+2?" }]
)
Semantic Caching
config = {
"cache" : {
"mode" : "semantic" ,
"max_age" : 3600 # 1 hour
}
}
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key" ,
config = config
)
# Semantically similar queries will hit cache
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "What's two plus two?" }]
)
Guardrails
Add input/output guardrails:
config = {
"output_guardrails" : [
{
"default.contains" : {
"operator" : "none" ,
"words" : [ "inappropriate" , "offensive" ]
},
"deny" : True
}
]
}
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key" ,
config = config
)
Add custom metadata for better observability:
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key"
)
response = client.with_options(
metadata = {
"user_id" : "user_123" ,
"session_id" : "session_456" ,
"environment" : "production"
},
trace_id = "custom-trace-id"
).chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Hello!" }]
)
Embeddings
Generate embeddings:
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
response = client.embeddings.create(
model = "text-embedding-3-small" ,
input = "The quick brown fox jumps over the lazy dog"
)
print (response.data[ 0 ].embedding)
Image Generation
Generate images:
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
response = client.images.generate(
model = "dall-e-3" ,
prompt = "A serene landscape with mountains" ,
n = 1 ,
size = "1024x1024"
)
print (response.data[ 0 ].url)
Audio
Speech to Text
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
audio_file = open ( "speech.mp3" , "rb" )
response = client.audio.transcriptions.create(
model = "whisper-1" ,
file = audio_file
)
print (response.text)
Text to Speech
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
response = client.audio.speech.create(
model = "tts-1" ,
voice = "alloy" ,
input = "Hello, this is a test."
)
with open ( "output.mp3" , "wb" ) as f:
f.write(response.content)
Function Calling
from portkey_ai import Portkey
import json
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
tools = [
{
"type" : "function" ,
"function" : {
"name" : "get_weather" ,
"description" : "Get the current weather in a location" ,
"parameters" : {
"type" : "object" ,
"properties" : {
"location" : {
"type" : "string" ,
"description" : "City name"
},
"unit" : {
"type" : "string" ,
"enum" : [ "celsius" , "fahrenheit" ]
}
},
"required" : [ "location" ]
}
}
}
]
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "What's the weather in Paris?" }],
tools = tools,
tool_choice = "auto"
)
if response.choices[ 0 ].message.tool_calls:
tool_call = response.choices[ 0 ].message.tool_calls[ 0 ]
print ( f "Function: { tool_call.function.name } " )
print ( f "Arguments: { tool_call.function.arguments } " )
Vision
Analyze images:
from portkey_ai import Portkey
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
response = client.chat.completions.create(
model = "gpt-4-vision-preview" ,
messages = [
{
"role" : "user" ,
"content" : [
{ "type" : "text" , "text" : "What's in this image?" },
{
"type" : "image_url" ,
"image_url" : {
"url" : "https://example.com/image.jpg"
}
}
]
}
],
max_tokens = 300
)
print (response.choices[ 0 ].message.content)
Async Support
Use async/await for concurrent requests:
import asyncio
from portkey_ai import AsyncPortkey
async def main ():
client = AsyncPortkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
response = await client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Hello!" }]
)
print (response.choices[ 0 ].message.content)
asyncio.run(main())
Async Streaming
import asyncio
from portkey_ai import AsyncPortkey
async def main ():
client = AsyncPortkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-openai-virtual-key"
)
stream = await client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Write a story" }],
stream = True
)
async for chunk in stream:
if chunk.choices[ 0 ].delta.content:
print (chunk.choices[ 0 ].delta.content, end = "" , flush = True )
asyncio.run(main())
Error Handling
from portkey_ai import Portkey
from portkey_ai.exceptions import (
PortkeyError,
APIError,
RateLimitError,
APIConnectionError
)
client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key"
)
try :
response = client.chat.completions.create(
model = "gpt-4" ,
messages = [{ "role" : "user" , "content" : "Hello!" }]
)
except RateLimitError as e:
print ( f "Rate limit exceeded: { e } " )
except APIConnectionError as e:
print ( f "Connection error: { e } " )
except APIError as e:
print ( f "API error: { e } " )
except PortkeyError as e:
print ( f "Portkey error: { e } " )
Best Practices
Store provider API keys as Virtual Keys in Portkey for better security: client = Portkey(
api_key = "your-portkey-api-key" ,
virtual_key = "your-virtual-key" # Secure and rotatable
)
Always configure fallback providers for production: config = { "strategy" : { "mode" : "fallback" }, "targets" : [ ... ]}
Use caching for repeated queries to reduce costs: config = { "cache" : { "mode" : "semantic" , "max_age" : 3600 }}
Use Async for High Throughput
Use AsyncPortkey for applications with many concurrent requests.
Complete Example
from portkey_ai import Portkey
import os
# Configure with fallbacks and caching
config = {
"strategy" : { "mode" : "fallback" },
"targets" : [
{ "virtual_key" : "openai-key" },
{ "virtual_key" : "anthropic-key" }
],
"retry" : { "attempts" : 3 },
"cache" : { "mode" : "semantic" , "max_age" : 3600 }
}
client = Portkey(
api_key = os.environ[ "PORTKEY_API_KEY" ],
config = config
)
# Make request with metadata
response = client.with_options(
metadata = {
"user_id" : "user_123" ,
"feature" : "chat" ,
"environment" : "production"
},
trace_id = "chat-session-001"
).chat.completions.create(
model = "gpt-4" ,
messages = [
{ "role" : "system" , "content" : "You are a helpful assistant." },
{ "role" : "user" , "content" : "Explain AI in simple terms." }
],
temperature = 0.7 ,
max_tokens = 500
)
print (response.choices[ 0 ].message.content)
# View analytics at https://app.portkey.ai/
Resources