OpenAI API Integration: GPT-4, GPT-4o, o1, o3
OpenAI provides several model families with different characteristics. GPT-4o is the optimal choice for most tasks: multimodal, good quality-to-cost ratio. o1/o3 are for tasks requiring deep reasoning (mathematics, code, logic). GPT-4o-mini is for high-load scenarios with simple tasks.
Basic Integration
from openai import OpenAI, AsyncOpenAI
from pydantic import BaseModel
client = OpenAI() # Uses OPENAI_API_KEY from environment
async_client = AsyncOpenAI()
# Synchronous call
def chat(prompt: str, model: str = "gpt-4o") -> str:
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=0.1,
)
return response.choices[0].message.content
# Structured output
class Extraction(BaseModel):
name: str
amount: float
currency: str
def extract_structured(text: str) -> Extraction:
response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[{"role": "user", "content": f"Extract data from: {text}"}],
response_format=Extraction,
)
return response.choices[0].message.parsed
# Streaming
def stream_response(prompt: str):
with client.chat.completions.stream(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
) as stream:
for chunk in stream.text_stream:
yield chunk
# Vision (GPT-4o)
def analyze_image(image_url: str, question: str) -> str:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": question}
]
}]
)
return response.choices[0].message.content
o1/o3 for Reasoning Tasks
# o1 does not support system prompt, temperature, streaming
def reason_with_o1(problem: str) -> str:
response = client.chat.completions.create(
model="o3-mini",
messages=[{"role": "user", "content": problem}],
# reasoning_effort: "low" | "medium" | "high" (for o3-mini)
reasoning_effort="high",
)
return response.choices[0].message.content
# o1 is optimal for: mathematical proofs, algorithmic problems,
# multi-step code with constraints, logic puzzles
Cost Management
# Approximate costs (per 1M tokens, 2025):
# gpt-4o: $2.50 input / $10.00 output
# gpt-4o-mini: $0.15 input / $0.60 output
# o3-mini: $1.10 input / $4.40 output
def estimate_cost(text: str, model: str = "gpt-4o") -> float:
"""Rough cost estimate before making API call"""
tokens = len(text.split()) * 1.3 # Approximately
costs = {"gpt-4o": 2.50, "gpt-4o-mini": 0.15, "o3-mini": 1.10}
return tokens / 1_000_000 * costs.get(model, 2.50)
Embeddings and Semantic Search
def get_embeddings(texts: list[str]) -> list[list[float]]:
response = client.embeddings.create(
model="text-embedding-3-small", # 1536 dims, $0.02/1M tokens
input=texts,
)
return [item.embedding for item in response.data]
Timeline
- Basic integration with chat completions: 0.5–1 day
- Structured outputs + tools: 2–3 days
- Retry logic + cost management: 1–2 days







