Setting up AI query cost tracking
Cost tracking for LLM—expense tracking broken down by project, user, and request type. Without this, it's impossible to manage the AI infrastructure budget.
Cost calculation
# Цены по состоянию на начало 2025 (уточняйте актуальные)
MODEL_PRICING = {
"gpt-4o": {"input": 2.50, "output": 10.00}, # $ per 1M tokens
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
"claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
"llama-3-8b-local": {"input": 0.0, "output": 0.0}, # только GPU cost
}
def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
if model not in MODEL_PRICING:
return 0.0
pricing = MODEL_PRICING[model]
return (prompt_tokens * pricing["input"] + completion_tokens * pricing["output"]) / 1_000_000
Aggregation by dimensions
class CostTracker:
def record(self, request: LLMRequest, cost_usd: float):
self.db.insert({
"timestamp": request.timestamp,
"cost_usd": cost_usd,
"model": request.model,
"project_id": request.project_id,
"user_id": request.user_id,
"feature": request.feature_tag, # "chat", "rag", "classification"
"prompt_tokens": request.prompt_tokens,
"completion_tokens": request.completion_tokens,
})
def get_daily_by_project(self, days: int = 30) -> dict:
return self.db.query("""
SELECT project_id, DATE(timestamp) as date,
SUM(cost_usd) as total_cost,
SUM(prompt_tokens) as total_tokens
FROM llm_costs
WHERE timestamp > NOW() - INTERVAL %s DAY
GROUP BY project_id, date
ORDER BY date, total_cost DESC
""", (days,))
Budget alerts
class BudgetGuard:
def __init__(self, limits: dict):
self.daily_limit_usd = limits["daily"]
self.hourly_limit_usd = limits["hourly"]
def check_budget(self, project_id: str) -> BudgetStatus:
daily_spend = self.tracker.get_spend(project_id, hours=24)
hourly_spend = self.tracker.get_spend(project_id, hours=1)
alerts = []
if daily_spend > self.daily_limit_usd * 0.8:
alerts.append(f"80% of daily budget consumed: ${daily_spend:.2f}/${self.daily_limit_usd:.2f}")
if daily_spend > self.daily_limit_usd:
alerts.append("DAILY BUDGET EXCEEDED — throttling enabled")
return BudgetStatus(daily_spend=daily_spend, alerts=alerts,
throttle_enabled=daily_spend > self.daily_limit_usd)
Cost Dashboard
Key charts: daily cost by project, top 10 most expensive features, cost per request by model, month-over-month trend. Anomalies: cost increase > 50% in a day without traffic growth indicates a prompt injection or a bug.







