Voice Agent Development on Retell AI Retell AI is a platform for production-grade voice agents with a focus on low latency and conversation state management. Its key differentiator from competitors is built-in support for stateful dialogs via LLM Web
Socket streaming and its own conversational context orchestrator. ### Architecture and Key Features Retell uses a bidirectional WebSocket between its infrastructure and the developer's LLM backend. This means you control the conversation logic on your server, while Retell handles telephony, STT, TTS, and audio management.```python import asyncio import json import websockets from typing import AsyncGenerator
class RetellAgentServer: """ Кастомный LLM сервер для Retell AI. Retell подключается по WebSocket и ожидает streaming ответы. """
def __init__(self, openai_client, system_prompt: str):
self.openai = openai_client
self.system_prompt = system_prompt
async def handle_connection(self, websocket, path):
"""Обработка WebSocket сессии от Retell"""
async for message in websocket:
data = json.loads(message)
if data.get("interaction_type") == "call_details":
# Начало звонка — получаем метаданные
call_info = data.get("call", {})
print(f"New call: {call_info.get('call_id')}")
continue
if data.get("interaction_type") in ("response_required", "reminder_required"):
# Пользователь что-то сказал или наступил таймаут
transcript = data.get("transcript", [])
# Генерируем ответ через OpenAI streaming
async for chunk in self._generate_response(transcript):
response_message = {
"response_id": data.get("response_id"),
"content": chunk,
"content_complete": False,
"end_call": False,
}
await websocket.send(json.dumps(response_message))
# Финальный чанк
await websocket.send(json.dumps({
"response_id": data.get("response_id"),
"content": "",
"content_complete": True,
"end_call": False,
}))
async def _generate_response(self, transcript: list) -> AsyncGenerator[str, None]:
"""Стриминг ответа через OpenAI"""
messages = [{"role": "system", "content": self.system_prompt}]
for turn in transcript:
role = "assistant" if turn["role"] == "agent" else "user"
messages.append({"role": role, "content": turn["content"]})
stream = await self.openai.chat.completions.create(
model="gpt-4o",
messages=messages,
stream=True,
temperature=0.7,
)
async for chunk in stream:
delta = chunk.choices[0].delta.content
if delta:
yield delta
class RetellAPIClient: """Управление агентами через Retell REST API"""
def __init__(self, api_key: str):
import requests
self.api_key = api_key
self.base_url = "https://api.retellai.com"
self.session = requests.Session()
self.session.headers.update({
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
})
def create_agent(self, agent_name: str,
llm_websocket_url: str,
voice_id: str = "11labs-Adrian",
language: str = "russian") -> dict:
"""
Создание агента с кастомным LLM бэкендом.
llm_websocket_url: ваш сервер для обработки диалога
"""
payload = {
"agent_name": agent_name,
"llm_websocket_url": llm_websocket_url,
"voice_id": voice_id,
"language": language,
"response_engine": {
"type": "retell-llm", # Или "custom-llm"
},
"responsiveness": 1.0, # 0-1, насколько быстро реагирует
"interruption_sensitivity": 1.0,
"enable_backchannel": True, # "угу", "да" во время пауз
"backchannel_frequency": 0.9,
"end_call_after_silence_ms": 600000,
"max_call_duration_ms": 3600000,
}
return self.session.post(
f"{self.base_url}/create-agent",
json=payload
).json()
def create_phone_call(self, from_number: str,
to_number: str,
agent_id: str,
retell_llm_dynamic_variables: dict = None) -> dict:
"""Инициирование исходящего звонка"""
payload = {
"from_number": from_number,
"to_number": to_number,
"agent_id": agent_id,
}
if retell_llm_dynamic_variables:
payload["retell_llm_dynamic_variables"] = retell_llm_dynamic_variables
return self.session.post(
f"{self.base_url}/create-phone-call",
json=payload
).json()
### Managing the state of complex conversations. The advantage of Retell's architecture with a custom LLM: complete control over the conversation state on your server. You can store history, load data from your CRM in real time, and manage branching logic.python
class ConversationStateManager:
"""Управление состоянием разговора для Retell"""
def __init__(self, call_id: str, customer_id: str):
self.call_id = call_id
self.customer_id = customer_id
self.state = "greeting"
self.collected_data = {}
self.escalation_triggers = ["оператор", "жалоба", "претензия", "руководитель"]
def should_escalate(self, user_message: str) -> bool:
"""Определяем нужна ли передача живому оператору"""
msg_lower = user_message.lower()
return any(trigger in msg_lower for trigger in self.escalation_triggers)
def get_context_prompt(self) -> str:
"""Динамический промпт на основе текущего состояния"""
base = f"Текущий шаг: {self.state}. Уже собрано: {self.collected_data}."
if self.state == "qualification":
base += " Узнай: есть ли бюджет, сроки принятия решения, лицо принимающее решение."
elif self.state == "scheduling":
base += " Предложи 3 слота для встречи на следующей неделе."
return base







