Developing an AI transcription system with an API. The Transcription API is a B2B service for developers: it accepts audio/video and returns structured JSON with a transcript, timestamps, and diarization. Suitable for embedding in third-party applications. ### REST API specification
from fastapi import FastAPI, UploadFile, HTTPException
from pydantic import BaseModel
from typing import Optional
import uuid
app = FastAPI(title="Transcription API", version="1.0.0")
class TranscriptionRequest(BaseModel):
url: Optional[str] = None # URL аудиофайла
language: Optional[str] = "auto" # auto | ru | en | ...
speakers: Optional[int] = None # количество говорящих
word_timestamps: bool = False # метки на уровне слов
punctuate: bool = True
diarize: bool = False
class TranscriptionSegment(BaseModel):
start: float
end: float
text: str
speaker: Optional[str] = None
confidence: Optional[float] = None
class TranscriptionResponse(BaseModel):
job_id: str
status: str # queued | processing | completed | failed
language: Optional[str]
duration: Optional[float]
segments: Optional[list[TranscriptionSegment]]
full_text: Optional[str]
error: Optional[str]
@app.post("/v1/transcribe", response_model=TranscriptionResponse)
async def transcribe(
file: Optional[UploadFile] = None,
request: Optional[TranscriptionRequest] = None,
api_key: str = Depends(verify_api_key)
):
job_id = str(uuid.uuid4())
if file:
audio_path = await save_upload(file, job_id)
elif request and request.url:
audio_path = await download_audio(request.url, job_id)
else:
raise HTTPException(400, "Provide file or url")
# Для коротких файлов (<5 мин) — синхронно
if await get_duration(audio_path) < 300:
result = await transcribe_sync(audio_path, request)
return TranscriptionResponse(
job_id=job_id,
status="completed",
**result
)
# Для длинных — async с polling
celery.send_task('transcribe_async', args=[job_id, audio_path, request.dict()])
return TranscriptionResponse(job_id=job_id, status="queued")
@app.get("/v1/transcribe/{job_id}", response_model=TranscriptionResponse)
async def get_job(job_id: str, api_key: str = Depends(verify_api_key)):
job = await db.get_job(job_id)
if not job:
raise HTTPException(404, "Job not found")
return TranscriptionResponse(**job)
```### Webhook notifications```python
@app.post("/v1/webhooks")
async def register_webhook(webhook_url: str, api_key: str = Depends(verify_api_key)):
await db.webhooks.upsert(api_key=api_key, url=webhook_url)
async def notify_webhook(api_key: str, job: dict):
webhook = await db.webhooks.get(api_key=api_key)
if webhook:
await aiohttp.post(webhook.url, json=job)
```### Rate limiting and tariffing```python
from slowapi import Limiter
from slowapi.util import get_remote_address
limiter = Limiter(key_func=get_remote_address)
@app.post("/v1/transcribe")
@limiter.limit("100/minute") # Paid tier
async def transcribe(...):
...
# Учёт потреблённых минут для биллинга
async def track_usage(api_key: str, duration_seconds: float):
await db.usage.increment(
api_key=api_key,
minutes=duration_seconds / 60,
period=get_current_billing_period()
)
```### SDK examples```python
# Python SDK
import transcription_sdk
client = transcription_sdk.Client(api_key="YOUR_KEY")
result = client.transcribe("meeting.mp3", language="ru", diarize=True)
print(result.full_text)
```Timeline: Basic API – 1 week. Billing, SDK, and documentation – 1 month.