Implementing automatic video subtitle generation. Auto-subtitles make video content accessible to the deaf, improve SEO, and increase viewership (85% watch videos without sound on social media). Whisper large-v3 generates subtitles for Russian videos with 90–95% accuracy. ### Subtitle generation via Whisper
import subprocess
from faster_whisper import WhisperModel
model = WhisperModel("large-v3", device="cuda", compute_type="int8_float16")
def generate_subtitles(video_path: str, output_format: str = "srt") -> str:
# Извлекаем аудио
audio_path = "/tmp/audio.wav"
subprocess.run([
"ffmpeg", "-i", video_path, "-vn", "-ar", "16000",
"-ac", "1", audio_path, "-y", "-loglevel", "error"
], check=True)
# Транскрибируем с временными метками
segments, _ = model.transcribe(
audio_path,
language="ru",
vad_filter=True,
word_timestamps=False
)
if output_format == "srt":
return segments_to_srt(list(segments))
elif output_format == "vtt":
return segments_to_vtt(list(segments))
elif output_format == "ass":
return segments_to_ass(list(segments))
def segments_to_srt(segments) -> str:
lines = []
for i, seg in enumerate(segments, 1):
start = format_srt_time(seg.start)
end = format_srt_time(seg.end)
text = seg.text.strip()
# Ограничиваем длину строки субтитра
if len(text) > 80:
text = wrap_subtitle_text(text)
lines.append(f"{i}\n{start} --> {end}\n{text}\n")
return "\n".join(lines)
def format_srt_time(seconds: float) -> str:
h, rem = divmod(int(seconds), 3600)
m, s = divmod(rem, 60)
ms = int((seconds % 1) * 1000)
return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
```### Embedding subtitles in video```python
def burn_subtitles(video_path: str, srt_path: str, output_path: str):
"""Встраиваем субтитры прямо в видео (burn-in)"""
subprocess.run([
"ffmpeg", "-i", video_path,
"-vf", f"subtitles={srt_path}:force_style='FontName=Arial,FontSize=24,PrimaryColour=&HFFFFFF,OutlineColour=&H000000,Outline=2'",
"-c:a", "copy",
output_path, "-y"
], check=True)
def add_soft_subtitles(video_path: str, srt_path: str, output_path: str):
"""Добавляем как дорожку субтитров (мягкие субтитры)"""
subprocess.run([
"ffmpeg", "-i", video_path, "-i", srt_path,
"-c", "copy", "-c:s", "mov_text",
"-metadata:s:s:0", "language=rus",
output_path, "-y"
], check=True)
```### Subtitle post-processing - Maximum 2 lines per subtitle, 42 characters per line - Minimum duration: 1.5 seconds - Merging short segments (<0.5 sec) Timeframe: SRT generation script - 1 day. Web service with video upload - 3–5 days.