Automatic SRT/VTT Subtitle File Generation Implementation

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
Automatic SRT/VTT Subtitle File Generation Implementation
Simple
~1 business day
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

Implementation of automatic generation of SRT/VTT subtitle files. SRT (Sub

Rip Subtitle) and VTT (WebVTT) are standard subtitle formats. SRT is used in video players and when uploading to YouTube/Vimeo. VTT is for HTML5 web players and streaming platforms. ### SRT and VTT generation```python from faster_whisper import WhisperModel from datetime import timedelta

model = WhisperModel("large-v3", device="cuda")

def format_time_srt(seconds: float) -> str: td = timedelta(seconds=seconds) total_seconds = int(td.total_seconds()) hours = total_seconds // 3600 minutes = (total_seconds % 3600) // 60 secs = total_seconds % 60 milliseconds = int((seconds % 1) * 1000) return f"{hours:02d}:{minutes:02d}:{secs:02d},{milliseconds:03d}"

def format_time_vtt(seconds: float) -> str: # VTT использует точку вместо запятой return format_time_srt(seconds).replace(",", ".")

def generate_srt(audio_path: str, language: str = "ru") -> str: segments, _ = model.transcribe( audio_path, language=language, vad_filter=True ) lines = [] for i, seg in enumerate(segments, 1): start = format_time_srt(seg.start) end = format_time_srt(seg.end) text = seg.text.strip() lines.append(f"{i}\n{start} --> {end}\n{text}\n") return "\n".join(lines)

def generate_vtt(audio_path: str, language: str = "ru") -> str: segments, _ = model.transcribe( audio_path, language=language, vad_filter=True ) lines = ["WEBVTT\n"] for seg in segments: start = format_time_vtt(seg.start) end = format_time_vtt(seg.end) text = seg.text.strip() lines.append(f"{start} --> {end}\n{text}\n") return "\n".join(lines) ### VTT with advanced capabilitiespython def generate_vtt_with_styling(segments, speaker_map: dict = None) -> str: """VTT с позиционированием и стилями""" lines = ["WEBVTT\n"] for i, seg in enumerate(segments): start = format_time_vtt(seg.start) end = format_time_vtt(seg.end) speaker = speaker_map.get(seg.speaker, "") if speaker_map else ""

    # Позиционирование: line:90% — внизу экрана
    position = "line:90% position:50% align:center"
    speaker_tag = f"<v {speaker}>" if speaker else ""

    lines.append(f"{i+1}\n{start} --> {end} {position}\n{speaker_tag}{seg.text.strip()}\n")
return "\n".join(lines)

### Subtitle post-processingpython def optimize_subtitles(segments: list, max_line_length: int = 42, max_duration: float = 7.0, min_duration: float = 1.2) -> list: """Оптимизируем субтитры под стандарты вещания""" optimized = [] for seg in segments: duration = seg.end - seg.start text = seg.text.strip()

    # Ограничиваем длину строки
    if len(text) > max_line_length:
        mid = text.rfind(" ", 0, max_line_length)
        text = text[:mid] + "\n" + text[mid+1:]

    # Минимальная длительность
    end = max(seg.end, seg.start + min_duration)
    optimized.append({**seg.__dict__, "text": text, "end": end})

return optimized