AI Video Stabilization Implementation

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
AI Video Stabilization Implementation
Simple
~2-3 business days
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

AI Development for Video Stabilization

Camera shake is inevitable artifact of hand-held shooting, drones, sports cameras. Classical stabilization works through optical flow: estimate motion between frames, smooth trajectory, compensate for shake. AI methods add semantic understanding: distinguish operator movement from subject movement, better handle dynamic scenes, can restore "cropped" pixels through inpainting.

Classical Stabilization via Optical Flow

import cv2
import numpy as np
from scipy.signal import medfilt

class VideoStabilizer:
    def __init__(self, smoothing_window: int = 30,
                  crop_ratio: float = 0.1):
        self.smoothing_window = smoothing_window
        self.crop_ratio = crop_ratio  # crop edges after stabilization

    def stabilize(self, input_path: str, output_path: str) -> dict:
        cap = cv2.VideoCapture(input_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # Step 1: Compute camera trajectory
        transforms = self._estimate_transforms(cap)
        cap.release()

        # Step 2: Smooth trajectory
        smoothed = self._smooth_trajectory(transforms)

        # Step 3: Apply stabilizing transforms
        cap = cv2.VideoCapture(input_path)
        out = cv2.VideoWriter(output_path,
                              cv2.VideoWriter_fourcc(*'mp4v'),
                              fps, (w, h))

        for i, (orig, smooth) in enumerate(zip(transforms, smoothed)):
            ret, frame = cap.read()
            if not ret:
                break
            stabilized = self._apply_transform(frame, orig, smooth, w, h)
            out.write(stabilized)

        cap.release()
        out.release()
        return {'frames': len(transforms), 'smoothing_window': self.smoothing_window}

    def _estimate_transforms(self, cap) -> list[np.ndarray]:
        """Estimate affine transforms between adjacent frames"""
        ret, prev = cap.read()
        prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
        transforms = []

        while True:
            ret, curr = cap.read()
            if not ret:
                break
            curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)

            # Feature detection and tracking
            prev_pts = cv2.goodFeaturesToTrack(
                prev_gray, maxCorners=200, qualityLevel=0.01,
                minDistance=30, blockSize=3
            )
            curr_pts, status, _ = cv2.calcOpticalFlowPyrLK(
                prev_gray, curr_gray, prev_pts, None
            )

            # Filter reliable points
            valid_prev = prev_pts[status == 1]
            valid_curr = curr_pts[status == 1]

            # Estimate affine transform
            m, _ = cv2.estimateAffinePartial2D(valid_prev, valid_curr)
            if m is None:
                m = np.eye(2, 3, dtype=np.float64)

            transforms.append(m)
            prev_gray = curr_gray

        return transforms

    def _smooth_trajectory(self, transforms: list) -> list:
        """Moving average to smooth trajectory"""
        trajectory = np.cumsum([m[:, 2] for m in transforms], axis=0)
        smoothed = np.zeros_like(trajectory)

        for i in range(len(trajectory)):
            start = max(0, i - self.smoothing_window // 2)
            end = min(len(trajectory), i + self.smoothing_window // 2)
            smoothed[i] = trajectory[start:end].mean(axis=0)

        # Delta transforms to apply
        delta = smoothed - trajectory
        result = []
        for i, m in enumerate(transforms):
            m_smooth = m.copy()
            m_smooth[:, 2] += delta[i]
            result.append(m_smooth)
        return result

    def _apply_transform(self, frame: np.ndarray,
                          orig_m: np.ndarray,
                          smooth_m: np.ndarray,
                          w: int, h: int) -> np.ndarray:
        stabilized = cv2.warpAffine(frame, smooth_m, (w, h))
        # Crop to hide black edges
        crop = int(min(w, h) * self.crop_ratio)
        stabilized = stabilized[crop:h-crop, crop:w-crop]
        return cv2.resize(stabilized, (w, h))

DUT — Deep Unified Transformer for AI Stabilization

class DeepVideoStabilizer:
    """
    AI approach: train to stabilize video on unstable/stable pairs.
    Advantage over classical: better handles
    rolling shutter, fast motion, blur.
    """
    def __init__(self, checkpoint_path: str, device: str = 'cuda'):
        import sys
        sys.path.append('/opt/DUT')
        from model import DUTStabilizer
        self.model = DUTStabilizer()
        self.model.load_state_dict(torch.load(checkpoint_path))
        self.model.eval().to(device)
        self.device = device

    @torch.no_grad()
    def stabilize_clip(self, frames: list[np.ndarray],
                        window_size: int = 16) -> list[np.ndarray]:
        """
        Processes video in window_size frame windows.
        Key feature of DUT: uses future frames
        to predict current stabilization.
        """
        results = []
        for i in range(0, len(frames), window_size // 2):
            window = frames[i:i+window_size]
            if len(window) < window_size:
                # Pad with last frame
                window = window + [window[-1]] * (window_size - len(window))

            tensor = self._frames_to_tensor(window)
            stabilized_tensor = self.model(tensor.to(self.device))
            stabilized_frames = self._tensor_to_frames(stabilized_tensor)
            results.extend(stabilized_frames[:window_size//2])

        return results[:len(frames)]

Video Stabilization Quality Metrics

def evaluate_stabilization(unstable_frames: list, stable_frames: list) -> dict:
    """
    Metrics:
    - Cropping Ratio: how many pixels preserved (higher = better)
    - Distortion Value: geometry distortion (lower = better)
    - Stability Score: motion variance between frames (lower = better)
    """
    # Stability: optical flow variance in stabilized video
    flows = []
    for i in range(1, len(stable_frames)):
        prev_gray = cv2.cvtColor(stable_frames[i-1], cv2.COLOR_BGR2GRAY)
        curr_gray = cv2.cvtColor(stable_frames[i], cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None,
                                             0.5, 3, 15, 3, 5, 1.2, 0)
        flows.append(np.abs(flow).mean())

    return {
        'stability_score': float(np.std(flows)),
        'mean_motion': float(np.mean(flows)),
        'max_motion': float(np.max(flows))
    }
Method Stability↓ Cropping↑ Speed
OpenCV (vidstab) 0.35 0.91 Real-time
DIFRINT 0.18 0.89 5–10 FPS
DUT 0.14 0.87 3–5 FPS
StabNet 0.16 0.90 8 FPS
Task Timeline
Batch stabilization via OpenCV 1 week
AI stabilization with DUT/DIFRINT 4–6 weeks
Real-time stabilization for streams 6–10 weeks