AI Audience Targeting System Based on ML

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
AI Audience Targeting System Based on ML
Medium
~2-4 weeks
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

ML-targeting of audiences in advertising

Machine learning for targeting transforms "show to all women 25-34" into "show to those who are 73%+ likely to convert in the next 7 days." The difference in effectiveness is 3-5 times greater with the same budget.

Predictive Targeting: From Segments to Probabilities

import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import LabelEncoder

class PredictiveAudienceBuilder:
    """Создание аудиторий на основе вероятностей конверсии"""

    def build_intent_features(self, user_events: pd.DataFrame) -> pd.DataFrame:
        """
        Признаки намерения из событий пользователя.
        user_events: user_id, event_type, page_url, timestamp, session_id
        """
        df = user_events.copy()
        df['ts'] = pd.to_datetime(df['timestamp'])

        # Рекентность последней активности
        now = df['ts'].max()
        recency = df.groupby('user_id')['ts'].max().apply(
            lambda t: (now - t).total_seconds() / 3600
        ).rename('hours_since_last_event')

        # Поведенческие признаки
        behavior = df.groupby('user_id').agg(
            total_sessions=('session_id', 'nunique'),
            total_events=('event_type', 'count'),
            product_views=('event_type', lambda x: (x == 'product_view').sum()),
            cart_adds=('event_type', lambda x: (x == 'add_to_cart').sum()),
            checkout_starts=('event_type', lambda x: (x == 'checkout_start').sum()),
            search_queries=('event_type', lambda x: (x == 'search').sum()),
        )

        # Конверсионная воронка (нормализованная)
        behavior['funnel_depth'] = (
            behavior['product_views'] * 1 +
            behavior['cart_adds'] * 3 +
            behavior['checkout_starts'] * 7
        ) / behavior['total_sessions'].clip(1)

        # Сессионная активность: тренд последних 7 дней vs предыдущие 7
        last_7d = df[df['ts'] >= now - pd.Timedelta(days=7)]
        prev_7d = df[df['ts'].between(now - pd.Timedelta(days=14), now - pd.Timedelta(days=7))]

        activity_last = last_7d.groupby('user_id')['event_type'].count().rename('events_last_7d')
        activity_prev = prev_7d.groupby('user_id')['event_type'].count().rename('events_prev_7d')

        result = behavior.join(recency).join(activity_last).join(activity_prev).fillna(0)
        result['activity_trend'] = (
            result['events_last_7d'] - result['events_prev_7d']
        ) / (result['events_prev_7d'] + 1)

        return result

    def score_purchase_propensity(self, features: pd.DataFrame,
                                    model: lgb.LGBMClassifier) -> pd.DataFrame:
        """Оценка вероятности покупки для каждого пользователя"""
        scores = model.predict_proba(features)[:, 1]

        result = pd.DataFrame({
            'user_id': features.index,
            'purchase_probability': scores,
            'audience_tier': pd.cut(
                scores,
                bins=[0, 0.1, 0.3, 0.6, 1.0],
                labels=['cold', 'warm', 'hot', 'ready_to_buy']
            )
        })

        return result.sort_values('purchase_probability', ascending=False)


class BehavioralClusteringAudience:
    """Поведенческая сегментация без supervision"""

    def segment_by_behavior(self, user_features: pd.DataFrame,
                              n_clusters: int = 8) -> pd.DataFrame:
        """
        K-Means кластеризация для выявления скрытых аудиторных сегментов.
        """
        from sklearn.preprocessing import StandardScaler

        feature_cols = user_features.select_dtypes(include=[np.number]).columns
        X = user_features[feature_cols].fillna(0)

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=42, n_init=10)
        clusters = kmeans.fit_predict(X_scaled)

        user_features = user_features.copy()
        user_features['cluster'] = clusters

        # Профили кластеров
        profiles = user_features.groupby('cluster')[feature_cols].mean()

        return user_features, profiles

    def label_clusters(self, cluster_profiles: pd.DataFrame) -> dict:
        """Автоматическая маркировка кластеров по профилям"""
        labels = {}
        for cluster_id, row in cluster_profiles.iterrows():
            # Упрощённая эвристическая маркировка
            if row.get('checkout_starts', 0) > 2:
                label = 'high_intent_buyers'
            elif row.get('product_views', 0) > 10 and row.get('cart_adds', 0) == 0:
                label = 'browsers_not_buyers'
            elif row.get('total_sessions', 0) > 20:
                label = 'loyal_visitors'
            elif row.get('hours_since_last_event', 9999) > 720:
                label = 'dormant_users'
            else:
                label = f'segment_{cluster_id}'
            labels[cluster_id] = label
        return labels

Contextual targeting without cookies

class ContextualTargetingEngine:
    """ML-таргетинг на основе контента страницы (cookieless)"""

    def classify_page_context(self, page_text: str,
                               page_url: str) -> dict:
        """
        IAB категоризация страницы для контекстуального таргетинга.
        Работает без user-level data (GDPR-compliant).
        """
        # Ключевые сигналы контекста
        url_signals = self._extract_url_signals(page_url)

        # В production: BERT-based classifier, обученный на IAB taxonomy
        # Здесь упрощённая keyword-based версия
        iab_keywords = {
            'IAB19': ['technology', 'software', 'programming', 'tech'],
            'IAB13': ['finance', 'investment', 'stock', 'crypto', 'money'],
            'IAB7': ['health', 'fitness', 'medical', 'diet'],
            'IAB9': ['hobby', 'crafts', 'games', 'gaming'],
        }

        text_lower = page_text.lower()
        scores = {}
        for iab_cat, keywords in iab_keywords.items():
            score = sum(text_lower.count(kw) for kw in keywords)
            if score > 0:
                scores[iab_cat] = score

        if not scores:
            return {'categories': ['IAB24'], 'confidence': 0.5}

        primary_cat = max(scores, key=scores.get)
        total = sum(scores.values())

        return {
            'primary_category': primary_cat,
            'all_categories': list(scores.keys()),
            'confidence': round(scores[primary_cat] / total, 2),
            'url_signals': url_signals,
        }

    def _extract_url_signals(self, url: str) -> list:
        signals = []
        if '/news/' in url or '/article/' in url:
            signals.append('editorial_content')
        if '/product/' in url or '/shop/' in url:
            signals.append('ecommerce')
        if '/blog/' in url:
            signals.append('blog_content')
        return signals

Comparison of targeting methods

Method CPM CTR Conversion Privacy
Demographics (age/gender) low 0.05-0.1% low safe
Behavioral (3rd party cookies) High 0.2-0.5% Medium Limited
Predictive (ML propensity) average 0.3-0.8% high 1st party
Lookalike ML average 0.2-0.6% average 1st party
Contextual (cookieless) average 0.1-0.3% average safe

Predictive targeting based on first-party data is the most robust option in the world without third-party cookies. It requires high-quality event data (at least 50,000 users with conversion history) to train a propensity model with an acceptable AUC > 0.72.