AI Lead Scoring System Development

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
AI Lead Scoring System Development
Medium
~1-2 weeks
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

AI-система скоринга лидов

Традиционный lead scoring — ручные правила: «посетил pricing page +10 баллов, открыл email +5». ML-подход обучается на исторических данных закрытых сделок и находит нелинейные комбинации сигналов, которые человек никогда не заметит. Разница в конверсии отдела продаж: +25-40% при правильно внедрённом ML-скоринге.

Предиктивная модель вероятности конверсии

import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import StratifiedKFold
import shap

class LeadScoringModel:
    """
    Предиктивный скоринг лидов.
    Выход: P(lead → closed_won) в горизонте 90 дней.
    """

    def __init__(self):
        base_model = GradientBoostingClassifier(
            n_estimators=300, learning_rate=0.05,
            max_depth=4, subsample=0.8,
            min_samples_leaf=20, random_state=42
        )
        # Калибрация: выход модели = реальные вероятности
        self.model = CalibratedClassifierCV(base_model, method='isotonic', cv=5)
        self.explainer = None
        self.feature_names = []

    def build_features(self, leads: pd.DataFrame) -> pd.DataFrame:
        """
        Три группы признаков:
        1. Firmographic (кто компания)
        2. Demographic (кто контакт)
        3. Behavioral (что делал на сайте/в продукте)
        """
        features = pd.DataFrame()

        # === Firmographic ===
        features['company_size_log'] = np.log1p(leads.get('company_employees', 10))
        features['industry_tech'] = (leads.get('industry') == 'technology').astype(int)
        features['industry_finance'] = (leads.get('industry') == 'finance').astype(int)
        features['annual_revenue_log'] = np.log1p(leads.get('annual_revenue_usd', 0))
        features['is_enterprise'] = (leads.get('company_employees', 0) > 500).astype(int)
        features['funding_stage_encoded'] = leads.get('funding_stage', 'unknown').map(
            {'seed': 1, 'series_a': 2, 'series_b': 3, 'series_c': 4,
             'public': 5, 'unknown': 0}
        ).fillna(0)

        # === Demographic ===
        features['is_decision_maker'] = leads.get('seniority', '').isin(
            ['VP', 'Director', 'C-Level', 'Founder']
        ).astype(int)
        features['contact_dept_it'] = (leads.get('department') == 'IT').astype(int)
        features['contact_dept_ops'] = (leads.get('department') == 'Operations').astype(int)

        # === Behavioral (за последние 30 дней) ===
        features['pricing_page_visits'] = leads.get('pricing_views_30d', 0).clip(0, 10)
        features['demo_requested'] = leads.get('demo_requested', 0).astype(int)
        features['trial_started'] = leads.get('trial_started', 0).astype(int)
        features['trial_active_days'] = leads.get('trial_active_days', 0).clip(0, 30)
        features['trial_key_feature_used'] = leads.get('key_feature_used', 0).astype(int)
        features['emails_opened_rate'] = leads.get('emails_opened', 0) / np.maximum(
            leads.get('emails_sent', 1), 1
        )
        features['content_downloads'] = leads.get('content_downloads_30d', 0).clip(0, 5)
        features['webinar_attended'] = leads.get('webinar_attended', 0).astype(int)
        features['support_tickets'] = leads.get('support_tickets', 0).clip(0, 10)

        # === Temporal ===
        features['days_since_first_touch'] = leads.get('days_since_first_touch', 90).clip(0, 180)
        features['days_since_last_activity'] = leads.get('days_since_last_activity', 30).clip(0, 90)
        features['velocity_score'] = (
            features['pricing_page_visits'] + features['emails_opened_rate'] * 5 +
            features['demo_requested'] * 10 + features['trial_key_feature_used'] * 8
        )

        self.feature_names = list(features.columns)
        return features.fillna(0)

    def train(self, leads: pd.DataFrame, target: pd.Series):
        """Обучение с стратифицированной кросс-валидацией"""
        X = self.build_features(leads)
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        cv_scores = []

        for train_idx, val_idx in cv.split(X, target):
            X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
            y_train, y_val = target.iloc[train_idx], target.iloc[val_idx]

            fold_model = GradientBoostingClassifier(
                n_estimators=300, learning_rate=0.05, max_depth=4, random_state=42
            )
            fold_model.fit(X_train, y_train)
            from sklearn.metrics import roc_auc_score
            cv_scores.append(roc_auc_score(y_val, fold_model.predict_proba(X_val)[:, 1]))

        print(f"CV AUC: {np.mean(cv_scores):.3f} ± {np.std(cv_scores):.3f}")
        self.model.fit(X, target)

        # SHAP для объяснимости
        import shap
        base_clf = self.model.calibrated_classifiers_[0].estimator
        self.explainer = shap.TreeExplainer(base_clf)

    def predict(self, leads: pd.DataFrame) -> pd.DataFrame:
        """Скоринг лидов с вероятностями и объяснениями"""
        X = self.build_features(leads)
        probabilities = self.model.predict_proba(X)[:, 1]

        result = leads[['lead_id']].copy() if 'lead_id' in leads.columns else pd.DataFrame(index=leads.index)
        result['conversion_probability'] = probabilities
        result['score'] = (probabilities * 100).astype(int)
        result['tier'] = pd.cut(
            probabilities,
            bins=[0, 0.2, 0.5, 0.75, 1.0],
            labels=['cold', 'warm', 'hot', 'very_hot']
        )
        return result

    def explain_lead(self, lead_features: pd.Series) -> list[dict]:
        """SHAP-объяснение скора для конкретного лида"""
        if self.explainer is None:
            return []

        X = pd.DataFrame([lead_features], columns=self.feature_names)
        shap_values = self.explainer.shap_values(X)[0]

        explanations = []
        for feat, shap_val in sorted(
            zip(self.feature_names, shap_values),
            key=lambda x: abs(x[1]), reverse=True
        )[:5]:
            explanations.append({
                'feature': feat,
                'value': float(lead_features.get(feat, 0)),
                'impact': '+' if shap_val > 0 else '-',
                'shap_value': round(float(shap_val), 3)
            })

        return explanations


class LeadRoutingEngine:
    """Маршрутизация лидов по менеджерам"""

    def route_lead(self, lead: dict, score: float, sales_team: list[dict]) -> dict:
        """Назначение лида оптимальному менеджеру"""
        # Стратегия: enterprise-лиды → enterprise AE, SMB → velocity AE
        if lead.get('company_employees', 0) > 500 and score > 0.5:
            target_segment = 'enterprise'
        elif score > 0.75:
            target_segment = 'high_velocity'
        else:
            target_segment = 'nurture'

        # Балансировка нагрузки
        available = [ae for ae in sales_team
                     if ae.get('segment') == target_segment and
                     ae.get('current_pipeline_count', 0) < ae.get('capacity', 50)]

        if not available:
            available = sales_team

        # Выбираем менеджера с наименьшей загрузкой
        assigned = min(available, key=lambda ae: ae.get('current_pipeline_count', 0))

        return {
            'assigned_to': assigned['id'],
            'segment': target_segment,
            'priority': 'high' if score > 0.6 else 'normal',
            'suggested_action': 'call_within_1h' if score > 0.75 else 'email_sequence'
        }

Типичные результаты: AUC 0.78-0.85 на исторических данных CRM (Salesforce/HubSpot), 35-40% рост win rate у менеджеров, фокусирующихся на top-25% скора. Минимальный датасет для обучения: 500+ закрытых сделок (won + lost).