AI-система скоринга лидов
Традиционный lead scoring — ручные правила: «посетил pricing page +10 баллов, открыл email +5». ML-подход обучается на исторических данных закрытых сделок и находит нелинейные комбинации сигналов, которые человек никогда не заметит. Разница в конверсии отдела продаж: +25-40% при правильно внедрённом ML-скоринге.
Предиктивная модель вероятности конверсии
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import StratifiedKFold
import shap
class LeadScoringModel:
"""
Предиктивный скоринг лидов.
Выход: P(lead → closed_won) в горизонте 90 дней.
"""
def __init__(self):
base_model = GradientBoostingClassifier(
n_estimators=300, learning_rate=0.05,
max_depth=4, subsample=0.8,
min_samples_leaf=20, random_state=42
)
# Калибрация: выход модели = реальные вероятности
self.model = CalibratedClassifierCV(base_model, method='isotonic', cv=5)
self.explainer = None
self.feature_names = []
def build_features(self, leads: pd.DataFrame) -> pd.DataFrame:
"""
Три группы признаков:
1. Firmographic (кто компания)
2. Demographic (кто контакт)
3. Behavioral (что делал на сайте/в продукте)
"""
features = pd.DataFrame()
# === Firmographic ===
features['company_size_log'] = np.log1p(leads.get('company_employees', 10))
features['industry_tech'] = (leads.get('industry') == 'technology').astype(int)
features['industry_finance'] = (leads.get('industry') == 'finance').astype(int)
features['annual_revenue_log'] = np.log1p(leads.get('annual_revenue_usd', 0))
features['is_enterprise'] = (leads.get('company_employees', 0) > 500).astype(int)
features['funding_stage_encoded'] = leads.get('funding_stage', 'unknown').map(
{'seed': 1, 'series_a': 2, 'series_b': 3, 'series_c': 4,
'public': 5, 'unknown': 0}
).fillna(0)
# === Demographic ===
features['is_decision_maker'] = leads.get('seniority', '').isin(
['VP', 'Director', 'C-Level', 'Founder']
).astype(int)
features['contact_dept_it'] = (leads.get('department') == 'IT').astype(int)
features['contact_dept_ops'] = (leads.get('department') == 'Operations').astype(int)
# === Behavioral (за последние 30 дней) ===
features['pricing_page_visits'] = leads.get('pricing_views_30d', 0).clip(0, 10)
features['demo_requested'] = leads.get('demo_requested', 0).astype(int)
features['trial_started'] = leads.get('trial_started', 0).astype(int)
features['trial_active_days'] = leads.get('trial_active_days', 0).clip(0, 30)
features['trial_key_feature_used'] = leads.get('key_feature_used', 0).astype(int)
features['emails_opened_rate'] = leads.get('emails_opened', 0) / np.maximum(
leads.get('emails_sent', 1), 1
)
features['content_downloads'] = leads.get('content_downloads_30d', 0).clip(0, 5)
features['webinar_attended'] = leads.get('webinar_attended', 0).astype(int)
features['support_tickets'] = leads.get('support_tickets', 0).clip(0, 10)
# === Temporal ===
features['days_since_first_touch'] = leads.get('days_since_first_touch', 90).clip(0, 180)
features['days_since_last_activity'] = leads.get('days_since_last_activity', 30).clip(0, 90)
features['velocity_score'] = (
features['pricing_page_visits'] + features['emails_opened_rate'] * 5 +
features['demo_requested'] * 10 + features['trial_key_feature_used'] * 8
)
self.feature_names = list(features.columns)
return features.fillna(0)
def train(self, leads: pd.DataFrame, target: pd.Series):
"""Обучение с стратифицированной кросс-валидацией"""
X = self.build_features(leads)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []
for train_idx, val_idx in cv.split(X, target):
X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
y_train, y_val = target.iloc[train_idx], target.iloc[val_idx]
fold_model = GradientBoostingClassifier(
n_estimators=300, learning_rate=0.05, max_depth=4, random_state=42
)
fold_model.fit(X_train, y_train)
from sklearn.metrics import roc_auc_score
cv_scores.append(roc_auc_score(y_val, fold_model.predict_proba(X_val)[:, 1]))
print(f"CV AUC: {np.mean(cv_scores):.3f} ± {np.std(cv_scores):.3f}")
self.model.fit(X, target)
# SHAP для объяснимости
import shap
base_clf = self.model.calibrated_classifiers_[0].estimator
self.explainer = shap.TreeExplainer(base_clf)
def predict(self, leads: pd.DataFrame) -> pd.DataFrame:
"""Скоринг лидов с вероятностями и объяснениями"""
X = self.build_features(leads)
probabilities = self.model.predict_proba(X)[:, 1]
result = leads[['lead_id']].copy() if 'lead_id' in leads.columns else pd.DataFrame(index=leads.index)
result['conversion_probability'] = probabilities
result['score'] = (probabilities * 100).astype(int)
result['tier'] = pd.cut(
probabilities,
bins=[0, 0.2, 0.5, 0.75, 1.0],
labels=['cold', 'warm', 'hot', 'very_hot']
)
return result
def explain_lead(self, lead_features: pd.Series) -> list[dict]:
"""SHAP-объяснение скора для конкретного лида"""
if self.explainer is None:
return []
X = pd.DataFrame([lead_features], columns=self.feature_names)
shap_values = self.explainer.shap_values(X)[0]
explanations = []
for feat, shap_val in sorted(
zip(self.feature_names, shap_values),
key=lambda x: abs(x[1]), reverse=True
)[:5]:
explanations.append({
'feature': feat,
'value': float(lead_features.get(feat, 0)),
'impact': '+' if shap_val > 0 else '-',
'shap_value': round(float(shap_val), 3)
})
return explanations
class LeadRoutingEngine:
"""Маршрутизация лидов по менеджерам"""
def route_lead(self, lead: dict, score: float, sales_team: list[dict]) -> dict:
"""Назначение лида оптимальному менеджеру"""
# Стратегия: enterprise-лиды → enterprise AE, SMB → velocity AE
if lead.get('company_employees', 0) > 500 and score > 0.5:
target_segment = 'enterprise'
elif score > 0.75:
target_segment = 'high_velocity'
else:
target_segment = 'nurture'
# Балансировка нагрузки
available = [ae for ae in sales_team
if ae.get('segment') == target_segment and
ae.get('current_pipeline_count', 0) < ae.get('capacity', 50)]
if not available:
available = sales_team
# Выбираем менеджера с наименьшей загрузкой
assigned = min(available, key=lambda ae: ae.get('current_pipeline_count', 0))
return {
'assigned_to': assigned['id'],
'segment': target_segment,
'priority': 'high' if score > 0.6 else 'normal',
'suggested_action': 'call_within_1h' if score > 0.75 else 'email_sequence'
}
Типичные результаты: AUC 0.78-0.85 на исторических данных CRM (Salesforce/HubSpot), 35-40% рост win rate у менеджеров, фокусирующихся на top-25% скора. Минимальный датасет для обучения: 500+ закрытых сделок (won + lost).







