AI-based account expansion prediction system
Account expansion is the growth of revenue from existing customers through upselling and cross-selling. In B2B SaaS, this is Net Revenue Retention (NRR): successful companies have an NRR of >120%, meaning revenue grows by 20%+ annually without a single new deal. AI determines which accounts are ready for expansion and why.
Expansion probability model
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
import shap
from anthropic import Anthropic
import json
class AccountExpansionPredictor:
"""Предсказание готовности аккаунта к расширению"""
def __init__(self):
self.model = GradientBoostingClassifier(
n_estimators=200, learning_rate=0.05, max_depth=4, random_state=42
)
self.llm = Anthropic()
def build_account_features(self, accounts: pd.DataFrame,
usage_data: pd.DataFrame,
support_data: pd.DataFrame) -> pd.DataFrame:
"""Feature engineering для expansion предсказания"""
features = accounts[['account_id']].copy()
# === Product Usage Signals ===
usage = usage_data.groupby('account_id').agg(
monthly_active_users=('user_id', pd.Series.nunique),
feature_breadth=('feature_name', pd.Series.nunique),
sessions_per_user=('session_id', 'count'),
advanced_features_used=('is_advanced_feature', 'sum'),
)
features = features.merge(usage, on='account_id', how='left')
# Тренд использования за последние 3 месяца
recent_usage = usage_data[
usage_data['date'] >= pd.Timestamp.now() - pd.DateOffset(months=3)
]
older_usage = usage_data[
(usage_data['date'] < pd.Timestamp.now() - pd.DateOffset(months=3)) &
(usage_data['date'] >= pd.Timestamp.now() - pd.DateOffset(months=6))
]
recent_counts = recent_usage.groupby('account_id')['session_id'].count()
older_counts = older_usage.groupby('account_id')['session_id'].count()
usage_trend = (recent_counts - older_counts) / (older_counts + 1)
features['usage_trend_3m'] = features['account_id'].map(usage_trend).fillna(0)
# === Account Health ===
features['days_as_customer'] = accounts.get('days_since_first_purchase', pd.Series([180]))
features['current_plan_tier'] = accounts.get('plan_tier', pd.Series([1])) # 1=basic, 2=pro, 3=enterprise
features['seats_utilization'] = (
accounts.get('active_users', 1) / accounts.get('licensed_seats', 1)
).clip(0, 1)
features['contract_months_remaining'] = accounts.get('contract_months_remaining', 12)
# === Support & Satisfaction ===
support = support_data.groupby('account_id').agg(
support_tickets_3m=('ticket_id', 'count'),
avg_csat=('csat_score', 'mean'),
has_critical_tickets=('priority', lambda x: (x == 'critical').any().astype(int))
)
features = features.merge(support, on='account_id', how='left')
features['support_tickets_3m'] = features['support_tickets_3m'].fillna(0)
features['avg_csat'] = features['avg_csat'].fillna(3.5)
# === Expansion Readiness Signals ===
features['seats_at_capacity'] = (features['seats_utilization'] > 0.90).astype(int)
features['power_user_count'] = usage_data[
usage_data['sessions_count'] > usage_data['sessions_count'].quantile(0.90)
].groupby('account_id')['user_id'].nunique().reindex(features['account_id']).fillna(0).values
return features.fillna(0)
def predict_expansion_opportunities(self, accounts: pd.DataFrame,
usage_data: pd.DataFrame,
support_data: pd.DataFrame) -> pd.DataFrame:
"""Список аккаунтов с высокой вероятностью расширения"""
features = self.build_account_features(accounts, usage_data, support_data)
feature_cols = [c for c in features.columns if c != 'account_id']
X = features[feature_cols]
probs = self.model.predict_proba(X)[:, 1]
features['expansion_probability'] = probs
features['expansion_potential_usd'] = self._estimate_expansion_value(features, accounts)
features['recommended_product'] = self._recommend_expansion_product(features)
# Приоритизация для sales team
features['priority_score'] = features['expansion_probability'] * np.log1p(features['expansion_potential_usd'])
return features.sort_values('priority_score', ascending=False)
def _estimate_expansion_value(self, features: pd.DataFrame,
accounts: pd.DataFrame) -> pd.Series:
"""Потенциальный ARR от расширения"""
base_arr = accounts.get('current_arr', pd.Series([10000]))
# Seats expansion
seats_expansion = (
features.get('seats_at_capacity', 0) *
features.get('power_user_count', 0) * 50 # $50/seat/month
)
# Plan upgrade
plan_upgrade_potential = (
(features.get('advanced_features_used', 0) > 5) &
(features.get('current_plan_tier', 1) < 2)
).astype(float) * base_arr * 0.5
return (seats_expansion * 12 + plan_upgrade_potential).fillna(0)
def _recommend_expansion_product(self, features: pd.DataFrame) -> pd.Series:
"""Рекомендуемый продукт для расширения"""
conditions = [
features.get('seats_at_capacity', pd.Series([0])) > 0,
features.get('feature_breadth', pd.Series([0])) < 5,
features.get('current_plan_tier', pd.Series([1])) == 1,
]
choices = ['seat_expansion', 'feature_add_on', 'plan_upgrade']
result = pd.Series(['general_expansion'] * len(features), index=features.index)
for cond, choice in zip(conditions, choices):
result = result.where(~cond, choice)
return result
def generate_expansion_brief(self, account: dict) -> str:
"""Бриф для account manager о сигналах расширения"""
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=200,
messages=[{
"role": "user",
"content": f"""Write a sales brief for account expansion in Russian.
Account: {account.get('company_name')}
Current ARR: ${account.get('current_arr', 0):,.0f}
Expansion probability: {account.get('expansion_probability', 0):.0%}
Key signals:
- Seats utilization: {account.get('seats_utilization', 0):.0%}
- Usage trend: {account.get('usage_trend_3m', 0):+.0%}
- Advanced features used: {account.get('advanced_features_used', 0)}
- Power users: {account.get('power_user_count', 0)}
Recommended expansion: {account.get('recommended_product', '')}
Estimated value: ${account.get('expansion_potential_usd', 0):,.0f} ARR
Write 2-3 sentences: what signals you see, what to propose, and how to frame the conversation."""
}]
)
return response.content[0].text
A predictive approach to account expansion typically increases NRR by 5-15 percentage points: the CS team focuses on the top 20% of accounts with real potential instead of distributing efforts evenly. The minimum training dataset is 200+ expansion events over 12+ months.







