AI-based SaaS onboarding optimization system
Onboarding is the most critical stage of the SaaS customer lifecycle. 40-60% of users churn within the first 30 days without activating the product's key value. The AI system identifies which onboarding steps lead to activation and personalizes each user's journey.
Predicting activation and churn in onboarding
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from anthropic import Anthropic
import json
class OnboardingActivationPredictor:
"""
Предсказание: активируется ли пользователь в течение 14 дней.
Активация = достижение "Aha moment" продукта.
"""
def __init__(self, aha_moment_events: list[str]):
"""
aha_moment_events: список событий, которые означают активацию
Пример для Slack: ['first_message_sent', 'channel_created']
Пример для Figma: ['first_design_shared', 'collaboration_started']
"""
self.aha_events = aha_moment_events
self.model = GradientBoostingClassifier(
n_estimators=200, learning_rate=0.05, max_depth=4, random_state=42
)
def build_features(self, user_events: pd.DataFrame,
days_since_signup: int = 7) -> pd.DataFrame:
"""Признаки из первых N дней онбординга"""
cutoff = user_events.groupby('user_id')['signup_date'].first() + pd.Timedelta(days=days_since_signup)
early_events = user_events[
user_events['event_date'] <= user_events['user_id'].map(cutoff)
]
features = early_events.groupby('user_id').agg(
sessions_count=('session_id', pd.Series.nunique),
unique_events=('event_name', pd.Series.nunique),
total_events=('event_id', 'count'),
days_active=('event_date', lambda x: x.dt.date.nunique()),
key_feature_used=('event_name', lambda x: x.isin(self.aha_events).any().astype(int)),
onboarding_steps_completed=('event_name', lambda x: x.str.startswith('onboarding_').sum()),
invited_teammates=('event_name', lambda x: (x == 'invite_sent').sum()),
setup_completed=('event_name', lambda x: (x == 'setup_complete').any().astype(int))
).reset_index()
# Скорость прогресса
features['setup_speed_days'] = early_events[
early_events['event_name'] == 'setup_complete'
].groupby('user_id')['days_to_event'].min().reindex(features['user_id']).fillna(days_since_signup)
return features.fillna(0)
def identify_critical_path(self, user_events: pd.DataFrame,
activated_users: set,
churned_users: set) -> dict:
"""
Aha moment анализ: какие события в первые 3 дня максимально
коррелируют с активацией vs churn.
"""
critical_path = {}
early = user_events[user_events['days_to_event'] <= 3]
event_names = early['event_name'].unique()
for event in event_names:
users_with_event = set(early[early['event_name'] == event]['user_id'])
activation_rate_with = len(users_with_event & activated_users) / max(len(users_with_event), 1)
activation_rate_without = len(activated_users - users_with_event) / max(len(activated_users - users_with_event) + 1, 1)
if activation_rate_with > 0:
lift = activation_rate_with / max(activation_rate_without, 0.01)
critical_path[event] = {
'activation_rate': round(activation_rate_with, 3),
'lift_vs_without': round(lift, 2),
'prevalence': len(users_with_event),
'is_critical': lift > 1.5
}
return dict(sorted(critical_path.items(), key=lambda x: -x[1]['lift_vs_without']))
class AdaptiveOnboardingOrchestrator:
"""Персонализация онбординговых действий"""
def __init__(self):
self.llm = Anthropic()
def determine_next_action(self, user: dict,
completed_steps: list[str],
days_since_signup: int,
activation_probability: float) -> dict:
"""
Следующее действие для пользователя в онбординге.
Учитывает скорость прогресса и риск churn.
"""
# Если вероятность активации низкая → интервенция
if activation_probability < 0.3 and days_since_signup <= 7:
intervention_type = 'urgent'
elif activation_probability < 0.5 and days_since_signup >= 7:
intervention_type = 'nudge'
else:
intervention_type = 'guide'
next_steps_map = {
'profile_completed': 'invite_teammates',
'invite_teammates': 'key_feature_setup',
'key_feature_setup': 'aha_moment_action',
'aha_moment_action': 'second_use_case',
}
last_completed = completed_steps[-1] if completed_steps else None
next_step = next_steps_map.get(last_completed, 'profile_completed')
return {
'next_action': next_step,
'intervention_type': intervention_type,
'channel': 'in_app' if days_since_signup <= 3 else 'email',
'message': self._generate_nudge(user, next_step, intervention_type),
'activation_risk': 'high' if activation_probability < 0.3 else 'low'
}
def _generate_nudge(self, user: dict, next_step: str,
intervention_type: str) -> str:
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=100,
messages=[{
"role": "user",
"content": f"""Write a {intervention_type} onboarding message in Russian.
User: {user.get('first_name', 'Пользователь')}, role: {user.get('job_title', '')}, company: {user.get('company', '')}
Next step needed: {next_step}
Urgency: {intervention_type}
Max 50 words. Action-oriented, specific, no generic phrases like "Don't miss out"."""
}]
)
return response.content[0].text.strip()
class OnboardingAnalytics:
"""Метрики онбординга"""
def compute_activation_funnel(self, events: pd.DataFrame,
funnel_steps: list[str]) -> pd.DataFrame:
"""Воронка активации по шагам"""
total_users = events['user_id'].nunique()
funnel = []
for step in funnel_steps:
users_at_step = events[events['event_name'] == step]['user_id'].nunique()
funnel.append({
'step': step,
'users': users_at_step,
'conversion_from_start': round(users_at_step / total_users, 3),
})
funnel_df = pd.DataFrame(funnel)
funnel_df['drop_off_from_prev'] = 1 - funnel_df['users'] / funnel_df['users'].shift(1).fillna(total_users)
return funnel_df
Optimizing onboarding through AI personalization increases activation rates by 25-40% and reduces first-month churn by 20-30%. Key point: identifying the "Aha moment" for your product before optimization is not a technical task, but a product one.







