Development of an AI-based system for monitoring employee burnout, Burnout Detection
Burnout is costly for companies: sick leave, decreased productivity, and employee turnover. Early warning systems analyze the digital footprint in corporate tools—without monitoring the content of emails, only behavioral patterns and aggregated metrics.
Principles and Limitations
What we monitor, what we don’t monitor:
Разрешено (поведенческие метрики):
✓ Частота коммитов и время работы (Git timestamps, без содержимого)
✓ Время первого и последнего входа в системы
✓ Количество встреч в календаре
✓ Регулярность перерывов в рабочем дне
✓ Количество сообщений (без текста) в Slack/Teams
✓ Количество созданных задач vs закрытых (Jira/Linear)
Запрещено:
✗ Чтение переписки
✗ Запись экрана или нажатий клавиш
✗ Геолокация
✗ Анализ содержимого документов
GDPR/152-FZ compliance: aggregation only at the pattern level, personal data is not stored in raw form, access to a specific employee's data is only available to HR and the manager with consent.
Feature Engineering from a Digital Footprint
Calculation of behavioral indicators:
import pandas as pd
import numpy as np
from datetime import time
def extract_burnout_features(employee_id: str,
activity_log: pd.DataFrame,
calendar_data: pd.DataFrame,
task_data: pd.DataFrame) -> dict:
"""
Все признаки — агрегаты за 4 недели.
Никаких конкретных значений "написал X в Y" — только паттерны.
"""
# Рабочее время
work_sessions = activity_log[activity_log['employee_id'] == employee_id]
work_sessions['hour'] = work_sessions['timestamp'].dt.hour
after_hours_ratio = len(work_sessions[work_sessions['hour'] >= 20]) / (len(work_sessions) + 1)
weekend_work_days = work_sessions[
work_sessions['timestamp'].dt.dayofweek >= 5
]['timestamp'].dt.date.nunique()
# Непрерывность сессий (паузы в работе)
sorted_sessions = work_sessions.sort_values('timestamp')
gaps = sorted_sessions['timestamp'].diff().dt.total_seconds() / 3600
long_breaks = (gaps > 0.5).sum() # перерывы > 30 минут
break_regularity = np.std(gaps[gaps > 0.1].values) if len(gaps) > 5 else 0
# Встречи
employee_meetings = calendar_data[calendar_data['employee_id'] == employee_id]
meetings_per_week = len(employee_meetings) / 4
back_to_back_meetings = count_back_to_back(employee_meetings)
# Продуктивность
tasks = task_data[task_data['assignee_id'] == employee_id]
tasks_created = len(tasks[tasks['event'] == 'created'])
tasks_completed = len(tasks[tasks['event'] == 'completed'])
completion_rate = tasks_completed / (tasks_created + 1)
# Коммуникации
comm_by_hour = work_sessions.groupby('hour').size()
comm_evening_ratio = comm_by_hour[comm_by_hour.index >= 20].sum() / (comm_by_hour.sum() + 1)
return {
'after_hours_ratio': after_hours_ratio,
'weekend_work_days_4w': weekend_work_days,
'break_regularity': break_regularity,
'meetings_per_week': meetings_per_week,
'back_to_back_ratio': back_to_back_meetings / (meetings_per_week + 1),
'task_completion_rate': completion_rate,
'comm_evening_ratio': comm_evening_ratio,
'long_breaks_per_day': long_breaks / 20 # нормируем на 20 рабочих дней
}
Burnout Risk Score
Composite Burnout Risk Index:
from sklearn.ensemble import GradientBoostingClassifier
import shap
def compute_burnout_risk(features: dict, model, baseline_for_team: dict) -> dict:
"""
Риск = отклонение от нормы команды × ML-модель.
Сравниваем с базовым поведением сотрудника (его же данные 3 месяца назад).
"""
# Нормализация по команде
feature_vector = []
feature_names = []
for name, value in features.items():
team_mean = baseline_for_team.get(name + '_mean', value)
team_std = baseline_for_team.get(name + '_std', 1.0)
normalized = (value - team_mean) / (team_std + 1e-9)
feature_vector.append(normalized)
feature_names.append(name)
# Rule-based pre-screening
rule_score = 0
if features['after_hours_ratio'] > 0.3:
rule_score += 2
if features['weekend_work_days_4w'] > 4:
rule_score += 2
if features['task_completion_rate'] < 0.4:
rule_score += 1 # снижение продуктивности
if features['back_to_back_ratio'] > 0.5:
rule_score += 1 # Meeting Overload
# ML-скор
ml_score = model.predict_proba([feature_vector])[0][1]
# Объяснение
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values([feature_vector])[1][0]
top_risk_factors = sorted(
zip(feature_names, shap_values),
key=lambda x: abs(x[1]), reverse=True
)[:3]
combined_risk = 0.5 * ml_score + 0.5 * min(1.0, rule_score / 6)
return {
'risk_score': round(combined_risk, 3),
'risk_level': 'high' if combined_risk > 0.7 else ('medium' if combined_risk > 0.4 else 'low'),
'top_risk_factors': [factor for factor, _ in top_risk_factors],
'recommended_action': determine_action(combined_risk, features)
}
def determine_action(risk_score, features):
if risk_score > 0.8:
return 'urgent_hr_conversation'
if risk_score > 0.6:
return 'manager_checkin_this_week'
if features['meetings_per_week'] > 30 and risk_score > 0.4:
return 'review_meeting_load'
return 'monitor'
Command level
Aggregated analysis without deanonymization:
def team_burnout_dashboard(team_risk_scores: list) -> dict:
"""
Менеджер видит только:
- % команды в каждой зоне риска
- Средний тренд за 4 недели
- Основные паттерны (без имён при числе в группе < 5)
"""
high_risk = sum(1 for r in team_risk_scores if r['risk_level'] == 'high')
medium_risk = sum(1 for r in team_risk_scores if r['risk_level'] == 'medium')
total = len(team_risk_scores)
# Топ факторы по команде (агрегат)
all_factors = []
for r in team_risk_scores:
all_factors.extend(r.get('top_risk_factors', []))
from collections import Counter
team_risk_drivers = Counter(all_factors).most_common(3)
return {
'high_risk_pct': round(high_risk / total * 100, 1),
'medium_risk_pct': round(medium_risk / total * 100, 1),
'team_risk_drivers': team_risk_drivers,
'anonymized': total < 5 # флаг: не показывать детали малых команд
}
Integration: Okta/Azure AD for authorization, Slack API for activity (with consent), GitHub/GitLab timestamps, Google/Outlook Calendar API. Data is aggregated only; raw events are deleted after 24 hours.
Timeframe: Basic behavioral features + risk score + HR dashboard — 4-5 weeks. ML model, team analytics, GDPR-compliant storage, and Slack/Jira/Calendar integration — 2-3 months.







