AI Employee Learning and Development System
Employee L&D (Learning & Development) with AI is not just a course catalog with recommendations. The system tracks practical application of knowledge, measures actual business impact of training, and automatically adjusts programs to align with changing role requirements.
Linking Learning to Business Results
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from anthropic import Anthropic
import json
class LearningImpactMeasurer:
"""Measuring impact of training on performance"""
def measure_training_impact(self, training_records: pd.DataFrame,
performance_data: pd.DataFrame,
training_id: str,
kpi_column: str,
weeks_before: int = 8,
weeks_after: int = 12) -> dict:
"""
Difference-in-differences: compare those who completed training
with control group of similar employees.
"""
trained = set(
training_records[training_records['training_id'] == training_id]['employee_id']
)
perf = performance_data.copy()
perf['is_treated'] = perf['employee_id'].isin(trained).astype(int)
perf['is_post'] = (perf['weeks_from_training'] > 0).astype(int)
# DiD estimation
pre_treated = perf[(perf['is_treated'] == 1) & (perf['is_post'] == 0)][kpi_column].mean()
post_treated = perf[(perf['is_treated'] == 1) & (perf['is_post'] == 1)][kpi_column].mean()
pre_control = perf[(perf['is_treated'] == 0) & (perf['is_post'] == 0)][kpi_column].mean()
post_control = perf[(perf['is_treated'] == 0) & (perf['is_post'] == 1)][kpi_column].mean()
did_estimate = (post_treated - pre_treated) - (post_control - pre_control)
pct_improvement = did_estimate / max(pre_treated, 1e-9) * 100
return {
'training_id': training_id,
'kpi': kpi_column,
'treated_n': len(trained),
'did_estimate': round(did_estimate, 3),
'improvement_pct': round(pct_improvement, 1),
'pre_treated_mean': round(pre_treated, 3),
'post_treated_mean': round(post_treated, 3),
'statistically_meaningful': abs(pct_improvement) > 5
}
def compute_roi(self, impact: dict,
training_cost: float,
avg_employee_cost_per_week: float,
n_employees: int) -> dict:
"""Training ROI in monetary terms"""
# Productivity gain per week × 12 weeks × N employees
weekly_value_gain = (
impact.get('improvement_pct', 0) / 100 *
avg_employee_cost_per_week * n_employees
)
total_value_12w = weekly_value_gain * 12
roi_pct = (total_value_12w - training_cost) / training_cost * 100 if training_cost > 0 else 0
return {
'training_investment': training_cost,
'estimated_value_gain_12w': round(total_value_12w),
'roi_pct': round(roi_pct, 1),
'payback_weeks': round(training_cost / max(weekly_value_gain, 1))
}
class SkillsMarketIntelligence:
"""Monitoring market trends in skills"""
def __init__(self):
self.llm = Anthropic()
def analyze_job_market_trends(self, job_postings: pd.DataFrame,
months_lookback: int = 6) -> dict:
"""Analysis of skill trends from job market postings"""
recent_postings = job_postings[
job_postings['posted_date'] >= pd.Timestamp.now() - pd.DateOffset(months=months_lookback)
]
older_postings = job_postings[
job_postings['posted_date'] < pd.Timestamp.now() - pd.DateOffset(months=months_lookback)
]
def skill_frequency(df: pd.DataFrame) -> pd.Series:
all_skills = []
for _, row in df.iterrows():
all_skills.extend(row.get('required_skills', []))
return pd.Series(all_skills).value_counts(normalize=True)
recent_freq = skill_frequency(recent_postings)
older_freq = skill_frequency(older_postings)
trends = []
for skill in recent_freq.index:
recent_share = recent_freq.get(skill, 0)
older_share = older_freq.get(skill, 0)
if older_share > 0:
growth = (recent_share - older_share) / older_share * 100
else:
growth = 100.0
trends.append({
'skill': skill,
'current_frequency': round(recent_share, 4),
'growth_pct': round(growth, 1),
'trend': 'rising' if growth > 20 else 'declining' if growth < -20 else 'stable'
})
return {
'rising_skills': [t for t in trends if t['trend'] == 'rising'][:10],
'declining_skills': [t for t in trends if t['trend'] == 'declining'][:5],
'analysis_period_months': months_lookback
}
def generate_l_and_d_priorities(self, company_skills_gaps: dict,
market_trends: dict,
budget_constraint: float) -> str:
"""LLM recommendations on L&D budget priorities"""
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=400,
messages=[{
"role": "user",
"content": f"""Recommend L&D priorities for a tech company.
Current skill gaps in team: {list(company_skills_gaps.keys())[:8]}
Rising market skills: {[s['skill'] for s in market_trends.get('rising_skills', [])[:8]]}
Declining skills: {[s['skill'] for s in market_trends.get('declining_skills', [])[:5]]}
Annual L&D budget: ${budget_constraint:,.0f}
Provide 4-5 specific recommendations.
For each: skill area, why it's priority, suggested format (bootcamp/course/workshop/mentoring), estimated cost."""
}]
)
return response.content[0].text
class AdaptiveLearningRecommender:
"""Personalized learning content recommendations"""
def recommend(self, employee: dict,
skill_gaps: dict,
content_catalog: pd.DataFrame,
learning_history: pd.DataFrame) -> list[dict]:
"""Recommendations based on learning history"""
# Exclude already completed
completed_ids = set(
learning_history[learning_history['employee_id'] == employee['id']]['content_id']
) if len(learning_history) > 0 else set()
available = content_catalog[~content_catalog['id'].isin(completed_ids)]
# Format preferences from history
if len(learning_history) > 0:
emp_history = learning_history[learning_history['employee_id'] == employee['id']]
preferred_format = (
emp_history.groupby('format')['completion_rate'].mean()
.idxmax() if len(emp_history) > 0 else 'video'
)
else:
preferred_format = 'video'
recommendations = []
for skill, gap_info in sorted(skill_gaps.items(), key=lambda x: -x[1].get('gap', 0))[:5]:
skill_content = available[
available['skills'].apply(lambda s: skill in (s if isinstance(s, list) else []))
]
if skill_content.empty:
continue
# Preferred format + difficulty matches level
target_level = gap_info.get('current', 0) + 1
filtered = skill_content[
(skill_content['level'].between(max(0, target_level - 0.5), target_level + 0.5)) |
(skill_content['level'].isna())
]
if filtered.empty:
filtered = skill_content
# Preferred format
format_match = filtered[filtered['format'] == preferred_format]
best = format_match.iloc[0] if not format_match.empty else filtered.iloc[0]
recommendations.append({
'skill': skill,
'content_id': best['id'],
'title': best['title'],
'format': best.get('format', 'course'),
'duration_hours': best.get('duration_hours', 5),
'skill_gap_priority': gap_info.get('priority', 'medium'),
'reason': f"Addresses gap in '{skill}' (level {gap_info.get('current', 0)} → {gap_info.get('required', 2)})"
})
return recommendations
Measuring actual training ROI through DiD analysis is the key differentiator from naive approaches. Companies that implement impact measurement increase L&D ROI by 40-60% by reallocating budget from ineffective to proven programs.







