AI Systems for Fantasy Sports and Betting
Sports analytics with ML means predicting outcomes through modeling real processes: player physical condition, team form, historical matchups, venue factor, weather. Fantasy platforms use these same models for scoring, squad selection, and player valuations.
Match Outcome Prediction
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.calibration import CalibratedClassifierCV
import json
class MatchOutcomePredictor:
"""
Predicting match outcome: Win/Draw/Loss + xG (Expected Goals).
Based on ELO, team form, home advantage, matchups.
"""
ELO_K = 32
HOME_ADVANTAGE = 100 # ELO points
def __init__(self):
# Model for outcome probabilities
self.outcome_model = CalibratedClassifierCV(
GradientBoostingClassifier(n_estimators=300, learning_rate=0.05, random_state=42),
method='isotonic', cv=5
)
# Regression model for xG
self.xg_model = GradientBoostingRegressor(
n_estimators=200, learning_rate=0.05, random_state=42
)
self.elo_ratings = {}
def update_elo(self, home_team: str, away_team: str,
outcome: str) -> tuple:
"""Update ELO rating after match"""
home_elo = self.elo_ratings.get(home_team, 1500)
away_elo = self.elo_ratings.get(away_team, 1500)
home_elo_adj = home_elo + self.HOME_ADVANTAGE
p_home_win = 1 / (1 + 10 ** ((away_elo - home_elo_adj) / 400))
p_away_win = 1 - p_home_win
if outcome == 'home_win':
home_result, away_result = 1.0, 0.0
elif outcome == 'away_win':
home_result, away_result = 0.0, 1.0
else: # draw
home_result, away_result = 0.5, 0.5
new_home_elo = home_elo + self.ELO_K * (home_result - p_home_win)
new_away_elo = away_elo + self.ELO_K * (away_result - p_away_win)
self.elo_ratings[home_team] = new_home_elo
self.elo_ratings[away_team] = new_away_elo
return new_home_elo, new_away_elo
def build_match_features(self, home_team: str, away_team: str,
match_context: dict,
team_stats: dict) -> np.ndarray:
"""Feature vector for match"""
home_elo = self.elo_ratings.get(home_team, 1500)
away_elo = self.elo_ratings.get(away_team, 1500)
home_stats = team_stats.get(home_team, {})
away_stats = team_stats.get(away_team, {})
return np.array([
# ELO and form
home_elo - away_elo, # ELO difference
home_elo + self.HOME_ADVANTAGE - away_elo, # Adjusted difference
home_stats.get('form_5_games', 0.5), # Form: points/possible last 5
away_stats.get('form_5_games', 0.5),
home_stats.get('form_trend', 0), # Form trend
# Attack/Defense
home_stats.get('goals_scored_avg', 1.5),
home_stats.get('goals_conceded_avg', 1.2),
away_stats.get('goals_scored_avg', 1.5),
away_stats.get('goals_conceded_avg', 1.2),
home_stats.get('xg_for_avg', 1.4),
home_stats.get('xg_against_avg', 1.1),
# Context
int(match_context.get('is_cup', False)),
int(match_context.get('is_derby', False)),
match_context.get('home_fatigue_days', 7), # Days since last match
away_stats.get('travel_distance_km', 0) / 1000,
# Historical matchups
match_context.get('h2h_home_win_rate', 0.4),
match_context.get('h2h_goals_avg', 2.5),
])
def predict_outcome(self, home_team: str, away_team: str,
match_context: dict, team_stats: dict) -> dict:
"""Probabilities for match outcomes"""
features = self.build_match_features(home_team, away_team, match_context, team_stats)
probs = self.outcome_model.predict_proba(features.reshape(1, -1))[0]
return {
'home_win': round(float(probs[0]), 3),
'draw': round(float(probs[1]), 3),
'away_win': round(float(probs[2]), 3),
'expected_goals_home': round(float(self.xg_model.predict(features.reshape(1, -1))[0]), 2),
'home_elo': self.elo_ratings.get(home_team, 1500),
'away_elo': self.elo_ratings.get(away_team, 1500)
}
class PlayerPerformancePredictor:
"""Predicting player performance for fantasy"""
def predict_fantasy_points(self, player: dict,
match_context: dict,
season_stats: pd.DataFrame) -> dict:
"""
Forecast fantasy points for player in specific match.
Considers position, form, opponent, venue.
"""
player_stats = season_stats[season_stats['player_id'] == player['id']]
if player_stats.empty:
return {'expected_points': 0, 'confidence': 'low'}
# Rolling averages of stats
recent = player_stats.sort_values('date').tail(5)
avg_stats = {
'goals_per_game': recent['goals'].mean(),
'assists_per_game': recent['assists'].mean(),
'shots_per_game': recent['shots'].mean(),
'minutes_per_game': recent['minutes_played'].mean(),
'key_passes': recent.get('key_passes', pd.Series([0])).mean(),
}
# Adjustment for opponent quality
opp_defensive_rank = match_context.get('opponent_defensive_rank', 10) # 1=best
difficulty_multiplier = 1.0 + (opp_defensive_rank - 10) * 0.02 # ±0.2 from rank
# Form adjustment
form_factor = recent['fantasy_points'].mean() / max(
player_stats['fantasy_points'].mean(), 0.1
)
form_factor = float(np.clip(form_factor, 0.7, 1.3))
# Position multiplier
position_multiplier = {
'GK': 0.8, 'DEF': 1.0, 'MID': 1.2, 'FWD': 1.1
}.get(player.get('position', 'MID'), 1.0)
# Base points calculation
expected_points = (
avg_stats['goals_per_game'] * 4 +
avg_stats['assists_per_game'] * 3 +
avg_stats['shots_per_game'] * 0.3 +
(avg_stats['minutes_per_game'] / 90) * 2
) * difficulty_multiplier * form_factor * position_multiplier
# Probability of starting
starting_prob = player.get('starting_probability', 0.9)
return {
'player_id': player['id'],
'player_name': player.get('name', ''),
'position': player.get('position', ''),
'expected_points': round(expected_points * starting_prob, 2),
'expected_if_starts': round(expected_points, 2),
'starting_probability': starting_prob,
'form_factor': round(form_factor, 2),
'difficulty_multiplier': round(difficulty_multiplier, 2),
'confidence': 'high' if len(player_stats) >= 10 else 'medium'
}
class FantasyTeamOptimizer:
"""Fantasy team lineup optimization"""
def optimize_lineup(self, player_predictions: list[dict],
budget: float,
formation: str = '4-3-3') -> dict:
"""
Linear programming to maximize expected points
under budget and position constraints.
"""
from scipy.optimize import linprog
formation_requirements = {
'4-3-3': {'GK': 1, 'DEF': 4, 'MID': 3, 'FWD': 3},
'4-4-2': {'GK': 1, 'DEF': 4, 'MID': 4, 'FWD': 2},
'3-5-2': {'GK': 1, 'DEF': 3, 'MID': 5, 'FWD': 2},
}
requirements = formation_requirements.get(formation, formation_requirements['4-3-3'])
# Greedy selection as fast heuristic
selected = []
remaining_budget = budget
position_slots = dict(requirements)
# Sort by expected_points / price
sorted_players = sorted(
player_predictions,
key=lambda p: p['expected_points'] / max(p.get('price', 5), 0.1),
reverse=True
)
for player in sorted_players:
pos = player.get('position', 'MID')
if position_slots.get(pos, 0) <= 0:
continue
if player.get('price', 5) > remaining_budget:
continue
selected.append(player)
position_slots[pos] -= 1
remaining_budget -= player.get('price', 5)
if all(v == 0 for v in position_slots.values()):
break
total_expected = sum(p['expected_points'] for p in selected)
total_cost = budget - remaining_budget
return {
'lineup': selected,
'total_expected_points': round(total_expected, 2),
'total_cost': round(total_cost, 1),
'remaining_budget': round(remaining_budget, 1),
'formation': formation
}
Responsible Gaming
Betting and fantasy systems require integration of RG (Responsible Gambling) tools: monitoring excess betting patterns (sharp volume increase, bets after big losses, night sessions), automatic limits, self-exclusion. This is not optional—it's a regulatory requirement in most jurisdictions.
Accuracy of football match outcome predictions: AUC 0.72-0.78 (markets are efficient, no absolute edge exists). ML value lies in precise probability assessment to identify overvalued/undervalued bets against bookmaker lines.







