Implementing AI-Based Cross-Sell Recommendation System for Sales
Cross-sell recommends additional products related to what the customer is already buying or viewing. Classic example: to a printer — cartridges, to a laptop — a bag. The ML approach finds non-trivial product pairs through joint purchase analysis (market basket analysis) and adds personalization through user profile.
Market Basket Analysis + Personalization
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from sklearn.ensemble import GradientBoostingClassifier
class CrossSellRecommender:
def __init__(self, min_support: float = 0.01, min_confidence: float = 0.1):
self.min_support = min_support
self.min_confidence = min_confidence
self.rules = None
self.cross_sell_map = {}
self.personalization_model = None
def fit_association_rules(self, orders_df: pd.DataFrame,
order_col: str = "order_id",
item_col: str = "item_id"):
"""Finding association rules through Apriori"""
# Transaction baskets
baskets = orders_df.groupby(order_col)[item_col].apply(list).tolist()
te = TransactionEncoder()
te_array = te.fit_transform(baskets)
df_encoded = pd.DataFrame(te_array, columns=te.columns_)
# Frequent itemsets
frequent_itemsets = apriori(
df_encoded,
min_support=self.min_support,
use_colnames=True,
max_len=3
)
# Association rules
self.rules = association_rules(
frequent_itemsets,
metric="lift",
min_threshold=1.2
)
self.rules = self.rules[self.rules['confidence'] >= self.min_confidence]
self.rules = self.rules.sort_values('lift', ascending=False)
# Mapping: item → list of recommendations with metrics
for _, rule in self.rules.iterrows():
for antecedent in rule['antecedents']:
if antecedent not in self.cross_sell_map:
self.cross_sell_map[antecedent] = []
for consequent in rule['consequents']:
if antecedent != consequent:
self.cross_sell_map[antecedent].append({
'item_id': consequent,
'confidence': rule['confidence'],
'lift': rule['lift'],
'support': rule['support']
})
# Sort by lift
for item in self.cross_sell_map:
self.cross_sell_map[item].sort(key=lambda x: x['lift'], reverse=True)
def recommend_cross_sell(self, cart_items: list[str],
user_history: list[str] = None,
n: int = 5) -> list[dict]:
"""Cross-sell for current cart"""
candidates = {}
for item_id in cart_items:
related = self.cross_sell_map.get(item_id, [])
for rec in related:
rec_id = rec['item_id']
# Skip items already in cart or history
if rec_id in cart_items:
continue
if user_history and rec_id in user_history:
continue
if rec_id not in candidates:
candidates[rec_id] = {'score': 0, 'triggers': []}
candidates[rec_id]['score'] += rec['lift']
candidates[rec_id]['triggers'].append(item_id)
# Normalization
if not candidates:
return []
sorted_candidates = sorted(
[{'item_id': k, **v} for k, v in candidates.items()],
key=lambda x: x['score'],
reverse=True
)
return sorted_candidates[:n]
def get_complementary_categories(self, category: str) -> list[str]:
"""Complementary categories"""
category_rules = self.rules[
self.rules['antecedents'].apply(lambda x: category in str(x))
]['consequents'].apply(lambda x: list(x)).explode().value_counts()
return category_rules.head(5).index.tolist()
Temporal Patterns: Next Purchase
class NextPurchasePredictor:
"""Predicting next purchase based on history"""
def predict_next_items(self, user_id: str,
purchase_history: list[dict],
catalog_features: pd.DataFrame) -> list[tuple]:
"""
purchase_history: [{item_id, date, quantity, category}]
Returns: [(item_id, probability)]
"""
if len(purchase_history) < 3:
return []
# Repeat purchase pattern
item_intervals = {}
for i in range(1, len(purchase_history)):
item = purchase_history[i]['item_id']
prev_same = [h for h in purchase_history[:i] if h['item_id'] == item]
if prev_same:
days_between = (
pd.to_datetime(purchase_history[i]['date']) -
pd.to_datetime(prev_same[-1]['date'])
).days
if item not in item_intervals:
item_intervals[item] = []
item_intervals[item].append(days_between)
# Predicting repeat purchases
predictions = []
last_purchase_date = pd.to_datetime(purchase_history[-1]['date'])
today = pd.Timestamp.now()
days_since_last = (today - last_purchase_date).days
for item_id, intervals in item_intervals.items():
avg_interval = np.mean(intervals)
std_interval = np.std(intervals) if len(intervals) > 1 else avg_interval * 0.3
# Probability through normal distribution
from scipy.stats import norm
prob = norm.cdf(days_since_last + 7, avg_interval, std_interval + 1)
prob -= norm.cdf(days_since_last - 7, avg_interval, std_interval + 1)
prob = min(max(prob, 0), 1)
if prob > 0.1:
predictions.append((item_id, prob))
return sorted(predictions, key=lambda x: x[1], reverse=True)[:10]
Association rules with min_support=0.01, min_confidence=0.1 typically yield 500-5000 significant rules on 100K orders. Lift > 2.0 — strong association. Cross-sell through rules gives average basket uplift of 15-25%. Combination with personalization (user history) adds another 5-10% to acceptance rate.







