AI Travel and Tour Recommendation System
Travel recommendations are more complex than e-commerce: high price, long-term decision, context is critical (family vacation ≠ romantic getaway). AI understands these nuances from search history, bookings, and explicit preferences, building a personalized traveler profile.
Traveler Profile and Contextual Recommendations
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from anthropic import Anthropic
import json
class TravelerProfiler:
"""Traveler profile from booking history"""
TRAVEL_STYLES = [
'adventure', 'cultural', 'relaxation', 'gastronomy',
'family', 'romantic', 'business', 'budget', 'luxury'
]
def build_profile(self, booking_history: pd.DataFrame,
search_history: pd.DataFrame,
user_id: str) -> dict:
"""Profile from bookings and search"""
bookings = booking_history[booking_history['user_id'] == user_id]
searches = search_history[search_history['user_id'] == user_id]
if bookings.empty and searches.empty:
return {'user_id': user_id, 'is_new': True}
profile = {
'user_id': user_id,
'is_new': False,
'total_trips': len(bookings),
# Price segment
'avg_budget_per_person': bookings.get('price_per_person', pd.Series([0])).mean(),
'hotel_star_preference': bookings.get('hotel_stars', pd.Series([3])).mean(),
# Destination types
'preferred_climate': self._infer_climate_preference(bookings),
'preferred_destination_type': self._infer_destination_type(bookings),
'international_ratio': (bookings.get('is_international', pd.Series([False]))).mean(),
# Trip organization
'avg_trip_duration_days': bookings.get('duration_days', pd.Series([7])).mean(),
'advance_booking_days': bookings.get('days_booked_in_advance', pd.Series([30])).mean(),
'solo_vs_group': bookings.get('travelers_count', pd.Series([2])).mean(),
# Activities from search
'activity_interests': self._extract_activity_interests(searches),
}
# Determine travel style
profile['travel_style'] = self._classify_travel_style(profile)
return profile
def _infer_climate_preference(self, bookings: pd.DataFrame) -> str:
if 'destination_climate' not in bookings.columns:
return 'mixed'
climate_counts = bookings['destination_climate'].value_counts()
return climate_counts.index[0] if len(climate_counts) > 0 else 'mixed'
def _infer_destination_type(self, bookings: pd.DataFrame) -> str:
if 'destination_type' not in bookings.columns:
return 'mixed'
type_counts = bookings['destination_type'].value_counts()
return type_counts.index[0] if len(type_counts) > 0 else 'mixed'
def _extract_activity_interests(self, searches: pd.DataFrame) -> list[str]:
interests = set()
activity_keywords = {
'skiing': ['ski', 'snow', 'winter'],
'beach': ['beach', 'sea', 'ocean', 'resort'],
'hiking': ['hike', 'trek', 'mountain', 'nature'],
'museums': ['museum', 'culture', 'history', 'art'],
'gastronomy': ['food', 'restaurant', 'cuisine', 'wine'],
}
if 'query' not in searches.columns:
return []
for query in searches['query'].str.lower():
for interest, keywords in activity_keywords.items():
if any(kw in query for kw in keywords):
interests.add(interest)
return list(interests)
def _classify_travel_style(self, profile: dict) -> str:
budget = profile.get('avg_budget_per_person', 0)
stars = profile.get('hotel_star_preference', 3)
if stars >= 4.5 or budget > 3000:
return 'luxury'
elif budget < 500:
return 'budget'
elif 'beach' in profile.get('activity_interests', []):
return 'relaxation'
elif profile.get('preferred_destination_type') == 'city':
return 'cultural'
return 'mixed'
class TourRecommendationEngine:
"""Tour recommendations with semantic search"""
def __init__(self):
self.encoder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
self.llm = Anthropic()
def semantic_search(self, query: str,
tours_catalog: pd.DataFrame,
top_k: int = 20) -> pd.DataFrame:
"""Semantic search for tours by query"""
query_embedding = self.encoder.encode(query, normalize_embeddings=True)
# Encode tour descriptions (in production: pre-computed index)
if 'description_embedding' not in tours_catalog.columns:
tours_catalog['description_embedding'] = tours_catalog['description'].apply(
lambda x: self.encoder.encode(str(x), normalize_embeddings=True)
)
similarities = cosine_similarity(
query_embedding.reshape(1, -1),
np.stack(tours_catalog['description_embedding'].values)
)[0]
tours_catalog = tours_catalog.copy()
tours_catalog['semantic_score'] = similarities
return tours_catalog.nlargest(top_k, 'semantic_score')
def personalized_ranking(self, candidates: pd.DataFrame,
traveler_profile: dict) -> pd.DataFrame:
"""Personalized ranking from semantic candidates"""
df = candidates.copy()
# Price match
avg_budget = traveler_profile.get('avg_budget_per_person', 1000)
df['price_fit'] = 1.0 - (abs(df['price_per_person'] - avg_budget) / avg_budget).clip(0, 1)
# Travel style
travel_style = traveler_profile.get('travel_style', 'mixed')
df['style_match'] = df.get('tour_style', pd.Series(['mixed'] * len(df))).apply(
lambda s: 1.0 if s == travel_style else 0.5 if s == 'mixed' else 0.3
)
# Activity interests
user_interests = set(traveler_profile.get('activity_interests', []))
df['activity_match'] = df.get('activities', pd.Series([[]] * len(df))).apply(
lambda acts: len(user_interests & set(acts)) / max(len(user_interests), 1) if user_interests else 0.5
)
# Duration
preferred_duration = traveler_profile.get('avg_trip_duration_days', 7)
df['duration_fit'] = 1.0 - (abs(df.get('duration_days', 7) - preferred_duration) / 14).clip(0, 1)
df['final_score'] = (
df['semantic_score'] * 0.30 +
df['price_fit'] * 0.25 +
df['style_match'] * 0.20 +
df['activity_match'] * 0.15 +
df['duration_fit'] * 0.10
)
return df.sort_values('final_score', ascending=False)
def generate_tour_pitch(self, tour: dict,
traveler_profile: dict) -> str:
"""Personalized tour description for user"""
response = self.llm.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=150,
messages=[{
"role": "user",
"content": f"""Write a 3-sentence personalized pitch for this tour.
Tour: {tour.get('name')}, {tour.get('destination')}
Key features: {tour.get('highlights', [])}
Traveler profile: {traveler_profile.get('travel_style')} traveler,
interests: {traveler_profile.get('activity_interests', [])},
typical budget: ${traveler_profile.get('avg_budget_per_person', 1000)}/person.
Highlight what's most relevant to THIS specific traveler."""
}]
)
return response.content[0].text
Semantic search for tours increases search result click-through rate by 22-35% versus keyword search. Personalized ranking increases booking conversion by 18-25%. Key cold start solution: for new users, a 5-question questionnaire outperforms popular tours without context.







