AI-based system for automatic vehicle valuation
A manual car appraisal takes an expert 20-40 minutes. An AI appraisal takes 200 milliseconds. Platforms like Avto.ru, KBB, and CarGurus use machine learning models to instantly estimate market value with an error rate of 3-7%, comparable to that of an experienced appraiser.
Price regression model
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder
import shap
class VehiclePriceEstimator:
"""Оценка рыночной стоимости автомобиля"""
def __init__(self):
self.model = GradientBoostingRegressor(
n_estimators=500, learning_rate=0.03, max_depth=5,
subsample=0.8, min_samples_leaf=10, random_state=42
)
self.label_encoders = {}
self.explainer = None
def build_features(self, vehicles: pd.DataFrame) -> pd.DataFrame:
"""Feature engineering для оценки автомобиля"""
df = vehicles.copy()
# Основные технические характеристики
features = pd.DataFrame()
features['year'] = df['year']
features['age_years'] = 2025 - df['year']
features['mileage_km'] = df['mileage_km'].clip(0, 500000)
features['mileage_per_year'] = df['mileage_km'] / (features['age_years'].clip(1, 50))
features['engine_volume_l'] = df.get('engine_volume_l', 1.6)
features['engine_power_hp'] = df.get('engine_power_hp', 120)
features['is_electric'] = (df.get('fuel_type', 'petrol') == 'electric').astype(int)
features['is_hybrid'] = (df.get('fuel_type', 'petrol') == 'hybrid').astype(int)
# Технические характеристики
features['transmission_auto'] = (df.get('transmission', 'manual') == 'automatic').astype(int)
features['drive_awd'] = (df.get('drive', 'fwd') == 'awd').astype(int)
features['body_type_encoded'] = self._encode_categorical(df.get('body_type', pd.Series(['sedan'])), 'body_type')
# Марка и модель (категориальные)
features['brand_encoded'] = self._encode_categorical(df.get('brand', pd.Series(['toyota'])), 'brand')
features['model_encoded'] = self._encode_categorical(df.get('model', pd.Series(['camry'])), 'model')
# Состояние
features['accidents_count'] = df.get('accidents_count', 0).clip(0, 5)
features['owners_count'] = df.get('owners_count', 1).clip(1, 10)
features['service_book'] = df.get('has_service_book', True).astype(int)
features['condition_encoded'] = df.get('condition', pd.Series(['good'])).map(
{'excellent': 4, 'good': 3, 'fair': 2, 'poor': 1}
).fillna(2)
# Опции и комплектация
features['has_leather'] = df.get('has_leather', False).astype(int)
features['has_panoramic'] = df.get('has_panoramic_roof', False).astype(int)
features['options_count'] = df.get('options_count', 5).clip(0, 30)
# Рыночные условия
features['region_demand_index'] = df.get('region_demand_index', 1.0)
return features.fillna(0)
def _encode_categorical(self, series: pd.Series, name: str) -> pd.Series:
if name not in self.label_encoders:
le = LabelEncoder()
self.label_encoders[name] = le
return pd.Series(le.fit_transform(series.astype(str)), index=series.index)
else:
le = self.label_encoders[name]
return series.astype(str).map(
lambda x: le.transform([x])[0] if x in le.classes_ else -1
)
def train(self, vehicles_with_prices: pd.DataFrame):
X = self.build_features(vehicles_with_prices)
y = np.log(vehicles_with_prices['price_rub'].clip(50000)) # Log transform
self.model.fit(X, y)
self.explainer = shap.TreeExplainer(self.model)
def predict_price(self, vehicle: dict) -> dict:
"""Оценка с доверительным интервалом и объяснением"""
vehicle_df = pd.DataFrame([vehicle])
X = self.build_features(vehicle_df)
log_price = self.model.predict(X)[0]
estimated_price = int(np.exp(log_price))
# Доверительный интервал: ±7% (типичная точность на хороших данных)
price_low = int(estimated_price * 0.93)
price_high = int(estimated_price * 1.07)
# SHAP объяснение
shap_values = self.explainer.shap_values(X)[0]
feature_names = X.columns.tolist()
top_factors = sorted(
zip(feature_names, shap_values),
key=lambda x: abs(x[1]), reverse=True
)[:5]
factors = []
for feat, val in top_factors:
direction = 'повышает' if np.exp(val) > 1 else 'снижает'
pct = abs(np.exp(val) - 1) * 100
factors.append(f"{feat}: {direction} цену на {pct:.1f}%")
return {
'estimated_price_rub': estimated_price,
'price_range': (price_low, price_high),
'confidence': 'high',
'price_factors': factors[:3],
'market_position': self._get_market_position(estimated_price, vehicle)
}
def _get_market_position(self, price: int, vehicle: dict) -> str:
# Упрощённое сравнение с рыночной медианой
market_median = vehicle.get('market_median_price', price)
ratio = price / max(market_median, 1)
if ratio < 0.90:
return 'below_market'
elif ratio > 1.10:
return 'above_market'
return 'at_market'
AI-powered car valuation reduces the median absolute percentage error (MAPE) to 4-8% on modern datasets with 100,000+ transactions. The main sources of error are rare models/configurations (cold start), regional price differences, and market drift over time (the model requires retraining quarterly).







