GNN for Graph-Based Recommendation Systems

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
GNN for Graph-Based Recommendation Systems
Complex
~2-4 weeks
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

GNN graph-based recommendation system

Graph-based recommender systems outperform classical collaborative filtering methods due to multi-order relationships: “user A bought product X, product X was also bought by users B and C, who also bought Y” — GNN captures such chains directly through message passing.

LightGCN is the current SOTA for recommendations

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import numpy as np
import pandas as pd
from typing import Optional

class LightGCNConv(MessagePassing):
    """
    Упрощённый GCN для рекомендаций: без feature transformation и non-linearity.
    Оставляем только propagation step — это ключевое открытие LightGCN (He et al., 2020).
    """

    def __init__(self):
        super().__init__(aggr='add')

    def forward(self, x: torch.Tensor, edge_index: torch.Tensor,
                 edge_weight: Optional[torch.Tensor] = None) -> torch.Tensor:
        # Симметричная нормализация: D^{-1/2} A D^{-1/2}
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j: torch.Tensor, norm: torch.Tensor) -> torch.Tensor:
        return norm.view(-1, 1) * x_j


class LightGCN(nn.Module):
    """
    LightGCN для рекомендаций пользователь-товар.
    Финальный эмбеддинг = среднее эмбеддингов всех слоёв (layer combination).
    """

    def __init__(self, n_users: int, n_items: int,
                  embedding_dim: int = 64, n_layers: int = 3):
        super().__init__()
        self.n_users = n_users
        self.n_items = n_items
        self.n_layers = n_layers

        # Только эмбеддинги — никакого feature transformation
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.item_embedding = nn.Embedding(n_items, embedding_dim)

        # Инициализация важна: Xavier для стабильного обучения
        nn.init.xavier_uniform_(self.user_embedding.weight)
        nn.init.xavier_uniform_(self.item_embedding.weight)

        self.conv = LightGCNConv()

    def forward(self, edge_index: torch.Tensor) -> tuple:
        """
        edge_index: рёбра в двудольном графе (users × items)
        Returns: финальные эмбеддинги пользователей и товаров
        """
        # Начальные эмбеддинги
        x = torch.cat([self.user_embedding.weight, self.item_embedding.weight], dim=0)

        # Сохраняем эмбеддинги каждого слоя для layer combination
        layer_embeddings = [x]

        for _ in range(self.n_layers):
            x = self.conv(x, edge_index)
            layer_embeddings.append(x)

        # Layer combination: среднее всех слоёв (включая E^0)
        final_embeddings = torch.stack(layer_embeddings, dim=1).mean(dim=1)

        users_emb = final_embeddings[:self.n_users]
        items_emb = final_embeddings[self.n_users:]

        return users_emb, items_emb

    def predict(self, users: torch.Tensor,
                 items: torch.Tensor,
                 edge_index: torch.Tensor) -> torch.Tensor:
        """Предсказание скоров для пар (user, item)"""
        users_emb, items_emb = self.forward(edge_index)
        return (users_emb[users] * items_emb[items]).sum(dim=-1)

    def recommend_topk(self, user_id: int,
                        edge_index: torch.Tensor,
                        k: int = 10,
                        exclude_known: Optional[set] = None) -> list:
        """Top-K рекомендации для пользователя"""
        self.eval()
        with torch.no_grad():
            users_emb, items_emb = self.forward(edge_index)
            user_emb = users_emb[user_id]

            # Скоры по всем товарам (dot product)
            scores = torch.matmul(items_emb, user_emb)

            if exclude_known:
                for item_idx in exclude_known:
                    scores[item_idx] = float('-inf')

            top_k_scores, top_k_items = scores.topk(k)

        return [
            {'item_id': int(item), 'score': float(score)}
            for item, score in zip(top_k_items, top_k_scores)
        ]


class BPRLoss(nn.Module):
    """
    Bayesian Personalized Ranking Loss для обучения.
    Оптимизирует: предпочтение наблюдаемых взаимодействий над ненаблюдаемыми.
    """

    def __init__(self, reg_weight: float = 1e-4):
        super().__init__()
        self.reg_weight = reg_weight

    def forward(self, pos_scores: torch.Tensor,
                 neg_scores: torch.Tensor,
                 user_embeddings: torch.Tensor,
                 pos_item_embeddings: torch.Tensor,
                 neg_item_embeddings: torch.Tensor) -> torch.Tensor:
        # BPR: максимизируем разницу pos - neg
        bpr_loss = -F.logsigmoid(pos_scores - neg_scores).mean()

        # L2 регуляризация на эмбеддинги
        reg_loss = self.reg_weight * (
            user_embeddings.norm(2).pow(2) +
            pos_item_embeddings.norm(2).pow(2) +
            neg_item_embeddings.norm(2).pow(2)
        ) / len(pos_scores)

        return bpr_loss + reg_loss


class GNNRecommendationTrainer:
    """Обучение LightGCN с negative sampling"""

    def __init__(self, model: LightGCN, device: str = 'cpu'):
        self.model = model.to(device)
        self.device = device
        self.optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        self.criterion = BPRLoss(reg_weight=1e-4)

    def prepare_training_edges(self, interactions: pd.DataFrame) -> torch.Tensor:
        """Граф взаимодействий для propagation"""
        users = torch.tensor(interactions['user_idx'].values, dtype=torch.long)
        items = torch.tensor(interactions['item_idx'].values + self.model.n_users, dtype=torch.long)

        # Двунаправленные рёбра
        edge_index = torch.stack([
            torch.cat([users, items]),
            torch.cat([items, users])
        ], dim=0)

        return edge_index.to(self.device)

    def sample_negative_items(self, users: torch.Tensor,
                               n_items: int,
                               known_items: dict) -> torch.Tensor:
        """Случайный отрицательный семплинг"""
        neg_items = []
        for user in users.cpu().numpy():
            known = known_items.get(int(user), set())
            while True:
                neg = np.random.randint(0, n_items)
                if neg not in known:
                    neg_items.append(neg)
                    break
        return torch.tensor(neg_items, dtype=torch.long).to(self.device)

    def train_epoch(self, interactions: pd.DataFrame,
                     edge_index: torch.Tensor,
                     batch_size: int = 2048) -> float:
        """Один эпох с BPR loss"""
        self.model.train()
        total_loss = 0
        n_batches = 0

        # Перемешиваем
        idx = np.random.permutation(len(interactions))

        known_items = interactions.groupby('user_idx')['item_idx'].apply(set).to_dict()

        for start in range(0, len(interactions), batch_size):
            batch_idx = idx[start:start + batch_size]
            batch = interactions.iloc[batch_idx]

            users = torch.tensor(batch['user_idx'].values, dtype=torch.long).to(self.device)
            pos_items = torch.tensor(batch['item_idx'].values, dtype=torch.long).to(self.device)
            neg_items = self.sample_negative_items(users, self.model.n_items, known_items)

            self.optimizer.zero_grad()

            users_emb, items_emb = self.model(edge_index)

            u_emb = users_emb[users]
            pos_emb = items_emb[pos_items]
            neg_emb = items_emb[neg_items]

            pos_scores = (u_emb * pos_emb).sum(dim=-1)
            neg_scores = (u_emb * neg_emb).sum(dim=-1)

            loss = self.criterion(pos_scores, neg_scores, u_emb, pos_emb, neg_emb)
            loss.backward()
            self.optimizer.step()

            total_loss += float(loss)
            n_batches += 1

        return total_loss / max(n_batches, 1)


class GNNRecommendationEvaluator:
    """Оценка качества GNN рекомендательной системы"""

    @staticmethod
    def ndcg_at_k(relevant: set, predicted: list, k: int) -> float:
        """NDCG@K — ключевая метрика для рекомендаций"""
        dcg = 0.0
        for i, item in enumerate(predicted[:k]):
            if item in relevant:
                dcg += 1.0 / np.log2(i + 2)

        ideal_dcg = sum(1.0 / np.log2(i + 2) for i in range(min(len(relevant), k)))
        return dcg / max(ideal_dcg, 1e-9)

    @staticmethod
    def recall_at_k(relevant: set, predicted: list, k: int) -> float:
        hits = len(set(predicted[:k]) & relevant)
        return hits / max(len(relevant), 1)

    def evaluate_model(self, model: LightGCN,
                        test_interactions: pd.DataFrame,
                        edge_index: torch.Tensor,
                        train_interactions: pd.DataFrame,
                        k: int = 20) -> dict:
        """Оценка на тестовой выборке"""
        model.eval()
        ndcgs, recalls = [], []

        # Для каждого пользователя в тесте
        test_users = test_interactions['user_idx'].unique()
        train_known = train_interactions.groupby('user_idx')['item_idx'].apply(set).to_dict()

        for user_id in test_users[:500]:  # Ограничение для скорости
            relevant = set(
                test_interactions[test_interactions['user_idx'] == user_id]['item_idx']
            )
            exclude = train_known.get(user_id, set())

            recommendations = model.recommend_topk(user_id, edge_index, k=k, exclude_known=exclude)
            predicted = [r['item_id'] for r in recommendations]

            ndcgs.append(self.ndcg_at_k(relevant, predicted, k))
            recalls.append(self.recall_at_k(relevant, predicted, k))

        return {
            f'NDCG@{k}': round(np.mean(ndcgs), 4),
            f'Recall@{k}': round(np.mean(recalls), 4),
            'n_evaluated': len(test_users)
        }

Extensions: Knowledge Graph and Side Information

class KGEnhancedRecommender(nn.Module):
    """
    Использование Knowledge Graph для обогащения рекомендаций.
    KG содержит атрибуты товаров: бренд → принадлежит_к → категории, цвет, материал.
    Рёбра KG улучшают cold-start для новых товаров.
    """

    def __init__(self, n_users: int, n_items: int,
                  n_entities: int, n_relations: int,
                  embedding_dim: int = 64):
        super().__init__()
        # Пользователи и товары — как в LightGCN
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.entity_embedding = nn.Embedding(n_entities, embedding_dim)  # Включает товары

        # Отношения в KG
        self.relation_embedding = nn.Embedding(n_relations, embedding_dim)

        nn.init.xavier_uniform_(self.user_embedding.weight)
        nn.init.xavier_uniform_(self.entity_embedding.weight)

    def compute_kg_score(self, h: torch.Tensor,
                          r: torch.Tensor,
                          t: torch.Tensor) -> torch.Tensor:
        """TransR scoring: h + r ≈ t"""
        return -(h + r - t).norm(p=2, dim=-1)

    def forward_kg(self, kg_triples: torch.Tensor) -> torch.Tensor:
        """Обучение на Knowledge Graph триплетах"""
        h_idx, r_idx, t_idx = kg_triples[:, 0], kg_triples[:, 1], kg_triples[:, 2]
        h = self.entity_embedding(h_idx)
        r = self.relation_embedding(r_idx)
        t = self.entity_embedding(t_idx)
        return self.compute_kg_score(h, r, t)

Comparison of approaches to GNN recommendations

Model NDCG@20 (Amazon) Parameters Training (epochs)
MF (baseline) 0.031 n×d ~100
NCF 0.038 n×d + MLP ~50
LightGCN 0.047 n×d ~200
NGCF 0.044 n×d + W ~200
KG-enhanced 0.052 n×d + KG ~300

LightGCN offers the best balance of quality and simplicity for production. KG-enhanced methods provide a 10-15% increase in NDCG on datasets with rich product metadata (fashion, movies), but require building and maintaining a Knowledge Graph.