Automatic Generation of Related Articles Block
A "Related articles" block keeps users on the site and reduces bounce rate. Manual selection doesn't scale—with hundreds of publications, you need an automatic system based on tags, categories, or semantic similarity.
Related Content Selection Strategies
By tags and categories — fast, no ML needed, but shallow.
By TF-IDF — statistical similarity based on term frequency.
By vector embeddings — semantic similarity, best quality, requires pgvector.
Tag-based Approach
// RelatedArticleService
class RelatedArticleService
{
public function getRelated(Article $article, int $limit = 4): Collection
{
if ($article->tags->isEmpty()) {
// Fallback: articles from same category
return Article::published()
->where('category_id', $article->category_id)
->where('id', '!=', $article->id)
->latest()
->limit($limit)
->get();
}
$tagIds = $article->tags->pluck('id');
// Count shared tags
return Article::published()
->where('id', '!=', $article->id)
->withCount(['tags as common_tags_count' => function ($q) use ($tagIds) {
$q->whereIn('tags.id', $tagIds);
}])
->having('common_tags_count', '>', 0)
->orderByDesc('common_tags_count')
->orderByDesc('published_at')
->limit($limit)
->get();
}
}
Embedding-based Approach with pgvector
// When creating/updating article
class ArticleObserver
{
public function saved(Article $article): void
{
GenerateArticleEmbedding::dispatch($article)->onQueue('low');
}
}
class GenerateArticleEmbedding implements ShouldQueue
{
public function handle(): void
{
$text = implode("\n", [
$this->article->title,
$this->article->excerpt,
strip_tags(substr($this->article->content, 0, 2000)),
]);
$embedding = OpenAI::embeddings()->create([
'model' => 'text-embedding-3-small',
'input' => $text,
])->embeddings[0]->embedding;
$this->article->update(['embedding' => '[' . implode(',', $embedding) . ']']);
// Recalculate related cache for this article
Cache::forget("related_articles_{$this->article->id}");
}
}
// Query related via pgvector
public function getSemanticallyRelated(Article $article, int $limit = 4): Collection
{
$embedding = $article->embedding;
if (!$embedding) return collect();
return Cache::remember("related_articles_{$article->id}", 86400, function () use ($article, $embedding, $limit) {
return Article::published()
->where('id', '!=', $article->id)
->selectRaw('*, (embedding <=> ?) AS distance', [$embedding])
->whereNotNull('embedding')
->orderBy('distance')
->limit($limit)
->get();
});
}
React Component with Lazy Loading
// RelatedArticles.tsx
export function RelatedArticles({ articleId }: { articleId: number }) {
const ref = useRef<HTMLDivElement>(null);
const [inView, setInView] = useState(false);
// Load only when block enters viewport
useEffect(() => {
const observer = new IntersectionObserver(
([entry]) => { if (entry.isIntersecting) setInView(true); },
{ rootMargin: '200px' }
);
if (ref.current) observer.observe(ref.current);
return () => observer.disconnect();
}, []);
const { data, isLoading } = useQuery({
queryKey: ['related', articleId],
queryFn: () => fetch(`/api/articles/${articleId}/related`).then(r => r.json()),
enabled: inView,
staleTime: 10 * 60 * 1000,
});
return (
<div ref={ref} className="mt-10">
<h3 className="text-xl font-bold mb-5">Read Also</h3>
{isLoading ? (
<div className="grid grid-cols-2 gap-4">
{[...Array(4)].map((_, i) => (
<div key={i} className="h-32 bg-gray-100 rounded-lg animate-pulse" />
))}
</div>
) : (
<div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
{data?.map((article: any) => (
<a key={article.id} href={article.url}
className="group flex gap-4 p-4 border rounded-xl hover:shadow-md transition-shadow">
{article.image && (
<img src={article.image} alt="" className="w-20 h-16 object-cover rounded-lg flex-shrink-0" />
)}
<div>
<p className="text-xs text-blue-600 mb-1">{article.category}</p>
<h4 className="text-sm font-medium group-hover:text-blue-600 transition-colors line-clamp-2">
{article.title}
</h4>
<p className="text-xs text-gray-400 mt-1">{article.reading_time} min read</p>
</div>
</a>
))}
</div>
)}
</div>
);
}
Timeline
System for related articles with tag-based and embedding-based selection, lazy-loading component: 3–4 working days.







