AI Influencer Matching and Audience Analytics

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
AI Influencer Matching and Audience Analytics
Medium
~2-4 weeks
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

AI-based influencer matching and audience analytics system

Manually searching for influencers is expensive and unreliable. AI matching analyzes not only the blogger's topic but also the quality of the audience (bots, fake engagement), overlap with the brand's target audience, and predicts the campaign's ROI. Platforms like GRIN, Traackr, and Upfluence use precisely these approaches.

Audience Quality Analytics

import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import KMeans
import json
from anthropic import Anthropic

class InfluencerAudienceAnalyzer:
    """Анализ качества и состава аудитории инфлюенсера"""

    def compute_authenticity_score(self, account_data: dict) -> dict:
        """
        Скор аутентичности аудитории (0-100).
        Детектирование ботов и искусственного engagement.
        """
        followers = account_data.get('followers_count', 1)
        avg_likes = account_data.get('avg_likes', 0)
        avg_comments = account_data.get('avg_comments', 0)
        avg_views = account_data.get('avg_views', followers)

        # Engagement Rate (ER)
        er = (avg_likes + avg_comments) / followers * 100

        # Follower-to-Following ratio (аномалии = много ботов-подписчиков)
        follow_ratio = account_data.get('followers_count', 1) / max(
            account_data.get('following_count', 1), 1
        )

        # Рост аудитории (резкие скачки = накрутка)
        growth_spike = account_data.get('max_weekly_growth_pct', 0)

        # Views/Follower ratio для видео
        views_ratio = avg_views / followers if followers > 0 else 0

        score = 100.0
        issues = []

        # Слишком низкий ER (нормы: nano 5-10%, micro 3-6%, macro 1-3%, mega 0.5-1.5%)
        size_tier = self._get_tier(followers)
        expected_er_range = {'nano': (5, 10), 'micro': (3, 6), 'macro': (1, 3), 'mega': (0.5, 1.5)}
        expected_range = expected_er_range.get(size_tier, (1, 5))

        if er < expected_range[0] * 0.5:
            score -= 30
            issues.append(f'ER {er:.1f}% значительно ниже нормы {expected_range[0]}% для {size_tier}')
        elif er < expected_range[0]:
            score -= 15

        # Аномально высокий ER (накрутка лайков)
        if er > expected_range[1] * 3:
            score -= 20
            issues.append('Аномально высокий ER — возможна накрутка')

        # Резкий рост
        if growth_spike > 50:
            score -= 25
            issues.append(f'Резкий рост аудитории +{growth_spike:.0f}% за неделю')

        # Низкое соотношение просмотров
        if views_ratio < 0.1 and account_data.get('content_type') == 'video':
            score -= 15
            issues.append('Низкий охват видео-контента')

        return {
            'authenticity_score': max(0, round(score)),
            'engagement_rate': round(er, 2),
            'tier': size_tier,
            'issues': issues,
            'estimated_real_followers': int(followers * max(0, score) / 100)
        }

    def _get_tier(self, followers: int) -> str:
        if followers < 10000:
            return 'nano'
        elif followers < 100000:
            return 'micro'
        elif followers < 1000000:
            return 'macro'
        return 'mega'

    def analyze_audience_demographics(self, follower_sample: pd.DataFrame,
                                       brand_target_audience: dict) -> dict:
        """Пересечение аудитории инфлюенсера с ЦА бренда"""
        overlaps = {}

        # Гендер
        if 'gender' in follower_sample.columns and 'gender' in brand_target_audience:
            brand_gender = brand_target_audience['gender']
            influencer_gender_dist = follower_sample['gender'].value_counts(normalize=True).to_dict()
            overlaps['gender_match'] = influencer_gender_dist.get(brand_gender, 0)

        # Возраст
        if 'age_group' in follower_sample.columns and 'age_groups' in brand_target_audience:
            target_ages = set(brand_target_audience['age_groups'])
            influencer_ages = set(
                follower_sample['age_group'].value_counts(normalize=True)
                .nlargest(3).index.tolist()
            )
            overlaps['age_overlap'] = len(target_ages & influencer_ages) / max(len(target_ages), 1)

        # Геолокация
        if 'country' in follower_sample.columns and 'countries' in brand_target_audience:
            target_countries = set(brand_target_audience['countries'])
            influencer_countries = set(
                follower_sample['country'].value_counts(normalize=True)
                .nlargest(5).index.tolist()
            )
            overlaps['geo_overlap'] = len(target_countries & influencer_countries) / max(len(target_countries), 1)

        # Общий скор аффинности
        overlaps['audience_affinity'] = round(np.mean(list(overlaps.values())) if overlaps else 0.5, 2)

        return overlaps


class InfluencerMatcher:
    """Матчинг инфлюенсеров под кампанию бренда"""

    def __init__(self):
        self.llm = Anthropic()
        self.analyzer = InfluencerAudienceAnalyzer()

    def score_influencer(self, influencer: dict,
                          campaign: dict,
                          follower_sample: pd.DataFrame) -> dict:
        """Комплексный скор инфлюенсера для кампании"""
        # Качество аудитории
        authenticity = self.analyzer.compute_authenticity_score(influencer)

        # Пересечение с ЦА
        audience_match = self.analyzer.analyze_audience_demographics(
            follower_sample, campaign.get('target_audience', {})
        )

        # Тематическое соответствие (категории контента)
        content_categories = set(influencer.get('content_categories', []))
        brand_categories = set(campaign.get('relevant_categories', []))
        category_match = len(content_categories & brand_categories) / max(len(brand_categories), 1)

        # Прогноз CPE (Cost Per Engagement)
        budget_per_influencer = campaign.get('budget', 10000)
        expected_engagements = (
            influencer.get('followers_count', 0) *
            authenticity['engagement_rate'] / 100 *
            authenticity['authenticity_score'] / 100
        )
        cpe = budget_per_influencer / max(expected_engagements, 1)

        # Итоговый скор
        total_score = (
            authenticity['authenticity_score'] / 100 * 0.30 +
            audience_match.get('audience_affinity', 0.5) * 0.35 +
            category_match * 0.25 +
            min(1.0, 10 / max(cpe, 0.1)) * 0.10  # Инвертируем CPE (меньше = лучше)
        )

        return {
            'influencer_id': influencer.get('id'),
            'handle': influencer.get('handle'),
            'tier': authenticity['tier'],
            'total_score': round(total_score, 3),
            'authenticity': authenticity['authenticity_score'],
            'audience_affinity': audience_match.get('audience_affinity', 0),
            'category_match': round(category_match, 2),
            'expected_engagements': int(expected_engagements),
            'estimated_cpe': round(cpe, 2),
            'red_flags': authenticity['issues']
        }

    def generate_campaign_brief(self, influencer: dict,
                                 campaign: dict) -> str:
        """Персональный бриф для инфлюенсера"""
        response = self.llm.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=300,
            messages=[{
                "role": "user",
                "content": f"""Write a personalized campaign brief for an influencer in Russian.

Influencer: @{influencer.get('handle')}, {influencer.get('tier')} tier, {influencer.get('content_categories', [])} content
Campaign: {campaign.get('name')}, brand: {campaign.get('brand_name')}
Product: {campaign.get('product_description', '')}
Key message: {campaign.get('key_message', '')}
Target audience: {campaign.get('target_audience', {})}

Write a 2-3 paragraph brief that:
1. Explains why this specific influencer was chosen (personalized)
2. Describes the campaign goals and what we want to achieve
3. Gives creative guidelines that fit their style"""
            }]
        )
        return response.content[0].text

AI-based influencer matching reduces CPE by 25-40% compared to manual selection due to precise audience matching. The main ROI driver is the exclusion of bots: 30-60% of a typical macro-influencer's audience may be inactive or fake accounts.