AI Agent for Legal Document Analysis

We design and deploy artificial intelligence systems: from prototype to production-ready solutions. Our team combines expertise in machine learning, data engineering and MLOps to make AI work not in the lab, but in real business.
Showing 1 of 1 servicesAll 1566 services
AI Agent for Legal Document Analysis
Complex
from 1 week to 3 months
FAQ
AI Development Areas
AI Solution Development Stages
Latest works
  • image_website-b2b-advance_0.png
    B2B ADVANCE company website development
    1212
  • image_web-applications_feedme_466_0.webp
    Development of a web application for FEEDME
    1161
  • image_websites_belfingroup_462_0.webp
    Website development for BELFINGROUP
    852
  • image_ecommerce_furnoro_435_0.webp
    Development of an online store for the company FURNORO
    1041
  • image_logo-advance_0.png
    B2B Advance company logo design
    561
  • image_crm_enviok_479_0.webp
    Development of a web application for Enviok
    822

AI Agent for Legal Document Analysis

Lawyers spend 60-70% of their time analyzing documents: reading contracts, identifying risk conditions, checking template compliance, comparing versions. An AI agent doesn't replace legal expertise but performs draft analytical work in minutes instead of hours: finds deviations from standards, identifies missing mandatory conditions, and generates a structured report with specific references to clauses.

Legal Agent Architecture

from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
import json

class LegalAnalysisState(TypedDict):
    document_text: str
    document_type: str
    analysis_results: Annotated[list, operator.add]
    risk_flags: Annotated[list, operator.add]
    missing_clauses: list[str]
    final_report: str

@tool
def check_mandatory_clauses(document_text: str, doc_type: str) -> str:
    """Checks for mandatory conditions for contract type"""
    mandatory_map = {
        "supply_contract": [
            "subject of contract", "goods price", "payment procedure",
            "delivery time", "goods quality", "liability of parties",
            "dispute resolution", "contract duration"
        ],
        "employment_contract": [
            "place of work", "job function", "work start date",
            "salary conditions", "work time schedule",
            "guarantees and compensation", "working conditions"
        ],
        "lease": [
            "leased property", "rent payment", "lease term",
            "tenant rights and obligations", "landlord rights and obligations",
            "property return procedure"
        ]
    }

    required = mandatory_map.get(doc_type, [])
    text_lower = document_text.lower()

    missing = []
    present = []
    for clause in required:
        # Fuzzy matching — not exact word matching
        if any(word in text_lower for word in clause.split()):
            present.append(clause)
        else:
            missing.append(clause)

    return json.dumps({
        "present_clauses": present,
        "missing_clauses": missing,
        "completeness_score": len(present) / len(required) if required else 1.0
    })

@tool
def identify_risk_clauses(document_text: str) -> str:
    """Identifies potentially risky conditions"""
    risk_patterns = {
        "unilateral_termination": [
            "may unilaterally terminate",
            "terminate without notice"
        ],
        "unlimited_liability": [
            "bears full liability",
            "compensates all damages without limits"
        ],
        "auto_renewal": [
            "automatically renews",
            "deemed renewed"
        ],
        "jurisdiction": [
            "court at location of",
            "arbitration court of"
        ]
    }
    # ... pattern analysis
    return json.dumps({"risks_found": []})

Comparison with Template Contract

class ContractComparator:
    COMPARISON_PROMPT = """Compare the contract with the company's template.

Template contract:
{template}

Received contract from counterparty:
{received}

Identify:
1. **Deviations in favor of counterparty** (they got better terms)
2. **Deviations against our company** (we bear increased risks)
3. **Neutral changes** (editorial corrections without legal consequences)
4. **Missing conditions** (in template, absent in received)

For each deviation:
- Template clause vs contract clause (quote)
- Legal consequences of change
- Recommendation: accept / insist on template / acceptable compromise

Format: Markdown table + comments."""

    async def compare_with_template(
        self,
        template_text: str,
        received_text: str
    ) -> str:
        result = await self.llm.ainvoke(
            self.COMPARISON_PROMPT.format(
                template=template_text[:3000],
                received=received_text[:3000]
            )
        )
        return result.content

Analysis Graph (LangGraph)

def build_legal_agent_graph():
    llm = ChatOpenAI(model="gpt-4o", temperature=0)

    tools = [
        check_mandatory_clauses,
        identify_risk_clauses,
    ]

    def analyze_node(state: LegalAnalysisState) -> LegalAnalysisState:
        # Step 1: determine document type
        doc_type_result = llm.invoke(
            f"Determine document type (1-2 words): {state['document_text'][:500]}"
        )
        state["document_type"] = doc_type_result.content.strip()

        # Step 2: check mandatory conditions
        mandatory_check = check_mandatory_clauses.invoke({
            "document_text": state["document_text"],
            "doc_type": state["document_type"]
        })
        state["analysis_results"].append(json.loads(mandatory_check))

        # Step 3: identify risks
        risk_check = identify_risk_clauses.invoke({
            "document_text": state["document_text"]
        })
        risks = json.loads(risk_check)
        state["risk_flags"].extend(risks.get("risks_found", []))

        return state

    def generate_report_node(state: LegalAnalysisState) -> LegalAnalysisState:
        prompt = f"""Create a structured report on document analysis for the lawyer.

Document type: {state['document_type']}
Condition check results: {json.dumps(state['analysis_results'], ensure_ascii=False)}
Identified risks: {json.dumps(state['risk_flags'], ensure_ascii=False)}

Report structure:
## Overall Assessment
## Critical Risks (require immediate attention)
## Missing Mandatory Conditions
## Recommendations for Revision
## Summary: recommend for signature / requires revision / decline"""

        state["final_report"] = llm.invoke(prompt).content
        return state

    graph = StateGraph(LegalAnalysisState)
    graph.add_node("analyze", analyze_node)
    graph.add_node("report", generate_report_node)
    graph.add_edge("analyze", "report")
    graph.add_edge("report", END)
    graph.set_entry_point("analyze")

    return graph.compile()

Integration with Legal Databases

class LegalDatabaseIntegration:
    async def check_counterparty(self, inn: str) -> dict:
        """Checks counterparty in company registry and risk databases"""
        egrul_data = await self.egrul_client.get_company(inn)
        risk_score = await self.risk_service.evaluate(inn)

        return {
            "company_name": egrul_data.get("name"),
            "status": egrul_data.get("status"),  # active / liquidated
            "registration_date": egrul_data.get("ogrn_date"),
            "risk_score": risk_score,              # 0-100, higher = riskier
            "bankruptcy_flag": egrul_data.get("bankruptcy", False),
            "tax_debt_flag": risk_score > 60
        }

Case: legal department of a holding company, 200 contracts per month from counterparties. Before implementation: each contract was read fully by lawyer (~45 min per contract). After: AI agent creates report with identified deviations in 90 seconds, lawyer reads report and checks only flags (~10 min). Productivity: 200 contracts per month → 380 contracts with same team. Critical risk detection: +23% (AI sees patterns that humans miss when tired).

Timeline: basic agent with condition checking: 3-4 weeks; template comparison and company registry integration: 6-8 weeks.