AI Agent for Legal Document Analysis
Lawyers spend 60-70% of their time analyzing documents: reading contracts, identifying risk conditions, checking template compliance, comparing versions. An AI agent doesn't replace legal expertise but performs draft analytical work in minutes instead of hours: finds deviations from standards, identifies missing mandatory conditions, and generates a structured report with specific references to clauses.
Legal Agent Architecture
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_core.tools import tool
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
import json
class LegalAnalysisState(TypedDict):
document_text: str
document_type: str
analysis_results: Annotated[list, operator.add]
risk_flags: Annotated[list, operator.add]
missing_clauses: list[str]
final_report: str
@tool
def check_mandatory_clauses(document_text: str, doc_type: str) -> str:
"""Checks for mandatory conditions for contract type"""
mandatory_map = {
"supply_contract": [
"subject of contract", "goods price", "payment procedure",
"delivery time", "goods quality", "liability of parties",
"dispute resolution", "contract duration"
],
"employment_contract": [
"place of work", "job function", "work start date",
"salary conditions", "work time schedule",
"guarantees and compensation", "working conditions"
],
"lease": [
"leased property", "rent payment", "lease term",
"tenant rights and obligations", "landlord rights and obligations",
"property return procedure"
]
}
required = mandatory_map.get(doc_type, [])
text_lower = document_text.lower()
missing = []
present = []
for clause in required:
# Fuzzy matching — not exact word matching
if any(word in text_lower for word in clause.split()):
present.append(clause)
else:
missing.append(clause)
return json.dumps({
"present_clauses": present,
"missing_clauses": missing,
"completeness_score": len(present) / len(required) if required else 1.0
})
@tool
def identify_risk_clauses(document_text: str) -> str:
"""Identifies potentially risky conditions"""
risk_patterns = {
"unilateral_termination": [
"may unilaterally terminate",
"terminate without notice"
],
"unlimited_liability": [
"bears full liability",
"compensates all damages without limits"
],
"auto_renewal": [
"automatically renews",
"deemed renewed"
],
"jurisdiction": [
"court at location of",
"arbitration court of"
]
}
# ... pattern analysis
return json.dumps({"risks_found": []})
Comparison with Template Contract
class ContractComparator:
COMPARISON_PROMPT = """Compare the contract with the company's template.
Template contract:
{template}
Received contract from counterparty:
{received}
Identify:
1. **Deviations in favor of counterparty** (they got better terms)
2. **Deviations against our company** (we bear increased risks)
3. **Neutral changes** (editorial corrections without legal consequences)
4. **Missing conditions** (in template, absent in received)
For each deviation:
- Template clause vs contract clause (quote)
- Legal consequences of change
- Recommendation: accept / insist on template / acceptable compromise
Format: Markdown table + comments."""
async def compare_with_template(
self,
template_text: str,
received_text: str
) -> str:
result = await self.llm.ainvoke(
self.COMPARISON_PROMPT.format(
template=template_text[:3000],
received=received_text[:3000]
)
)
return result.content
Analysis Graph (LangGraph)
def build_legal_agent_graph():
llm = ChatOpenAI(model="gpt-4o", temperature=0)
tools = [
check_mandatory_clauses,
identify_risk_clauses,
]
def analyze_node(state: LegalAnalysisState) -> LegalAnalysisState:
# Step 1: determine document type
doc_type_result = llm.invoke(
f"Determine document type (1-2 words): {state['document_text'][:500]}"
)
state["document_type"] = doc_type_result.content.strip()
# Step 2: check mandatory conditions
mandatory_check = check_mandatory_clauses.invoke({
"document_text": state["document_text"],
"doc_type": state["document_type"]
})
state["analysis_results"].append(json.loads(mandatory_check))
# Step 3: identify risks
risk_check = identify_risk_clauses.invoke({
"document_text": state["document_text"]
})
risks = json.loads(risk_check)
state["risk_flags"].extend(risks.get("risks_found", []))
return state
def generate_report_node(state: LegalAnalysisState) -> LegalAnalysisState:
prompt = f"""Create a structured report on document analysis for the lawyer.
Document type: {state['document_type']}
Condition check results: {json.dumps(state['analysis_results'], ensure_ascii=False)}
Identified risks: {json.dumps(state['risk_flags'], ensure_ascii=False)}
Report structure:
## Overall Assessment
## Critical Risks (require immediate attention)
## Missing Mandatory Conditions
## Recommendations for Revision
## Summary: recommend for signature / requires revision / decline"""
state["final_report"] = llm.invoke(prompt).content
return state
graph = StateGraph(LegalAnalysisState)
graph.add_node("analyze", analyze_node)
graph.add_node("report", generate_report_node)
graph.add_edge("analyze", "report")
graph.add_edge("report", END)
graph.set_entry_point("analyze")
return graph.compile()
Integration with Legal Databases
class LegalDatabaseIntegration:
async def check_counterparty(self, inn: str) -> dict:
"""Checks counterparty in company registry and risk databases"""
egrul_data = await self.egrul_client.get_company(inn)
risk_score = await self.risk_service.evaluate(inn)
return {
"company_name": egrul_data.get("name"),
"status": egrul_data.get("status"), # active / liquidated
"registration_date": egrul_data.get("ogrn_date"),
"risk_score": risk_score, # 0-100, higher = riskier
"bankruptcy_flag": egrul_data.get("bankruptcy", False),
"tax_debt_flag": risk_score > 60
}
Case: legal department of a holding company, 200 contracts per month from counterparties. Before implementation: each contract was read fully by lawyer (~45 min per contract). After: AI agent creates report with identified deviations in 90 seconds, lawyer reads report and checks only flags (~10 min). Productivity: 200 contracts per month → 380 contracts with same team. Critical risk detection: +23% (AI sees patterns that humans miss when tired).
Timeline: basic agent with condition checking: 3-4 weeks; template comparison and company registry integration: 6-8 weeks.







