AI System for Legal Legislation and Case Law Search
Legal search is one of the most powerful applications of RAG: the legislative corpus is stable (rules change but don't disappear), documents are structured (articles, sections, paragraphs), and citation accuracy is critically important. AI doesn't interpret law — it searches, structures, and retrieves relevant norms for specific questions.
Architecture of Legal Search System
from anthropic import Anthropic
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel
from typing import Optional
import json
client = Anthropic()
class LegalDocument(BaseModel):
doc_id: str
title: str
doc_type: str # "federal_law", "decree", "supreme_court_decision"
number: str # "149-ФЗ", "А40-12345/2023"
date: str
content: str
articles: list[dict] = [] # [{"article": "Art. 10", "text": "..."}]
tags: list[str] = []
class LegalSearchResult(BaseModel):
document: LegalDocument
relevant_excerpt: str
article_reference: str # "Article 10, Part 2"
relevance_score: float
reasoning: str
class LegalSearchEngine:
def __init__(self, db_path: str = "./legal_db"):
self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
self.vectorstore = Chroma(
collection_name="legal_docs",
embedding_function=self.embeddings,
persist_directory=db_path,
)
self.splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\nArticle ", "\n\n", "\n", " "],
)
def index_document(self, doc: LegalDocument):
"""Indexes legal document"""
# Split by articles for accurate citation
chunks = []
metadatas = []
for article in doc.articles:
# Each article = separate chunk
chunk_text = f"{doc.title}\n{article['article']}\n{article['text']}"
chunks.append(chunk_text)
metadatas.append({
"doc_id": doc.doc_id,
"doc_type": doc.doc_type,
"title": doc.title,
"number": doc.number,
"date": doc.date,
"article": article["article"],
})
if not chunks and doc.content:
# If no article breakdown — split by text
splits = self.splitter.split_text(doc.content)
for i, split in enumerate(splits):
chunks.append(split)
metadatas.append({
"doc_id": doc.doc_id,
"doc_type": doc.doc_type,
"title": doc.title,
"number": doc.number,
"date": doc.date,
"article": f"section_{i}",
})
self.vectorstore.add_texts(texts=chunks, metadatas=metadatas)
def search(self, query: str, k: int = 10, filters: dict = None) -> list[dict]:
"""Semantic search across legal database"""
where_filter = {}
if filters:
if filters.get("doc_type"):
where_filter["doc_type"] = filters["doc_type"]
if filters.get("date_from"):
where_filter["date"] = {"$gte": filters["date_from"]}
results = self.vectorstore.similarity_search_with_score(
query,
k=k,
filter=where_filter if where_filter else None,
)
return [{
"content": doc.page_content,
"metadata": doc.metadata,
"score": score,
} for doc, score in results]
AI Legal Query Analyst
class LegalAnalyst:
def __init__(self, search_engine: LegalSearchEngine):
self.search = search_engine
def analyze_question(self, question: str, jurisdiction: str = "RF") -> dict:
"""Analyzes legal question and finds relevant norms"""
# Step 1: Extract legal concepts from question
concepts = self._extract_legal_concepts(question)
# Step 2: Search for relevant norms
all_results = []
for concept in concepts:
results = self.search.search(concept, k=5)
all_results.extend(results)
# Deduplication
seen = set()
unique_results = []
for r in all_results:
key = f"{r['metadata']['doc_id']}_{r['metadata']['article']}"
if key not in seen:
seen.add(key)
unique_results.append(r)
# Step 3: AI analyzes and structures answer
return self._synthesize_answer(question, unique_results[:10], jurisdiction)
def _extract_legal_concepts(self, question: str) -> list[str]:
"""Extracts key legal concepts for search"""
response = client.messages.create(
model="claude-haiku-4-5",
max_tokens=512,
messages=[{
"role": "user",
"content": f"""Extract 3-5 key legal concepts/terms from the question for search in legal database.
Question: {question}
Return JSON: {{"concepts": ["concept 1", "concept 2", ...]}}
Concepts should be legal terms, precise for search."""
}]
)
text = response.content[0].text
data = json.loads(text[text.find("{"):text.rfind("}") + 1])
return data.get("concepts", [question])
def _synthesize_answer(self, question: str, results: list[dict], jurisdiction: str) -> dict:
"""Synthesizes answer from found legal norms"""
context = "\n\n".join([
f"[{r['metadata']['title']}, {r['metadata']['article']}]\n{r['content']}"
for r in results
])
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
system=f"""You are a legal analyst for {jurisdiction} legislation.
CRITICALLY IMPORTANT:
- Cite ONLY norms from provided documents
- Always specify source: law + article + section
- Don't interpret expansively — only what's written in law
- If norm not found — state it directly
- Distinguish: law establishes / court practices / doctrine considers
Answer structure:
1. Applicable norms (with citations and references)
2. Judicial practice (if available)
3. Conclusion
4. What's not covered by found norms""",
messages=[{
"role": "user",
"content": f"""Question: {question}
Found legal norms:
{context}
Provide structured legal analysis."""
}]
)
return {
"question": question,
"answer": response.content[0].text,
"sources": [
{
"title": r["metadata"]["title"],
"number": r["metadata"]["number"],
"article": r["metadata"]["article"],
"date": r["metadata"]["date"],
}
for r in results[:5]
],
}
Case Law Search
class CaseLawSearchEngine:
"""Specialized search for judicial decisions"""
def find_precedents(
self,
legal_issue: str,
court_level: str = "all", # "supreme", "arbitration", "general_jurisdiction"
outcome_filter: str = None, # "granted", "denied"
) -> list[dict]:
"""Searches for relevant judicial precedents"""
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=2048,
system="""You analyze judicial decisions to search for precedents.
Structure information: dispute essence, court's legal position, norm references, outcome.
IMPORTANT: Don't fabricate case details. Work only with provided documents.""",
messages=[{
"role": "user",
"content": f"""Find precedents for the issue: {legal_issue}
Search parameters:
- Court level: {court_level}
- Outcome: {outcome_filter or "any"}
Based on cases found in database, show:
1. Cases with similar legal issues
2. Court's legal position on each case
3. Trends in judicial practice
4. Key arguments accepted by the court"""
}]
)
return {"analysis": response.content[0].text}
Practical Case: Corporate Legal Department
Context: Legal department of manufacturing holding (12 lawyers). Main tasks: contract review, labor disputes, tax issues. Legal database: Civil Code, Labor Code, Tax Code, 200+ federal laws, Supreme and Arbitration Court practices.
Implementation:
- Indexing 1800 documents (laws + key Supreme Court decisions)
- Interface in corporate Confluence
- Auto-responder for typical legal questions on HR and contracts
Metrics:
- Time to find applicable norms: 45 min → 8 min (initial search)
- Accuracy of norm references: 94% (6% required manual verification)
- Standard questions (business trips, sick leave, VAT): 70% resolved without lawyer involvement
- Lawyer time savings: ~40%
Key principle: system always shows sources and warns that answer is an analytical memorandum, not legal advice.
Timeline
- Indexing legal database + basic search: 1 week
- AI analyst with answer synthesis: 1 week
- Case law search: 1–2 weeks
- Corporate interface + access rights: 1–2 weeks







