AI-Powered Code Quality Analysis
Static analyzers (ruff, SonarQube, ESLint) find syntax violations and known anti-patterns. AI analysis operates at a higher level: it understands code semantics, sees architectural problems, notices discrepancies between function names and behavior, discovers hidden dependencies. This isn't a linter replacement — it's the next layer of analysis.
Analyzer Architecture
from anthropic import Anthropic
import ast
import subprocess
from pathlib import Path
from dataclasses import dataclass
from typing import Literal
import json
client = Anthropic()
@dataclass
class QualityIssue:
file: str
line: int | None
severity: Literal["critical", "major", "minor", "info"]
category: str
title: str
description: str
recommendation: str
class CodeQualityAnalyzer:
def analyze_file(self, file_path: str) -> list[QualityIssue]:
"""Full file analysis: static + AI"""
source = Path(file_path).read_text()
# Level 1: fast static analysis
static_issues = self._run_static_analysis(file_path, source)
# Level 2: AI analysis for deep problems
ai_issues = self._run_ai_analysis(file_path, source)
return static_issues + ai_issues
def _run_static_analysis(self, file_path: str, source: str) -> list[QualityIssue]:
"""ruff + radon for complexity metrics"""
issues = []
# Run ruff
result = subprocess.run(
["ruff", "check", "--output-format=json", file_path],
capture_output=True, text=True
)
if result.stdout:
for item in json.loads(result.stdout):
issues.append(QualityIssue(
file=file_path,
line=item["location"]["row"],
severity="minor",
category="style",
title=item["code"],
description=item["message"],
recommendation="See ruff documentation",
))
# Cyclomatic complexity via radon
result = subprocess.run(
["radon", "cc", "-j", file_path],
capture_output=True, text=True
)
if result.stdout:
data = json.loads(result.stdout)
for funcs in data.values():
for func in funcs:
if func.get("complexity", 0) > 10:
issues.append(QualityIssue(
file=file_path,
line=func.get("lineno"),
severity="major" if func["complexity"] > 15 else "minor",
category="complexity",
title=f"High complexity: {func['name']}",
description=f"Cyclomatic complexity: {func['complexity']} (threshold: 10)",
recommendation="Decompose into smaller functions",
))
return issues
def _run_ai_analysis(self, file_path: str, source: str) -> list[QualityIssue]:
"""AI analysis of architectural and semantic problems"""
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
system="""You are a senior code reviewer. Analyze code for:
1. ARCHITECTURAL PROBLEMS: SOLID violations, God Object, Feature Envy
2. HIDDEN BUGS: race conditions, off-by-one, improper None handling
3. SECURITY: SQL injection, XSS, unprotected credentials
4. PERFORMANCE: N+1 queries, blocking operations in async, memory leaks
5. SEMANTICS: mismatch between name and behavior, misleading comments
Return JSON array of issues:
[{
"line": <number or null>,
"severity": "critical|major|minor|info",
"category": "architecture|bug|security|performance|semantics",
"title": "<short title>",
"description": "<what exactly is wrong>",
"recommendation": "<how to fix>"
}]""",
messages=[{
"role": "user",
"content": f"Analyze code quality:\n\n```python\n{source[:5000]}\n```"
}]
)
text = response.content[0].text
try:
# Extract JSON
start = text.find("[")
end = text.rfind("]") + 1
issues_data = json.loads(text[start:end])
return [QualityIssue(
file=file_path,
line=item.get("line"),
severity=item.get("severity", "info"),
category=item.get("category", "general"),
title=item.get("title", ""),
description=item.get("description", ""),
recommendation=item.get("recommendation", ""),
) for item in issues_data]
except Exception:
return []
Technical Debt Analysis
class TechDebtAnalyzer:
def analyze_module(self, module_path: str) -> dict:
"""Assesses module technical debt"""
source = Path(module_path).read_text()
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=2048,
messages=[{
"role": "user",
"content": f"""Assess the technical debt of this module.
Return JSON:
{{
"debt_score": <0-100, where 100 = maximum debt>,
"estimated_hours": <estimate hours to refactor>,
"top_issues": [
{{"category": "...", "description": "...", "impact": "high|medium|low"}}
],
"quick_wins": ["<what can be improved in 30 min>"],
"requires_redesign": <true/false>
}}
Code:
```python
{source[:4000]}
```"""
}]
)
text = response.content[0].text
start = text.find("{")
end = text.rfind("}") + 1
return json.loads(text[start:end])
def generate_refactoring_plan(self, module_path: str, debt_report: dict) -> str:
"""Generates refactoring plan based on debt analysis"""
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=2048,
messages=[{
"role": "user",
"content": f"""Based on technical debt analysis, create a refactoring plan.
Report:
{json.dumps(debt_report, ensure_ascii=False, indent=2)}
Format: prioritized task list with time estimates and expected results.
Group by: Quick Wins (< 2h), Medium Tasks (2–8h), Major Refactoring (> 8h)."""
}]
)
return response.content[0].text
Quality Dashboard Metrics
def generate_quality_report(project_root: str) -> dict:
"""Generates quality report for entire project"""
analyzer = CodeQualityAnalyzer()
all_issues = []
file_metrics = {}
for py_file in Path(project_root).rglob("*.py"):
if any(skip in str(py_file) for skip in ["migrations", "__pycache__", ".venv"]):
continue
issues = analyzer.analyze_file(str(py_file))
all_issues.extend(issues)
file_metrics[str(py_file)] = {
"critical": len([i for i in issues if i.severity == "critical"]),
"major": len([i for i in issues if i.severity == "major"]),
"minor": len([i for i in issues if i.severity == "minor"]),
}
# Top problematic files
worst_files = sorted(
file_metrics.items(),
key=lambda x: x[1]["critical"] * 10 + x[1]["major"] * 3 + x[1]["minor"],
reverse=True
)[:10]
return {
"total_issues": len(all_issues),
"by_severity": {
"critical": len([i for i in all_issues if i.severity == "critical"]),
"major": len([i for i in all_issues if i.severity == "major"]),
"minor": len([i for i in all_issues if i.severity == "minor"]),
},
"by_category": {},
"worst_files": worst_files,
"quality_score": calculate_quality_score(all_issues, len(file_metrics)),
}
def calculate_quality_score(issues: list, file_count: int) -> float:
"""Single code quality score (0-100)"""
if file_count == 0:
return 100.0
penalty = sum({
"critical": 10,
"major": 3,
"minor": 1,
"info": 0,
}.get(i.severity, 0) for i in issues)
# Normalize by file count
score = max(0, 100 - penalty / file_count)
return round(score, 1)
Practical Case: Payment Service
Task: Legacy payment service, 15,000 lines of Python, 4 years without refactoring. Before adding new payment providers — quality audit.
AI Analysis Results in 2 hours:
- 3 critical security issues (hardcoded API keys in tests leaked in repository, unparameterized SQL in one place, card data logging in debug mode)
- 12 architectural problems (God Object PaymentProcessor with 2,800 lines, circular imports)
- 47 error handling problems
Prioritization:
- Sprint 1: critical security issues (3 days)
- Sprint 2: PaymentProcessor decomposition (2 weeks)
- Sprint 3: error handling + tests (1 week)
Code Quality Before/After: score 31/100 → 72/100 after three sprints.
Without AI analysis, manual audit would have taken 3–5 days of a senior developer.
Timeline
- Basic analyzer (static + AI for one file): 2–3 days
- Project analysis with report: 1 week
- Dashboard with historical metrics: 2 weeks
- CI/CD integration with quality gate: 1 week







