AI System for Technical Debt Management
Technical debt is accumulated compromises: "do it quickly now, rewrite later". The problem is that "later" never comes because it's hard to measure debt and justify refactoring time to management. An AI system makes technical debt measurable, prioritized, and manageable like a regular backlog.
Debt Detection and Measurement
from anthropic import Anthropic
from pathlib import Path
import ast
import subprocess
import json
from dataclasses import dataclass, field
from typing import Literal
client = Anthropic()
@dataclass
class DebtItem:
id: str
file: str
category: Literal["code_smell", "architecture", "security", "test_coverage", "documentation", "dependency"]
severity: Literal["critical", "high", "medium", "low"]
title: str
description: str
estimated_hours: float
business_impact: str
quick_fix: bool = False
class TechDebtScanner:
def scan_project(self, project_root: str) -> list[DebtItem]:
"""Complete project technical debt scan"""
all_items = []
# 1. Dependencies with vulnerabilities
all_items.extend(self._scan_dependencies(project_root))
# 2. Code complexity
all_items.extend(self._scan_complexity(project_root))
# 3. AI analysis of architectural issues
all_items.extend(self._ai_scan_architecture(project_root))
# 4. TODO/FIXME/HACK comments
all_items.extend(self._scan_comments(project_root))
return all_items
def _scan_dependencies(self, project_root: str) -> list[DebtItem]:
"""Scans dependencies for vulnerabilities and outdated versions"""
items = []
# Safety for Python dependencies
result = subprocess.run(
["safety", "check", "--json", "--full-report"],
capture_output=True, text=True, cwd=project_root
)
if result.returncode != 0 and result.stdout:
try:
vulns = json.loads(result.stdout)
for vuln in vulns:
items.append(DebtItem(
id=f"dep_{vuln.get('package_name', 'unknown')}",
file="requirements.txt",
category="security",
severity="critical" if "critical" in str(vuln).lower() else "high",
title=f"Vulnerability in {vuln.get('package_name')} {vuln.get('affected_versions')}",
description=vuln.get("vulnerability", ""),
estimated_hours=0.5,
business_impact="Potential security vulnerability",
quick_fix=True,
))
except json.JSONDecodeError:
pass
# pip-audit as alternative
result = subprocess.run(
["pip-audit", "--format=json"],
capture_output=True, text=True, cwd=project_root
)
# Process similarly...
return items
def _scan_complexity(self, project_root: str) -> list[DebtItem]:
"""Finds functions with high cyclomatic complexity"""
items = []
result = subprocess.run(
["radon", "cc", "-j", "-n", "C", project_root], # Only C grade and above
capture_output=True, text=True
)
if result.stdout:
data = json.loads(result.stdout)
for file_path, functions in data.items():
for func in functions:
complexity = func.get("complexity", 0)
if complexity >= 10:
hours = complexity * 0.5 # Rough estimate
items.append(DebtItem(
id=f"cc_{file_path}_{func['name']}",
file=file_path,
category="code_smell",
severity="critical" if complexity >= 20 else "high" if complexity >= 15 else "medium",
title=f"High complexity: {func['name']} (CC={complexity})",
description=f"Cyclomatic complexity {complexity} exceeds threshold of 10. Function is hard to test and maintain.",
estimated_hours=hours,
business_impact="Increases change time, bug risk on modifications",
))
return items
def _scan_comments(self, project_root: str) -> list[DebtItem]:
"""Finds TODO/FIXME/HACK markers"""
items = []
result = subprocess.run(
["grep", "-rn", "--include=*.py", r"#\s*\(TODO\|FIXME\|HACK\|XXX\|BUG\)", project_root],
capture_output=True, text=True
)
for line in result.stdout.splitlines():
if ":" in line:
parts = line.split(":", 2)
if len(parts) >= 3:
file_path, line_num, comment = parts
severity = "high" if "HACK" in comment or "FIXME" in comment else "low"
items.append(DebtItem(
id=f"todo_{hash(line)}",
file=file_path,
category="code_smell",
severity=severity,
title=f"Technical marker in code",
description=comment.strip(),
estimated_hours=2.0,
business_impact="Documented technical debt",
quick_fix=False,
))
return items
def _ai_scan_architecture(self, project_root: str) -> list[DebtItem]:
"""AI analysis of architectural issues"""
items = []
# Read project structure
structure = []
for root, dirs, files in Path(project_root).walk():
dirs[:] = [d for d in dirs if d not in {".git", "__pycache__", ".venv"}]
for f in files:
if f.endswith(".py"):
structure.append(str(Path(root) / f))
# Analyze files > 500 lines (potential God Objects)
large_files = []
for fp in structure[:50]:
try:
lines = Path(fp).read_text().splitlines()
if len(lines) > 500:
large_files.append((fp, len(lines)))
except Exception:
pass
if not large_files:
return items
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=2048,
messages=[{
"role": "user",
"content": f"""Analyze list of large files for architectural issues.
Files (path, num lines):
{json.dumps(large_files, ensure_ascii=False)}
Return JSON:
[{{
"file": "...",
"issue": "...",
"severity": "high|medium",
"estimated_hours": <number>,
"recommendation": "..."
}}]"""
}]
)
text = response.content[0].text
start = text.find("[")
end = text.rfind("]") + 1
arch_issues = json.loads(text[start:end])
for issue in arch_issues:
items.append(DebtItem(
id=f"arch_{hash(issue['file'])}",
file=issue["file"],
category="architecture",
severity=issue.get("severity", "medium"),
title=issue.get("issue", "Architectural issue"),
description=issue.get("recommendation", ""),
estimated_hours=issue.get("estimated_hours", 8.0),
business_impact="Slows down new feature development",
))
return items
Prioritization and Planning
class TechDebtPlanner:
def prioritize(
self,
items: list[DebtItem],
available_hours: float,
team_velocity: float = 0.7,
) -> dict:
"""Prioritizes debt considering available time"""
# Score = impact * urgency / effort
severity_weights = {"critical": 100, "high": 40, "medium": 10, "low": 2}
scored = []
for item in items:
base_score = severity_weights[item.severity]
effort = max(item.estimated_hours, 0.5)
# Quick fixes prioritized
if item.quick_fix:
base_score *= 2
score = base_score / effort
scored.append((score, item))
scored.sort(key=lambda x: x[0], reverse=True)
# Form plan
selected = []
total_hours = 0.0
effective_hours = available_hours * team_velocity
for _, item in scored:
if total_hours + item.estimated_hours <= effective_hours:
selected.append(item)
total_hours += item.estimated_hours
return {
"selected_items": selected,
"total_hours": total_hours,
"debt_reduced_hours": total_hours,
"remaining_items": [item for _, item in scored if item not in selected],
"sprint_capacity": available_hours,
}
def generate_jira_tickets(self, items: list[DebtItem]) -> list[dict]:
"""Generates Jira tasks for technical debt"""
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
messages=[{
"role": "user",
"content": f"""Create Jira tasks for technical debt.
Debt items:
{json.dumps([{
"title": i.title,
"description": i.description,
"severity": i.severity,
"hours": i.estimated_hours,
"business_impact": i.business_impact,
} for i in items], ensure_ascii=False, indent=2)}
For each create Jira task:
{{
"summary": "...",
"description": "...",
"story_points": <1-13>,
"priority": "Highest|High|Medium|Low",
"labels": ["tech-debt", "<category>"],
"acceptance_criteria": ["..."]
}}"""
}]
)
text = response.content[0].text
start = text.find("[")
end = text.rfind("]") + 1
return json.loads(text[start:end])
Technical Debt Dashboard
def generate_debt_dashboard(project_root: str) -> dict:
"""Generates comprehensive technical debt report"""
scanner = TechDebtScanner()
items = scanner.scan_project(project_root)
by_severity = {}
for item in items:
by_severity.setdefault(item.severity, []).append(item)
total_hours = sum(i.estimated_hours for i in items)
return {
"total_items": len(items),
"total_hours": total_hours,
"debt_index": round(total_hours / max(len(list(Path(project_root).rglob("*.py"))), 1), 2),
"by_severity": {k: len(v) for k, v in by_severity.items()},
"by_category": {},
"top_10_critical": sorted(
[i for i in items if i.severity in ("critical", "high")],
key=lambda x: x.estimated_hours,
reverse=True
)[:10],
}
Practical Case: 4-Year-Old SaaS Debt
Context: HR SaaS, 4 years development, 3 team rotations. Complaints: any new feature takes 3–4x longer than expected.
Scan Results:
- 847 technical debt items
- Critical: 23 (12 dependency vulnerabilities, 11 God Objects)
- Total estimate: 1340 hours to eliminate
- Debt Index: 8.7 (high — norm < 3.0)
Plan of Attack:
- Sprint 1 (20h): all dependency vulnerabilities — package updates, 0.5h each
- Sprints 2–4 (60h): 4 main God Objects → decomposition
- Sprints 5–8 (80h): test coverage 28% → 70%
Results over 4 months:
- Debt Index: 8.7 → 3.2
- Typical feature implementation time: -41%
- Production bug rate: -38%
ROI Calculation: 160 hours on debt repayment saved ~320 hours of slowdown over quarter — 2:1 payback in first quarter.
Timeline
- Basic scanner (complexity + TODO + dependencies): 3–5 days
- AI architectural issue analysis: 1 week
- Prioritization + Jira task generation: 1 week
- Dashboard with historical trends: 2 weeks







