#!/usr/bin/env python3 """Scan api/app for AI-related symbols (LLM, embedding, langchain JSON helpers). Run from repo root: uv run python api/scripts/ai_touchpoints_scan.py uv run python api/scripts/ai_touchpoints_scan.py --markdown api/docs/ai-touchpoints.md Default prints sorted unique file paths to stdout. """ from __future__ import annotations import argparse import re import sys from pathlib import Path # Line must match at least one pattern to count as a touchpoint for that file. PATTERNS: list[tuple[str, re.Pattern[str]]] = [ ("llm_provider", re.compile(r"get_llm_provider(_fast)?\b")), ("embedding", re.compile(r"get_embedding_provider\b|EmbeddingProvider\b")), ("json_llm_helpers", re.compile(r"invoke_json_object|ainvoke_json_object|allm_json_call|llm_json_call\b")), ("llm_call_module", re.compile(r"from app\.core\.llm_call|import app\.core\.llm_call")), ("langchain", re.compile(r"\blangchain_|from langchain|import langchain")), ("ports_ai", re.compile(r"from app\.ports\.(llm|embedding|asr|tts)\b|LLMProvider\b")), ("agents_layer", re.compile(r"from app\.agents\.|import app\.agents\.")), ("memory_ai", re.compile(r"MemoryService\b|HybridRetriever\b|retrieve_evidence_bundle_|schedule_memory_enrichment\b")), ] def app_root_from_script() -> Path: return Path(__file__).resolve().parent.parent def iter_python_files(root: Path) -> list[Path]: base = root / "app" if not base.is_dir(): raise SystemExit(f"expected package dir: {base}") return sorted(p for p in base.rglob("*.py") if p.is_file()) def scan_files(files: list[Path]) -> dict[Path, list[str]]: hits: dict[Path, list[str]] = {} for path in files: try: text = path.read_text(encoding="utf-8") except OSError: continue tags: list[str] = [] for tag, rx in PATTERNS: if rx.search(text): tags.append(tag) if tags: hits[path] = sorted(set(tags)) return hits def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--root", type=Path, default=None, help="api/ directory (default: parent of this script)", ) parser.add_argument( "--markdown", type=Path, default=None, help="if set, write markdown report to this path", ) args = parser.parse_args() root = args.root or app_root_from_script() files = iter_python_files(root) hits = scan_files(files) lines = [f"{len(hits)} files with AI touchpoints under {root / 'app'}"] for path in sorted(hits.keys()): rel = path.relative_to(root.parent) if path.is_relative_to(root.parent) else path lines.append(f"{rel}\t{','.join(hits[path])}") report = "\n".join(lines) + "\n" sys.stdout.write(report) if args.markdown: md_lines = [ "# AI touchpoints (generated)", "", "Regenerate: `uv run python api/scripts/ai_touchpoints_scan.py --markdown api/docs/ai-touchpoints.md`", "", "| File | Tags |", "|------|------|", ] for path in sorted(hits.keys()): rel = path.relative_to(root.parent) tags = ", ".join(f"`{t}`" for t in hits[path]) md_lines.append(f"| `{rel}` | {tags} |") md_lines.append("") args.markdown.parent.mkdir(parents=True, exist_ok=True) args.markdown.write_text("\n".join(md_lines), encoding="utf-8") print(f"Wrote {args.markdown}", file=sys.stderr) return 0 if __name__ == "__main__": raise SystemExit(main())