108 lines
3.6 KiB
Python
108 lines
3.6 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""Scan api/app for AI-related symbols (LLM, embedding, langchain JSON helpers).
|
||
|
|
|
||
|
|
Run from repo root:
|
||
|
|
uv run python api/scripts/ai_touchpoints_scan.py
|
||
|
|
uv run python api/scripts/ai_touchpoints_scan.py --markdown api/docs/ai-touchpoints.md
|
||
|
|
|
||
|
|
Default prints sorted unique file paths to stdout.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
# Line must match at least one pattern to count as a touchpoint for that file.
|
||
|
|
PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
||
|
|
("llm_provider", re.compile(r"get_llm_provider(_fast)?\b")),
|
||
|
|
("embedding", re.compile(r"get_embedding_provider\b|EmbeddingProvider\b")),
|
||
|
|
("json_llm_helpers", re.compile(r"invoke_json_object|ainvoke_json_object|allm_json_call|llm_json_call\b")),
|
||
|
|
("llm_call_module", re.compile(r"from app\.core\.llm_call|import app\.core\.llm_call")),
|
||
|
|
("langchain", re.compile(r"\blangchain_|from langchain|import langchain")),
|
||
|
|
("ports_ai", re.compile(r"from app\.ports\.(llm|embedding|asr|tts)\b|LLMProvider\b")),
|
||
|
|
("agents_layer", re.compile(r"from app\.agents\.|import app\.agents\.")),
|
||
|
|
("memory_ai", re.compile(r"MemoryService\b|HybridRetriever\b|retrieve_evidence_bundle_|schedule_memory_enrichment\b")),
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def app_root_from_script() -> Path:
|
||
|
|
return Path(__file__).resolve().parent.parent
|
||
|
|
|
||
|
|
|
||
|
|
def iter_python_files(root: Path) -> list[Path]:
|
||
|
|
base = root / "app"
|
||
|
|
if not base.is_dir():
|
||
|
|
raise SystemExit(f"expected package dir: {base}")
|
||
|
|
return sorted(p for p in base.rglob("*.py") if p.is_file())
|
||
|
|
|
||
|
|
|
||
|
|
def scan_files(files: list[Path]) -> dict[Path, list[str]]:
|
||
|
|
hits: dict[Path, list[str]] = {}
|
||
|
|
for path in files:
|
||
|
|
try:
|
||
|
|
text = path.read_text(encoding="utf-8")
|
||
|
|
except OSError:
|
||
|
|
continue
|
||
|
|
tags: list[str] = []
|
||
|
|
for tag, rx in PATTERNS:
|
||
|
|
if rx.search(text):
|
||
|
|
tags.append(tag)
|
||
|
|
if tags:
|
||
|
|
hits[path] = sorted(set(tags))
|
||
|
|
return hits
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> int:
|
||
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
||
|
|
parser.add_argument(
|
||
|
|
"--root",
|
||
|
|
type=Path,
|
||
|
|
default=None,
|
||
|
|
help="api/ directory (default: parent of this script)",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--markdown",
|
||
|
|
type=Path,
|
||
|
|
default=None,
|
||
|
|
help="if set, write markdown report to this path",
|
||
|
|
)
|
||
|
|
args = parser.parse_args()
|
||
|
|
root = args.root or app_root_from_script()
|
||
|
|
files = iter_python_files(root)
|
||
|
|
hits = scan_files(files)
|
||
|
|
|
||
|
|
lines = [f"{len(hits)} files with AI touchpoints under {root / 'app'}"]
|
||
|
|
for path in sorted(hits.keys()):
|
||
|
|
rel = path.relative_to(root.parent) if path.is_relative_to(root.parent) else path
|
||
|
|
lines.append(f"{rel}\t{','.join(hits[path])}")
|
||
|
|
|
||
|
|
report = "\n".join(lines) + "\n"
|
||
|
|
sys.stdout.write(report)
|
||
|
|
|
||
|
|
if args.markdown:
|
||
|
|
md_lines = [
|
||
|
|
"# AI touchpoints (generated)",
|
||
|
|
"",
|
||
|
|
"Regenerate: `uv run python api/scripts/ai_touchpoints_scan.py --markdown api/docs/ai-touchpoints.md`",
|
||
|
|
"",
|
||
|
|
"| File | Tags |",
|
||
|
|
"|------|------|",
|
||
|
|
]
|
||
|
|
for path in sorted(hits.keys()):
|
||
|
|
rel = path.relative_to(root.parent)
|
||
|
|
tags = ", ".join(f"`{t}`" for t in hits[path])
|
||
|
|
md_lines.append(f"| `{rel}` | {tags} |")
|
||
|
|
md_lines.append("")
|
||
|
|
args.markdown.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
args.markdown.write_text("\n".join(md_lines), encoding="utf-8")
|
||
|
|
print(f"Wrote {args.markdown}", file=sys.stderr)
|
||
|
|
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
raise SystemExit(main())
|