Simplify AI memory pipeline

2026-04-30 16:22:55 +08:00
parent 7617ea902c
commit 3234396254
35 changed files with 1002 additions and 579 deletions
--- a/api/app/features/memory/chunker.py
+++ b/api/app/features/memory/chunker.py
@@ -13,10 +13,14 @@ def chunk_transcript(
    text = text.strip()
    if len(text) <= max_chars:
        return [text] if text else []
+    if max_chars <= 0:
+        raise ValueError("max_chars must be positive")
+    if overlap_chars < 0:
+        raise ValueError("overlap_chars cannot be negative")
+    overlap = min(overlap_chars, max_chars - 1)

    chunks: list[str] = []
    start = 0
-    step = max_chars - overlap_chars

    while start < len(text):
        end = start + max_chars
@@ -31,6 +35,12 @@ def chunk_transcript(
                    break
        if chunk.strip():
            chunks.append(chunk.strip())
-        start += len(chunk) if chunk else step
+        if not chunk:
+            start += max_chars - overlap
+            continue
+        next_start = end - overlap
+        if next_start <= start:
+            next_start = start + len(chunk)
+        start = next_start

    return chunks