Simplify AI memory pipeline
This commit is contained in:
@@ -13,10 +13,14 @@ def chunk_transcript(
|
||||
text = text.strip()
|
||||
if len(text) <= max_chars:
|
||||
return [text] if text else []
|
||||
if max_chars <= 0:
|
||||
raise ValueError("max_chars must be positive")
|
||||
if overlap_chars < 0:
|
||||
raise ValueError("overlap_chars cannot be negative")
|
||||
overlap = min(overlap_chars, max_chars - 1)
|
||||
|
||||
chunks: list[str] = []
|
||||
start = 0
|
||||
step = max_chars - overlap_chars
|
||||
|
||||
while start < len(text):
|
||||
end = start + max_chars
|
||||
@@ -31,6 +35,12 @@ def chunk_transcript(
|
||||
break
|
||||
if chunk.strip():
|
||||
chunks.append(chunk.strip())
|
||||
start += len(chunk) if chunk else step
|
||||
if not chunk:
|
||||
start += max_chars - overlap
|
||||
continue
|
||||
next_start = end - overlap
|
||||
if next_start <= start:
|
||||
next_start = start + len(chunk)
|
||||
start = next_start
|
||||
|
||||
return chunks
|
||||
|
||||
Reference in New Issue
Block a user