20 lines
452 B
Python
20 lines
452 B
Python
from __future__ import annotations
|
|
|
|
from typing import List
|
|
|
|
|
|
def chunk_text(text: str, max_chars: int = 1200, overlap: int = 200) -> List[str]:
|
|
chunks: List[str] = []
|
|
start = 0
|
|
n = len(text)
|
|
while start < n:
|
|
end = min(start + max_chars, n)
|
|
chunk = text[start:end].strip()
|
|
if chunk:
|
|
chunks.append(chunk)
|
|
if end == n:
|
|
break
|
|
start = max(0, end - overlap)
|
|
return chunks
|
|
|