from rag.config import CHUNK_SIZE, CHUNK_OVERLAP, MIN_CHUNK_LENGTH


def split(
    text: str,
    chunk_size: int = CHUNK_SIZE,
    overlap: int = CHUNK_OVERLAP,
) -> list[str]:
    if not text or not text.strip():
        return []
    raw = _recursive_split(text.strip(), chunk_size, ["\n\n", "\n", ". ", ""])
    clean = [c.strip() for c in raw if c.strip()]
    overlapped = _add_overlap(clean, overlap)
    return [c for c in overlapped if len(c) >= MIN_CHUNK_LENGTH]


def _recursive_split(text: str, size: int, seps: list[str]) -> list[str]:
    if len(text) <= size:
        return [text]

    sep = seps[0] if seps else ""
    remaining = seps[1:] if seps else []

    if sep == "":
        return [text[i : i + size] for i in range(0, len(text), size)]

    parts = text.split(sep)
    if len(parts) == 1:
        return _recursive_split(text, size, remaining)

    result: list[str] = []
    current = ""

    for part in parts:
        candidate = current + sep + part if current else part
        if len(candidate) <= size:
            current = candidate
        else:
            if current:
                result.append(current)
            if len(part) <= size:
                current = part
            else:
                sub = _recursive_split(part, size, remaining)
                result.extend(sub[:-1])
                current = sub[-1] if sub else ""

    if current:
        result.append(current)

    return result or [text[:size]]


def _add_overlap(chunks: list[str], overlap: int) -> list[str]:
    if overlap <= 0 or len(chunks) <= 1:
        return chunks
    result = [chunks[0]]
    for i in range(1, len(chunks)):
        prefix = chunks[i - 1][-overlap:]
        result.append(prefix + chunks[i])
    return result
