AI-VideoAssistant/engine/providers/common/streaming_text.py

"""Shared text chunking helpers for streaming TTS."""

from typing import Optional


def is_non_sentence_period(text: str, idx: int) -> bool:
    """Check whether '.' should NOT be treated as a sentence delimiter."""
    if idx < 0 or idx >= len(text) or text[idx] != ".":
        return False

    # Decimal/version segment: 1.2, v1.2.3
    if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit():
        return True

    # Number abbreviations: No.1 / No. 1
    left_start = idx - 1
    while left_start >= 0 and text[left_start].isalpha():
        left_start -= 1
    left_token = text[left_start + 1:idx].lower()
    if left_token == "no":
        j = idx + 1
        while j < len(text) and text[j].isspace():
            j += 1
        if j < len(text) and text[j].isdigit():
            return True

    return False


def has_spoken_content(text: str) -> bool:
    """Check whether text contains pronounceable content (not punctuation-only)."""
    return any(char.isalnum() for char in text)


def extract_tts_sentence(
    text_buffer: str,
    *,
    end_chars: frozenset[str],
    trailing_chars: frozenset[str],
    closers: frozenset[str],
    min_split_spoken_chars: int = 0,
    hold_trailing_at_buffer_end: bool = False,
    force: bool = False,
) -> Optional[tuple[str, str]]:
    """Extract one TTS sentence from text buffer."""
    if not text_buffer:
        return None

    search_start = 0
    while True:
        split_idx = -1
        for idx in range(search_start, len(text_buffer)):
            char = text_buffer[idx]
            if char == "." and is_non_sentence_period(text_buffer, idx):
                continue
            if char in end_chars:
                split_idx = idx
                break

        if split_idx == -1:
            return None

        end_idx = split_idx + 1
        while end_idx < len(text_buffer) and text_buffer[end_idx] in trailing_chars:
            end_idx += 1

        while end_idx < len(text_buffer) and text_buffer[end_idx] in closers:
            end_idx += 1

        if hold_trailing_at_buffer_end and not force and end_idx >= len(text_buffer):
            return None

        sentence = text_buffer[:end_idx].strip()
        spoken_chars = sum(1 for ch in sentence if ch.isalnum())

        if (
            not force
            and min_split_spoken_chars > 0
            and 0 < spoken_chars < min_split_spoken_chars
            and end_idx < len(text_buffer)
        ):
            search_start = end_idx
            continue

        remainder = text_buffer[end_idx:]
        return sentence, remainder