This commit is contained in:
Xin Wang
2026-02-09 16:09:46 +08:00

View File

@@ -53,6 +53,7 @@ class DuplexPipeline:
_SENTENCE_END_CHARS = frozenset({"", "", "", ".", "!", "?", "\n"}) _SENTENCE_END_CHARS = frozenset({"", "", "", ".", "!", "?", "\n"})
_SENTENCE_TRAILING_CHARS = frozenset({"", "", "", ".", "!", "?", "", "~", "", "\n"}) _SENTENCE_TRAILING_CHARS = frozenset({"", "", "", ".", "!", "?", "", "~", "", "\n"})
_SENTENCE_CLOSERS = frozenset({'"', "'", "", "", ")", "]", "}", "", "", "", "", ""}) _SENTENCE_CLOSERS = frozenset({'"', "'", "", "", ")", "]", "}", "", "", "", "", ""})
_MIN_SPLIT_SPOKEN_CHARS = 6
def __init__( def __init__(
self, self,
@@ -628,8 +629,11 @@ class DuplexPipeline:
if not text_buffer: if not text_buffer:
return None return None
search_start = 0
while True:
split_idx = -1 split_idx = -1
for idx, char in enumerate(text_buffer): for idx in range(search_start, len(text_buffer)):
char = text_buffer[idx]
if char == "." and self._is_non_sentence_period(text_buffer, idx): if char == "." and self._is_non_sentence_period(text_buffer, idx):
continue continue
if char in self._SENTENCE_END_CHARS: if char in self._SENTENCE_END_CHARS:
@@ -651,6 +655,17 @@ class DuplexPipeline:
return None return None
sentence = text_buffer[:end_idx].strip() sentence = text_buffer[:end_idx].strip()
spoken_chars = sum(1 for ch in sentence if ch.isalnum())
# Keep short utterances (e.g. "好。", "OK.") merged with following text.
if (
not force
and 0 < spoken_chars < self._MIN_SPLIT_SPOKEN_CHARS
and end_idx < len(text_buffer)
):
search_start = end_idx
continue
remainder = text_buffer[end_idx:] remainder = text_buffer[end_idx:]
return sentence, remainder return sentence, remainder