Ignore short sentence TTS

This commit is contained in:
Xin Wang
2026-02-09 16:09:26 +08:00
parent ed044bd8ad
commit 11016c04da

View File

@@ -53,6 +53,7 @@ class DuplexPipeline:
_SENTENCE_END_CHARS = frozenset({"", "", "", ".", "!", "?", "\n"})
_SENTENCE_TRAILING_CHARS = frozenset({"", "", "", ".", "!", "?", "", "~", "", "\n"})
_SENTENCE_CLOSERS = frozenset({'"', "'", "", "", ")", "]", "}", "", "", "", "", ""})
_MIN_SPLIT_SPOKEN_CHARS = 6
def __init__(
self,
@@ -628,31 +629,45 @@ class DuplexPipeline:
if not text_buffer:
return None
split_idx = -1
for idx, char in enumerate(text_buffer):
if char == "." and self._is_non_sentence_period(text_buffer, idx):
search_start = 0
while True:
split_idx = -1
for idx in range(search_start, len(text_buffer)):
char = text_buffer[idx]
if char == "." and self._is_non_sentence_period(text_buffer, idx):
continue
if char in self._SENTENCE_END_CHARS:
split_idx = idx
break
if split_idx == -1:
return None
end_idx = split_idx + 1
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
end_idx += 1
# Include trailing quote/bracket closers in the same segment.
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
end_idx += 1
if not force and end_idx >= len(text_buffer):
return None
sentence = text_buffer[:end_idx].strip()
spoken_chars = sum(1 for ch in sentence if ch.isalnum())
# Keep short utterances (e.g. "好。", "OK.") merged with following text.
if (
not force
and 0 < spoken_chars < self._MIN_SPLIT_SPOKEN_CHARS
and end_idx < len(text_buffer)
):
search_start = end_idx
continue
if char in self._SENTENCE_END_CHARS:
split_idx = idx
break
if split_idx == -1:
return None
end_idx = split_idx + 1
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
end_idx += 1
# Include trailing quote/bracket closers in the same segment.
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
end_idx += 1
if not force and end_idx >= len(text_buffer):
return None
sentence = text_buffer[:end_idx].strip()
remainder = text_buffer[end_idx:]
return sentence, remainder
remainder = text_buffer[end_idx:]
return sentence, remainder
def _has_spoken_content(self, text: str) -> bool:
"""Check whether text contains pronounceable content (not punctuation-only)."""