Ignore short sentence TTS
This commit is contained in:
@@ -53,6 +53,7 @@ class DuplexPipeline:
|
||||
_SENTENCE_END_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "\n"})
|
||||
_SENTENCE_TRAILING_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "…", "~", "~", "\n"})
|
||||
_SENTENCE_CLOSERS = frozenset({'"', "'", "”", "’", ")", "]", "}", ")", "】", "」", "』", "》"})
|
||||
_MIN_SPLIT_SPOKEN_CHARS = 6
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -628,31 +629,45 @@ class DuplexPipeline:
|
||||
if not text_buffer:
|
||||
return None
|
||||
|
||||
split_idx = -1
|
||||
for idx, char in enumerate(text_buffer):
|
||||
if char == "." and self._is_non_sentence_period(text_buffer, idx):
|
||||
search_start = 0
|
||||
while True:
|
||||
split_idx = -1
|
||||
for idx in range(search_start, len(text_buffer)):
|
||||
char = text_buffer[idx]
|
||||
if char == "." and self._is_non_sentence_period(text_buffer, idx):
|
||||
continue
|
||||
if char in self._SENTENCE_END_CHARS:
|
||||
split_idx = idx
|
||||
break
|
||||
|
||||
if split_idx == -1:
|
||||
return None
|
||||
|
||||
end_idx = split_idx + 1
|
||||
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
|
||||
end_idx += 1
|
||||
|
||||
# Include trailing quote/bracket closers in the same segment.
|
||||
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
|
||||
end_idx += 1
|
||||
|
||||
if not force and end_idx >= len(text_buffer):
|
||||
return None
|
||||
|
||||
sentence = text_buffer[:end_idx].strip()
|
||||
spoken_chars = sum(1 for ch in sentence if ch.isalnum())
|
||||
|
||||
# Keep short utterances (e.g. "好。", "OK.") merged with following text.
|
||||
if (
|
||||
not force
|
||||
and 0 < spoken_chars < self._MIN_SPLIT_SPOKEN_CHARS
|
||||
and end_idx < len(text_buffer)
|
||||
):
|
||||
search_start = end_idx
|
||||
continue
|
||||
if char in self._SENTENCE_END_CHARS:
|
||||
split_idx = idx
|
||||
break
|
||||
|
||||
if split_idx == -1:
|
||||
return None
|
||||
|
||||
end_idx = split_idx + 1
|
||||
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
|
||||
end_idx += 1
|
||||
|
||||
# Include trailing quote/bracket closers in the same segment.
|
||||
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
|
||||
end_idx += 1
|
||||
|
||||
if not force and end_idx >= len(text_buffer):
|
||||
return None
|
||||
|
||||
sentence = text_buffer[:end_idx].strip()
|
||||
remainder = text_buffer[end_idx:]
|
||||
return sentence, remainder
|
||||
remainder = text_buffer[end_idx:]
|
||||
return sentence, remainder
|
||||
|
||||
def _has_spoken_content(self, text: str) -> bool:
|
||||
"""Check whether text contains pronounceable content (not punctuation-only)."""
|
||||
|
||||
Reference in New Issue
Block a user