Merge branch 'master' of https://gitea.xiaowang.eu.org/wx44wx/AI-VideoAssistant
This commit is contained in:
@@ -53,6 +53,7 @@ class DuplexPipeline:
|
|||||||
_SENTENCE_END_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "\n"})
|
_SENTENCE_END_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "\n"})
|
||||||
_SENTENCE_TRAILING_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "…", "~", "~", "\n"})
|
_SENTENCE_TRAILING_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "…", "~", "~", "\n"})
|
||||||
_SENTENCE_CLOSERS = frozenset({'"', "'", "”", "’", ")", "]", "}", ")", "】", "」", "』", "》"})
|
_SENTENCE_CLOSERS = frozenset({'"', "'", "”", "’", ")", "]", "}", ")", "】", "」", "』", "》"})
|
||||||
|
_MIN_SPLIT_SPOKEN_CHARS = 6
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -628,31 +629,45 @@ class DuplexPipeline:
|
|||||||
if not text_buffer:
|
if not text_buffer:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
split_idx = -1
|
search_start = 0
|
||||||
for idx, char in enumerate(text_buffer):
|
while True:
|
||||||
if char == "." and self._is_non_sentence_period(text_buffer, idx):
|
split_idx = -1
|
||||||
|
for idx in range(search_start, len(text_buffer)):
|
||||||
|
char = text_buffer[idx]
|
||||||
|
if char == "." and self._is_non_sentence_period(text_buffer, idx):
|
||||||
|
continue
|
||||||
|
if char in self._SENTENCE_END_CHARS:
|
||||||
|
split_idx = idx
|
||||||
|
break
|
||||||
|
|
||||||
|
if split_idx == -1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
end_idx = split_idx + 1
|
||||||
|
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
|
||||||
|
end_idx += 1
|
||||||
|
|
||||||
|
# Include trailing quote/bracket closers in the same segment.
|
||||||
|
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
|
||||||
|
end_idx += 1
|
||||||
|
|
||||||
|
if not force and end_idx >= len(text_buffer):
|
||||||
|
return None
|
||||||
|
|
||||||
|
sentence = text_buffer[:end_idx].strip()
|
||||||
|
spoken_chars = sum(1 for ch in sentence if ch.isalnum())
|
||||||
|
|
||||||
|
# Keep short utterances (e.g. "好。", "OK.") merged with following text.
|
||||||
|
if (
|
||||||
|
not force
|
||||||
|
and 0 < spoken_chars < self._MIN_SPLIT_SPOKEN_CHARS
|
||||||
|
and end_idx < len(text_buffer)
|
||||||
|
):
|
||||||
|
search_start = end_idx
|
||||||
continue
|
continue
|
||||||
if char in self._SENTENCE_END_CHARS:
|
|
||||||
split_idx = idx
|
|
||||||
break
|
|
||||||
|
|
||||||
if split_idx == -1:
|
remainder = text_buffer[end_idx:]
|
||||||
return None
|
return sentence, remainder
|
||||||
|
|
||||||
end_idx = split_idx + 1
|
|
||||||
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
|
|
||||||
end_idx += 1
|
|
||||||
|
|
||||||
# Include trailing quote/bracket closers in the same segment.
|
|
||||||
while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
|
|
||||||
end_idx += 1
|
|
||||||
|
|
||||||
if not force and end_idx >= len(text_buffer):
|
|
||||||
return None
|
|
||||||
|
|
||||||
sentence = text_buffer[:end_idx].strip()
|
|
||||||
remainder = text_buffer[end_idx:]
|
|
||||||
return sentence, remainder
|
|
||||||
|
|
||||||
def _has_spoken_content(self, text: str) -> bool:
|
def _has_spoken_content(self, text: str) -> bool:
|
||||||
"""Check whether text contains pronounceable content (not punctuation-only)."""
|
"""Check whether text contains pronounceable content (not punctuation-only)."""
|
||||||
|
|||||||
Reference in New Issue
Block a user