Merge branch 'master' of https://gitea.xiaowang.eu.org/wx44wx/AI-VideoAssistant
This commit is contained in:
@@ -53,6 +53,7 @@ class DuplexPipeline:
|
||||
_SENTENCE_END_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "\n"})
|
||||
_SENTENCE_TRAILING_CHARS = frozenset({"。", "!", "?", ".", "!", "?", "…", "~", "~", "\n"})
|
||||
_SENTENCE_CLOSERS = frozenset({'"', "'", "”", "’", ")", "]", "}", ")", "】", "」", "』", "》"})
|
||||
_MIN_SPLIT_SPOKEN_CHARS = 6
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -628,8 +629,11 @@ class DuplexPipeline:
|
||||
if not text_buffer:
|
||||
return None
|
||||
|
||||
search_start = 0
|
||||
while True:
|
||||
split_idx = -1
|
||||
for idx, char in enumerate(text_buffer):
|
||||
for idx in range(search_start, len(text_buffer)):
|
||||
char = text_buffer[idx]
|
||||
if char == "." and self._is_non_sentence_period(text_buffer, idx):
|
||||
continue
|
||||
if char in self._SENTENCE_END_CHARS:
|
||||
@@ -651,6 +655,17 @@ class DuplexPipeline:
|
||||
return None
|
||||
|
||||
sentence = text_buffer[:end_idx].strip()
|
||||
spoken_chars = sum(1 for ch in sentence if ch.isalnum())
|
||||
|
||||
# Keep short utterances (e.g. "好。", "OK.") merged with following text.
|
||||
if (
|
||||
not force
|
||||
and 0 < spoken_chars < self._MIN_SPLIT_SPOKEN_CHARS
|
||||
and end_idx < len(text_buffer)
|
||||
):
|
||||
search_start = end_idx
|
||||
continue
|
||||
|
||||
remainder = text_buffer[end_idx:]
|
||||
return sentence, remainder
|
||||
|
||||
|
||||
Reference in New Issue
Block a user