From 11016c04dac880c61a4df0ac87bf8e975c7401a3 Mon Sep 17 00:00:00 2001
From: Xin Wang <wx44wx@XindeMac-mini.local>
Date: Mon, 9 Feb 2026 16:09:26 +0800
Subject: [PATCH] Ignore short sentence TTS

---
 engine/core/duplex_pipeline.py | 61 +++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/engine/core/duplex_pipeline.py b/engine/core/duplex_pipeline.py
index dfe3ef0..64a7012 100644
--- a/engine/core/duplex_pipeline.py
+++ b/engine/core/duplex_pipeline.py
@@ -53,6 +53,7 @@ class DuplexPipeline:
     _SENTENCE_END_CHARS = frozenset({"。", "！", "？", ".", "!", "?", "\n"})
     _SENTENCE_TRAILING_CHARS = frozenset({"。", "！", "？", ".", "!", "?", "…", "~", "～", "\n"})
     _SENTENCE_CLOSERS = frozenset({'"', "'", "”", "’", ")", "]", "}", "）", "】", "」", "』", "》"})
+    _MIN_SPLIT_SPOKEN_CHARS = 6
 
     def __init__(
         self,
@@ -628,31 +629,45 @@ class DuplexPipeline:
         if not text_buffer:
             return None
 
-        split_idx = -1
-        for idx, char in enumerate(text_buffer):
-            if char == "." and self._is_non_sentence_period(text_buffer, idx):
+        search_start = 0
+        while True:
+            split_idx = -1
+            for idx in range(search_start, len(text_buffer)):
+                char = text_buffer[idx]
+                if char == "." and self._is_non_sentence_period(text_buffer, idx):
+                    continue
+                if char in self._SENTENCE_END_CHARS:
+                    split_idx = idx
+                    break
+
+            if split_idx == -1:
+                return None
+
+            end_idx = split_idx + 1
+            while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
+                end_idx += 1
+
+            # Include trailing quote/bracket closers in the same segment.
+            while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
+                end_idx += 1
+
+            if not force and end_idx >= len(text_buffer):
+                return None
+
+            sentence = text_buffer[:end_idx].strip()
+            spoken_chars = sum(1 for ch in sentence if ch.isalnum())
+
+            # Keep short utterances (e.g. "好。", "OK.") merged with following text.
+            if (
+                not force
+                and 0 < spoken_chars < self._MIN_SPLIT_SPOKEN_CHARS
+                and end_idx < len(text_buffer)
+            ):
+                search_start = end_idx
                 continue
-            if char in self._SENTENCE_END_CHARS:
-                split_idx = idx
-                break
 
-        if split_idx == -1:
-            return None
-
-        end_idx = split_idx + 1
-        while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_TRAILING_CHARS:
-            end_idx += 1
-
-        # Include trailing quote/bracket closers in the same segment.
-        while end_idx < len(text_buffer) and text_buffer[end_idx] in self._SENTENCE_CLOSERS:
-            end_idx += 1
-
-        if not force and end_idx >= len(text_buffer):
-            return None
-
-        sentence = text_buffer[:end_idx].strip()
-        remainder = text_buffer[end_idx:]
-        return sentence, remainder
+            remainder = text_buffer[end_idx:]
+            return sentence, remainder
 
     def _has_spoken_content(self, text: str) -> bool:
         """Check whether text contains pronounceable content (not punctuation-only)."""