From 65aab79fb6fe80cd921ac26968450877d3645c25 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 9 Feb 2026 15:11:35 +0800 Subject: [PATCH] Improve delimiter --- engine/core/duplex_pipeline.py | 25 +++++++++++++++++++++++++ engine/services/siliconflow_tts.py | 25 +++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/engine/core/duplex_pipeline.py b/engine/core/duplex_pipeline.py index 2c7c7b6..0df9c39 100644 --- a/engine/core/duplex_pipeline.py +++ b/engine/core/duplex_pipeline.py @@ -632,6 +632,8 @@ class DuplexPipeline: split_idx = -1 for idx, char in enumerate(text_buffer): + if char == "." and self._is_non_sentence_period(text_buffer, idx): + continue if char in self._SENTENCE_END_CHARS: split_idx = idx break @@ -658,6 +660,29 @@ class DuplexPipeline: """Check whether text contains pronounceable content (not punctuation-only).""" return any(char.isalnum() for char in text) + def _is_non_sentence_period(self, text: str, idx: int) -> bool: + """Check whether '.' should NOT be treated as a sentence delimiter.""" + if text[idx] != ".": + return False + + # Decimal/version segment: 1.2, v1.2.3 + if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit(): + return True + + # Number abbreviations: No.1 / No. 1 + left_start = idx - 1 + while left_start >= 0 and text[left_start].isalpha(): + left_start -= 1 + left_token = text[left_start + 1:idx].lower() + if left_token == "no": + j = idx + 1 + while j < len(text) and text[j].isspace(): + j += 1 + if j < len(text) and text[j].isdigit(): + return True + + return False + async def _speak_sentence(self, text: str, fade_in_ms: int = 2, fade_out_ms: int = 8) -> None: """ Synthesize and send a single sentence. diff --git a/engine/services/siliconflow_tts.py b/engine/services/siliconflow_tts.py index 15d71ef..3974015 100644 --- a/engine/services/siliconflow_tts.py +++ b/engine/services/siliconflow_tts.py @@ -212,6 +212,29 @@ class StreamingTTSAdapter: self._buffer = "" self._cancel_event = asyncio.Event() self._is_speaking = False + + def _is_non_sentence_period(self, text: str, idx: int) -> bool: + """Check whether '.' should NOT be treated as a sentence delimiter.""" + if text[idx] != ".": + return False + + # Decimal/version segment: 1.2, v1.2.3 + if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit(): + return True + + # Number abbreviations: No.1 / No. 1 + left_start = idx - 1 + while left_start >= 0 and text[left_start].isalpha(): + left_start -= 1 + left_token = text[left_start + 1:idx].lower() + if left_token == "no": + j = idx + 1 + while j < len(text) and text[j].isspace(): + j += 1 + if j < len(text) and text[j].isdigit(): + return True + + return False async def process_text_chunk(self, text_chunk: str) -> None: """ @@ -229,6 +252,8 @@ class StreamingTTSAdapter: while True: split_idx = -1 for i, char in enumerate(self._buffer): + if char == "." and self._is_non_sentence_period(self._buffer, i): + continue if char in self.SENTENCE_ENDS: split_idx = i break