Improve delimiter
This commit is contained in:
@@ -632,6 +632,8 @@ class DuplexPipeline:
|
|||||||
|
|
||||||
split_idx = -1
|
split_idx = -1
|
||||||
for idx, char in enumerate(text_buffer):
|
for idx, char in enumerate(text_buffer):
|
||||||
|
if char == "." and self._is_non_sentence_period(text_buffer, idx):
|
||||||
|
continue
|
||||||
if char in self._SENTENCE_END_CHARS:
|
if char in self._SENTENCE_END_CHARS:
|
||||||
split_idx = idx
|
split_idx = idx
|
||||||
break
|
break
|
||||||
@@ -658,6 +660,29 @@ class DuplexPipeline:
|
|||||||
"""Check whether text contains pronounceable content (not punctuation-only)."""
|
"""Check whether text contains pronounceable content (not punctuation-only)."""
|
||||||
return any(char.isalnum() for char in text)
|
return any(char.isalnum() for char in text)
|
||||||
|
|
||||||
|
def _is_non_sentence_period(self, text: str, idx: int) -> bool:
|
||||||
|
"""Check whether '.' should NOT be treated as a sentence delimiter."""
|
||||||
|
if text[idx] != ".":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Decimal/version segment: 1.2, v1.2.3
|
||||||
|
if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit():
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Number abbreviations: No.1 / No. 1
|
||||||
|
left_start = idx - 1
|
||||||
|
while left_start >= 0 and text[left_start].isalpha():
|
||||||
|
left_start -= 1
|
||||||
|
left_token = text[left_start + 1:idx].lower()
|
||||||
|
if left_token == "no":
|
||||||
|
j = idx + 1
|
||||||
|
while j < len(text) and text[j].isspace():
|
||||||
|
j += 1
|
||||||
|
if j < len(text) and text[j].isdigit():
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
async def _speak_sentence(self, text: str, fade_in_ms: int = 2, fade_out_ms: int = 8) -> None:
|
async def _speak_sentence(self, text: str, fade_in_ms: int = 2, fade_out_ms: int = 8) -> None:
|
||||||
"""
|
"""
|
||||||
Synthesize and send a single sentence.
|
Synthesize and send a single sentence.
|
||||||
|
|||||||
@@ -213,6 +213,29 @@ class StreamingTTSAdapter:
|
|||||||
self._cancel_event = asyncio.Event()
|
self._cancel_event = asyncio.Event()
|
||||||
self._is_speaking = False
|
self._is_speaking = False
|
||||||
|
|
||||||
|
def _is_non_sentence_period(self, text: str, idx: int) -> bool:
|
||||||
|
"""Check whether '.' should NOT be treated as a sentence delimiter."""
|
||||||
|
if text[idx] != ".":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Decimal/version segment: 1.2, v1.2.3
|
||||||
|
if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit():
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Number abbreviations: No.1 / No. 1
|
||||||
|
left_start = idx - 1
|
||||||
|
while left_start >= 0 and text[left_start].isalpha():
|
||||||
|
left_start -= 1
|
||||||
|
left_token = text[left_start + 1:idx].lower()
|
||||||
|
if left_token == "no":
|
||||||
|
j = idx + 1
|
||||||
|
while j < len(text) and text[j].isspace():
|
||||||
|
j += 1
|
||||||
|
if j < len(text) and text[j].isdigit():
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
async def process_text_chunk(self, text_chunk: str) -> None:
|
async def process_text_chunk(self, text_chunk: str) -> None:
|
||||||
"""
|
"""
|
||||||
Process a text chunk from LLM and trigger TTS when sentence is complete.
|
Process a text chunk from LLM and trigger TTS when sentence is complete.
|
||||||
@@ -229,6 +252,8 @@ class StreamingTTSAdapter:
|
|||||||
while True:
|
while True:
|
||||||
split_idx = -1
|
split_idx = -1
|
||||||
for i, char in enumerate(self._buffer):
|
for i, char in enumerate(self._buffer):
|
||||||
|
if char == "." and self._is_non_sentence_period(self._buffer, i):
|
||||||
|
continue
|
||||||
if char in self.SENTENCE_ENDS:
|
if char in self.SENTENCE_ENDS:
|
||||||
split_idx = i
|
split_idx = i
|
||||||
break
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user