Improve tts with codex extra high

This commit is contained in:
Xin Wang
2026-02-09 15:04:34 +08:00
parent a42dd4c712
commit cd68ebe306
3 changed files with 164 additions and 56 deletions

View File

@@ -134,6 +134,7 @@ class SiliconFlowTTSService(BaseTTSService):
# Stream audio chunks
chunk_size = self.sample_rate * 2 // 10 # 100ms chunks
buffer = b""
pending_chunk = None
async for chunk in response.content.iter_any():
if self._cancel_event.is_set():
@@ -146,14 +147,34 @@ class SiliconFlowTTSService(BaseTTSService):
while len(buffer) >= chunk_size:
audio_chunk = buffer[:chunk_size]
buffer = buffer[chunk_size:]
# Keep one full chunk buffered so we can always tag the true
# last full chunk as final when stream length is an exact multiple.
if pending_chunk is not None:
yield TTSChunk(
audio=pending_chunk,
sample_rate=self.sample_rate,
is_final=False
)
pending_chunk = audio_chunk
# Flush pending chunk(s) and remaining tail.
if pending_chunk is not None:
if buffer:
yield TTSChunk(
audio=audio_chunk,
audio=pending_chunk,
sample_rate=self.sample_rate,
is_final=False
)
# Yield remaining buffer
pending_chunk = None
else:
yield TTSChunk(
audio=pending_chunk,
sample_rate=self.sample_rate,
is_final=True
)
pending_chunk = None
if buffer:
yield TTSChunk(
audio=buffer,
@@ -182,7 +203,7 @@ class StreamingTTSAdapter:
"""
# Sentence delimiters
SENTENCE_ENDS = {'', '', '', '\n'}
SENTENCE_ENDS = {'', '', '', '.', '!', '?', '\n'}
def __init__(self, tts_service: BaseTTSService, transport, session_id: str):
self.tts_service = tts_service
@@ -205,15 +226,24 @@ class StreamingTTSAdapter:
self._buffer += text_chunk
# Check for sentence completion
for i, char in enumerate(self._buffer):
if char in self.SENTENCE_ENDS:
# Found sentence end, synthesize up to this point
sentence = self._buffer[:i+1].strip()
self._buffer = self._buffer[i+1:]
if sentence:
await self._speak_sentence(sentence)
while True:
split_idx = -1
for i, char in enumerate(self._buffer):
if char in self.SENTENCE_ENDS:
split_idx = i
break
if split_idx < 0:
break
end_idx = split_idx + 1
while end_idx < len(self._buffer) and self._buffer[end_idx] in self.SENTENCE_ENDS:
end_idx += 1
sentence = self._buffer[:end_idx].strip()
self._buffer = self._buffer[end_idx:]
if sentence and any(ch.isalnum() for ch in sentence):
await self._speak_sentence(sentence)
async def flush(self) -> None:
"""Flush remaining buffer."""