Make the mechanism of adding spaces when concatenating TTS (or speech-to-speech LLM) output text explicit and deterministic, rather than heuristic-based.

This fixes a bug where spaces were sometimes missing from assistant messages in context.
This commit is contained in:
Paul Kompfner
2025-11-10 12:28:40 -05:00
parent c2ce143e6c
commit 913194844e
10 changed files with 113 additions and 44 deletions

View File

@@ -438,17 +438,22 @@ class TestUserTranscriptProcessor(unittest.IsolatedAsyncioTestCase):
received_updates.append(frame)
# Test the specific pattern shared
def make_tts_text_frame(text: str) -> TTSTextFrame:
frame = TTSTextFrame(text=text)
frame.includes_inter_frame_spaces = True
return frame
frames_to_send = [
BotStartedSpeakingFrame(),
SleepFrame(),
TTSTextFrame(text="Hello"),
TTSTextFrame(text=" there"),
TTSTextFrame(text="!"),
TTSTextFrame(text=" How"),
TTSTextFrame(text="'s"),
TTSTextFrame(text=" it"),
TTSTextFrame(text=" going"),
TTSTextFrame(text="?"),
make_tts_text_frame("Hello"),
make_tts_text_frame(" there"),
make_tts_text_frame("!"),
make_tts_text_frame(" How"),
make_tts_text_frame("'s"),
make_tts_text_frame(" it"),
make_tts_text_frame(" going"),
make_tts_text_frame("?"),
BotStoppedSpeakingFrame(),
]