Simplify the TranscriptProcessor _emit_aggregated_text logic

This commit is contained in:
Mark Backman
2025-03-17 16:30:46 -04:00
parent acd0660f66
commit 6885d07e88
2 changed files with 50 additions and 284 deletions

View File

@@ -235,8 +235,7 @@ class TestUserTranscriptProcessor(unittest.IsolatedAsyncioTestCase):
BotStartedSpeakingFrame(),
SleepFrame(sleep=0.1),
TTSTextFrame(text="Hello"),
TTSTextFrame(text="world"),
TTSTextFrame(text="!"),
TTSTextFrame(text="world!"),
SleepFrame(sleep=0.1),
StartInterruptionFrame(), # User interrupts here
BotStartedSpeakingFrame(),
@@ -251,8 +250,7 @@ class TestUserTranscriptProcessor(unittest.IsolatedAsyncioTestCase):
expected_down_frames = [
BotStartedSpeakingFrame,
TTSTextFrame, # "Hello"
TTSTextFrame, # "world"
TTSTextFrame, # "!"
TTSTextFrame, # "world!"
TranscriptionUpdateFrame, # First message (emitted due to interruption)
StartInterruptionFrame, # Interruption frame comes after the update
BotStartedSpeakingFrame,
@@ -480,245 +478,3 @@ class TestUserTranscriptProcessor(unittest.IsolatedAsyncioTestCase):
self.assertEqual(message.role, "assistant")
# Should be properly joined without extra spaces
self.assertEqual(message.content, "Hello there! How's it going?")
async def test_mixed_spacing_styles(self):
"""Test handling mixed word-by-word and pre-spaced fragments"""
processor = AssistantTranscriptProcessor()
received_updates = []
@processor.event_handler("on_transcript_update")
async def handle_update(proc, frame: TranscriptionUpdateFrame):
received_updates.append(frame)
# Mix of spacing styles within the same utterance
frames_to_send = [
BotStartedSpeakingFrame(),
SleepFrame(sleep=0.1),
# Word-by-word style
TTSTextFrame(text="First"),
TTSTextFrame(text="style."),
# Pre-spaced style
TTSTextFrame(text=" Second"),
TTSTextFrame(text=" style"),
TTSTextFrame(text="!"),
BotStoppedSpeakingFrame(),
]
expected_down_frames = [
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TranscriptionUpdateFrame,
]
await run_test(
processor,
frames_to_send=frames_to_send,
expected_down_frames=expected_down_frames,
)
self.assertEqual(len(received_updates), 1)
message = received_updates[0].messages[0]
self.assertEqual(message.content, "First style. Second style!")
async def test_punctuation_handling(self):
"""Test handling of various punctuation patterns"""
processor = AssistantTranscriptProcessor()
received_updates = []
@processor.event_handler("on_transcript_update")
async def handle_update(proc, frame: TranscriptionUpdateFrame):
received_updates.append(frame)
# Test various punctuation types
frames_to_send = [
BotStartedSpeakingFrame(),
SleepFrame(sleep=0.1),
TTSTextFrame(text="Commas"),
TTSTextFrame(text=","),
TTSTextFrame(text="colons"),
TTSTextFrame(text=":"),
TTSTextFrame(text="semicolons"),
TTSTextFrame(text=";"),
TTSTextFrame(text="quotes"),
TTSTextFrame(text="'"),
TTSTextFrame(text="and"),
TTSTextFrame(text='"'),
TTSTextFrame(text="double quotes"),
TTSTextFrame(text="!"),
BotStoppedSpeakingFrame(),
]
expected_down_frames = [
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TranscriptionUpdateFrame,
]
await run_test(
processor,
frames_to_send=frames_to_send,
expected_down_frames=expected_down_frames,
)
self.assertEqual(len(received_updates), 1)
message = received_updates[0].messages[0]
self.assertEqual(
message.content, "Commas, colons: semicolons; quotes' and\" double quotes!"
)
async def test_complex_mixed_case(self):
"""Test a complex mix of patterns to ensure robustness"""
processor = AssistantTranscriptProcessor()
received_updates = []
@processor.event_handler("on_transcript_update")
async def handle_update(proc, frame: TranscriptionUpdateFrame):
received_updates.append(frame)
# Complex mixed case with various patterns
frames_to_send = [
BotStartedSpeakingFrame(),
SleepFrame(sleep=0.1),
# Pre-spaced fragments
TTSTextFrame(text="Hello"),
TTSTextFrame(text=" there"),
TTSTextFrame(text="!"),
# Sentence boundary
TTSTextFrame(text=" I'm"),
TTSTextFrame(text=" testing"),
TTSTextFrame(text=" spacing"),
TTSTextFrame(text="."),
# Word-by-word fragments
TTSTextFrame(text="Does"),
TTSTextFrame(text="this"),
TTSTextFrame(text="work"),
TTSTextFrame(text="correctly"),
TTSTextFrame(text="?"),
# Mixed punctuation and spacing
TTSTextFrame(text=" Let's"),
TTSTextFrame(text=" see:"),
TTSTextFrame(text="commas"),
TTSTextFrame(text=","),
TTSTextFrame(text=" semicolons"),
TTSTextFrame(text=";"),
TTSTextFrame(text=" and"),
TTSTextFrame(text=" quotes"),
TTSTextFrame(text="'"),
TTSTextFrame(text="!"),
BotStoppedSpeakingFrame(),
]
expected_down_frames = [
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TranscriptionUpdateFrame,
]
await run_test(
processor,
frames_to_send=frames_to_send,
expected_down_frames=expected_down_frames,
)
self.assertEqual(len(received_updates), 1)
message = received_updates[0].messages[0]
expected = "Hello there! I'm testing spacing. Does this work correctly? Let's see: commas, semicolons; and quotes'!"
self.assertEqual(message.content, expected)
async def test_multiple_consecutive_punctuation(self):
"""Test handling of multiple consecutive punctuation marks"""
processor = AssistantTranscriptProcessor()
received_updates = []
@processor.event_handler("on_transcript_update")
async def handle_update(proc, frame: TranscriptionUpdateFrame):
received_updates.append(frame)
frames_to_send = [
BotStartedSpeakingFrame(),
SleepFrame(sleep=0.1),
TTSTextFrame(text="Wow"),
TTSTextFrame(text="!"),
TTSTextFrame(text="!"),
TTSTextFrame(text="!"),
TTSTextFrame(text=" That's"),
TTSTextFrame(text=" amazing"),
TTSTextFrame(text="..."),
TTSTextFrame(text=" Don't"),
TTSTextFrame(text=" you"),
TTSTextFrame(text=" think"),
TTSTextFrame(text="?"),
TTSTextFrame(text="?"),
BotStoppedSpeakingFrame(),
]
expected_down_frames = [
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TTSTextFrame,
TranscriptionUpdateFrame,
]
await run_test(
processor,
frames_to_send=frames_to_send,
expected_down_frames=expected_down_frames,
)
self.assertEqual(len(received_updates), 1)
message = received_updates[0].messages[0]
self.assertEqual(message.content, "Wow!!! That's amazing... Don't you think??")