Fix "bot-llm-text" not firing when using OpenAI Realtime
This commit is contained in:
@@ -724,10 +724,26 @@ class OpenAIRealtimeLLMService(LLMService):
|
||||
# We receive audio transcript deltas (as opposed to text deltas) when
|
||||
# the output modality is "audio" (the default)
|
||||
if evt.delta:
|
||||
frame = TTSTextFrame(evt.delta, aggregated_by=AggregationType.SENTENCE)
|
||||
# OpenAI Realtime text already includes any necessary inter-chunk spaces
|
||||
frame.includes_inter_frame_spaces = True
|
||||
await self.push_frame(frame)
|
||||
await self._push_output_transcript_text_frames(evt.delta)
|
||||
|
||||
async def _push_output_transcript_text_frames(self, text: str):
|
||||
# In a typical "cascade" LLM + TTS setup, LLMTextFrames would not
|
||||
# proceed beyond the TTS service. Therefore, since a speech-to-speech
|
||||
# service like OpenAI Realtime combines both LLM and TTS functionality,
|
||||
# you might think we wouldn't need to push LLMTextFrames at all.
|
||||
# However, RTVI relies on LLMTextFrames being pushed to trigger its
|
||||
# "bot-llm-text" event. So here we push an LLMTextFrame, too, but avoid
|
||||
# appending it to context to avoid context message duplication.
|
||||
|
||||
# Push LLMTextFrame
|
||||
llm_text_frame = LLMTextFrame(text)
|
||||
llm_text_frame.append_to_context = False
|
||||
await self.push_frame(llm_text_frame)
|
||||
|
||||
# Push TTSTextFrame
|
||||
tts_text_frame = TTSTextFrame(text, aggregated_by=AggregationType.SENTENCE)
|
||||
tts_text_frame.includes_inter_frame_spaces = True
|
||||
await self.push_frame(tts_text_frame)
|
||||
|
||||
async def _handle_evt_function_call_arguments_done(self, evt):
|
||||
"""Handle completion of function call arguments.
|
||||
|
||||
Reference in New Issue
Block a user