diff --git a/src/dailyai/services/base_transport_service.py b/src/dailyai/services/base_transport_service.py index 113fe93dc..a13c63498 100644 --- a/src/dailyai/services/base_transport_service.py +++ b/src/dailyai/services/base_transport_service.py @@ -176,13 +176,19 @@ class BaseTransportService(): self._logger.info("🎬 Starting frame consumer thread") b = bytearray() smallest_write_size = 3200 + largest_write_size = 8000 all_audio_frames = bytearray() while True: try: frames_or_frame: QueueFrame | list[QueueFrame] = ( self._threadsafe_send_queue.get() ) - if isinstance(frames_or_frame, QueueFrame): + if isinstance(frames_or_frame, AudioQueueFrame) and len(frames_or_frame.data) > largest_write_size: + # subdivide large audio frames to enable interruption + frames = [] + for i in range(0, len(frames_or_frame.data), largest_write_size): + frames.append(AudioQueueFrame(frames_or_frame.data[i : i+largest_write_size])) + elif isinstance(frames_or_frame, QueueFrame): frames: list[QueueFrame] = [frames_or_frame] elif isinstance(frames_or_frame, list): frames: list[QueueFrame] = frames_or_frame @@ -190,6 +196,7 @@ class BaseTransportService(): raise Exception("Unknown type in output queue") for frame in frames: + print(f"got frame of type: {type(frame)}") if isinstance(frame, EndStreamQueueFrame): self._logger.info("Stopping frame consumer thread") self._threadsafe_send_queue.task_done() diff --git a/src/examples/foundational/06b-patient-intake.py b/src/examples/foundational/06b-patient-intake.py index a9726e14a..e648c34f5 100644 --- a/src/examples/foundational/06b-patient-intake.py +++ b/src/examples/foundational/06b-patient-intake.py @@ -7,6 +7,7 @@ from typing import AsyncGenerator from dailyai.services.daily_transport_service import DailyTransportService from dailyai.services.azure_ai_services import AzureLLMService, AzureTTSService from dailyai.services.open_ai_services import OpenAILLMService +from dailyai.services.deepgram_ai_services import DeepgramTTSService from dailyai.services.elevenlabs_ai_service import ElevenLabsTTSService from dailyai.queue_aggregators import LLMAssistantContextAggregator, LLMContextAggregator, LLMUserContextAggregator from support.runner import configure @@ -261,7 +262,8 @@ async def main(room_url: str, token): # tts = AzureTTSService(api_key=os.getenv( # "AZURE_SPEECH_API_KEY"), region=os.getenv("AZURE_SPEECH_REGION")) tts = ElevenLabsTTSService(aiohttp_session=session, api_key=os.getenv( - "ELEVENLABS_API_KEY"), voice_id="EXAVITQu4vr4xnSDxMaL") + "ELEVENLABS_API_KEY"), voice_id="XrExE9yKIg1WjnnlVkGX") # matilda + # tts = DeepgramTTSService(aiohttp_session=session, api_key=os.getenv("DEEPGRAM_API_KEY"), voice=os.getenv("DEEPGRAM_VOICE")) messages = [ ] tma_in = LLMUserContextAggregator(