From 293677588deb2c289aa6de460a3157b211f6e023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 19 Feb 2025 21:39:00 -0800 Subject: [PATCH 1/2] tts: make push_stop_frames default to 2.0s --- src/pipecat/services/ai_services.py | 2 +- src/pipecat/services/elevenlabs.py | 1 - src/pipecat/services/rime.py | 4 ---- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/pipecat/services/ai_services.py b/src/pipecat/services/ai_services.py index 6b739163a..05c637126 100644 --- a/src/pipecat/services/ai_services.py +++ b/src/pipecat/services/ai_services.py @@ -209,7 +209,7 @@ class TTSService(AIService): # if True, TTSService will push TTSStoppedFrames, otherwise subclass must do it push_stop_frames: bool = False, # if push_stop_frames is True, wait for this idle period before pushing TTSStoppedFrame - stop_frame_timeout_s: float = 1.0, + stop_frame_timeout_s: float = 2.0, # if True, TTSService will push silence audio frames after TTSStoppedFrame push_silence_after_stop: bool = False, # if push_silence_after_stop is True, send this amount of audio silence diff --git a/src/pipecat/services/elevenlabs.py b/src/pipecat/services/elevenlabs.py index 3c8cbb3ed..5c312e7fe 100644 --- a/src/pipecat/services/elevenlabs.py +++ b/src/pipecat/services/elevenlabs.py @@ -191,7 +191,6 @@ class ElevenLabsTTSService(WordTTSService, WebsocketService): aggregate_sentences=True, push_text_frames=False, push_stop_frames=True, - stop_frame_timeout_s=2.0, pause_frame_processing=True, sample_rate=sample_rate, **kwargs, diff --git a/src/pipecat/services/rime.py b/src/pipecat/services/rime.py index f87405a63..e12844a96 100644 --- a/src/pipecat/services/rime.py +++ b/src/pipecat/services/rime.py @@ -14,16 +14,13 @@ from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ( - BotStoppedSpeakingFrame, CancelFrame, EndFrame, ErrorFrame, Frame, - LLMFullResponseEndFrame, StartFrame, StartInterruptionFrame, TTSAudioRawFrame, - TTSSpeakFrame, TTSStartedFrame, TTSStoppedFrame, ) @@ -100,7 +97,6 @@ class RimeTTSService(AudioContextWordTTSService, WebsocketService): aggregate_sentences=True, push_text_frames=False, push_stop_frames=True, - stop_frame_timeout_s=2.0, pause_frame_processing=True, sample_rate=sample_rate, **kwargs, From 6e3f96aa834394dbcb5850ae7e32edd84362618e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 19 Feb 2025 21:39:36 -0800 Subject: [PATCH 2/2] fish: automatically send TTSStoppedFrame after timeout --- CHANGELOG.md | 3 +++ src/pipecat/services/fish.py | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fe52e1da..5e5137f07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general")) ### Fixed +- Fixed a `FishAudioTTSService` issue where `TTSStoppedFrame` was not being + pushed. + - Fixed an issue that `start_callback` was not invoked for some LLM services. - Fixed an issue that would cause `DeepgramSTTService` to stop working after an diff --git a/src/pipecat/services/fish.py b/src/pipecat/services/fish.py index 94475aca7..e2a75bdb2 100644 --- a/src/pipecat/services/fish.py +++ b/src/pipecat/services/fish.py @@ -11,16 +11,13 @@ from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ( - BotStoppedSpeakingFrame, CancelFrame, EndFrame, ErrorFrame, Frame, - LLMFullResponseEndFrame, StartFrame, StartInterruptionFrame, TTSAudioRawFrame, - TTSSpeakFrame, TTSStartedFrame, TTSStoppedFrame, ) @@ -60,7 +57,12 @@ class FishAudioTTSService(TTSService, WebsocketService): params: InputParams = InputParams(), **kwargs, ): - super().__init__(pause_frame_processing=True, sample_rate=sample_rate, **kwargs) + super().__init__( + push_stop_frames=True, + pause_frame_processing=True, + sample_rate=sample_rate, + **kwargs, + ) self._api_key = api_key self._base_url = "wss://api.fish.audio/v1/tts/live"