Merge pull request #1251 from pipecat-ai/aleix/fish-tts-service-push-stop-frame

FishAudioTTSService should push TTSStoppedFrame
2025-02-20 07:32:05 -08:00
parent 98259af54e 6e3f96aa83
commit d9a67164f6
5 changed files with 10 additions and 10 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))

 ### Fixed

+- Fixed a `FishAudioTTSService` issue where `TTSStoppedFrame` was not being
+  pushed.
+
 - Fixed an issue that `start_callback` was not invoked for some LLM services.

 - Fixed an issue that would cause `DeepgramSTTService` to stop working after an
--- a/src/pipecat/services/ai_services.py
+++ b/src/pipecat/services/ai_services.py
@@ -209,7 +209,7 @@ class TTSService(AIService):
        # if True, TTSService will push TTSStoppedFrames, otherwise subclass must do it
        push_stop_frames: bool = False,
        # if push_stop_frames is True, wait for this idle period before pushing TTSStoppedFrame
-        stop_frame_timeout_s: float = 1.0,
+        stop_frame_timeout_s: float = 2.0,
        # if True, TTSService will push silence audio frames after TTSStoppedFrame
        push_silence_after_stop: bool = False,
        # if push_silence_after_stop is True, send this amount of audio silence
--- a/src/pipecat/services/elevenlabs.py
+++ b/src/pipecat/services/elevenlabs.py
@@ -191,7 +191,6 @@ class ElevenLabsTTSService(WordTTSService, WebsocketService):
            aggregate_sentences=True,
            push_text_frames=False,
            push_stop_frames=True,
-            stop_frame_timeout_s=2.0,
            pause_frame_processing=True,
            sample_rate=sample_rate,
            **kwargs,
--- a/src/pipecat/services/fish.py
+++ b/src/pipecat/services/fish.py
@@ -11,16 +11,13 @@ from loguru import logger
 from pydantic import BaseModel

 from pipecat.frames.frames import (
-    BotStoppedSpeakingFrame,
    CancelFrame,
    EndFrame,
    ErrorFrame,
    Frame,
-    LLMFullResponseEndFrame,
    StartFrame,
    StartInterruptionFrame,
    TTSAudioRawFrame,
-    TTSSpeakFrame,
    TTSStartedFrame,
    TTSStoppedFrame,
 )
@@ -60,7 +57,12 @@ class FishAudioTTSService(TTSService, WebsocketService):
        params: InputParams = InputParams(),
        **kwargs,
    ):
-        super().__init__(pause_frame_processing=True, sample_rate=sample_rate, **kwargs)
+        super().__init__(
+            push_stop_frames=True,
+            pause_frame_processing=True,
+            sample_rate=sample_rate,
+            **kwargs,
+        )

        self._api_key = api_key
        self._base_url = "wss://api.fish.audio/v1/tts/live"
--- a/src/pipecat/services/rime.py
+++ b/src/pipecat/services/rime.py
@@ -14,16 +14,13 @@ from loguru import logger
 from pydantic import BaseModel

 from pipecat.frames.frames import (
-    BotStoppedSpeakingFrame,
    CancelFrame,
    EndFrame,
    ErrorFrame,
    Frame,
-    LLMFullResponseEndFrame,
    StartFrame,
    StartInterruptionFrame,
    TTSAudioRawFrame,
-    TTSSpeakFrame,
    TTSStartedFrame,
    TTSStoppedFrame,
 )
@@ -100,7 +97,6 @@ class RimeTTSService(AudioContextWordTTSService, WebsocketService):
            aggregate_sentences=True,
            push_text_frames=False,
            push_stop_frames=True,
-            stop_frame_timeout_s=2.0,
            pause_frame_processing=True,
            sample_rate=sample_rate,
            **kwargs,