Add TranslationFrame and use in GladiaSTTService; add 13c-gladia-translation.py

2025-04-17 16:22:33 -04:00
parent 2fb85941d3
commit d1086914fe
4 changed files with 117 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added 04 foundational examples for client/server transports. Also, renamed
  `29-livekit-audio-chat.py` to `04b-transports-livekit.py`.

+- Added foundational example `13c-gladia-translation.py` showing how to use
+  `TranscriptionFrame` and `TranslationFrame`.
+
 ## [0.0.65] - 2025-04-23 "Sant Jordi's release" 🌹📕

 https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
@@ -108,6 +111,9 @@ https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia

 - Added word/timestamp pairs to `ElevenLabsHttpTTSService`.

+- Added `TranslationFrame`, a new frame type that contains a translated
+  transcription.
+
 - It is now possible to disable `SoundfileMixer` when created. You can then use
  `MixerEnableFrame` to dynamically enable it when necessary.

@@ -129,6 +135,9 @@ https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
 - `OpenAILLMService` and `OpenPipeLLMService` now use `gpt-4.1` as their
  default model.

+- Updated `GladiaSTTService` to output a `TranslationFrame` when specifying a
+  `translation` and `translation_config`.
+
 - `SoundfileMixer` constructor arguments need to be keywords.

 ### Deprecated
--- a/examples/foundational/13c-gladia-translation.py
+++ b/examples/foundational/13c-gladia-translation.py
@@ -0,0 +1,90 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import os
+
+from dotenv import load_dotenv
+from loguru import logger
+
+from pipecat.frames.frames import Frame, TranscriptionFrame, TranslationFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.gladia.config import (
+    GladiaInputParams,
+    LanguageConfig,
+    RealtimeProcessingConfig,
+    TranslationConfig,
+)
+from pipecat.services.gladia.stt import GladiaSTTService
+from pipecat.transcriptions.language import Language
+from pipecat.transports.base_transport import TransportParams
+from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
+from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
+
+load_dotenv(override=True)
+
+
+class TranscriptionLogger(FrameProcessor):
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+
+        if isinstance(frame, TranscriptionFrame):
+            print(f"Transcription ({frame.language}): {frame.text}")
+        elif isinstance(frame, TranslationFrame):
+            print(f"Translation ({frame.language}): {frame.text}")
+
+
+async def run_bot(webrtc_connection: SmallWebRTCConnection):
+    logger.info(f"Starting bot")
+
+    transport = SmallWebRTCTransport(
+        webrtc_connection=webrtc_connection,
+        params=TransportParams(audio_in_enabled=True),
+    )
+
+    stt = GladiaSTTService(
+        api_key=os.getenv("GLADIA_API_KEY"),
+        params=GladiaInputParams(
+            language_config=LanguageConfig(
+                languages=[Language.EN],  # Input in English
+                code_switching=False,
+            ),
+            realtime_processing=RealtimeProcessingConfig(
+                translation=True,  # Enable translation
+                translation_config=TranslationConfig(
+                    target_languages=[Language.ES],  # Translate to Spanish
+                    model="enhanced",  # Use the enhanced translation model
+                ),
+            ),
+        ),
+    )
+
+    tl = TranscriptionLogger()
+
+    pipeline = Pipeline([transport.input(), stt, tl])
+
+    task = PipelineTask(pipeline)
+
+    @transport.event_handler("on_client_disconnected")
+    async def on_client_disconnected(transport, client):
+        logger.info(f"Client disconnected")
+
+    @transport.event_handler("on_client_closed")
+    async def on_client_closed(transport, client):
+        logger.info(f"Client closed connection")
+        await task.cancel()
+
+    runner = PipelineRunner(handle_sigint=False)
+
+    await runner.run(task)
+
+
+if __name__ == "__main__":
+    from run import main
+
+    main()
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -256,6 +256,22 @@ class InterimTranscriptionFrame(TextFrame):
        return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"


+@dataclass
+class TranslationFrame(TextFrame):
+    """A text frame with translated transcription data.
+
+    Will be placed in the transport's receive queue when a participant speaks.
+
+    """
+
+    user_id: str
+    timestamp: str
+    language: Optional[Language] = None
+
+    def __str__(self):
+        return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
+
+
@dataclass
 class OpenAILLMContextAssistantTimestampFrame(DataFrame):
    """Timestamp information for assistant message in LLM context."""
--- a/src/pipecat/services/gladia/stt.py
+++ b/src/pipecat/services/gladia/stt.py
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
    InterimTranscriptionFrame,
    StartFrame,
    TranscriptionFrame,
+    TranslationFrame,
 )
 from pipecat.services.gladia.config import GladiaInputParams
 from pipecat.services.stt_service import STTService
@@ -405,7 +406,7 @@ class GladiaSTTService(STTService):
                    translation = translated_utterance["text"]
                    if translated_language != original_language and confidence >= self._confidence:
                        await self.push_frame(
-                            TranscriptionFrame(
+                            TranslationFrame(
                                translation, "", time_now_iso8601(), translated_language
                            )
                        )