Add TranslationFrame and use in GladiaSTTService; add 13c-gladia-translation.py

This commit is contained in:
Mark Backman
2025-04-17 16:22:33 -04:00
parent 2fb85941d3
commit d1086914fe
4 changed files with 117 additions and 1 deletions

View File

@@ -49,6 +49,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added 04 foundational examples for client/server transports. Also, renamed
`29-livekit-audio-chat.py` to `04b-transports-livekit.py`.
- Added foundational example `13c-gladia-translation.py` showing how to use
`TranscriptionFrame` and `TranslationFrame`.
## [0.0.65] - 2025-04-23 "Sant Jordi's release" 🌹📕
https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
@@ -108,6 +111,9 @@ https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
- Added word/timestamp pairs to `ElevenLabsHttpTTSService`.
- Added `TranslationFrame`, a new frame type that contains a translated
transcription.
- It is now possible to disable `SoundfileMixer` when created. You can then use
`MixerEnableFrame` to dynamically enable it when necessary.
@@ -129,6 +135,9 @@ https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
- `OpenAILLMService` and `OpenPipeLLMService` now use `gpt-4.1` as their
default model.
- Updated `GladiaSTTService` to output a `TranslationFrame` when specifying a
`translation` and `translation_config`.
- `SoundfileMixer` constructor arguments need to be keywords.
### Deprecated

View File

@@ -0,0 +1,90 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.frames.frames import Frame, TranscriptionFrame, TranslationFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.gladia.config import (
GladiaInputParams,
LanguageConfig,
RealtimeProcessingConfig,
TranslationConfig,
)
from pipecat.services.gladia.stt import GladiaSTTService
from pipecat.transcriptions.language import Language
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
load_dotenv(override=True)
class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TranscriptionFrame):
print(f"Transcription ({frame.language}): {frame.text}")
elif isinstance(frame, TranslationFrame):
print(f"Translation ({frame.language}): {frame.text}")
async def run_bot(webrtc_connection: SmallWebRTCConnection):
logger.info(f"Starting bot")
transport = SmallWebRTCTransport(
webrtc_connection=webrtc_connection,
params=TransportParams(audio_in_enabled=True),
)
stt = GladiaSTTService(
api_key=os.getenv("GLADIA_API_KEY"),
params=GladiaInputParams(
language_config=LanguageConfig(
languages=[Language.EN], # Input in English
code_switching=False,
),
realtime_processing=RealtimeProcessingConfig(
translation=True, # Enable translation
translation_config=TranslationConfig(
target_languages=[Language.ES], # Translate to Spanish
model="enhanced", # Use the enhanced translation model
),
),
),
)
tl = TranscriptionLogger()
pipeline = Pipeline([transport.input(), stt, tl])
task = PipelineTask(pipeline)
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
@transport.event_handler("on_client_closed")
async def on_client_closed(transport, client):
logger.info(f"Client closed connection")
await task.cancel()
runner = PipelineRunner(handle_sigint=False)
await runner.run(task)
if __name__ == "__main__":
from run import main
main()

View File

@@ -256,6 +256,22 @@ class InterimTranscriptionFrame(TextFrame):
return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
@dataclass
class TranslationFrame(TextFrame):
"""A text frame with translated transcription data.
Will be placed in the transport's receive queue when a participant speaks.
"""
user_id: str
timestamp: str
language: Optional[Language] = None
def __str__(self):
return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
@dataclass
class OpenAILLMContextAssistantTimestampFrame(DataFrame):
"""Timestamp information for assistant message in LLM context."""

View File

@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
InterimTranscriptionFrame,
StartFrame,
TranscriptionFrame,
TranslationFrame,
)
from pipecat.services.gladia.config import GladiaInputParams
from pipecat.services.stt_service import STTService
@@ -405,7 +406,7 @@ class GladiaSTTService(STTService):
translation = translated_utterance["text"]
if translated_language != original_language and confidence >= self._confidence:
await self.push_frame(
TranscriptionFrame(
TranslationFrame(
translation, "", time_now_iso8601(), translated_language
)
)