Add TranslationFrame and use in GladiaSTTService; add 13c-gladia-translation.py
This commit is contained in:
@@ -49,6 +49,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added 04 foundational examples for client/server transports. Also, renamed
|
||||
`29-livekit-audio-chat.py` to `04b-transports-livekit.py`.
|
||||
|
||||
- Added foundational example `13c-gladia-translation.py` showing how to use
|
||||
`TranscriptionFrame` and `TranslationFrame`.
|
||||
|
||||
## [0.0.65] - 2025-04-23 "Sant Jordi's release" 🌹📕
|
||||
|
||||
https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
|
||||
@@ -108,6 +111,9 @@ https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
|
||||
|
||||
- Added word/timestamp pairs to `ElevenLabsHttpTTSService`.
|
||||
|
||||
- Added `TranslationFrame`, a new frame type that contains a translated
|
||||
transcription.
|
||||
|
||||
- It is now possible to disable `SoundfileMixer` when created. You can then use
|
||||
`MixerEnableFrame` to dynamically enable it when necessary.
|
||||
|
||||
@@ -129,6 +135,9 @@ https://en.wikipedia.org/wiki/Saint_George%27s_Day_in_Catalonia
|
||||
- `OpenAILLMService` and `OpenPipeLLMService` now use `gpt-4.1` as their
|
||||
default model.
|
||||
|
||||
- Updated `GladiaSTTService` to output a `TranslationFrame` when specifying a
|
||||
`translation` and `translation_config`.
|
||||
|
||||
- `SoundfileMixer` constructor arguments need to be keywords.
|
||||
|
||||
### Deprecated
|
||||
|
||||
90
examples/foundational/13c-gladia-translation.py
Normal file
90
examples/foundational/13c-gladia-translation.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame, TranslationFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.gladia.config import (
|
||||
GladiaInputParams,
|
||||
LanguageConfig,
|
||||
RealtimeProcessingConfig,
|
||||
TranslationConfig,
|
||||
)
|
||||
from pipecat.services.gladia.stt import GladiaSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||
from pipecat.transports.network.webrtc_connection import SmallWebRTCConnection
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription ({frame.language}): {frame.text}")
|
||||
elif isinstance(frame, TranslationFrame):
|
||||
print(f"Translation ({frame.language}): {frame.text}")
|
||||
|
||||
|
||||
async def run_bot(webrtc_connection: SmallWebRTCConnection):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
transport = SmallWebRTCTransport(
|
||||
webrtc_connection=webrtc_connection,
|
||||
params=TransportParams(audio_in_enabled=True),
|
||||
)
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
params=GladiaInputParams(
|
||||
language_config=LanguageConfig(
|
||||
languages=[Language.EN], # Input in English
|
||||
code_switching=False,
|
||||
),
|
||||
realtime_processing=RealtimeProcessingConfig(
|
||||
translation=True, # Enable translation
|
||||
translation_config=TranslationConfig(
|
||||
target_languages=[Language.ES], # Translate to Spanish
|
||||
model="enhanced", # Use the enhanced translation model
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from run import main
|
||||
|
||||
main()
|
||||
@@ -256,6 +256,22 @@ class InterimTranscriptionFrame(TextFrame):
|
||||
return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranslationFrame(TextFrame):
|
||||
"""A text frame with translated transcription data.
|
||||
|
||||
Will be placed in the transport's receive queue when a participant speaks.
|
||||
|
||||
"""
|
||||
|
||||
user_id: str
|
||||
timestamp: str
|
||||
language: Optional[Language] = None
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenAILLMContextAssistantTimestampFrame(DataFrame):
|
||||
"""Timestamp information for assistant message in LLM context."""
|
||||
|
||||
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
|
||||
InterimTranscriptionFrame,
|
||||
StartFrame,
|
||||
TranscriptionFrame,
|
||||
TranslationFrame,
|
||||
)
|
||||
from pipecat.services.gladia.config import GladiaInputParams
|
||||
from pipecat.services.stt_service import STTService
|
||||
@@ -405,7 +406,7 @@ class GladiaSTTService(STTService):
|
||||
translation = translated_utterance["text"]
|
||||
if translated_language != original_language and confidence >= self._confidence:
|
||||
await self.push_frame(
|
||||
TranscriptionFrame(
|
||||
TranslationFrame(
|
||||
translation, "", time_now_iso8601(), translated_language
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user