Compare commits

...

1 Commits

Author SHA1 Message Date
James Hush
be02b73797 Japanese example 2025-01-02 18:05:20 +08:00

View File

@@ -16,15 +16,14 @@ from runner import configure
from pipecat.frames.frames import Frame, LLMMessagesFrame, TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
from pipecat.processors.aggregators.sentence import SentenceAggregator
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.azure import AzureTTSService
from pipecat.services.azure import AzureSTTService, AzureTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import (
DailyParams,
DailyTranscriptionSettings,
DailyTransport,
DailyTransportMessageFrame,
)
@@ -44,18 +43,20 @@ It also isn't saving what the user or bot says into the context object for use i
# We need to use a custom service here to yield LLM frames without saving
# any context
class TranslationProcessor(FrameProcessor):
def __init__(self, language):
def __init__(self, source_language, language):
super().__init__()
self._language = language
self._source_language = source_language
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TextFrame):
logger.debug(f"Translating {self._source_language}: {frame.text} to {self._language}")
context = [
{
"role": "system",
"content": f"You will be provided with a sentence in English, and your task is to translate it into {self._language}.",
"content": f"You will be provided with a sentence in {self._source_language}, and your task is to only translate it into {self._language}.",
},
{"role": "user", "content": frame.text},
]
@@ -79,7 +80,8 @@ class TranslationSubtitles(FrameProcessor):
await super().process_frame(frame, direction)
if isinstance(frame, TextFrame):
message = {"language": self._language, "text": frame.text}
print(f"TranslationSubtitles: {frame.text}")
message = {"event": "translation", "language": self._language, "text": frame.text}
await self.push_frame(DailyTransportMessageFrame(message))
await self.push_frame(frame)
@@ -92,34 +94,54 @@ async def main():
transport = DailyTransport(
room_url,
token,
"Translator",
"Translator bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
transcription_settings=DailyTranscriptionSettings(extra={"interim_results": False}),
vad_enabled=True,
vad_audio_passthrough=True,
),
)
stt = AzureSTTService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
language="ja-JP",
)
tts = AzureTTSService(
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
region=os.getenv("AZURE_SPEECH_REGION"),
voice="es-ES-AlvaroNeural",
# Use Japanese Voice from Azure,
# https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#text-to-speech
voice="ja-JP-KeitaNeural",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
sa = SentenceAggregator()
tp = TranslationProcessor("Spanish")
tp = TranslationProcessor(source_language="English", language="Japanese")
lfra = LLMFullResponseAggregator()
ts = TranslationSubtitles("spanish")
ts = TranslationSubtitles("japanese")
pipeline = Pipeline([transport.input(), sa, tp, llm, lfra, ts, tts, transport.output()])
pipeline = Pipeline(
[
transport.input(),
stt,
sa,
tp,
llm,
lfra,
ts,
tts,
transport.output(),
]
)
task = PipelineTask(pipeline)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
logger.info("First participant joined")
runner = PipelineRunner()