Add foundational example 35
This commit is contained in:
192
examples/foundational/35-voice-switching.py
Normal file
192
examples/foundational/35-voice-switching.py
Normal file
@@ -0,0 +1,192 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.utils.text.pattern_pair_aggregator import PatternMatch, PatternPairAggregator
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
# Define voice IDs
|
||||
VOICE_IDS = {
|
||||
"narrator": "c45bc5ec-dc68-4feb-8829-6e6b2748095d", # Narrator voice
|
||||
"female": "71a7ad14-091c-4e8e-a314-022ece01c121", # Female character voice
|
||||
"male": "7cf0e2b1-8daf-4fe4-89ad-f6039398f359", # Male character voice
|
||||
}
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Storytelling Bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize TTS with narrator voice as default
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id=VOICE_IDS["narrator"],
|
||||
)
|
||||
|
||||
# Create pattern pair aggregator for voice switching
|
||||
pattern_aggregator = PatternPairAggregator()
|
||||
|
||||
# Add pattern for voice switching
|
||||
pattern_aggregator.add_pattern_pair(
|
||||
pattern_id="voice_tag",
|
||||
start_pattern="<voice>",
|
||||
end_pattern="</voice>",
|
||||
remove_match=True,
|
||||
)
|
||||
|
||||
# Register handler for voice switching
|
||||
def on_voice_tag(match: PatternMatch):
|
||||
voice_name = match.content.strip().lower()
|
||||
if voice_name in VOICE_IDS:
|
||||
voice_id = VOICE_IDS[voice_name]
|
||||
tts.set_voice(voice_id)
|
||||
logger.info(f"Switched to {voice_name} voice")
|
||||
else:
|
||||
logger.warning(f"Unknown voice: {voice_name}")
|
||||
|
||||
pattern_aggregator.on_pattern_match("voice_tag", on_voice_tag)
|
||||
|
||||
# Set the pattern aggregator on the TTS service
|
||||
tts._text_aggregator = pattern_aggregator
|
||||
|
||||
# Initialize LLM
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
# System prompt for storytelling with voice switching
|
||||
system_prompt = """You are an engaging storyteller that uses different voices to bring stories to life.
|
||||
|
||||
You have three voices to use, but each has a specific purpose:
|
||||
|
||||
<voice>narrator</voice>
|
||||
This is the default narrator voice. Use this for all narration, descriptions, and non-dialogue text.
|
||||
|
||||
<voice>female</voice>
|
||||
Use this ONLY for direct speech by female characters (just the quoted text).
|
||||
|
||||
<voice>male</voice>
|
||||
Use this ONLY for direct speech by male characters (just the quoted text).
|
||||
|
||||
IMPORTANT: Switch back to narrator voice immediately after character dialogue.
|
||||
|
||||
Here's an EXAMPLE of correct voice usage:
|
||||
|
||||
<voice>narrator</voice>
|
||||
Sarah spotted her old friend across the café. She couldn't believe her eyes.
|
||||
|
||||
<voice>female</voice>
|
||||
"Jacob! It's been so long!"
|
||||
|
||||
<voice>narrator</voice>
|
||||
Sarah exclaimed, jumping up from her seat with a radiant smile.
|
||||
|
||||
<voice>male</voice>
|
||||
"Sarah, is it really you? I can't believe it!"
|
||||
|
||||
<voice>narrator</voice>
|
||||
Jacob replied, grinning widely as he walked over to her. The two friends embraced warmly, as if trying to make up for all the years spent apart.
|
||||
|
||||
<voice>female</voice>
|
||||
"What are you doing in town? Last I heard you were in Seattle."
|
||||
|
||||
<voice>narrator</voice>
|
||||
She asked, gesturing for him to join her at the table.
|
||||
|
||||
FOLLOW THESE RULES:
|
||||
1. Always begin with the narrator voice
|
||||
2. Only use character voices for the EXACT words they speak (in quotes)
|
||||
3. SWITCH BACK to narrator voice for speech tags and all other text
|
||||
4. Begin by asking what kind of story the user would like to hear
|
||||
5. Create engaging dialogue with distinct characters
|
||||
|
||||
Remember: Use narrator voice for EVERYTHING except the actual quoted dialogue."""
|
||||
|
||||
# Set up LLM context
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": system_prompt,
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Create pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts, # TTS with pattern aggregator
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
logger.info(f"First participant joined: {participant['id']}")
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
|
||||
# Start conversation - empty prompt to let LLM follow system instructions
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
logger.info(f"Participant left: {participant['id']}")
|
||||
await task.cancel()
|
||||
|
||||
logger.info(f"Starting storytelling bot at: {room_url}")
|
||||
logger.info("Join the room to interact with the bot!")
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user