Add foundational example 35

2025-03-17 23:05:22 -04:00
parent e731a0d41f
commit ddcc1fbb2f
1 changed files with 192 additions and 0 deletions
--- a/examples/foundational/35-voice-switching.py
+++ b/examples/foundational/35-voice-switching.py
@@ -0,0 +1,192 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import os
+import sys
+
+import aiohttp
+from dotenv import load_dotenv
+from loguru import logger
+from runner import configure
+
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.services.cartesia import CartesiaTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.utils.text.pattern_pair_aggregator import PatternMatch, PatternPairAggregator
+
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+# Define voice IDs
+VOICE_IDS = {
+    "narrator": "c45bc5ec-dc68-4feb-8829-6e6b2748095d",  # Narrator voice
+    "female": "71a7ad14-091c-4e8e-a314-022ece01c121",  # Female character voice
+    "male": "7cf0e2b1-8daf-4fe4-89ad-f6039398f359",  # Male character voice
+}
+
+
+async def main():
+    async with aiohttp.ClientSession() as session:
+        (room_url, token) = await configure(session)
+
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Storytelling Bot",
+            DailyParams(
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer(),
+            ),
+        )
+
+        # Initialize TTS with narrator voice as default
+        tts = CartesiaTTSService(
+            api_key=os.getenv("CARTESIA_API_KEY"),
+            voice_id=VOICE_IDS["narrator"],
+        )
+
+        # Create pattern pair aggregator for voice switching
+        pattern_aggregator = PatternPairAggregator()
+
+        # Add pattern for voice switching
+        pattern_aggregator.add_pattern_pair(
+            pattern_id="voice_tag",
+            start_pattern="<voice>",
+            end_pattern="</voice>",
+            remove_match=True,
+        )
+
+        # Register handler for voice switching
+        def on_voice_tag(match: PatternMatch):
+            voice_name = match.content.strip().lower()
+            if voice_name in VOICE_IDS:
+                voice_id = VOICE_IDS[voice_name]
+                tts.set_voice(voice_id)
+                logger.info(f"Switched to {voice_name} voice")
+            else:
+                logger.warning(f"Unknown voice: {voice_name}")
+
+        pattern_aggregator.on_pattern_match("voice_tag", on_voice_tag)
+
+        # Set the pattern aggregator on the TTS service
+        tts._text_aggregator = pattern_aggregator
+
+        # Initialize LLM
+        llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
+
+        # System prompt for storytelling with voice switching
+        system_prompt = """You are an engaging storyteller that uses different voices to bring stories to life.
+
+You have three voices to use, but each has a specific purpose:
+
+<voice>narrator</voice>
+This is the default narrator voice. Use this for all narration, descriptions, and non-dialogue text.
+
+<voice>female</voice>
+Use this ONLY for direct speech by female characters (just the quoted text).
+
+<voice>male</voice>
+Use this ONLY for direct speech by male characters (just the quoted text).
+
+IMPORTANT: Switch back to narrator voice immediately after character dialogue.
+
+Here's an EXAMPLE of correct voice usage:
+
+<voice>narrator</voice>
+Sarah spotted her old friend across the café. She couldn't believe her eyes.
+
+<voice>female</voice>
+"Jacob! It's been so long!"
+
+<voice>narrator</voice>
+Sarah exclaimed, jumping up from her seat with a radiant smile.
+
+<voice>male</voice>
+"Sarah, is it really you? I can't believe it!"
+
+<voice>narrator</voice>
+Jacob replied, grinning widely as he walked over to her. The two friends embraced warmly, as if trying to make up for all the years spent apart.
+
+<voice>female</voice>
+"What are you doing in town? Last I heard you were in Seattle."
+
+<voice>narrator</voice>
+She asked, gesturing for him to join her at the table.
+
+FOLLOW THESE RULES:
+1. Always begin with the narrator voice
+2. Only use character voices for the EXACT words they speak (in quotes)
+3. SWITCH BACK to narrator voice for speech tags and all other text
+4. Begin by asking what kind of story the user would like to hear
+5. Create engaging dialogue with distinct characters
+
+Remember: Use narrator voice for EVERYTHING except the actual quoted dialogue."""
+
+        # Set up LLM context
+        messages = [
+            {
+                "role": "system",
+                "content": system_prompt,
+            },
+        ]
+
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
+
+        # Create pipeline
+        pipeline = Pipeline(
+            [
+                transport.input(),
+                context_aggregator.user(),
+                llm,
+                tts,  # TTS with pattern aggregator
+                transport.output(),
+                context_aggregator.assistant(),
+            ]
+        )
+
+        task = PipelineTask(
+            pipeline,
+            params=PipelineParams(
+                allow_interruptions=True,
+                enable_metrics=True,
+                enable_usage_metrics=True,
+                report_only_initial_ttfb=True,
+            ),
+        )
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            logger.info(f"First participant joined: {participant['id']}")
+            await transport.capture_participant_transcription(participant["id"])
+
+            # Start conversation - empty prompt to let LLM follow system instructions
+            await task.queue_frames([context_aggregator.user().get_context_frame()])
+
+        @transport.event_handler("on_participant_left")
+        async def on_participant_left(transport, participant, reason):
+            logger.info(f"Participant left: {participant['id']}")
+            await task.cancel()
+
+        logger.info(f"Starting storytelling bot at: {room_url}")
+        logger.info("Join the room to interact with the bot!")
+
+        runner = PipelineRunner()
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())