logging

Switch questions
Better recreation
2024-11-27 19:38:37 +08:00 · 2024-11-27 15:10:50 +08:00 · 2024-11-27 14:08:01 +08:00 · 2024-11-27 12:21:45 +08:00 · 2024-11-27 11:50:28 +08:00 · 2024-11-27 11:36:28 +08:00
5 changed files with 221 additions and 1 deletions
--- a/examples/foundational/07-interruptible.py
+++ b/examples/foundational/07-interruptible.py
@@ -10,11 +10,12 @@ import os
 import sys

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import LLMMessagesFrame
+from pipecat.frames.frames import BotSpeakingFrame, Frame, InputAudioRawFrame, LLMMessagesFrame, TTSAudioRawFrame, TextFrame, UserStoppedSpeakingFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -30,6 +31,22 @@ load_dotenv(override=True)
 logger.remove(0)
 logger.add(sys.stderr, level="DEBUG")

+class DebugProcessor(FrameProcessor):
+    def __init__(self, name, **kwargs):
+        self._name = name
+        super().__init__(**kwargs)
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+        if not (
+            isinstance(frame, InputAudioRawFrame)
+            or isinstance(frame, BotSpeakingFrame)
+            or isinstance(frame, TTSAudioRawFrame)
+            or isinstance(frame, TextFrame)
+        ):
+            logger.debug(f"--- {self._name}: {frame} {direction}")
+        await self.push_frame(frame, direction)
+

 async def main():
    async with aiohttp.ClientSession() as session:
@@ -63,11 +80,14 @@ async def main():

        context = OpenAILLMContext(messages)
        context_aggregator = llm.create_context_aggregator(context)
+        
+        dp = DebugProcessor("dp")

        pipeline = Pipeline(
            [
                transport.input(),  # Transport user input
                context_aggregator.user(),  # User responses
+                dp,
                llm,  # LLM
                tts,  # TTS
                transport.output(),  # Transport bot output
--- a/examples/foundational/race_bot.py
+++ b/examples/foundational/race_bot.py
@@ -0,0 +1,191 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import os
+import sys
+import time
+
+import aiohttp
+from loguru import logger
+from runner import configure
+
+from pipecat.audio.vad.silero import SileroVADAnalyzer
+from pipecat.frames.frames import (
+    BotSpeakingFrame,
+    EndFrame,
+    Frame,
+    InputAudioRawFrame,
+    StartInterruptionFrame,
+    StopInterruptionFrame,
+    TextFrame,
+    TranscriptionFrame,
+    TTSAudioRawFrame,
+    UserStartedSpeakingFrame,
+    UserStoppedSpeakingFrame,
+)
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineParams, PipelineTask
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.cartesia import CartesiaTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class DebugProcessor(FrameProcessor):
+    def __init__(self, name, **kwargs):
+        self._name = name
+        super().__init__(**kwargs)
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await super().process_frame(frame, direction)
+        if not (
+            isinstance(frame, InputAudioRawFrame)
+            or isinstance(frame, BotSpeakingFrame)
+            or isinstance(frame, UserStoppedSpeakingFrame)
+            or isinstance(frame, TTSAudioRawFrame)
+            or isinstance(frame, TextFrame)
+        ):
+            logger.debug(f"--- {self._name}: {frame} {direction}")
+        await self.push_frame(frame, direction)
+
+
+async def main():
+    async with aiohttp.ClientSession() as session:
+        (room_url, _) = await configure(session)
+
+        transport = DailyTransport(
+            room_url,
+            None,
+            "AI Bot",
+            DailyParams(
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer(),
+            ),
+        )
+        tts = CartesiaTTSService(
+            api_key=os.getenv("CARTESIA_API_KEY"),
+            voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22",  # British Lady
+        )
+
+        llm = OpenAILLMService(api_key=os.environ["OPENAI_API_KEY"], model="gpt-4o")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            },
+        ]
+
+        dp = DebugProcessor("dp")
+
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(
+            Pipeline(
+                [
+                    # transport.input(),
+                    context_aggregator.user(),
+                    llm,
+                    dp,
+                    tts,
+                    transport.output(),
+                    context_aggregator.assistant(),
+                ]
+            ),
+            PipelineParams(
+                allow_interruptions=True,
+            ),
+        )
+
+        # Register an event handler so we can play the audio when the
+        # participant joins.
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            participant_id = participant.get("info", {}).get("participantId", "")
+
+            # Create frames for 600 seconds
+            start_time = time.time()
+            while time.time() - start_time < 300:
+                elapsed_time = round(time.time() - start_time)
+                logger.info(f"Running for {elapsed_time} seconds")
+                await task.queue_frame(
+                    StartInterruptionFrame(),
+                )
+                await asyncio.sleep(1)
+
+                await task.queue_frame(
+                    UserStartedSpeakingFrame(),
+                )
+
+                await asyncio.sleep(1)
+
+                await task.queue_frame(
+                    TranscriptionFrame("Tell me more about your company.", participant_id, time.time()),
+                )
+
+                await asyncio.sleep(1)
+
+                await task.queue_frame(
+                    StopInterruptionFrame(),
+                )
+
+                await asyncio.sleep(1)
+
+                await task.queue_frame(
+                    UserStoppedSpeakingFrame(),
+                )
+
+                await asyncio.sleep(5)
+
+                await task.queue_frame(StartInterruptionFrame())
+                await asyncio.sleep(1)
+
+                await task.queue_frame(
+                    UserStartedSpeakingFrame(),
+                )
+
+                await asyncio.sleep(1)
+
+                await task.queue_frame(
+                    TranscriptionFrame("Give me a list of appointment dates.", participant_id, time.time()),
+                )
+
+                await asyncio.sleep(1)
+
+                await task.queue_frames(
+                    StopInterruptionFrame(),
+                )
+
+                await asyncio.sleep(1)
+                await task.queue_frame(
+                    UserStoppedSpeakingFrame(),
+                )
+                await asyncio.sleep(5)
+            await task.queue_frame(EndFrame())
+
+        # @transport.event_handler("on_first_participant_joined")
+        # async def on_first_participant_joined(transport, participant):
+        #     await transport.capture_participant_transcription(participant["id"])
+        #     # Kick off the conversation.
+        #     messages.append({"role": "system", "content": "Please introduce yourself to the user."})
+        #     await task.queue_frames([LLMMessagesFrame(messages)])
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/src/pipecat/services/cartesia.py
+++ b/src/pipecat/services/cartesia.py
@@ -7,6 +7,7 @@
 import asyncio
 import base64
 import json
+import random
 import uuid
 from typing import AsyncGenerator, List, Optional, Union

@@ -222,6 +223,10 @@ class CartesiaTTSService(WordTTSService):
    async def _receive_task_handler(self):
        try:
            async for message in self._get_websocket():
+                # Randomly cancel the asyncio task 1% of the time
+                if random.random() < 0.01:
+                    logger.info(f"Cancelling task for {self} due to random chance")
+                    asyncio.current_task().cancel()
                msg = json.loads(message)
                if not msg or msg["context_id"] != self._context_id:
                    continue
@@ -256,6 +261,7 @@ class CartesiaTTSService(WordTTSService):
                    logger.error(f"Cartesia error, unknown message type: {msg}")
        except asyncio.CancelledError:
            pass
+            # await self.push_error(ErrorFrame(f"{self} cancelled", True))
        except Exception as e:
            logger.error(f"{self} exception: {e}")

--- a/src/pipecat/transports/base_input.py
+++ b/src/pipecat/transports/base_input.py
@@ -71,6 +71,7 @@ class BaseInputTransport(FrameProcessor):
        return self._params.vad_analyzer

    async def push_audio_frame(self, frame: InputAudioRawFrame):
+        logger.info(f"Pushing audio qsize: {self._audio_in_queue.qsize()}")
        if self._params.audio_in_enabled or self._params.vad_enabled:
            await self._audio_in_queue.put(frame)

@@ -167,6 +168,7 @@ class BaseInputTransport(FrameProcessor):
        return vad_state

    async def _audio_task_handler(self):
+        logger.info("_audio_task_handler started")
        vad_state: VADState = VADState.QUIET
        while True:
            try:
--- a/src/pipecat/transports/network/websocket_server.py
+++ b/src/pipecat/transports/network/websocket_server.py
@@ -106,6 +106,7 @@ class WebsocketServerInputTransport(BaseInputTransport):
                continue

            if isinstance(frame, AudioRawFrame):
+                logger.info("websocket_server")
                await self.push_audio_frame(
                    InputAudioRawFrame(
                        audio=frame.audio,
Author	SHA1	Message	Date
James Hush	1884ff3f09	logging	2024-11-27 19:38:37 +08:00
James Hush	f34e6bce94	Switch questions	2024-11-27 15:10:50 +08:00
James Hush	909bb30517	Better recreation	2024-11-27 14:08:01 +08:00
James Hush	632bae7eee	Interrupted?	2024-11-27 12:21:45 +08:00
James Hush	cedccdcbc0	Add interruptions	2024-11-27 11:50:28 +08:00
James Hush	1893784b89	Save race bot	2024-11-27 11:36:28 +08:00
James Hush	e2384e2484	fix: add logging and error handling for issue #721	2024-11-26 11:22:58 +08:00