diff --git a/examples/freeze-test/client/src/app.ts b/examples/freeze-test/client/src/app.ts index 2e8315539..7b565ea71 100644 --- a/examples/freeze-test/client/src/app.ts +++ b/examples/freeze-test/client/src/app.ts @@ -191,7 +191,17 @@ class WebsocketClientApp { const startTime = Date.now(); this.recordingSerializer = new RecordingSerializer() - const transport = this.ENABLE_RECORDING_MODE ? new WebSocketTransport({serializer: this.recordingSerializer}) : new WebSocketTransport(); + const transport = this.ENABLE_RECORDING_MODE ? + new WebSocketTransport({ + serializer: this.recordingSerializer, + recorderSampleRate: 8000, + playerSampleRate:8000 + }) : + new WebSocketTransport({ + serializer: new ProtobufFrameSerializer(), + recorderSampleRate: 8000, + playerSampleRate:8000 + }); this.websocketTransport = transport const RTVIConfig: RTVIClientOptions = { diff --git a/examples/freeze-test/env.example b/examples/freeze-test/env.example new file mode 100644 index 000000000..7a94b2eee --- /dev/null +++ b/examples/freeze-test/env.example @@ -0,0 +1,4 @@ +SENTRY_DSN= +DEEPGRAM_API_KEY= +CARTESIA_API_KEY= +OPENAI_API_KEY= \ No newline at end of file diff --git a/examples/freeze-test/freeze_test_bot.py b/examples/freeze-test/freeze_test_bot.py index 52ef5fc89..8ce3df17a 100644 --- a/examples/freeze-test/freeze_test_bot.py +++ b/examples/freeze-test/freeze_test_bot.py @@ -18,7 +18,6 @@ from fastapi import FastAPI, Request, WebSocket from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import RedirectResponse from loguru import logger -from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import ( @@ -27,11 +26,13 @@ from pipecat.frames.frames import ( Frame, InterimTranscriptionFrame, LLMFullResponseEndFrame, + LLMMessagesFrame, StartFrame, StartInterruptionFrame, StopFrame, StopInterruptionFrame, TranscriptionFrame, + TTSSpeakFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) @@ -47,6 +48,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor from pipecat.processors.metrics.sentry import SentryMetrics +from pipecat.processors.user_idle_processor import UserIdleProcessor from pipecat.serializers.protobuf import ProtobufFrameSerializer from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService @@ -78,9 +80,6 @@ app.add_middleware( allow_headers=["*"], ) -# Mount the frontend at / -app.mount("/client", SmallWebRTCPrebuiltUI) - class SimulateFreezeInput(FrameProcessor): def __init__( @@ -188,6 +187,37 @@ async def run_example(websocket_client): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + async def handle_user_idle(user_idle: UserIdleProcessor, retry_count: int) -> bool: + if retry_count == 1: + # First attempt: Add a gentle prompt to the conversation + messages.append( + { + "role": "system", + "content": "The user has been quiet. Politely and briefly ask if they're still there.", + } + ) + await user_idle.push_frame(LLMMessagesFrame(messages)) + return True + elif retry_count == 2: + # Second attempt: More direct prompt + messages.append( + { + "role": "system", + "content": "The user is still inactive. Ask if they'd like to continue our conversation.", + } + ) + await user_idle.push_frame(LLMMessagesFrame(messages)) + return True + else: + # Third attempt: End the conversation + await user_idle.push_frame( + TTSSpeakFrame("It seems like you're busy right now. Have a nice day!") + ) + await task.queue_frame(EndFrame()) + return False + + user_idle = UserIdleProcessor(callback=handle_user_idle, timeout=10.0) + tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady @@ -222,6 +252,7 @@ async def run_example(websocket_client): stt, ], ), + user_idle, rtvi, context_aggregator.user(), # User responses llm, # LLM @@ -238,6 +269,8 @@ async def run_example(websocket_client): enable_metrics=True, enable_usage_metrics=True, report_only_initial_ttfb=True, + audio_in_sample_rate=8000, + audio_out_sample_rate=8000, ), idle_timeout_secs=120, observers=[ @@ -249,6 +282,10 @@ async def run_example(websocket_client): # LLMTextFrame: None, OpenAILLMContextFrame: None, LLMFullResponseEndFrame: None, + UserStartedSpeakingFrame: None, + UserStoppedSpeakingFrame: None, + StartInterruptionFrame: None, + StopInterruptionFrame: None, }, exclude_fields={ "result", diff --git a/examples/freeze-test/requirements.txt b/examples/freeze-test/requirements.txt new file mode 100644 index 000000000..8e3d7f12f --- /dev/null +++ b/examples/freeze-test/requirements.txt @@ -0,0 +1,4 @@ +python-dotenv +fastapi[all] +uvicorn +pipecat-ai[silero,websocket,openai, deepgram, cartesia, sentry]