From 8319aea642c8a717c93550f4c5502d10466b01d4 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 29 May 2026 12:55:56 +0800 Subject: [PATCH 1/2] Fix idle prompt handling --- engine/pipeline.py | 90 +++++++++++----------------------------------- engine/services.py | 1 + 2 files changed, 22 insertions(+), 69 deletions(-) diff --git a/engine/pipeline.py b/engine/pipeline.py index 0c64139..6dd88eb 100644 --- a/engine/pipeline.py +++ b/engine/pipeline.py @@ -1,6 +1,5 @@ from __future__ import annotations -import asyncio import uuid from loguru import logger @@ -8,9 +7,6 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import ( - BotStartedSpeakingFrame, - BotStoppedSpeakingFrame, - Frame, LLMRunFrame, OutputTransportMessageUrgentFrame, TTSSpeakFrame, @@ -150,6 +146,7 @@ async def run_pipeline_with_serializer( user_params=LLMUserAggregatorParams( vad_analyzer=SileroVADAnalyzer(params=vad_params), user_turn_strategies=user_turn_strategies, + user_idle_timeout=config.turn.idle_prompt_timeout_sec, ), ) @@ -192,59 +189,7 @@ async def run_pipeline_with_serializer( ), idle_timeout_secs=config.session.inactivity_timeout_sec, ) - task.set_reached_upstream_filter((BotStartedSpeakingFrame, BotStoppedSpeakingFrame)) idle_prompt_count = 0 - idle_prompt_speaking = False - idle_prompt_task: asyncio.Task | None = None - - async def cancel_idle_prompt_timer() -> None: - nonlocal idle_prompt_task - timer = idle_prompt_task - idle_prompt_task = None - if timer and not timer.done(): - await task.cancel_task(timer) - - async def run_idle_prompt_timer() -> None: - nonlocal idle_prompt_count, idle_prompt_speaking, idle_prompt_task - try: - await asyncio.sleep(config.turn.idle_prompt_timeout_sec) - - text = config.turn.idle_prompt_text.strip() - if not text or config.turn.idle_prompt_max_count <= 0: - return - if idle_prompt_count >= config.turn.idle_prompt_max_count: - return - - idle_prompt_count += 1 - logger.info( - "User idle prompt triggered " - f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}" - ) - idle_prompt_speaking = True - await task.queue_frames([TTSSpeakFrame(text)]) - finally: - if idle_prompt_task is asyncio.current_task(): - idle_prompt_task = None - - async def arm_idle_prompt_timer() -> None: - nonlocal idle_prompt_task - await cancel_idle_prompt_timer() - - text = config.turn.idle_prompt_text.strip() - if ( - config.turn.idle_prompt_timeout_sec <= 0 - or config.turn.idle_prompt_max_count <= 0 - or not text - or idle_prompt_count >= config.turn.idle_prompt_max_count - ): - return - - logger.debug( - "Arming user idle prompt timer " - f"timeout={config.turn.idle_prompt_timeout_sec}s " - f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}" - ) - idle_prompt_task = task.create_task(run_idle_prompt_timer()) @transport.event_handler("on_client_connected") async def on_client_connected(_transport, _client): @@ -264,26 +209,17 @@ async def run_pipeline_with_serializer( @transport.event_handler("on_client_disconnected") async def on_client_disconnected(_transport, _client): logger.info(f"{client_label} websocket client disconnected") - await cancel_idle_prompt_timer() await task.cancel() @transport.event_handler("on_session_timeout") async def on_session_timeout(_transport, _client): logger.info(f"{client_label} websocket session timed out") - await cancel_idle_prompt_timer() await task.cancel() - @task.event_handler("on_frame_reached_upstream") - async def on_frame_reached_upstream(_task, frame: Frame): - nonlocal idle_prompt_count, idle_prompt_speaking - if isinstance(frame, BotStartedSpeakingFrame): - await cancel_idle_prompt_timer() - elif isinstance(frame, BotStoppedSpeakingFrame): - if idle_prompt_speaking: - idle_prompt_speaking = False - else: - idle_prompt_count = 0 - await arm_idle_prompt_timer() + @user_aggregator.event_handler("on_user_turn_started") + async def on_user_turn_started(_aggregator, _strategy): + nonlocal idle_prompt_count + idle_prompt_count = 0 @user_aggregator.event_handler("on_user_turn_stopped") async def on_user_turn_stopped(_aggregator, _strategy, message: UserTurnStoppedMessage): @@ -302,6 +238,22 @@ async def run_pipeline_with_serializer( ) ) + @user_aggregator.event_handler("on_user_turn_idle") + async def on_user_turn_idle(aggregator): + nonlocal idle_prompt_count + text = config.turn.idle_prompt_text.strip() + if not text or config.turn.idle_prompt_max_count <= 0: + return + if idle_prompt_count >= config.turn.idle_prompt_max_count: + return + + idle_prompt_count += 1 + logger.info( + "User idle prompt triggered " + f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}" + ) + await aggregator.push_frame(TTSSpeakFrame(text)) + # NOTE: assistant turn started/final events are emitted by # ProductTextStreamProcessor, upstream of TTS, so text streams to the # client ahead of audio. This logger is kept for server-side visibility. diff --git a/engine/services.py b/engine/services.py index ae8652c..1446e30 100644 --- a/engine/services.py +++ b/engine/services.py @@ -107,6 +107,7 @@ def create_tts_service(config: TTSConfig, audio: AudioConfig): volume=config.volume, pitch=config.pitch, timeout=config.timeout_sec, + push_stop_frames=True, ) if config.provider in ("xfyun_super", "xfyun_super_tts"): From b6cb72eda780425da58be26d10d6863ad6a1ea0e Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 29 May 2026 12:56:31 +0800 Subject: [PATCH 2/2] Remove unused config --- config/openai.example.json | 43 -------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 config/openai.example.json diff --git a/config/openai.example.json b/config/openai.example.json deleted file mode 100644 index 23207f7..0000000 --- a/config/openai.example.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "server": { - "host": "0.0.0.0", - "port": 8000, - "cors_origins": ["*"] - }, - "audio": { - "sample_rate_hz": 16000, - "channels": 1, - "frame_ms": 20 - }, - "session": { - "inactivity_timeout_sec": 60 - }, - "agent": { - "system_prompt": "You are a concise voice assistant.", - "greeting": "Please say hello in one short sentence.", - "greeting_mode": "generated" - }, - "services": { - "stt": { - "provider": "openai", - "api_key": "", - "base_url": null, - "model": "gpt-4o-mini-transcribe", - "language": "en" - }, - "llm": { - "provider": "openai", - "api_key": "", - "base_url": null, - "model": "gpt-4o-mini", - "temperature": 0.7 - }, - "tts": { - "provider": "openai", - "api_key": "", - "base_url": null, - "model": "gpt-4o-mini-tts", - "voice": "alloy" - } - } -}