From 8319aea642c8a717c93550f4c5502d10466b01d4 Mon Sep 17 00:00:00 2001
From: Xin Wang <wangx@XindeMac-mini-2.local>
Date: Fri, 29 May 2026 12:55:56 +0800
Subject: [PATCH 1/2] Fix idle prompt handling

---
 engine/pipeline.py | 90 +++++++++++-----------------------------------
 engine/services.py |  1 +
 2 files changed, 22 insertions(+), 69 deletions(-)

diff --git a/engine/pipeline.py b/engine/pipeline.py
index 0c64139..6dd88eb 100644
--- a/engine/pipeline.py
+++ b/engine/pipeline.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import asyncio
 import uuid
 
 from loguru import logger
@@ -8,9 +7,6 @@ from loguru import logger
 from pipecat.audio.vad.silero import SileroVADAnalyzer
 from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.frames.frames import (
-    BotStartedSpeakingFrame,
-    BotStoppedSpeakingFrame,
-    Frame,
     LLMRunFrame,
     OutputTransportMessageUrgentFrame,
     TTSSpeakFrame,
@@ -150,6 +146,7 @@ async def run_pipeline_with_serializer(
         user_params=LLMUserAggregatorParams(
             vad_analyzer=SileroVADAnalyzer(params=vad_params),
             user_turn_strategies=user_turn_strategies,
+            user_idle_timeout=config.turn.idle_prompt_timeout_sec,
         ),
     )
 
@@ -192,59 +189,7 @@ async def run_pipeline_with_serializer(
         ),
         idle_timeout_secs=config.session.inactivity_timeout_sec,
     )
-    task.set_reached_upstream_filter((BotStartedSpeakingFrame, BotStoppedSpeakingFrame))
     idle_prompt_count = 0
-    idle_prompt_speaking = False
-    idle_prompt_task: asyncio.Task | None = None
-
-    async def cancel_idle_prompt_timer() -> None:
-        nonlocal idle_prompt_task
-        timer = idle_prompt_task
-        idle_prompt_task = None
-        if timer and not timer.done():
-            await task.cancel_task(timer)
-
-    async def run_idle_prompt_timer() -> None:
-        nonlocal idle_prompt_count, idle_prompt_speaking, idle_prompt_task
-        try:
-            await asyncio.sleep(config.turn.idle_prompt_timeout_sec)
-
-            text = config.turn.idle_prompt_text.strip()
-            if not text or config.turn.idle_prompt_max_count <= 0:
-                return
-            if idle_prompt_count >= config.turn.idle_prompt_max_count:
-                return
-
-            idle_prompt_count += 1
-            logger.info(
-                "User idle prompt triggered "
-                f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
-            )
-            idle_prompt_speaking = True
-            await task.queue_frames([TTSSpeakFrame(text)])
-        finally:
-            if idle_prompt_task is asyncio.current_task():
-                idle_prompt_task = None
-
-    async def arm_idle_prompt_timer() -> None:
-        nonlocal idle_prompt_task
-        await cancel_idle_prompt_timer()
-
-        text = config.turn.idle_prompt_text.strip()
-        if (
-            config.turn.idle_prompt_timeout_sec <= 0
-            or config.turn.idle_prompt_max_count <= 0
-            or not text
-            or idle_prompt_count >= config.turn.idle_prompt_max_count
-        ):
-            return
-
-        logger.debug(
-            "Arming user idle prompt timer "
-            f"timeout={config.turn.idle_prompt_timeout_sec}s "
-            f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
-        )
-        idle_prompt_task = task.create_task(run_idle_prompt_timer())
 
     @transport.event_handler("on_client_connected")
     async def on_client_connected(_transport, _client):
@@ -264,26 +209,17 @@ async def run_pipeline_with_serializer(
     @transport.event_handler("on_client_disconnected")
     async def on_client_disconnected(_transport, _client):
         logger.info(f"{client_label} websocket client disconnected")
-        await cancel_idle_prompt_timer()
         await task.cancel()
 
     @transport.event_handler("on_session_timeout")
     async def on_session_timeout(_transport, _client):
         logger.info(f"{client_label} websocket session timed out")
-        await cancel_idle_prompt_timer()
         await task.cancel()
 
-    @task.event_handler("on_frame_reached_upstream")
-    async def on_frame_reached_upstream(_task, frame: Frame):
-        nonlocal idle_prompt_count, idle_prompt_speaking
-        if isinstance(frame, BotStartedSpeakingFrame):
-            await cancel_idle_prompt_timer()
-        elif isinstance(frame, BotStoppedSpeakingFrame):
-            if idle_prompt_speaking:
-                idle_prompt_speaking = False
-            else:
-                idle_prompt_count = 0
-            await arm_idle_prompt_timer()
+    @user_aggregator.event_handler("on_user_turn_started")
+    async def on_user_turn_started(_aggregator, _strategy):
+        nonlocal idle_prompt_count
+        idle_prompt_count = 0
 
     @user_aggregator.event_handler("on_user_turn_stopped")
     async def on_user_turn_stopped(_aggregator, _strategy, message: UserTurnStoppedMessage):
@@ -302,6 +238,22 @@ async def run_pipeline_with_serializer(
             )
         )
 
+    @user_aggregator.event_handler("on_user_turn_idle")
+    async def on_user_turn_idle(aggregator):
+        nonlocal idle_prompt_count
+        text = config.turn.idle_prompt_text.strip()
+        if not text or config.turn.idle_prompt_max_count <= 0:
+            return
+        if idle_prompt_count >= config.turn.idle_prompt_max_count:
+            return
+
+        idle_prompt_count += 1
+        logger.info(
+            "User idle prompt triggered "
+            f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
+        )
+        await aggregator.push_frame(TTSSpeakFrame(text))
+
     # NOTE: assistant turn started/final events are emitted by
     # ProductTextStreamProcessor, upstream of TTS, so text streams to the
     # client ahead of audio. This logger is kept for server-side visibility.
diff --git a/engine/services.py b/engine/services.py
index ae8652c..1446e30 100644
--- a/engine/services.py
+++ b/engine/services.py
@@ -107,6 +107,7 @@ def create_tts_service(config: TTSConfig, audio: AudioConfig):
             volume=config.volume,
             pitch=config.pitch,
             timeout=config.timeout_sec,
+            push_stop_frames=True,
         )
 
     if config.provider in ("xfyun_super", "xfyun_super_tts"):

From b6cb72eda780425da58be26d10d6863ad6a1ea0e Mon Sep 17 00:00:00 2001
From: Xin Wang <wangx@XindeMac-mini-2.local>
Date: Fri, 29 May 2026 12:56:31 +0800
Subject: [PATCH 2/2] Remove unused config

---
 config/openai.example.json | 43 --------------------------------------
 1 file changed, 43 deletions(-)
 delete mode 100644 config/openai.example.json

diff --git a/config/openai.example.json b/config/openai.example.json
deleted file mode 100644
index 23207f7..0000000
--- a/config/openai.example.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-  "server": {
-    "host": "0.0.0.0",
-    "port": 8000,
-    "cors_origins": ["*"]
-  },
-  "audio": {
-    "sample_rate_hz": 16000,
-    "channels": 1,
-    "frame_ms": 20
-  },
-  "session": {
-    "inactivity_timeout_sec": 60
-  },
-  "agent": {
-    "system_prompt": "You are a concise voice assistant.",
-    "greeting": "Please say hello in one short sentence.",
-    "greeting_mode": "generated"
-  },
-  "services": {
-    "stt": {
-      "provider": "openai",
-      "api_key": "",
-      "base_url": null,
-      "model": "gpt-4o-mini-transcribe",
-      "language": "en"
-    },
-    "llm": {
-      "provider": "openai",
-      "api_key": "",
-      "base_url": null,
-      "model": "gpt-4o-mini",
-      "temperature": 0.7
-    },
-    "tts": {
-      "provider": "openai",
-      "api_key": "",
-      "base_url": null,
-      "model": "gpt-4o-mini-tts",
-      "voice": "alloy"
-    }
-  }
-}