This commit is contained in:
Xin Wang
2026-05-29 12:56:47 +08:00
3 changed files with 22 additions and 112 deletions

View File

@@ -1,43 +0,0 @@
{
"server": {
"host": "0.0.0.0",
"port": 8000,
"cors_origins": ["*"]
},
"audio": {
"sample_rate_hz": 16000,
"channels": 1,
"frame_ms": 20
},
"session": {
"inactivity_timeout_sec": 60
},
"agent": {
"system_prompt": "You are a concise voice assistant.",
"greeting": "Please say hello in one short sentence.",
"greeting_mode": "generated"
},
"services": {
"stt": {
"provider": "openai",
"api_key": "",
"base_url": null,
"model": "gpt-4o-mini-transcribe",
"language": "en"
},
"llm": {
"provider": "openai",
"api_key": "",
"base_url": null,
"model": "gpt-4o-mini",
"temperature": 0.7
},
"tts": {
"provider": "openai",
"api_key": "",
"base_url": null,
"model": "gpt-4o-mini-tts",
"voice": "alloy"
}
}
}

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import asyncio
import uuid
from loguru import logger
@@ -8,9 +7,6 @@ from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
LLMRunFrame,
OutputTransportMessageUrgentFrame,
TTSSpeakFrame,
@@ -150,6 +146,7 @@ async def run_pipeline_with_serializer(
user_params=LLMUserAggregatorParams(
vad_analyzer=SileroVADAnalyzer(params=vad_params),
user_turn_strategies=user_turn_strategies,
user_idle_timeout=config.turn.idle_prompt_timeout_sec,
),
)
@@ -192,59 +189,7 @@ async def run_pipeline_with_serializer(
),
idle_timeout_secs=config.session.inactivity_timeout_sec,
)
task.set_reached_upstream_filter((BotStartedSpeakingFrame, BotStoppedSpeakingFrame))
idle_prompt_count = 0
idle_prompt_speaking = False
idle_prompt_task: asyncio.Task | None = None
async def cancel_idle_prompt_timer() -> None:
nonlocal idle_prompt_task
timer = idle_prompt_task
idle_prompt_task = None
if timer and not timer.done():
await task.cancel_task(timer)
async def run_idle_prompt_timer() -> None:
nonlocal idle_prompt_count, idle_prompt_speaking, idle_prompt_task
try:
await asyncio.sleep(config.turn.idle_prompt_timeout_sec)
text = config.turn.idle_prompt_text.strip()
if not text or config.turn.idle_prompt_max_count <= 0:
return
if idle_prompt_count >= config.turn.idle_prompt_max_count:
return
idle_prompt_count += 1
logger.info(
"User idle prompt triggered "
f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
)
idle_prompt_speaking = True
await task.queue_frames([TTSSpeakFrame(text)])
finally:
if idle_prompt_task is asyncio.current_task():
idle_prompt_task = None
async def arm_idle_prompt_timer() -> None:
nonlocal idle_prompt_task
await cancel_idle_prompt_timer()
text = config.turn.idle_prompt_text.strip()
if (
config.turn.idle_prompt_timeout_sec <= 0
or config.turn.idle_prompt_max_count <= 0
or not text
or idle_prompt_count >= config.turn.idle_prompt_max_count
):
return
logger.debug(
"Arming user idle prompt timer "
f"timeout={config.turn.idle_prompt_timeout_sec}s "
f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
)
idle_prompt_task = task.create_task(run_idle_prompt_timer())
@transport.event_handler("on_client_connected")
async def on_client_connected(_transport, _client):
@@ -264,26 +209,17 @@ async def run_pipeline_with_serializer(
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(_transport, _client):
logger.info(f"{client_label} websocket client disconnected")
await cancel_idle_prompt_timer()
await task.cancel()
@transport.event_handler("on_session_timeout")
async def on_session_timeout(_transport, _client):
logger.info(f"{client_label} websocket session timed out")
await cancel_idle_prompt_timer()
await task.cancel()
@task.event_handler("on_frame_reached_upstream")
async def on_frame_reached_upstream(_task, frame: Frame):
nonlocal idle_prompt_count, idle_prompt_speaking
if isinstance(frame, BotStartedSpeakingFrame):
await cancel_idle_prompt_timer()
elif isinstance(frame, BotStoppedSpeakingFrame):
if idle_prompt_speaking:
idle_prompt_speaking = False
else:
idle_prompt_count = 0
await arm_idle_prompt_timer()
@user_aggregator.event_handler("on_user_turn_started")
async def on_user_turn_started(_aggregator, _strategy):
nonlocal idle_prompt_count
idle_prompt_count = 0
@user_aggregator.event_handler("on_user_turn_stopped")
async def on_user_turn_stopped(_aggregator, _strategy, message: UserTurnStoppedMessage):
@@ -302,6 +238,22 @@ async def run_pipeline_with_serializer(
)
)
@user_aggregator.event_handler("on_user_turn_idle")
async def on_user_turn_idle(aggregator):
nonlocal idle_prompt_count
text = config.turn.idle_prompt_text.strip()
if not text or config.turn.idle_prompt_max_count <= 0:
return
if idle_prompt_count >= config.turn.idle_prompt_max_count:
return
idle_prompt_count += 1
logger.info(
"User idle prompt triggered "
f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
)
await aggregator.push_frame(TTSSpeakFrame(text))
# NOTE: assistant turn started/final events are emitted by
# ProductTextStreamProcessor, upstream of TTS, so text streams to the
# client ahead of audio. This logger is kept for server-side visibility.

View File

@@ -107,6 +107,7 @@ def create_tts_service(config: TTSConfig, audio: AudioConfig):
volume=config.volume,
pitch=config.pitch,
timeout=config.timeout_sec,
push_stop_frames=True,
)
if config.provider in ("xfyun_super", "xfyun_super_tts"):