Enhance voice configuration with idle prompt features and new TTS settings
- Added idle prompt timeout, maximum count, and text to multiple voice configuration files to improve user interaction during idle periods. - Updated greeting mode to 'fastgpt_opener' in relevant configurations for a more dynamic greeting experience. - Introduced a new voice configuration file for xfyun TTS, including detailed service settings and parameters. - Refactored the pipeline to handle idle prompts and user turn events, ensuring smoother interaction flow. - Adjusted the VAD and turn configurations to accommodate new idle prompt features.
This commit is contained in:
@@ -42,12 +42,15 @@
|
||||
"你好",
|
||||
"在吗"
|
||||
],
|
||||
"user_speech_timeout_sec": 0.2
|
||||
"user_speech_timeout_sec": 0.2,
|
||||
"idle_prompt_timeout_sec": 3.0,
|
||||
"idle_prompt_max_count": 3,
|
||||
"idle_prompt_text": "你好,请问还在吗?"
|
||||
},
|
||||
"agent": {
|
||||
"system_prompt": "FastGPT app owns the system prompt when send_system_prompt is false.",
|
||||
"greeting": "您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。",
|
||||
"greeting_mode": "fixed",
|
||||
"greeting_mode": "fastgpt_opener",
|
||||
"response_state": {
|
||||
"enabled": true,
|
||||
"tag": "state",
|
||||
|
||||
99
config/voice-fastgpt-state-xfyunTTS.json
Normal file
99
config/voice-fastgpt-state-xfyunTTS.json
Normal file
@@ -0,0 +1,99 @@
|
||||
{
|
||||
"server": {
|
||||
"host": "0.0.0.0",
|
||||
"port": 8000,
|
||||
"cors_origins": ["*"]
|
||||
},
|
||||
"audio": {
|
||||
"sample_rate_hz": 16000,
|
||||
"channels": 1,
|
||||
"frame_ms": 20
|
||||
},
|
||||
"session": {
|
||||
"inactivity_timeout_sec": 60
|
||||
},
|
||||
"turn": {
|
||||
"vad": {
|
||||
"confidence": 0.8,
|
||||
"start_secs": 0.4,
|
||||
"stop_secs": 0.2,
|
||||
"min_volume": 0.8
|
||||
},
|
||||
"interruption_min_chars": 3,
|
||||
"interruption_use_interim": true,
|
||||
"interruption_short_replies": [
|
||||
"是",
|
||||
"是的",
|
||||
"对",
|
||||
"对的",
|
||||
"嗯",
|
||||
"好",
|
||||
"好的",
|
||||
"行",
|
||||
"可以",
|
||||
"没问题",
|
||||
"不是",
|
||||
"不",
|
||||
"不行",
|
||||
"不用",
|
||||
"不要",
|
||||
"没有",
|
||||
"否",
|
||||
"你好",
|
||||
"在吗"
|
||||
],
|
||||
"user_speech_timeout_sec": 0.2,
|
||||
"idle_prompt_timeout_sec": 3.0,
|
||||
"idle_prompt_max_count": 3,
|
||||
"idle_prompt_text": "你好,请问还在吗?"
|
||||
},
|
||||
"agent": {
|
||||
"greeting_mode": "fastgpt_opener",
|
||||
"response_state": {
|
||||
"enabled": true,
|
||||
"tag": "state",
|
||||
"event_type": "response.state",
|
||||
"max_prefix_chars": 256
|
||||
}
|
||||
},
|
||||
"services": {
|
||||
"stt": {
|
||||
"provider": "xfyun",
|
||||
"app_id": "416ce125",
|
||||
"api_key": "c65342fe603126c3610031d8429bb36d",
|
||||
"api_secret": "MzkyYmI5OWEyODQzN2FiN2VhN2UzYzU4",
|
||||
"base_url": "wss://iat-api.xfyun.cn/v2/iat",
|
||||
"language": "zh_cn",
|
||||
"domain": "iat",
|
||||
"accent": "mandarin",
|
||||
"encoding": "raw",
|
||||
"frame_size": 1280,
|
||||
"timeout_sec": 10.0
|
||||
},
|
||||
"llm": {
|
||||
"provider": "fastgpt",
|
||||
"api_key": "fastgpt-zlLjYtWZWN0uhQHs3ZOFHG4KLGMIdr2CkbZLCSfqGm5vcdx5xIZbp",
|
||||
"base_url": "http://localhost:3030",
|
||||
"model": "my-voice-app",
|
||||
"app_id": "691eddaa53e3f8d9f25f1370",
|
||||
"chat_id": null,
|
||||
"variables": {},
|
||||
"detail": false,
|
||||
"timeout_sec": 60.0
|
||||
},
|
||||
"tts": {
|
||||
"provider": "xfyun",
|
||||
"app_id": "416ce125",
|
||||
"api_key": "c65342fe603126c3610031d8429bb36d",
|
||||
"api_secret": "MzkyYmI5OWEyODQzN2FiN2VhN2UzYzU4",
|
||||
"base_url": "wss://tts-api.xfyun.cn/v2/tts",
|
||||
"voice": "x4_xiaoyan",
|
||||
"aue": "raw",
|
||||
"tte": "UTF8",
|
||||
"speed": 50,
|
||||
"volume": 50,
|
||||
"pitch": 50,
|
||||
"source_sample_rate_hz": 16000
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -42,12 +42,15 @@
|
||||
"你好",
|
||||
"在吗"
|
||||
],
|
||||
"user_speech_timeout_sec": 0.2
|
||||
"user_speech_timeout_sec": 0.2,
|
||||
"idle_prompt_timeout_sec": 3.0,
|
||||
"idle_prompt_max_count": 3,
|
||||
"idle_prompt_text": "你好,请问还在吗?"
|
||||
},
|
||||
"agent": {
|
||||
"system_prompt": "FastGPT app owns the system prompt when send_system_prompt is false.",
|
||||
"greeting": "您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。",
|
||||
"greeting_mode": "fixed",
|
||||
"greeting_mode": "fastgpt_opener",
|
||||
"response_state": {
|
||||
"enabled": true,
|
||||
"tag": "state",
|
||||
|
||||
@@ -42,12 +42,15 @@
|
||||
"你好",
|
||||
"在吗"
|
||||
],
|
||||
"user_speech_timeout_sec": 0.2
|
||||
"user_speech_timeout_sec": 0.2,
|
||||
"idle_prompt_timeout_sec": 3.0,
|
||||
"idle_prompt_max_count": 3,
|
||||
"idle_prompt_text": "你好,请问还在吗?"
|
||||
},
|
||||
"agent": {
|
||||
"system_prompt": "FastGPT app owns the system prompt when send_system_prompt is false.",
|
||||
"greeting": "您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。",
|
||||
"greeting_mode": "fixed",
|
||||
"greeting_mode": "fastgpt_opener",
|
||||
"response_state": {
|
||||
"enabled": true,
|
||||
"tag": "state",
|
||||
@@ -82,20 +85,18 @@
|
||||
"send_system_prompt": false
|
||||
},
|
||||
"tts": {
|
||||
"provider": "xfyun_super",
|
||||
"provider": "xfyun",
|
||||
"app_id": "416ce125",
|
||||
"api_key": "c65342fe603126c3610031d8429bb36d",
|
||||
"api_secret": "MzkyYmI5OWEyODQzN2FiN2VhN2UzYzU4",
|
||||
"base_url": "wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/mcd9m97e6",
|
||||
"voice": "x5_lingxiaoxuan_flow",
|
||||
"base_url": "wss://tts-api.xfyun.cn/v2/tts",
|
||||
"voice": "x4_xiaoyan",
|
||||
"aue": "raw",
|
||||
"tte": "UTF8",
|
||||
"speed": 50,
|
||||
"volume": 50,
|
||||
"pitch": 50,
|
||||
"oral_level": "mid",
|
||||
"source_sample_rate_hz": 24000,
|
||||
"text_aggregation_mode": "token",
|
||||
"timeout_sec": 30.0
|
||||
"source_sample_rate_hz": 16000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,10 @@
|
||||
"没有",
|
||||
"否"
|
||||
],
|
||||
"user_speech_timeout_sec": 0.2
|
||||
"user_speech_timeout_sec": 0.2,
|
||||
"idle_prompt_timeout_sec": 3.0,
|
||||
"idle_prompt_max_count": 3,
|
||||
"idle_prompt_text": "你好,请问还在吗?"
|
||||
},
|
||||
"agent": {
|
||||
"system_prompt": "# 角色 你是一个高度集成、安全第一的交警AI接警员。正在收集事故人员伤亡情况,时间,地点,事故原因,事故车辆数量,收集完成之后和用户说再见",
|
||||
|
||||
@@ -42,7 +42,10 @@
|
||||
"没有",
|
||||
"否"
|
||||
],
|
||||
"user_speech_timeout_sec": 0.8
|
||||
"user_speech_timeout_sec": 0.8,
|
||||
"idle_prompt_timeout_sec": 3.0,
|
||||
"idle_prompt_max_count": 3,
|
||||
"idle_prompt_text": "你好,请问还在吗?"
|
||||
},
|
||||
"agent": {
|
||||
"system_prompt": "You are a helpful, friendly voice assistant. Keep responses concise and natural for spoken conversation.",
|
||||
|
||||
@@ -67,6 +67,12 @@ class VADConfig:
|
||||
class TurnConfig:
|
||||
vad: VADConfig = field(default_factory=VADConfig)
|
||||
user_speech_timeout_sec: float = 1.0
|
||||
idle_prompt_timeout_sec: float = 0.0
|
||||
idle_prompt_max_count: int = 1
|
||||
idle_prompt_text: str = (
|
||||
"我先停在这里。你可以继续说你的想法,"
|
||||
"或者让我根据刚才的内容帮你整理下一步。"
|
||||
)
|
||||
interruption_min_chars: int = 3
|
||||
interruption_use_interim: bool = True
|
||||
interruption_short_replies: list[str] = field(
|
||||
@@ -209,8 +215,10 @@ def config_from_dict(data: dict) -> EngineConfig:
|
||||
agent = _dict(data.get("agent"))
|
||||
if agent.get("greeting") == "":
|
||||
agent["greeting"] = None
|
||||
if agent.get("greeting_mode") not in (None, "generated", "fixed", "off"):
|
||||
raise ValueError("agent.greeting_mode must be one of: generated, fixed, off")
|
||||
if agent.get("greeting_mode") not in (None, "generated", "fixed", "off", "fastgpt_opener"):
|
||||
raise ValueError(
|
||||
"agent.greeting_mode must be one of: generated, fixed, off, fastgpt_opener"
|
||||
)
|
||||
response_state = ResponseStateConfig(**_dict(agent.pop("response_state")))
|
||||
if response_state.max_prefix_chars < 1:
|
||||
raise ValueError("agent.response_state.max_prefix_chars must be greater than 0")
|
||||
@@ -231,6 +239,10 @@ def config_from_dict(data: dict) -> EngineConfig:
|
||||
llm["app_id"] = None
|
||||
if not isinstance(llm.get("variables"), dict):
|
||||
llm["variables"] = {}
|
||||
if agent.get("greeting_mode") == "fastgpt_opener" and llm["provider"] != "fastgpt":
|
||||
raise ValueError(
|
||||
"agent.greeting_mode='fastgpt_opener' requires services.llm.provider='fastgpt'"
|
||||
)
|
||||
|
||||
turn = _dict(data.get("turn"))
|
||||
vad = _dict(turn.get("vad"))
|
||||
@@ -244,6 +256,15 @@ def config_from_dict(data: dict) -> EngineConfig:
|
||||
user_speech_timeout_sec=float(
|
||||
turn.get("user_speech_timeout_sec", TurnConfig().user_speech_timeout_sec)
|
||||
),
|
||||
idle_prompt_timeout_sec=float(
|
||||
turn.get("idle_prompt_timeout_sec", TurnConfig().idle_prompt_timeout_sec)
|
||||
),
|
||||
idle_prompt_max_count=int(
|
||||
turn.get("idle_prompt_max_count", TurnConfig().idle_prompt_max_count)
|
||||
),
|
||||
idle_prompt_text=str(
|
||||
turn.get("idle_prompt_text", TurnConfig().idle_prompt_text)
|
||||
),
|
||||
interruption_min_chars=int(
|
||||
turn.get("interruption_min_chars", TurnConfig().interruption_min_chars)
|
||||
),
|
||||
|
||||
@@ -126,6 +126,7 @@ async def run_pipeline_with_serializer(
|
||||
user_params=LLMUserAggregatorParams(
|
||||
vad_analyzer=SileroVADAnalyzer(params=vad_params),
|
||||
user_turn_strategies=user_turn_strategies,
|
||||
user_idle_timeout=config.turn.idle_prompt_timeout_sec,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -167,21 +168,26 @@ async def run_pipeline_with_serializer(
|
||||
),
|
||||
idle_timeout_secs=config.session.inactivity_timeout_sec,
|
||||
)
|
||||
idle_prompt_count = 0
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(_transport, _client):
|
||||
logger.info(f"{client_label} websocket client connected")
|
||||
if config.agent.greeting_mode == "fixed" and config.agent.greeting:
|
||||
await task.queue_frames([TTSSpeakFrame(config.agent.greeting)])
|
||||
elif config.agent.greeting_mode == "generated":
|
||||
elif config.agent.greeting_mode == "fastgpt_opener":
|
||||
if isinstance(llm, FastGPTLLMService):
|
||||
welcome = await llm.fetch_welcome_text()
|
||||
if welcome:
|
||||
await task.queue_frames([TTSSpeakFrame(welcome)])
|
||||
else:
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
logger.warning("FastGPT opener requested but no opener text was returned")
|
||||
else:
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
raise RuntimeError(
|
||||
"agent.greeting_mode='fastgpt_opener' requires FastGPT LLM service"
|
||||
)
|
||||
elif config.agent.greeting_mode == "generated":
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(_transport, _client):
|
||||
@@ -193,6 +199,27 @@ async def run_pipeline_with_serializer(
|
||||
logger.info(f"{client_label} websocket session timed out")
|
||||
await task.cancel()
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_started")
|
||||
async def on_user_turn_started(_aggregator, _strategy):
|
||||
nonlocal idle_prompt_count
|
||||
idle_prompt_count = 0
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_idle")
|
||||
async def on_user_turn_idle(aggregator):
|
||||
nonlocal idle_prompt_count
|
||||
text = config.turn.idle_prompt_text.strip()
|
||||
if not text or config.turn.idle_prompt_max_count <= 0:
|
||||
return
|
||||
if idle_prompt_count >= config.turn.idle_prompt_max_count:
|
||||
return
|
||||
|
||||
idle_prompt_count += 1
|
||||
logger.info(
|
||||
"User idle prompt triggered "
|
||||
f"count={idle_prompt_count}/{config.turn.idle_prompt_max_count}"
|
||||
)
|
||||
await aggregator.push_frame(TTSSpeakFrame(text))
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_stopped")
|
||||
async def on_user_turn_stopped(_aggregator, _strategy, message: UserTurnStoppedMessage):
|
||||
logger.info(f"User: {message.content}")
|
||||
|
||||
@@ -2,7 +2,13 @@ from __future__ import annotations
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import Frame, InputTransportMessageFrame, LLMMessagesAppendFrame
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputTransportMessageFrame,
|
||||
LLMMessagesAppendFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
@@ -25,6 +31,8 @@ class ProductTextInputProcessor(FrameProcessor):
|
||||
if not text:
|
||||
return
|
||||
|
||||
await self.broadcast_frame(UserStartedSpeakingFrame)
|
||||
|
||||
if message.get("interrupt", True):
|
||||
logger.info("Text input interrupting current response")
|
||||
await self.broadcast_interruption()
|
||||
@@ -36,3 +44,4 @@ class ProductTextInputProcessor(FrameProcessor):
|
||||
),
|
||||
FrameDirection.DOWNSTREAM,
|
||||
)
|
||||
await self.broadcast_frame(UserStoppedSpeakingFrame)
|
||||
|
||||
Reference in New Issue
Block a user