修改初始步骤,避免了原来stt失效的问题
This commit is contained in:
parent
ff24ccf5f0
commit
f74604ef21
@ -38,6 +38,7 @@ from livekit.agents.voice.io import PlaybackFinishedEvent
|
|||||||
from livekit.agents.voice.room_io import ATTRIBUTE_PUBLISH_ON_BEHALF
|
from livekit.agents.voice.room_io import ATTRIBUTE_PUBLISH_ON_BEHALF
|
||||||
from livekit.plugins import silero
|
from livekit.plugins import silero
|
||||||
from livekit.plugins import openai, azure, minimax, aliyun, volcengine
|
from livekit.plugins import openai, azure, minimax, aliyun, volcengine
|
||||||
|
# from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@ -86,10 +87,12 @@ DEFAULT_INSTRUCTIONS = """# 角色
|
|||||||
- 完成之后进入后续办理提示阶段
|
- 完成之后进入后续办理提示阶段
|
||||||
|
|
||||||
## 后续办理提示阶段
|
## 后续办理提示阶段
|
||||||
- 在后续办理提示阶段:使用ask_important_question显示已经提取的交通事故信息,提示用户点击转人工继续处理。
|
- 在后续办理提示阶段:使用ask_important_question显示已经提取的交通事故信息,提示用户点击转人工继续处理,用户点击之后调用enter_hand_off_to_human_mode工具转人工。
|
||||||
|
|
||||||
# 要求
|
# 要求
|
||||||
|
- 在通话开始
|
||||||
- 你会在ask_image_capture的prompt参数中告诉用户拍摄的目标,所以避免在对话中重复描述需要用户拍摄什么
|
- 你会在ask_image_capture的prompt参数中告诉用户拍摄的目标,所以避免在对话中重复描述需要用户拍摄什么
|
||||||
|
- 使用get_mobile_by_plate和get_id_card_by_plate的时候不要告诉用户正在查询,执行工具查看结果即可
|
||||||
|
|
||||||
# 回复风格
|
# 回复风格
|
||||||
- 使用第一人称,语言简洁
|
- 使用第一人称,语言简洁
|
||||||
@ -99,34 +102,6 @@ DEFAULT_INSTRUCTIONS = """# 角色
|
|||||||
- 你已经说过下面的开场白所以不需要重复说:“您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。”
|
- 你已经说过下面的开场白所以不需要重复说:“您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。”
|
||||||
"""
|
"""
|
||||||
|
|
||||||
backup = """
|
|
||||||
|
|
||||||
#回复要求
|
|
||||||
你主动对话并推进事故信息采集的完成。语言简洁,一次询问一个问题。
|
|
||||||
不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符。
|
|
||||||
你可以对一个用户回复多次调用工具,比如askImageCapture没有得到合适的照片的时候,可以继续调用askImageCapture工具让用户重新拍摄,直到得到合适的照片为止。
|
|
||||||
只有在我要求使用askImportantQuestion的时候才去调用,否则禁止使用askImportantQuestion工具。
|
|
||||||
|
|
||||||
你已经说过下面的开场白所以不需要重复说:
|
|
||||||
“您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。”
|
|
||||||
|
|
||||||
#任务
|
|
||||||
##事故初审阶段
|
|
||||||
禁止在事故初审阶段使用askImageCapture工具。只有在完成事故信息采集后,才能进入现场证据拍照阶段并使用askImageCapture。
|
|
||||||
|
|
||||||
##现场证据拍照阶段
|
|
||||||
使用askImageCapture工具引导用户依次拍摄事故现场照片,驾驶员正脸照片,车牌号
|
|
||||||
每次拍摄完成后立即判断是否符合要求:符合则直接进入下一步;不符合则立即再次调用askImageCapture让用户重新拍摄,并明确指出问题与改进要求
|
|
||||||
ask_image_capture的分析结果中只要target_found为false或者quality_ok为false,就必须向用户解释问题(结合quality_notes或缺失的目标),并立刻再次调用askImageCapture给出更具体的改进提示;在获得有效照片之前禁止进入下一步
|
|
||||||
拍完需要的照片后,复述车牌号并让用户确认或修正;确认后进入驾驶员信息核实。
|
|
||||||
|
|
||||||
##驾驶员信息核实阶段
|
|
||||||
询问司机的姓名。
|
|
||||||
之后根据车牌号查询驾驶员手机号,如果查询到则用手机号后四位向用户确认,如果未查询到或者用户告知手机号后四位错误,则询问用户手机号。
|
|
||||||
接着根据车牌号查询驾驶员身份证号,如果查询到则用身份证号后四位向用户确认,如果未查询到或者用户告知身份证号后四位错误,则询问用户身份证号。
|
|
||||||
之后告诉用户采集完成,显示重要消息显示已经采集的信息,提示用户点击转人工继续处理。
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ## 黄金对话路径示例 (GOLDEN_CONVERSATION_PATH)
|
# ## 黄金对话路径示例 (GOLDEN_CONVERSATION_PATH)
|
||||||
|
|
||||||
# ```
|
# ```
|
||||||
@ -203,6 +178,9 @@ class MyAgent(Agent):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def on_enter(self):
|
async def on_enter(self):
|
||||||
|
self.session.generate_reply(
|
||||||
|
instructions="调用ask_important_question,message=\"您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理。\",options=[\"继续办理\"]", allow_interruptions=False)
|
||||||
|
|
||||||
# Register byte stream handler for image uploads from frontend
|
# Register byte stream handler for image uploads from frontend
|
||||||
def _image_received_handler(reader, participant_identity):
|
def _image_received_handler(reader, participant_identity):
|
||||||
task = asyncio.create_task(
|
task = asyncio.create_task(
|
||||||
@ -214,61 +192,6 @@ class MyAgent(Agent):
|
|||||||
# Add the handler when the agent joins
|
# Add the handler when the agent joins
|
||||||
get_job_context().room.register_byte_stream_handler("image", _image_received_handler)
|
get_job_context().room.register_byte_stream_handler("image", _image_received_handler)
|
||||||
|
|
||||||
# Proactively ask the user the initial important question via askImportantQuestion
|
|
||||||
# using the greeting message from instructions (lines 59-60)
|
|
||||||
initial_question = (
|
|
||||||
"您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,"
|
|
||||||
"开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,"
|
|
||||||
"请点击继续办理。"
|
|
||||||
)
|
|
||||||
options = ["继续办理"]
|
|
||||||
|
|
||||||
async def _ask_initial():
|
|
||||||
try:
|
|
||||||
room = get_job_context().room
|
|
||||||
if not room.remote_participants:
|
|
||||||
logger.warning("No remote participants yet; skipping initial askImportantQuestion.")
|
|
||||||
return
|
|
||||||
participant_identity = next(iter(room.remote_participants))
|
|
||||||
|
|
||||||
# Speak the initial question so the user hears it
|
|
||||||
try:
|
|
||||||
self.session.say(initial_question, allow_interruptions=False)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to speak initial question: {e}")
|
|
||||||
|
|
||||||
payload_data = {
|
|
||||||
"message": initial_question,
|
|
||||||
"options": options,
|
|
||||||
}
|
|
||||||
# Log tool call in chat
|
|
||||||
await self._send_chat_message(
|
|
||||||
f"🔨 Call: ask_important_question\n • message: \"{initial_question}\"\n • options: {options}"
|
|
||||||
)
|
|
||||||
response = await room.local_participant.perform_rpc(
|
|
||||||
destination_identity=participant_identity,
|
|
||||||
method="askImportantQuestion",
|
|
||||||
payload=json.dumps(payload_data),
|
|
||||||
response_timeout=60.0,
|
|
||||||
)
|
|
||||||
logger.info(f"Initial important question response: {response}")
|
|
||||||
try:
|
|
||||||
response_data = json.loads(response)
|
|
||||||
selection = response_data.get("selection", "")
|
|
||||||
await self._send_chat_message(
|
|
||||||
f"✅ Result: ask_important_question\n • selection: \"{selection}\""
|
|
||||||
)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.error(f"Failed to parse initial response: {response}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to ask initial important question: {e}")
|
|
||||||
|
|
||||||
# when the agent is added to the session, it'll generate a reply
|
|
||||||
# according to its instructions
|
|
||||||
self.session.generate_reply()
|
|
||||||
|
|
||||||
asyncio.create_task(_ask_initial())
|
|
||||||
|
|
||||||
async def _send_chat_message(self, message: str):
|
async def _send_chat_message(self, message: str):
|
||||||
"""Helper to send a chat message to the room."""
|
"""Helper to send a chat message to the room."""
|
||||||
try:
|
try:
|
||||||
@ -987,6 +910,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
|||||||
|
|
||||||
session = AgentSession(
|
session = AgentSession(
|
||||||
vad=ctx.proc.userdata["vad"],
|
vad=ctx.proc.userdata["vad"],
|
||||||
|
# turn_detection=MultilingualModel(),
|
||||||
# any combination of STT, LLM, TTS, or realtime API can be used
|
# any combination of STT, LLM, TTS, or realtime API can be used
|
||||||
# stt = aliyun.STT(model="paraformer-realtime-v2"),
|
# stt = aliyun.STT(model="paraformer-realtime-v2"),
|
||||||
stt = volcengine.BigModelSTT(
|
stt = volcengine.BigModelSTT(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user