From f74604ef21eaba926597e68e277be67ae57e3c38 Mon Sep 17 00:00:00 2001
From: Xin Wang <wangxin19930411@163.com>
Date: Mon, 15 Dec 2025 16:34:27 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=88=9D=E5=A7=8B=E6=AD=A5?=
 =?UTF-8?q?=E9=AA=A4=EF=BC=8C=E9=81=BF=E5=85=8D=E4=BA=86=E5=8E=9F=E6=9D=A5?=
 =?UTF-8?q?stt=E5=A4=B1=E6=95=88=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/my_basic_agent_1_2_9.py | 92 +++-------------------------------
 1 file changed, 8 insertions(+), 84 deletions(-)

diff --git a/agents/my_basic_agent_1_2_9.py b/agents/my_basic_agent_1_2_9.py
index 60fba80..71dd818 100644
--- a/agents/my_basic_agent_1_2_9.py
+++ b/agents/my_basic_agent_1_2_9.py
@@ -38,6 +38,7 @@ from livekit.agents.voice.io import PlaybackFinishedEvent
 from livekit.agents.voice.room_io import ATTRIBUTE_PUBLISH_ON_BEHALF
 from livekit.plugins import silero
 from livekit.plugins import openai, azure, minimax, aliyun, volcengine
+# from livekit.plugins.turn_detector.multilingual  import MultilingualModel
 
 from datetime import datetime
 
@@ -86,10 +87,12 @@ DEFAULT_INSTRUCTIONS = """# 角色
 - 完成之后进入后续办理提示阶段
 
 ## 后续办理提示阶段
-- 在后续办理提示阶段：使用ask_important_question显示已经提取的交通事故信息，提示用户点击转人工继续处理。
+- 在后续办理提示阶段：使用ask_important_question显示已经提取的交通事故信息，提示用户点击转人工继续处理，用户点击之后调用enter_hand_off_to_human_mode工具转人工。
 
 # 要求
+- 在通话开始
 - 你会在ask_image_capture的prompt参数中告诉用户拍摄的目标，所以避免在对话中重复描述需要用户拍摄什么
+- 使用get_mobile_by_plate和get_id_card_by_plate的时候不要告诉用户正在查询，执行工具查看结果即可
 
 # 回复风格
 - 使用第一人称，语言简洁
@@ -99,34 +102,6 @@ DEFAULT_INSTRUCTIONS = """# 角色
 - 你已经说过下面的开场白所以不需要重复说：“您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理，如需人工服务，请说转人工。”
 """
 
-backup = """
-
-#回复要求
-你主动对话并推进事故信息采集的完成。语言简洁，一次询问一个问题。
-不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符。
-你可以对一个用户回复多次调用工具，比如askImageCapture没有得到合适的照片的时候，可以继续调用askImageCapture工具让用户重新拍摄，直到得到合适的照片为止。
-只有在我要求使用askImportantQuestion的时候才去调用，否则禁止使用askImportantQuestion工具。
-
-你已经说过下面的开场白所以不需要重复说：
-“您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理，如需人工服务，请说转人工。”
-
-#任务
-##事故初审阶段
-禁止在事故初审阶段使用askImageCapture工具。只有在完成事故信息采集后，才能进入现场证据拍照阶段并使用askImageCapture。
-
-##现场证据拍照阶段
-使用askImageCapture工具引导用户依次拍摄事故现场照片，驾驶员正脸照片，车牌号
-每次拍摄完成后立即判断是否符合要求：符合则直接进入下一步；不符合则立即再次调用askImageCapture让用户重新拍摄，并明确指出问题与改进要求
-ask_image_capture的分析结果中只要target_found为false或者quality_ok为false，就必须向用户解释问题（结合quality_notes或缺失的目标），并立刻再次调用askImageCapture给出更具体的改进提示；在获得有效照片之前禁止进入下一步
-拍完需要的照片后，复述车牌号并让用户确认或修正；确认后进入驾驶员信息核实。
-
-##驾驶员信息核实阶段
-询问司机的姓名。
-之后根据车牌号查询驾驶员手机号，如果查询到则用手机号后四位向用户确认，如果未查询到或者用户告知手机号后四位错误，则询问用户手机号。
-接着根据车牌号查询驾驶员身份证号，如果查询到则用身份证号后四位向用户确认，如果未查询到或者用户告知身份证号后四位错误，则询问用户身份证号。
-之后告诉用户采集完成，显示重要消息显示已经采集的信息，提示用户点击转人工继续处理。
-"""
-
 # ## 黄金对话路径示例 （GOLDEN_CONVERSATION_PATH）
 
 # ```
@@ -203,6 +178,9 @@ class MyAgent(Agent):
         )
 
     async def on_enter(self):
+        self.session.generate_reply(
+            instructions="调用ask_important_question，message=\"您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理。\"，options=[\"继续办理\"]", allow_interruptions=False)
+
         # Register byte stream handler for image uploads from frontend
         def _image_received_handler(reader, participant_identity):
             task = asyncio.create_task(
@@ -214,61 +192,6 @@ class MyAgent(Agent):
         # Add the handler when the agent joins
         get_job_context().room.register_byte_stream_handler("image", _image_received_handler)
 
-        # Proactively ask the user the initial important question via askImportantQuestion
-        # using the greeting message from instructions (lines 59-60)
-        initial_question = (
-            "您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，"
-            "开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，"
-            "请点击继续办理。"
-        )
-        options = ["继续办理"]
-
-        async def _ask_initial():
-            try:
-                room = get_job_context().room
-                if not room.remote_participants:
-                    logger.warning("No remote participants yet; skipping initial askImportantQuestion.")
-                    return
-                participant_identity = next(iter(room.remote_participants))
-
-                # Speak the initial question so the user hears it
-                try:
-                    self.session.say(initial_question, allow_interruptions=False)
-                except Exception as e:
-                    logger.error(f"Failed to speak initial question: {e}")
-
-                payload_data = {
-                    "message": initial_question,
-                    "options": options,
-                }
-                # Log tool call in chat
-                await self._send_chat_message(
-                    f"🔨 Call: ask_important_question\n  • message: \"{initial_question}\"\n  • options: {options}"
-                )
-                response = await room.local_participant.perform_rpc(
-                    destination_identity=participant_identity,
-                    method="askImportantQuestion",
-                    payload=json.dumps(payload_data),
-                    response_timeout=60.0,
-                )
-                logger.info(f"Initial important question response: {response}")
-                try:
-                    response_data = json.loads(response)
-                    selection = response_data.get("selection", "")
-                    await self._send_chat_message(
-                        f"✅ Result: ask_important_question\n  • selection: \"{selection}\""
-                    )
-                except json.JSONDecodeError:
-                    logger.error(f"Failed to parse initial response: {response}")
-            except Exception as e:
-                logger.error(f"Failed to ask initial important question: {e}")
-                
-            # when the agent is added to the session, it'll generate a reply
-            # according to its instructions
-            self.session.generate_reply()
-
-        asyncio.create_task(_ask_initial())
-
     async def _send_chat_message(self, message: str):
         """Helper to send a chat message to the room."""
         try:
@@ -987,6 +910,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
 
     session = AgentSession(
         vad=ctx.proc.userdata["vad"],
+        # turn_detection=MultilingualModel(),
         # any combination of STT, LLM, TTS, or realtime API can be used
         # stt = aliyun.STT(model="paraformer-realtime-v2"),
         stt = volcengine.BigModelSTT(