From ff24ccf5f080b2003efd8477e31339dce9fccbcf Mon Sep 17 00:00:00 2001
From: Xin Wang <wangxin19930411@163.com>
Date: Mon, 15 Dec 2025 13:05:41 +0800
Subject: [PATCH] update system prompt

---
 agents/my_basic_agent_1_2_9.py               | 95 ++++++++++++++++----
 src/components/playground/PhoneSimulator.tsx |  6 +-
 2 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/agents/my_basic_agent_1_2_9.py b/agents/my_basic_agent_1_2_9.py
index 6c29eb3..60fba80 100644
--- a/agents/my_basic_agent_1_2_9.py
+++ b/agents/my_basic_agent_1_2_9.py
@@ -43,18 +43,65 @@ from datetime import datetime
 
 logger = logging.getLogger("basic-agent")
 
+# Load environment variables from .env file in the agents directory
+env_path = os.path.join(os.path.dirname(__file__), ".env")
+load_dotenv(env_path)
+# Also try loading from current directory as fallback
 load_dotenv()
 
 AVATAR_IDENTITY = "avatar_worker"
 
-DEFAULT_INSTRUCTIONS = """#角色
-你是一个高度集成、安全第一的交通事故处理AI智能体。
+DEFAULT_INSTRUCTIONS = """# 角色
+你是无锡交警智能机器人，负责收集交通事故信息。
 
-你像真人一样和用户对话，而不是机器人。你收到的文字是经过ASR识别的语音输入。
+你像真人一样和用户对话，而不是机器人。你收到的文字是经过ASR识别的语音输入。 
 
-当前时间：{datetime}
+# 当前日期和时间
+日期：{datetime}
+星期：{weekday}
 
-#对话要求
+# 能力
+- 你具有调用工具操作前端界面系统的能力
+- ask_image_capture工具被调用后会在系统播放拍摄的目标和需求，所以你每次在调用它之前不需要重复引导用户拍摄什么
+
+# 任务
+你的职责是全流程引导用户完成：事故信息采集 -> 现场证据拍照 -> 驾驶员信息核实。
+
+## 事故信息采集阶段
+- 在事故信息采集阶段：询问是否有人受伤，请求用户简单描述事故情况，询问事故发生时间并通过复述标准化时间（xx年xx月xx日xx时xx分）向用户确认，询问事故车辆数量，询问事故发生的原因（例如追尾、刮擦、碰撞等）。采集完成后进入现场证据拍照阶段
+- 如果用户回答已包含需要问题的答案，改为与用户确认答案是否正确
+- 采集完成之后进入现场证据拍照阶段
+
+## 现场证据拍照阶段
+- 在现场证据拍照阶段：使用askImageCapture工具引导用户依次拍摄照片：1. 第一辆车的车牌；2. 第一辆车的碰撞位置；3. 第一辆车的驾驶员正脸；
+- 如果车辆涉及一辆车，则询问一位驾驶员，如果涉及两辆车，则要求拍摄：4. 第二辆车的车牌；5. 第二辆车的碰撞位置；6. 第二辆车的驾驶员正脸；
+- 拍摄完成之后和用户确认识别的车牌号是否正确
+- 完成之后进入驾驶员信息核实阶段
+
+## 驾驶员信息核实阶段
+- 你只处理事故车辆有一辆或者两辆的情况，超过两辆的情况需要转人工处理
+- 对于一辆车辆的情况，你首先询问司机的姓名，之后根据车牌号查询驾驶员手机号，如果查询到则用手机号后四位向用户确认，如果未查询到或者用户告知手机号后四位错误，则询问用户手机号。接着根据车牌号查询驾驶员身份证号，如果查询到则用身份证号后四位向用户确认，如果未查询到或者用户告知身份证号后四位错误，则询问用户身份证号
+- 对于两辆车辆的情况，你在手机收集完成第一位驾驶员的信息后主动要求与第二位驾驶员通话，你通过主动询问确认电话转接之后再进行后续的通话
+- 收集第二位驾驶员的过程与第一位驾驶员相同
+- 完成之后进入后续办理提示阶段
+
+## 后续办理提示阶段
+- 在后续办理提示阶段：使用ask_important_question显示已经提取的交通事故信息，提示用户点击转人工继续处理。
+
+# 要求
+- 你会在ask_image_capture的prompt参数中告诉用户拍摄的目标，所以避免在对话中重复描述需要用户拍摄什么
+
+# 回复风格
+- 使用第一人称，语言简洁
+- 一次询问一个问题
+- 不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符
+- 不同阶段直接的过渡语句自然
+- 你已经说过下面的开场白所以不需要重复说：“您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理，如需人工服务，请说转人工。”
+"""
+
+backup = """
+
+#回复要求
 你主动对话并推进事故信息采集的完成。语言简洁，一次询问一个问题。
 不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符。
 你可以对一个用户回复多次调用工具，比如askImageCapture没有得到合适的照片的时候，可以继续调用askImageCapture工具让用户重新拍摄，直到得到合适的照片为止。
@@ -64,11 +111,7 @@ DEFAULT_INSTRUCTIONS = """#角色
 “您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理，如需人工服务，请说转人工。”
 
 #任务
-你的职责是全流程引导用户完成：事故情况初审 -> 现场证据拍照 -> 驾驶员信息核实。
-
 ##事故初审阶段
-在事故情况初审阶段：询问是否有人受伤，请求用户简单描述事故情况，询问事故发生时间并通过复述标准化时间向用户确认，询问事故车辆数量，询问事故发生的原因（例如追尾、刮擦、碰撞等）。采集完成后进入现场证据拍照阶段。
-如果用户回答已包含需要问题的答案，改为与用户确认答案是否正确。
 禁止在事故初审阶段使用askImageCapture工具。只有在完成事故信息采集后，才能进入现场证据拍照阶段并使用askImageCapture。
 
 ##现场证据拍照阶段
@@ -176,9 +219,9 @@ class MyAgent(Agent):
         initial_question = (
             "您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，"
             "开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，"
-            "请点击继续办理，如需人工服务，请说转人工。"
+            "请点击继续办理。"
         )
-        options = ["继续办理", "转人工"]
+        options = ["继续办理"]
 
         async def _ask_initial():
             try:
@@ -586,10 +629,16 @@ class MyAgent(Agent):
                         notes = parsed_analysis.get("quality_notes") or []
                         problems.extend(notes)
 
+                # Format the full analysis output for chat
+                analysis_display = str(raw_text)
+                if isinstance(parsed_analysis, dict):
+                    # Pretty print JSON for better readability
+                    analysis_display = json.dumps(parsed_analysis, ensure_ascii=False, indent=2)
+                
                 await self._send_chat_message(
                     "┌─✅ Result: ask_image_capture\n"
                     f"│ valid: {valid}, problems: {problems}\n"
-                    f"│ analysis: \"{str(raw_text)[:50]}...\"\n"
+                    f"│ analysis:\n{analysis_display}\n"
                     "└───────────────"
                 )
 
@@ -889,6 +938,19 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
     current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     logger.info(f"Current time: {current_time}")
 
+    # Calculate weekday in Chinese
+    weekday_map = {
+        0: "星期一",
+        1: "星期二",
+        2: "星期三",
+        3: "星期四",
+        4: "星期五",
+        5: "星期六",
+        6: "星期日"
+    }
+    current_weekday = weekday_map[datetime.now().weekday()]
+    logger.info(f"Current weekday: {current_weekday}")
+
     initial_voice_id = "BV001_streaming" # Female voice
     if participant.attributes.get("voice"):
         initial_voice_id = participant.attributes.get("voice")
@@ -905,8 +967,9 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
         initial_instructions = participant.attributes.get("instructions")
         logger.info(f"User selected instructions: {initial_instructions}")
 
-    # Only replace the datetime placeholder to avoid KeyError from other braces in the prompt
+    # Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt
     initial_instructions = initial_instructions.replace("{datetime}", current_time)
+    initial_instructions = initial_instructions.replace("{weekday}", current_weekday)
     logger.info(f"Initial instructions: {initial_instructions}")
 
     if llm_backend == "dashscope":
@@ -914,12 +977,12 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
         llm = openai.LLM(
             base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
             api_key=os.getenv("DASHSCOPE_API_KEY"),
-            model="deepseek-v3.2"
+            model="deepseek-v3.2",
         )
     else:
         logger.info("Using default DeepSeek backend")
         llm = openai.LLM.with_deepseek(
-            model='deepseek-chat'
+            model='deepseek-chat',
         )
 
     session = AgentSession(
@@ -948,6 +1011,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
         # when it's detected, you may resume the agent's speech
         resume_false_interruption=True,
         false_interruption_timeout=1.0,
+        # Increase the maximum number of function calls per turn to avoid hitting the limit
+        max_tool_steps=15,
     )
 
     # log metrics as they are emitted, and total usage after session is over
diff --git a/src/components/playground/PhoneSimulator.tsx b/src/components/playground/PhoneSimulator.tsx
index 224c935..24305ea 100644
--- a/src/components/playground/PhoneSimulator.tsx
+++ b/src/components/playground/PhoneSimulator.tsx
@@ -417,9 +417,9 @@ export function PhoneSimulator({
           : `请上传${MAX_UPLOAD_MB}MB以内的图片文件。`;
         showErrorToast(msg);
       } else {
-        onCapture(file);
-        setProcessingImage(URL.createObjectURL(file));
-        setProcessingSource("upload");
+      onCapture(file);
+      setProcessingImage(URL.createObjectURL(file));
+      setProcessingSource("upload");
       }
     }
     // Reset input so the same file can be selected again