Update agents/README.md

make chat message overlay right
try to fix fast reload
2026-02-02 15:28:28 +00:00 · 2025-12-19 10:55:13 +08:00 · 2025-12-18 09:41:42 +08:00 · 2025-12-18 09:22:01 +08:00 · 2025-12-17 23:13:42 +08:00 · 2025-12-17 22:40:11 +08:00
12 changed files with 1423 additions and 156 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,27 +6,46 @@
 .pnp.js
 .yarn/install-state.gz

+# pnpm
+.pnpm-store/
+.pnpm-debug.log*
+
 # testing
 /coverage
+*.lcov
+.nyc_output

 # next.js
 /.next/
 /out/
+.next/
+out/

 # production
 /build
+dist/

 # misc
 .DS_Store
 *.pem
+*.log
+*.swp
+*.swo
+*~

 # debug
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
+pnpm-debug.log*

 # local env files
+.env
 .env*.local
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local

 # vercel
 .vercel
@@ -34,3 +53,42 @@ yarn-error.log*
 # typescript
 *.tsbuildinfo
 next-env.d.ts
+
+# IDE
+.vscode/
+.idea/
+*.sublime-project
+*.sublime-workspace
+*.code-workspace
+
+# Python (for agents directory)
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+.venv
+pip-log.txt
+pip-delete-this-directory.txt
+.pytest_cache/
+.coverage
+htmlcov/
+*.egg-info/
+dist/
+build/
+*.egg
+
+# OS
+Thumbs.db
+.DS_Store
+.AppleDouble
+.LSOverride
+._*
+
+# Temporary files
+*.tmp
+*.temp
+.cache/
--- a/agents/README.md
+++ b/agents/README.md
@@ -0,0 +1 @@
+use livekit-plugins-volcengine==1.2.9
--- a/agents/my_basic_agent_1_2_9.py
+++ b/agents/my_basic_agent_1_2_9.py
@@ -4,6 +4,7 @@ import base64
 import json
 import logging
 import os
+import random
 import sys
 import re
 from dataclasses import asdict, dataclass
@@ -31,6 +32,7 @@ from livekit.agents import (
    cli,
    get_job_context,
    metrics,
+    RoomIO
 )
 from livekit.agents.llm import ImageContent, ToolError, function_tool
 from typing import Any, List, Optional
@@ -64,7 +66,7 @@ DEFAULT_INSTRUCTIONS = """# 角色

 # 能力
 - 你具有调用工具操作前端界面系统的能力
- ask_image_capture工具被调用后会在系统播放拍摄的目标和需求，所以你每次在调用它之前不需要重复引导用户拍摄什么
+- ask_image_capture工具被调用后会在系统播放拍摄的目标和需求，所以你每次在调用它之前不需要重复引导用户拍摄什么，而是使用ask_image_capture来传递拍摄需求

 # 任务
 你的职责是全流程引导用户完成：事故信息采集 -> 现场证据拍照 -> 驾驶员信息核实。
@@ -73,6 +75,7 @@ DEFAULT_INSTRUCTIONS = """# 角色
 - 在事故信息采集阶段：询问是否有人受伤，请求用户简单描述事故情况，询问事故发生时间并通过复述标准化时间（xx年xx月xx日xx时xx分）向用户确认，询问事故车辆数量，询问事故发生的原因（例如追尾、刮擦、碰撞等）。采集完成后进入现场证据拍照阶段
 - 如果用户回答已包含需要问题的答案，改为与用户确认答案是否正确
 - 采集完成之后进入现场证据拍照阶段
+- 这个阶段不使用ask_important_question和ask_image_capture工具

 ## 现场证据拍照阶段
 - 在现场证据拍照阶段：使用askImageCapture工具引导用户依次拍摄照片：1. 第一辆车的车牌；2. 第一辆车的碰撞位置；3. 第一辆车的驾驶员正脸；
@@ -100,9 +103,11 @@ DEFAULT_INSTRUCTIONS = """# 角色
 - 一次询问一个问题
 - 不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符
 - 不同阶段直接的过渡语句自然
- 你已经说过下面的开场白所以不需要重复说：“您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理，如需人工服务，请说转人工。”
+- 你已经说过下面的开场白，用户点击继续办理说明已经认可，所以不需要重复说：“您好，这里是无锡交警，我将为您远程处理交通事故。请将人员撤离至路侧安全区域，开启危险报警双闪灯、放置三角警告牌、做好安全防护，谨防二次事故伤害。若您已经准备好了，请点击继续办理，如需人工服务，请说转人工。” 
 """

+DEFAULT_TALKING_MODE = 'push_to_talk'
+
 # ## 黄金对话路径示例 （GOLDEN_CONVERSATION_PATH）

 # ```
@@ -477,7 +482,7 @@ class MyAgent(Agent):
            self._image_event.clear()
            
            # Speak the capture prompt so the user hears what to do
-            self.session.say(prompt, allow_interruptions=True)
+            self.session.say(prompt, allow_interruptions=False)

            # Ask for image capture and wait for user to capture/upload
            response = await room.local_participant.perform_rpc(
@@ -605,12 +610,25 @@ class MyAgent(Agent):
            f"│ plate: \"{normalized_plate}\"\n"
            "└───────────────"
        )
-        # Dummy fixed response (placeholder backend)
-        return {
+        # Generate random mobile number (11 digits: 1[3-9] + 9 random digits)
+        mobile_prefix = random.choice(['13', '14', '15', '16', '17', '18', '19'])
+        mobile_suffix = ''.join([str(random.randint(0, 9)) for _ in range(9)])
+        random_mobile = f"{mobile_prefix}{mobile_suffix}"
+        
+        result = {
            "success": True,
            "plate": normalized_plate,
-            "mobile": "13800001234",
+            "mobile": random_mobile,
        }
+        
+        await self._send_chat_message(
+            "┌─✅ Result: get_mobile_by_plate\n"
+            f"│ plate: \"{normalized_plate}\"\n"
+            f"│ mobile: \"{random_mobile}\"\n"
+            "└───────────────"
+        )
+        
+        return result

    @function_tool()
    async def get_id_card_by_plate(
@@ -630,12 +648,34 @@ class MyAgent(Agent):
            f"│ plate: \"{normalized_plate}\"\n"
            "└───────────────"
        )
-        # Dummy fixed response (placeholder backend)
-        return {
+        # Generate random ID card number (18 digits: 6-digit area code + 8-digit birth date + 3-digit sequence + 1 check digit)
+        # Area code: random 6 digits (typically 110000-659999 for Chinese ID cards)
+        area_code = random.randint(110000, 659999)
+        # Birth date: random date between 1950-01-01 and 2000-12-31
+        year = random.randint(1950, 2000)
+        month = random.randint(1, 12)
+        day = random.randint(1, 28)  # Use 28 to avoid month-specific day issues
+        birth_date = f"{year:04d}{month:02d}{day:02d}"
+        # Sequence number: 3 random digits
+        sequence = random.randint(100, 999)
+        # Check digit: random digit or X (10% chance of X)
+        check_digit = 'X' if random.random() < 0.1 else str(random.randint(0, 9))
+        random_id_card = f"{area_code}{birth_date}{sequence}{check_digit}"
+        
+        result = {
            "success": True,
            "plate": normalized_plate,
-            "id_card": "320101198001011234",
+            "id_card": random_id_card,
        }
+        
+        await self._send_chat_message(
+            "┌─✅ Result: get_id_card_by_plate\n"
+            f"│ plate: \"{normalized_plate}\"\n"
+            f"│ id_card: \"{random_id_card}\"\n"
+            "└───────────────"
+        )
+        
+        return result

    @function_tool()
    async def validate_mobile_number(
@@ -657,17 +697,33 @@ class MyAgent(Agent):
        )
        is_valid = bool(re.fullmatch(r"1[3-9]\\d{9}", normalized))
        if is_valid:
-            return {
+            result = {
                "success": True,
                "valid": True,
                "mobile": normalized,
            }
-        return {
+            await self._send_chat_message(
+                "┌─✅ Result: validate_mobile_number\n"
+                f"│ mobile: \"{normalized}\"\n"
+                f"│ valid: true\n"
+                "└───────────────"
+            )
+            return result
+        
+        result = {
            "success": True,
            "valid": False,
            "mobile": normalized,
            "error": "手机号格式不正确，应为1[3-9]开头的11位数字",
        }
+        await self._send_chat_message(
+            "┌─✅ Result: validate_mobile_number\n"
+            f"│ mobile: \"{normalized}\"\n"
+            f"│ valid: false\n"
+            f"│ error: \"{result['error']}\"\n"
+            "└───────────────"
+        )
+        return result

    @function_tool()
    async def validate_id_card_number(
@@ -689,25 +745,44 @@ class MyAgent(Agent):
        )
        is_valid = bool(re.fullmatch(r"(\\d{17}[\\dX]|\\d{15})", normalized))
        if is_valid:
-            return {
+            result = {
                "success": True,
                "valid": True,
                "id_card": normalized,
            }
-        return {
+            await self._send_chat_message(
+                "┌─✅ Result: validate_id_card_number\n"
+                f"│ id_card: \"{normalized}\"\n"
+                f"│ valid: true\n"
+                "└───────────────"
+            )
+            return result
+        
+        result = {
            "success": True,
            "valid": False,
            "id_card": normalized,
            "error": "身份证格式不正确，应为18位（末位可为X）或15位数字",
        }
+        await self._send_chat_message(
+            "┌─✅ Result: validate_id_card_number\n"
+            f"│ id_card: \"{normalized}\"\n"
+            f"│ valid: false\n"
+            f"│ error: \"{result['error']}\"\n"
+            "└───────────────"
+        )
+        return result

    @function_tool()
    async def enter_hand_off_to_human_mode(
        self,
        context: RunContext,
    ):
-        """切换到“转人工”模式（前端电话界面进入人工处理）。返回成功/失败。"""
-        await self._send_chat_message("🔨 Call: enter_hand_off_to_human_mode")
+        """切换到"转人工"模式（前端电话界面进入人工处理）。返回成功/失败。"""
+        await self._send_chat_message(
+            "┌─🔨 Call: enter_hand_off_to_human_mode\n"
+            "└───────────────"
+        )
        try:
            room = get_job_context().room
            participant_identity = next(iter(room.remote_participants))
@@ -718,10 +793,21 @@ class MyAgent(Agent):
                response_timeout=5.0,
            )
            logger.info(f"Entered hand off to human mode: {response}")
-            await self._send_chat_message(f"✅ Result: enter_hand_off_to_human_mode\n  • status: success")
+            await self._send_chat_message(
+                "┌─✅ Result: enter_hand_off_to_human_mode\n"
+                f"│ status: success\n"
+                f"│ response: {response}\n"
+                "└───────────────"
+            )
            return response
        except Exception as e:
            logger.error(f"Failed to enter hand off to human mode: {e}")
+            await self._send_chat_message(
+                "┌─❌ Result: enter_hand_off_to_human_mode\n"
+                f"│ status: error\n"
+                f"│ error: \"{str(e)}\"\n"
+                "└───────────────"
+            )
            raise ToolError(f"Unable to enter hand off to human mode: {str(e)}")

    @function_tool()
@@ -730,7 +816,10 @@ class MyAgent(Agent):
        context: RunContext,
    ):
        """挂断当前通话（结束会话），返回成功/失败。"""
-        await self._send_chat_message("🔨 Call: hang_up_call")
+        await self._send_chat_message(
+            "┌─🔨 Call: hang_up_call\n"
+            "└───────────────"
+        )
        try:
            room = get_job_context().room
            participant_identity = next(iter(room.remote_participants))
@@ -741,14 +830,25 @@ class MyAgent(Agent):
                response_timeout=5.0,
            )
            logger.info(f"Hung up call: {response}")
-            await self._send_chat_message(f"✅ Result: hang_up_call\n  • status: disconnected")
+            await self._send_chat_message(
+                "┌─✅ Result: hang_up_call\n"
+                f"│ status: disconnected\n"
+                f"│ response: {response}\n"
+                "└───────────────"
+            )
            return response
        except Exception as e:
            logger.error(f"Failed to hang up call: {e}")
+            await self._send_chat_message(
+                "┌─❌ Result: hang_up_call\n"
+                f"│ status: error\n"
+                f"│ error: \"{str(e)}\"\n"
+                "└───────────────"
+            )
            raise ToolError(f"Unable to hang up call: {str(e)}")

    @function_tool()
-    async def ask_important_question(self, context: RunContext, message: str, options: Optional[List[str]] = None):
+    async def ask_important_question(self, context: RunContext, message: str, options: Optional[List[str]] | str = None):
        """询问关键问题并等待用户选择选项，返回用户的选择结果。

        参数:
@@ -758,7 +858,12 @@ class MyAgent(Agent):
        返回:
            str: 用户选择的文本内容。
        """
-        await self._send_chat_message(f"🔨 Call: ask_important_question\n  • message: \"{message}\"\n  • options: {options}")
+        await self._send_chat_message(
+            "┌─🔨 Call: ask_important_question\n"
+            f"│ message: \"{message}\"\n"
+            f"│ options: {options}\n"
+            "└───────────────"
+        )
        try:
            room = get_job_context().room
            participant_identity = next(iter(room.remote_participants))
@@ -781,7 +886,7 @@ class MyAgent(Agent):
                    payload_data["options"] = options
                
            # Speak the message
-            speech_handle = self.session.say(message, allow_interruptions=True)
+            speech_handle = self.session.say(message, allow_interruptions=False)
            
            # Wait for user selection with longer timeout since user needs time to respond
            response = await room.local_participant.perform_rpc(
@@ -793,8 +898,12 @@ class MyAgent(Agent):
            
            # Interrupt speech if user makes a selection while agent is speaking
            if speech_handle and hasattr(speech_handle, "interrupt"):
-                speech_handle.interrupt()
-                logger.info("Interrupted speech due to user selection")
+                try:
+                    speech_handle.interrupt()
+                except Exception as e:
+                    logger.error(f"Failed to interrupt speech: {e}")
+                else:
+                    logger.info("Interrupted speech due to user selection")
                
            logger.info(f"User made selection: {response}")
            
@@ -804,7 +913,11 @@ class MyAgent(Agent):
                user_selection = response_data.get("selection", "确认")
                logger.info(f"User selected: {user_selection}")
                
-                await self._send_chat_message(f"✅ Result: ask_important_question\n  • selection: \"{user_selection}\"")
+                await self._send_chat_message(
+                    "┌─✅ Result: ask_important_question\n"
+                    f"│ selection: \"{user_selection}\"\n"
+                    "└───────────────"
+                )
                return f"用户选择了: {user_selection}"
            except json.JSONDecodeError:
                logger.error(f"Failed to parse response: {response}")
@@ -905,6 +1018,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
        initial_instructions = participant.attributes.get("instructions")
        logger.info(f"User selected instructions: {initial_instructions}")

+    # Read talking_mode from frontend state
+    initial_talking_mode = DEFAULT_TALKING_MODE
+    if participant.attributes.get("talking_mode"):
+        frontend_talking_mode = participant.attributes.get("talking_mode")
+        if frontend_talking_mode in ["push_to_talk", "realtime"]:
+            initial_talking_mode = frontend_talking_mode
+            logger.info(f"Initializing talking_mode from frontend: {initial_talking_mode}")
+        else:
+            logger.warning(f"Invalid talking_mode from frontend: {frontend_talking_mode}, using default: {initial_talking_mode}")
+
    # Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt
    initial_instructions = initial_instructions.replace("{datetime}", current_time)
    initial_instructions = initial_instructions.replace("{weekday}", current_weekday)
@@ -921,6 +1044,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
        logger.info("Using default DeepSeek backend")
        llm = openai.LLM.with_deepseek(
            model='deepseek-chat',
+            temperature=0.1
        )

    session = AgentSession(
@@ -953,6 +1077,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
        # Increase the maximum number of function calls per turn to avoid hitting the limit
        max_tool_steps=15,
    )
+    room_io = RoomIO(session, room=ctx.room)
+    await room_io.start()

    # log metrics as they are emitted, and total usage after session is over
    usage_collector = metrics.UsageCollector()
@@ -1011,6 +1137,90 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
        room_output_options=RoomOutputOptions(transcription_enabled=True),
    )

+     # disable input audio at the start
+    _talking_mode = initial_talking_mode
+    if _talking_mode == "push_to_talk":
+        session.input.set_audio_enabled(False)
+    else:
+        session.input.set_audio_enabled(True)
+
+    @ctx.room.local_participant.register_rpc_method("start_turn")
+    async def start_turn(data: rtc.RpcInvocationData):
+        try:
+            session.interrupt()
+        except RuntimeError as e:
+            logger.info(f"Cannot interrupt session (agent is speaking): {e}")
+            # Return a message instead of raising an error
+            return json.dumps({"success": False, "message": "不能打断"})
+
+        session.clear_user_turn()
+
+        # listen to the caller if multi-user
+        room_io.set_participant(data.caller_identity)
+        session.input.set_audio_enabled(True)
+        
+        return json.dumps({"success": True})
+
+    @ctx.room.local_participant.register_rpc_method("end_turn")
+    async def end_turn(data: rtc.RpcInvocationData):
+        session.input.set_audio_enabled(False)
+        session.commit_user_turn(
+            # the timeout for the final transcript to be received after committing the user turn
+            # increase this value if the STT is slow to respond
+            transcript_timeout=10.0,
+            # the duration of the silence to be appended to the STT to make it generate the final transcript
+            stt_flush_duration=2.0,
+        )
+
+    @ctx.room.local_participant.register_rpc_method("cancel_turn")
+    async def cancel_turn(data: rtc.RpcInvocationData):
+        session.input.set_audio_enabled(False)
+        session.clear_user_turn()
+        logger.info("cancel turn")
+
+    @ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
+    async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
+        nonlocal _talking_mode
+        try:
+            # Parse the payload to get the target mode
+            payload = json.loads(data.payload) if data.payload else {}
+            target_mode = payload.get("mode")
+            
+            # Validate and set the mode
+            if target_mode in ["push_to_talk", "realtime"]:
+                _talking_mode = target_mode
+                logger.info(f"Switching talking mode to: {_talking_mode}")
+            else:
+                # If invalid mode, toggle from current state
+                logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
+                _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
+                logger.info(f"Toggled talking mode to: {_talking_mode}")
+            
+            # Apply the mode settings
+            room_io.set_participant(data.caller_identity)
+            if _talking_mode == "push_to_talk":
+                session.input.set_audio_enabled(False)
+                logger.info("Setting audio enabled to False (PTT mode)")
+            else:
+                # When switching to realtime mode, clear user turn state to ensure proper initialization
+                session.clear_user_turn()
+                session.input.set_audio_enabled(True)
+                logger.info("Setting audio enabled to True (realtime mode)")
+            
+            return json.dumps({"success": True, "mode": _talking_mode})
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
+            # Fallback to toggle behavior
+            _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
+            room_io.set_participant(data.caller_identity)
+            if _talking_mode == "push_to_talk":
+                session.input.set_audio_enabled(False)
+            else:
+                # When switching to realtime mode, clear user turn state
+                session.clear_user_turn()
+                session.input.set_audio_enabled(True)
+            return json.dumps({"success": True, "mode": _talking_mode})
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL")
--- a/next.config.js
+++ b/next.config.js
@@ -4,6 +4,10 @@ const withNextPluginPreval = createNextPluginPreval();
 /** @type {import('next').NextConfig} */
 const nextConfig = {
  reactStrictMode: false,
+  // Explicitly allow znjj.wangxin93.eu.org for Dev Origin, per future Next.js requirement.
+  allowedDevOrigins: [
+    "znjj.wangxin93.eu.org",
+  ],
 };

 module.exports = withNextPluginPreval(nextConfig);
--- a/src/components/chat/ChatOverlay.tsx
+++ b/src/components/chat/ChatOverlay.tsx
@@ -0,0 +1,214 @@
+"use client";
+
+import { TranscriptionTile } from "@/transcriptions/TranscriptionTile";
+import { TrackReferenceOrPlaceholder } from "@livekit/components-react";
+import { useCallback, useEffect, useRef, useState } from "react";
+
+export interface ChatOverlayProps {
+  agentAudioTrack?: TrackReferenceOrPlaceholder;
+  accentColor: string;
+  inputDisabled?: boolean;
+  isVisible: boolean;
+  position: { x: number; y: number };
+  onPositionChange: (position: { x: number; y: number }) => void;
+  containerRef: React.RefObject<HTMLDivElement | null>;
+  onToggle: () => void;
+}
+
+export function ChatOverlay({
+  agentAudioTrack,
+  accentColor,
+  inputDisabled,
+  isVisible,
+  position,
+  onPositionChange,
+  containerRef,
+  onToggle,
+}: ChatOverlayProps) {
+  const overlayRef = useRef<HTMLDivElement>(null);
+  const headerRef = useRef<HTMLDivElement>(null);
+  const [isDragging, setIsDragging] = useState(false);
+  const dragOffset = useRef({ x: 0, y: 0 });
+
+  // Responsive sizing based on container size
+  const [containerSize, setContainerSize] = useState({ width: 360, height: 500 });
+
+  useEffect(() => {
+    const updateSize = () => {
+      if (containerRef.current) {
+        const rect = containerRef.current.getBoundingClientRect();
+        setContainerSize({ width: rect.width, height: rect.height });
+      }
+    };
+
+    updateSize();
+    const resizeObserver = new ResizeObserver(updateSize);
+    if (containerRef.current) {
+      resizeObserver.observe(containerRef.current);
+    }
+
+    return () => {
+      resizeObserver.disconnect();
+    };
+  }, [containerRef]);
+
+  // Calculate overlay size as percentage of container, with min/max constraints
+  // Width: larger (up to 95% of container)
+  const overlayWidth = Math.min(
+    Math.max(containerSize.width * 0.9, 280),
+    containerSize.width * 0.95
+  );
+  // Height: smaller (reduced from 60-85% to 40-60%)
+  const overlayHeight = Math.min(
+    Math.max(containerSize.height * 0.4, 250),
+    containerSize.height * 0.6
+  );
+
+  // Position overlay at center (slightly moved up) when first shown
+  const hasPositionedRef = useRef(false);
+  useEffect(() => {
+    if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
+      // Calculate center position, moved up by 15% of container height
+      const centerX = (containerSize.width - overlayWidth) / 2;
+      const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
+      
+      // Only auto-position on first show (when position is at origin)
+      if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
+        onPositionChange({ x: Math.max(0, centerX), y: Math.max(0, centerY) });
+        hasPositionedRef.current = true;
+      }
+    }
+  }, [isVisible, containerSize.width, containerSize.height, overlayWidth, overlayHeight, containerRef, position, onPositionChange]);
+
+  const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
+    if (!overlayRef.current || !headerRef.current) return;
+    
+    // Only allow dragging from the header, but not from buttons
+    const target = e.target as HTMLElement;
+    if (!headerRef.current.contains(target)) return;
+    
+    // Don't drag if clicking on the close button
+    if (target.closest('button') || target.closest('svg')) return;
+
+    e.preventDefault();
+    e.stopPropagation();
+    setIsDragging(true);
+    
+    const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
+    const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
+    
+    dragOffset.current = {
+      x: clientX - position.x,
+      y: clientY - position.y,
+    };
+  };
+
+  const handleDragMove = useCallback((e: MouseEvent | TouchEvent) => {
+    if (!isDragging || !containerRef.current || !overlayRef.current) return;
+
+    e.preventDefault();
+    e.stopPropagation();
+
+    const containerRect = containerRef.current.getBoundingClientRect();
+    const overlayRect = overlayRef.current.getBoundingClientRect();
+
+    const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
+    const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
+
+    let newX = clientX - dragOffset.current.x;
+    let newY = clientY - dragOffset.current.y;
+
+    // Constrain within container bounds
+    const maxX = containerRect.width - overlayRect.width;
+    const maxY = containerRect.height - overlayRect.height;
+    const minY = 0; // Allow dragging to top
+
+    newX = Math.max(0, Math.min(newX, maxX));
+    newY = Math.max(minY, Math.min(newY, maxY));
+
+    onPositionChange({ x: newX, y: newY });
+  }, [isDragging, containerRef, overlayRef, onPositionChange]);
+
+  const handleDragEnd = useCallback(() => {
+    setIsDragging(false);
+  }, []);
+
+  useEffect(() => {
+    if (isDragging) {
+      window.addEventListener("mousemove", handleDragMove);
+      window.addEventListener("mouseup", handleDragEnd);
+      window.addEventListener("touchmove", handleDragMove, { passive: false });
+      window.addEventListener("touchend", handleDragEnd);
+    }
+    return () => {
+      window.removeEventListener("mousemove", handleDragMove);
+      window.removeEventListener("mouseup", handleDragEnd);
+      window.removeEventListener("touchmove", handleDragMove);
+      window.removeEventListener("touchend", handleDragEnd);
+    };
+  }, [isDragging, handleDragMove, handleDragEnd]);
+
+  return (
+    <div
+      ref={overlayRef}
+      className="absolute z-40 rounded-lg border border-white/20 shadow-2xl backdrop-blur-md transition-all duration-300 flex flex-col"
+      style={{
+        left: `${position.x}px`,
+        top: `${position.y}px`,
+        width: `${overlayWidth}px`,
+        height: `${overlayHeight}px`,
+        backgroundColor: 'rgba(0, 0, 0, 0.85)',
+        cursor: isDragging ? 'grabbing' : 'default',
+        display: isVisible ? 'flex' : 'none',
+      }}
+      onMouseDown={handleDragStart}
+      onTouchStart={handleDragStart}
+    >
+      {/* Header with drag handle and close button */}
+      <div
+        ref={headerRef}
+        className="flex items-center justify-between px-4 py-2 border-b border-white/10 cursor-move select-none"
+        style={{ backgroundColor: 'rgba(0, 0, 0, 0.3)' }}
+      >
+        <div className="flex items-center gap-2">
+          <div className="w-2 h-2 rounded-full bg-white/40"></div>
+          <span className="text-white text-xs font-medium">Chat</span>
+        </div>
+        <button
+          onClick={(e) => {
+            e.stopPropagation();
+            e.preventDefault();
+            onToggle();
+          }}
+          className="text-white hover:text-white transition-colors p-2 rounded hover:bg-white/10 flex items-center justify-center"
+          aria-label="Close chat overlay"
+          style={{ minWidth: '32px', minHeight: '32px' }}
+        >
+          <svg
+            className="w-5 h-5"
+            fill="none"
+            stroke="currentColor"
+            viewBox="0 0 24 24"
+            strokeWidth={2.5}
+          >
+            <path
+              strokeLinecap="round"
+              strokeLinejoin="round"
+              d="M6 18L18 6M6 6l12 12"
+            />
+          </svg>
+        </button>
+      </div>
+
+      {/* Chat content with padding */}
+      <div className="overflow-hidden flex flex-col px-2 py-2" style={{ height: `calc(100% - 40px)` }}>
+        <TranscriptionTile
+          agentAudioTrack={agentAudioTrack}
+          accentColor={accentColor}
+          inputDisabled={inputDisabled}
+        />
+      </div>
+    </div>
+  );
+}
+
--- a/src/components/playground/PhoneSimulator.tsx
+++ b/src/components/playground/PhoneSimulator.tsx
@@ -5,15 +5,17 @@ import {
  BarVisualizer,
  useConnectionState,
  useLocalParticipant,
+  useParticipantAttributes,
  useRoomContext,
  useTracks,
  useVoiceAssistant,
  VideoTrack,
 } from "@livekit/components-react";
 import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client";
-import { useEffect, useMemo, useState, useRef } from "react";
-import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon } from "./icons";
+import { useEffect, useMemo, useState, useRef, useCallback } from "react";
+import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon, ChatIcon } from "./icons";
 import { useToast } from "@/components/toast/ToasterProvider";
+import { ChatOverlay } from "@/components/chat/ChatOverlay";

 export interface PhoneSimulatorProps {
    onConnect: () => void;
@@ -43,6 +45,9 @@ export function PhoneSimulator({
  const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant();
  const tracks = useTracks();
  const voiceAssistant = useVoiceAssistant();
+  const agentAttributes = useParticipantAttributes({
+    participant: voiceAssistant.agent,
+  });
  const fileInputRef = useRef<HTMLInputElement>(null);
  const phoneContainerRef = useRef<HTMLDivElement>(null);
  const visualizerRef = useRef<HTMLDivElement>(null);
@@ -59,6 +64,17 @@ export function PhoneSimulator({
  const isAgentSpeaking = voiceAssistant.state === "speaking";
  const wasMicEnabledRef = useRef(false);
  const lastPhoneMode = useRef(phoneMode);
+  const [isPushToTalkActive, setIsPushToTalkActive] = useState(false);
+  const [interruptRejected, setInterruptRejected] = useState(false);
+  const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default)
+  const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
+  const [showChatOverlay, setShowChatOverlay] = useState(false);
+  const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component
+  const [chatTogglePosition, setChatTogglePosition] = useState<{ x?: number; right?: number; y: number }>({ right: 16, y: 56 }); // Initial position on the right
+  const [isDraggingChatToggle, setIsDraggingChatToggle] = useState(false);
+  const chatToggleRef = useRef<HTMLButtonElement>(null);
+  const chatToggleDragOffset = useRef({ x: 0, y: 0 });
+  const chatToggleHasDragged = useRef(false);

  useEffect(() => {
    const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
@@ -67,6 +83,47 @@ export function PhoneSimulator({
    }
  }, [config.settings.attributes]);

+  // Set talking_mode attribute when connected or when mode changes
+  const lastTalkingModeRef = useRef<string | null>(null);
+  const configAttributesRef = useRef(config.settings.attributes);
+  
+  // Update config attributes ref when it changes
+  useEffect(() => {
+    configAttributesRef.current = config.settings.attributes;
+  }, [config.settings.attributes]);
+  
+  useEffect(() => {
+    if (roomState === ConnectionState.Connected && localParticipant) {
+      const talkingMode = isPushToTalkMode ? "push_to_talk" : "realtime";
+      
+      // Only update if the mode actually changed
+      if (lastTalkingModeRef.current === talkingMode) {
+        return;
+      }
+      lastTalkingModeRef.current = talkingMode;
+      
+      try {
+        // Get current attributes from config to preserve them
+        const attributesToSet: Record<string, string> = {};
+        const configAttributes = configAttributesRef.current || [];
+        configAttributes.forEach(attr => {
+          if (attr.key && attr.value) {
+            attributesToSet[attr.key] = attr.value;
+          }
+        });
+        // Add talking_mode
+        attributesToSet.talking_mode = talkingMode;
+        
+        localParticipant.setAttributes(attributesToSet);
+      } catch (error) {
+        console.error("Failed to set talking_mode attribute:", error);
+      }
+    } else if (roomState === ConnectionState.Disconnected) {
+      // Reset ref when disconnected
+      lastTalkingModeRef.current = null;
+    }
+  }, [roomState, localParticipant, isPushToTalkMode]);
+
  const [currentTime, setCurrentTime] = useState("");

  const [visualizerPosition, setVisualizerPosition] = useState({
@@ -76,30 +133,41 @@ export function PhoneSimulator({
  const [isDragging, setIsDragging] = useState(false);
  const dragOffset = useRef({ x: 0, y: 0 });

-  const handleDragStart = (e: React.MouseEvent) => {
+  const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault();
    setIsDragging(true);
+    const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
+    const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
    dragOffset.current = {
-      x: e.clientX - visualizerPosition.x,
-      y: e.clientY - visualizerPosition.y,
+      x: clientX - visualizerPosition.x,
+      y: clientY - visualizerPosition.y,
    };
  };

-  const handleDragMove = (e: MouseEvent) => {
+  const handleDragMove = (e: MouseEvent | TouchEvent) => {
    if (!isDragging || !phoneContainerRef.current || !visualizerRef.current) return;

+    e.preventDefault();
+
    const containerRect = phoneContainerRef.current.getBoundingClientRect();
    const visualizerRect = visualizerRef.current.getBoundingClientRect();

-    let newX = e.clientX - dragOffset.current.x;
-    let newY = e.clientY - dragOffset.current.y;
+    const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
+    const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
+
+    let newX = clientX - dragOffset.current.x;
+    let newY = clientY - dragOffset.current.y;

    // Constrain within container
    const maxX = containerRect.width - visualizerRect.width;
    const maxY = containerRect.height - visualizerRect.height;
-    const statusBarHeight = 48; // h-12 = 48px
+    // On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
+    // On desktop, keep status bar height constraint (48px)
+    const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
+    const minY = isMobile ? 0 : 48; // statusBarHeight = 48px

    newX = Math.max(0, Math.min(newX, maxX));
-    newY = Math.max(statusBarHeight, Math.min(newY, maxY));
+    newY = Math.max(minY, Math.min(newY, maxY));

    setVisualizerPosition({
      x: newX,
@@ -115,13 +183,107 @@ export function PhoneSimulator({
    if (isDragging) {
      window.addEventListener("mouseup", handleDragEnd);
      window.addEventListener("mousemove", handleDragMove);
+      window.addEventListener("touchend", handleDragEnd);
+      window.addEventListener("touchmove", handleDragMove, { passive: false });
    }
    return () => {
      window.removeEventListener("mouseup", handleDragEnd);
      window.removeEventListener("mousemove", handleDragMove);
+      window.removeEventListener("touchend", handleDragEnd);
+      window.removeEventListener("touchmove", handleDragMove);
    };
  }, [isDragging]);

+  // Chat toggle button drag handlers
+  const handleChatToggleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
+    e.preventDefault();
+    e.stopPropagation(); // Prevent triggering the button click
+    setIsDraggingChatToggle(true);
+    chatToggleHasDragged.current = false;
+    if (!phoneContainerRef.current || !chatToggleRef.current) return;
+    
+    const containerRect = phoneContainerRef.current.getBoundingClientRect();
+    const buttonRect = chatToggleRef.current.getBoundingClientRect();
+    const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
+    const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
+    
+    // If using right positioning, convert to x for dragging
+    if (chatTogglePosition.right !== undefined && chatTogglePosition.x === undefined) {
+      const currentX = containerRect.width - chatTogglePosition.right - buttonRect.width;
+      setChatTogglePosition({ x: currentX, y: chatTogglePosition.y });
+      chatToggleDragOffset.current = {
+        x: clientX - containerRect.left - currentX,
+        y: clientY - containerRect.top - chatTogglePosition.y,
+      };
+    } else {
+      // Already using x positioning
+      const currentX = chatTogglePosition.x ?? 0;
+      chatToggleDragOffset.current = {
+        x: clientX - containerRect.left - currentX,
+        y: clientY - containerRect.top - chatTogglePosition.y,
+      };
+    }
+  };
+
+  const handleChatToggleDragMove = (e: MouseEvent | TouchEvent) => {
+    if (!isDraggingChatToggle || !phoneContainerRef.current || !chatToggleRef.current) return;
+
+    e.preventDefault();
+    chatToggleHasDragged.current = true; // Mark that we've actually dragged
+
+    const containerRect = phoneContainerRef.current.getBoundingClientRect();
+    const buttonRect = chatToggleRef.current.getBoundingClientRect();
+
+    const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
+    const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
+
+    // Calculate new position relative to container
+    let newX = clientX - containerRect.left - chatToggleDragOffset.current.x;
+    let newY = clientY - containerRect.top - chatToggleDragOffset.current.y;
+
+    // Constrain within container
+    const maxX = containerRect.width - buttonRect.width;
+    const maxY = containerRect.height - buttonRect.height;
+    // On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
+    // On desktop, keep status bar height constraint (48px)
+    const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
+    const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
+
+    newX = Math.max(0, Math.min(newX, maxX));
+    newY = Math.max(minY, Math.min(newY, maxY));
+
+    setChatTogglePosition({
+      x: newX,
+      y: newY,
+    });
+  };
+
+  const handleChatToggleDragEnd = () => {
+    setIsDraggingChatToggle(false);
+    // Reset the flag after a short delay to allow onClick to check it
+    setTimeout(() => {
+      chatToggleHasDragged.current = false;
+    }, 100);
+  };
+
+  useEffect(() => {
+    if (isDraggingChatToggle) {
+      window.addEventListener("mouseup", handleChatToggleDragEnd);
+      window.addEventListener("mousemove", handleChatToggleDragMove);
+      window.addEventListener("touchend", handleChatToggleDragEnd);
+      window.addEventListener("touchmove", handleChatToggleDragMove, { passive: false });
+    }
+    return () => {
+      window.removeEventListener("mouseup", handleChatToggleDragEnd);
+      window.removeEventListener("mousemove", handleChatToggleDragMove);
+      window.removeEventListener("touchend", handleChatToggleDragEnd);
+      window.removeEventListener("touchmove", handleChatToggleDragMove);
+    };
+  }, [isDraggingChatToggle]);
+
+  // Initialize chat toggle button position - keep it on the right using 'right' CSS property
+  // Only convert to 'x' (left positioning) when user drags it
+
  useEffect(() => {
    if (showCameraMenu) {
      Room.getLocalDevices("videoinput").then(setCameras);
@@ -156,35 +318,53 @@ export function PhoneSimulator({
    const enteringMode = (mode: typeof phoneMode) =>
      phoneMode === mode && lastPhoneMode.current !== mode;

-    // Entering important message / capture / hand_off: remember mic state and mute if needed
-    if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) {
-      wasMicEnabledRef.current = isMicEnabled;
-      if (isMicEnabled) {
-        localParticipant.setMicrophoneEnabled(false);
-      }
-    }
-    // Exiting important message mode or hand off mode or capture mode
-    else if (
-      (phoneMode !== "important_message" && lastPhoneMode.current === "important_message") ||
-      (phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") ||
-      (phoneMode !== "capture" && lastPhoneMode.current === "capture")
-    ) {
-      // Restore mic to previous state
-      localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current);
+    // Only proceed if connected and localParticipant is available
+    if (roomState !== ConnectionState.Connected || !localParticipant) return;

-      // If exiting capture mode, clear processing image
-      if (lastPhoneMode.current === "capture") {
-          setProcessingImage(null);
-          setProcessingSource(null);
+    const updateMicState = async () => {
+      // Entering important message / capture / hand_off: remember mic state and mute if needed
+      if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) {
+        wasMicEnabledRef.current = isMicEnabled;
+        if (isMicEnabled) {
+          try {
+            await localParticipant.setMicrophoneEnabled(false);
+          } catch (error) {
+            console.error("Failed to disable microphone:", error);
+          }
+        }
      }
-    }
-    // Enforce mic off in important message mode, hand off mode, or capture mode
-    else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) {
-      localParticipant.setMicrophoneEnabled(false);
-    }
+      // Exiting important message mode or hand off mode or capture mode
+      else if (
+        (phoneMode !== "important_message" && lastPhoneMode.current === "important_message") ||
+        (phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") ||
+        (phoneMode !== "capture" && lastPhoneMode.current === "capture")
+      ) {
+        // Restore mic to previous state
+        try {
+          await localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current);
+        } catch (error) {
+          console.error("Failed to restore microphone:", error);
+        }

+        // If exiting capture mode, clear processing image
+        if (lastPhoneMode.current === "capture") {
+            setProcessingImage(null);
+            setProcessingSource(null);
+        }
+      }
+      // Enforce mic off in important message mode, hand off mode, or capture mode
+      else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) {
+        try {
+          await localParticipant.setMicrophoneEnabled(false);
+        } catch (error) {
+          console.error("Failed to disable microphone:", error);
+        }
+      }
+    };
+
+    updateMicState();
    lastPhoneMode.current = phoneMode;
-  }, [phoneMode, isMicEnabled, localParticipant]);
+  }, [phoneMode, isMicEnabled, localParticipant, roomState]);

  useEffect(() => {
    const updateTime = () => {
@@ -210,15 +390,36 @@ export function PhoneSimulator({
  );

  const handleMicToggle = async () => {
-    if (isMicEnabled) {
-      await localParticipant.setMicrophoneEnabled(false);
-    } else {
-      await localParticipant.setMicrophoneEnabled(true);
+    if (roomState !== ConnectionState.Connected || !localParticipant) return;
+    
+    try {
+      if (isMicEnabled) {
+        await localParticipant.setMicrophoneEnabled(false);
+      } else {
+        await localParticipant.setMicrophoneEnabled(true);
+      }
+    } catch (error) {
+      console.error("Failed to toggle microphone:", error);
+      // Silently handle the error to avoid disrupting user experience
    }
  };

  const handleDisconnect = () => {
-    onDisconnect();
+    try {
+      // Only disconnect if we're actually connected
+      if (roomState === ConnectionState.Connected || roomState === ConnectionState.Connecting) {
+        onDisconnect();
+      }
+    } catch (error) {
+      // Silently handle any errors during disconnect
+      console.warn("Error during disconnect:", error);
+      // Still try to call onDisconnect to ensure cleanup
+      try {
+        onDisconnect();
+      } catch (e) {
+        // Ignore secondary errors
+      }
+    }
  };

  const validateImageFile = (file: File) => {
@@ -407,6 +608,221 @@ export function PhoneSimulator({
    setShowVoiceMenu(!showVoiceMenu);
  };

+  const handleModeSwitch = async () => {
+    if (!room || !voiceAssistant.agent) return;
+    
+    // Determine the target mode (toggle from current state)
+    const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
+    
+    try {
+      const response = await room.localParticipant.performRpc({
+        destinationIdentity: voiceAssistant.agent.identity,
+        method: "switch_ptt_and_rt",
+        payload: JSON.stringify({ mode: targetMode }),
+      });
+      
+      // Parse the response to confirm the mode was set
+      try {
+        const responseData = JSON.parse(response);
+        const confirmedMode = responseData.mode;
+        // Update state based on server response
+        setIsPushToTalkMode(confirmedMode === "push_to_talk");
+      } catch (parseError) {
+        // If parsing fails, update state based on what we sent
+        console.warn("Failed to parse mode switch response, using sent mode:", parseError);
+        setIsPushToTalkMode(targetMode === "push_to_talk");
+      }
+    } catch (error: any) {
+      console.error("Failed to switch mode:", error);
+      // Don't show error toast for mode switch failures, just log
+    }
+  };
+
+  // Check if agent supports push-to-talk (optional check, button will show regardless)
+  const supportsPushToTalk = useMemo(() => {
+    if (!voiceAssistant.agent || !agentAttributes.attributes) return false;
+    return agentAttributes.attributes["push-to-talk"] === "1";
+  }, [voiceAssistant.agent, agentAttributes.attributes]);
+
+  const handlePushToTalkStart = async () => {
+    if (!room || !voiceAssistant.agent || isPushToTalkActive) return;
+    
+    // Reset interrupt rejection state
+    setInterruptRejected(false);
+    
+    try {
+      const response = await room.localParticipant.performRpc({
+        destinationIdentity: voiceAssistant.agent.identity,
+        method: "start_turn",
+        payload: "",
+      });
+      
+      // Parse the response to check for success/failure
+      try {
+        const responseData = JSON.parse(response);
+        if (responseData.success === false) {
+          // Interrupt was rejected, show message
+          if (responseData.message === "不能打断") {
+            setInterruptRejected(true);
+            // Clear the rejection message after 3 seconds
+            setTimeout(() => setInterruptRejected(false), 3000);
+            if (process.env.NODE_ENV === 'development') {
+              console.log("Interrupt rejected (cannot interrupt):", responseData.message);
+            }
+            return;
+          }
+        } else if (responseData.success === true) {
+          // Successfully started turn
+          setIsPushToTalkActive(true);
+          setInterruptRejected(false);
+        }
+      } catch (parseError) {
+        // If response is not JSON, assume success (backward compatibility)
+        setIsPushToTalkActive(true);
+        setInterruptRejected(false);
+      }
+    } catch (error: any) {
+      // Handle RPC errors (method not found, etc.)
+      setIsPushToTalkActive(false);
+      
+      const errorMessage = error?.message || "";
+      const errorCode = error?.code;
+      
+      // Check for "Method not supported at destination" - this happens when RPC methods aren't registered yet
+      // This can occur on first call before agent is fully ready, so we silently ignore it
+      if (errorMessage.includes("Method not supported at destination") || 
+          errorMessage.includes("method not found") ||
+          errorCode === 12) { // METHOD_NOT_FOUND
+        // Silently ignore - the method will be available after first turn
+        console.log("RPC method not ready yet, will be available after first turn");
+        return;
+      }
+      
+      // Only log and show error for unexpected errors
+      console.error("Unexpected error in push-to-talk:", error);
+      const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
+      setToastMessage({ message: defaultErrorMessage, type: "error" });
+    }
+  };
+
+  const handlePushToTalkEnd = useCallback(async () => {
+    // Always clear interrupt rejection state when button is released
+    setInterruptRejected(false);
+    
+    if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
+    
+    try {
+      await room.localParticipant.performRpc({
+        destinationIdentity: voiceAssistant.agent.identity,
+        method: "end_turn",
+        payload: "",
+      });
+      setIsPushToTalkActive(false);
+    } catch (error: any) {
+      console.error("Failed to end turn:", error);
+      // Don't show error toast on end_turn failure as it might be called during cleanup
+      setIsPushToTalkActive(false);
+    }
+  }, [room, voiceAssistant.agent, isPushToTalkActive]);
+
+  const handlePushToTalkCancel = useCallback(async () => {
+    // Always clear interrupt rejection state when button is cancelled
+    setInterruptRejected(false);
+    
+    if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
+    
+    try {
+      await room.localParticipant.performRpc({
+        destinationIdentity: voiceAssistant.agent.identity,
+        method: "cancel_turn",
+        payload: "",
+      });
+      setIsPushToTalkActive(false);
+    } catch (error) {
+      console.error("Failed to cancel turn:", error);
+      setIsPushToTalkActive(false);
+    }
+  }, [room, voiceAssistant.agent, isPushToTalkActive]);
+
+  // Handle mouse events for push-to-talk
+  const handlePushToTalkMouseDown = (e: React.MouseEvent) => {
+    e.preventDefault();
+    handlePushToTalkStart();
+  };
+
+  const handlePushToTalkMouseUp = (e: React.MouseEvent) => {
+    e.preventDefault();
+    handlePushToTalkEnd();
+  };
+
+  // Handle touch events for push-to-talk
+  const handlePushToTalkTouchStart = (e: React.TouchEvent) => {
+    e.preventDefault();
+    handlePushToTalkStart();
+  };
+
+  const handlePushToTalkTouchEnd = (e: React.TouchEvent) => {
+    e.preventDefault();
+    handlePushToTalkEnd();
+  };
+
+  // Handle window blur, escape key, and global mouse/touch events to cancel/end push-to-talk
+  useEffect(() => {
+    if (!isPushToTalkActive) return;
+
+    const handleBlur = () => {
+      handlePushToTalkCancel();
+    };
+
+    const handleKeyDown = (e: KeyboardEvent) => {
+      if (e.key === "Escape") {
+        handlePushToTalkCancel();
+      }
+    };
+
+    // Handle global mouseup/touchend to end push-to-talk even if released outside button
+    const handleGlobalMouseUp = () => {
+      // Clear interrupt rejection state immediately when button is released
+      setInterruptRejected(false);
+      handlePushToTalkEnd();
+    };
+
+    const handleGlobalTouchEnd = () => {
+      // Clear interrupt rejection state immediately when button is released
+      setInterruptRejected(false);
+      handlePushToTalkEnd();
+    };
+
+    window.addEventListener("blur", handleBlur);
+    window.addEventListener("keydown", handleKeyDown);
+    window.addEventListener("mouseup", handleGlobalMouseUp);
+    window.addEventListener("touchend", handleGlobalTouchEnd);
+
+    return () => {
+      window.removeEventListener("blur", handleBlur);
+      window.removeEventListener("keydown", handleKeyDown);
+      window.removeEventListener("mouseup", handleGlobalMouseUp);
+      window.removeEventListener("touchend", handleGlobalTouchEnd);
+    };
+  }, [isPushToTalkActive, handlePushToTalkCancel, handlePushToTalkEnd]);
+
+  // Clean up push-to-talk state on disconnect
+  useEffect(() => {
+    if (roomState === ConnectionState.Disconnected && isPushToTalkActive) {
+      setIsPushToTalkActive(false);
+      setInterruptRejected(false);
+    }
+  }, [roomState, isPushToTalkActive]);
+
+  // Reset interrupt rejection when agent stops speaking
+  useEffect(() => {
+    if (!isAgentSpeaking && interruptRejected) {
+      // Clear rejection state when agent finishes speaking
+      const timer = setTimeout(() => setInterruptRejected(false), 1000);
+      return () => clearTimeout(timer);
+    }
+  }, [isAgentSpeaking, interruptRejected]);
+
  const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0];
    if (file && onCapture) {
@@ -449,7 +865,7 @@ export function PhoneSimulator({
                  >
                      <PhoneIcon className="w-8 h-8" />
                  </div>
-                  <span className="font-medium text-white">Call Agent</span>
+                  <span className="font-medium text-white">呼叫智能体</span>
              </button>

              <div className="relative">
@@ -459,7 +875,7 @@ export function PhoneSimulator({
                >
                  <VoiceIcon className="w-3 h-3" />
                  <span>
-                    {currentVoiceId === "BV001_streaming" ? "Female Voice" : "Male Voice"}
+                    {currentVoiceId === "BV001_streaming" ? "女性声音" : "男性声音"}
                  </span>
                </button>
                {showVoiceMenu && (
@@ -479,7 +895,7 @@ export function PhoneSimulator({
                          : "text-white"
                      }`}
                    >
-                      <span>Female Voice</span>
+                      <span>女性声音</span>
                      {currentVoiceId === "BV001_streaming" && <CheckIcon />}
                    </button>
                    <button
@@ -494,7 +910,7 @@ export function PhoneSimulator({
                          : "text-white"
                      }`}
                    >
-                      <span>Male Voice</span>
+                      <span>男性声音</span>
                      {currentVoiceId === "BV002_streaming" && (
                        <CheckIcon />
                      )}
@@ -526,7 +942,7 @@ export function PhoneSimulator({
  })();

  return (
-    <div className="w-auto max-w-full h-full aspect-[9/19.5] max-h-full bg-black rounded-[40px] border-[12px] border-gray-900 overflow-hidden relative shadow-2xl flex flex-col shrink-0">
+    <div className="absolute inset-0 w-full h-full bg-black rounded-none border-0 overflow-hidden flex flex-col shrink-0 md:relative md:w-auto md:max-w-full md:h-full md:aspect-[9/19.5] md:max-h-full md:rounded-[40px] md:border-[12px] md:border-gray-900 md:shadow-2xl">
      <style jsx global>{`
        .mirror-video video {
            transform: scaleX(-1);
@@ -558,7 +974,12 @@ export function PhoneSimulator({
        }
      `}</style>
      {/* Status Bar */}
-      <div className="h-12 w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 flex items-center justify-between px-6 text-white text-xs font-medium">
+      <div className="hidden md:flex w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 items-center justify-between px-6 text-white text-xs font-medium"
+           style={{
+             paddingTop: 'max(env(safe-area-inset-top, 0px), 0.5rem)',
+             paddingBottom: '0.75rem',
+             minHeight: '3rem',
+           }}>
        <span>{currentTime}</span>
        <div className="flex items-center gap-2">
          <WifiIcon className="w-4 h-4" />
@@ -566,8 +987,42 @@ export function PhoneSimulator({
        </div>
      </div>

+      {/* Chat Toggle Button - Top Right, aligned with audio visualizer (Draggable) */}
+      {roomState === ConnectionState.Connected && 
+       voiceAssistant.agent && 
+       phoneMode !== "important_message" && 
+       phoneMode !== "capture" && (
+        <button
+          ref={chatToggleRef}
+          className={`absolute z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg cursor-move select-none touch-none ${
+            showChatOverlay
+              ? "bg-blue-500/80 text-white"
+              : "bg-gray-800/70 text-white hover:bg-gray-800/90"
+          }`}
+          onClick={(e) => {
+            // Only toggle if we didn't just drag
+            if (!chatToggleHasDragged.current) {
+              setShowChatOverlay(!showChatOverlay);
+            }
+          }}
+          onMouseDown={handleChatToggleDragStart}
+          onTouchStart={handleChatToggleDragStart}
+          title={showChatOverlay ? "Hide chat (drag to move)" : "Show chat (drag to move)"}
+          style={{
+            ...(chatTogglePosition.x !== undefined ? { left: chatTogglePosition.x } : {}),
+            ...(chatTogglePosition.right !== undefined ? { right: chatTogglePosition.right } : {}),
+            top: chatTogglePosition.y,
+          }}
+        >
+          <ChatIcon className="w-5 h-5 md:w-6 md:h-6" />
+        </button>
+      )}
+
      {/* Main Content */}
-      <div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden">
+      <div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden"
+           style={{
+             paddingBottom: 'env(safe-area-inset-bottom, 0px)',
+           }}>
        <div className={`h-full w-full transition-all duration-500 ease-in-out transform ${
            phoneMode === "hand_off" && roomState === ConnectionState.Connected 
                ? "blur-md scale-105" 
@@ -694,12 +1149,13 @@ export function PhoneSimulator({
      {roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && (
          <div
            ref={visualizerRef}
-            className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none"
+            className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none touch-none"
            style={{
              left: visualizerPosition.x,
              top: visualizerPosition.y,
            }}
            onMouseDown={handleDragStart}
+            onTouchStart={handleDragStart}
          >
            <div className="h-8 w-24 flex items-center justify-center [--lk-va-bar-width:3px] [--lk-va-bar-gap:2px] [--lk-fg:white]">
              <BarVisualizer
@@ -712,10 +1168,30 @@ export function PhoneSimulator({
          </div>
        )}

+      {/* Chat Overlay - Hidden during capture and important_message modes */}
+      {roomState === ConnectionState.Connected && 
+       voiceAssistant.agent && 
+       phoneMode !== "capture" && 
+       phoneMode !== "important_message" && (
+        <ChatOverlay
+          agentAudioTrack={voiceAssistant.audioTrack}
+          accentColor={config.settings.theme_color}
+          inputDisabled={phoneMode === "hand_off"}
+          isVisible={showChatOverlay}
+          position={chatOverlayPosition}
+          onPositionChange={setChatOverlayPosition}
+          containerRef={phoneContainerRef}
+          onToggle={() => setShowChatOverlay(!showChatOverlay)}
+        />
+      )}
+
      {/* Call Controls Overlay */}
      {roomState === ConnectionState.Connected && (
          phoneMode === "capture" ? (
-            <div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end pb-[5%] px-[8%] z-40">
+            <div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end px-[8%] z-40"
+                 style={{
+                   paddingBottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
+                 }}>
              {/* Camera Controls Row */}
              <div className="w-full flex items-center justify-evenly mb-8">
                  {/* Left: Upload */}
@@ -792,31 +1268,205 @@ export function PhoneSimulator({
              </div>
            </div>
          ) : (
-            <div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40">
-            <div className="w-full flex items-center justify-center gap-8">
-              {phoneMode !== "important_message" && phoneMode !== "hand_off" && (
-                <button
-                  className={`p-4 rounded-full backdrop-blur-md transition-colors ${
-                    !isMicEnabled
-                      ? "bg-white text-black"
-                      : "bg-gray-600/50 text-white hover:bg-gray-600/70"
-                  }`}
-                  onClick={handleMicToggle}
-                >
-                  {isMicEnabled ? (
-                    <MicIcon className="w-6 h-6" />
-                  ) : (
-                    <MicOffIcon className="w-6 h-6" />
-                  )}
-                </button>
+            <div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40"
+                 style={{
+                   paddingBottom: 'max(env(safe-area-inset-bottom, 0px), 0px)',
+                   bottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
+                 }}>
+            <div className="w-full flex flex-col items-center justify-center gap-4">
+              {/* Mode Toggle Switch */}
+              {phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
+                <div className="flex items-center gap-3 mb-2">
+                  <span className={`text-xs font-medium transition-colors ${isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
+                    按下说话模式
+                  </span>
+                  <button
+                    onClick={handleModeSwitch}
+                    className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 ${
+                      !isPushToTalkMode ? "bg-blue-500" : "bg-gray-600"
+                    }`}
+                    role="switch"
+                    aria-checked={!isPushToTalkMode}
+                    title={isPushToTalkMode ? "切换到实时对话模式" : "切换到按下说话模式"}
+                  >
+                    <span
+                      className={`inline-block h-4 w-4 transform rounded-full bg-white transition-transform ${
+                        !isPushToTalkMode ? "translate-x-6" : "translate-x-1"
+                      }`}
+                    />
+                  </button>
+                  <span className={`text-xs font-medium transition-colors ${!isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
+                    实时对话模式
+                  </span>
+                </div>
              )}

-              <button
-                className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
-                onClick={handleDisconnect}
-              >
-                <PhoneOffIcon className="w-6 h-6" />
-              </button>
+              {/* Push-to-Talk Mode Layout */}
+              {isPushToTalkMode && phoneMode !== "hand_off" && voiceAssistant.agent && (
+                <>
+                  {/* Important Message Mode - Centered End Call Button */}
+                  {phoneMode === "important_message" ? (
+                    <div className="w-full flex items-center justify-center">
+                      <button
+                        className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
+                        onClick={handleDisconnect}
+                      >
+                        <PhoneOffIcon className="w-6 h-6" />
+                      </button>
+                    </div>
+                  ) : (
+                    <div className="w-full flex items-center justify-between gap-8">
+                      {/* Left side: Mic Toggle and Camera Switch Buttons */}
+                      <div className="flex flex-col items-center gap-2">
+                        {/* Mic Toggle Button */}
+                        <button
+                          className={`p-4 rounded-full backdrop-blur-md transition-colors ${
+                            !isMicEnabled
+                              ? "bg-white text-black"
+                              : "bg-gray-800/50 text-white hover:bg-gray-800/70"
+                          }`}
+                          onClick={handleMicToggle}
+                        >
+                          {isMicEnabled ? (
+                            <MicIcon className="w-6 h-6" />
+                          ) : (
+                            <MicOffIcon className="w-6 h-6" />
+                          )}
+                        </button>
+                        {/* Camera Switch Button */}
+                        <div className="relative">
+                          <button
+                            className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
+                            onClick={handleSwitchCamera}
+                          >
+                            <SwitchCameraIcon className="w-6 h-6" />
+                          </button>
+                          {showCameraMenu && (
+                            <div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
+                              {cameras.length === 0 ? (
+                                <div className="px-4 py-2 text-gray-500 text-sm">
+                                  No cameras found
+                                </div>
+                              ) : (
+                                cameras.map((device) => (
+                                  <button
+                                    key={device.deviceId}
+                                    onClick={() => handleSelectCamera(device.deviceId)}
+                                    className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
+                                  >
+                                    {device.label ||
+                                      `Camera ${cameras.indexOf(device) + 1}`}
+                                  </button>
+                                ))
+                              )}
+                            </div>
+                          )}
+                        </div>
+                      </div>
+
+                      {/* Center: Large Push-to-Talk Button */}
+                      <button
+                        ref={pushToTalkButtonRef}
+                        className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square select-none ${
+                          interruptRejected
+                            ? "bg-red-500/70 text-white"
+                            : isPushToTalkActive
+                            ? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
+                            : "bg-blue-500/70 text-white hover:bg-blue-500/90"
+                        }`}
+                        style={{ borderRadius: '50%' }}
+                        onMouseDown={handlePushToTalkMouseDown}
+                        onMouseUp={handlePushToTalkMouseUp}
+                        onTouchStart={handlePushToTalkTouchStart}
+                        onTouchEnd={handlePushToTalkTouchEnd}
+                        title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
+                      >
+                        <MicIcon className="w-8 h-8" />
+                        <span className="text-xs font-medium">
+                          {interruptRejected ? "不允许打断" : "按住说话"}
+                        </span>
+                      </button>
+
+                      {/* Right side: End Call Button */}
+                      <button
+                        className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
+                        onClick={handleDisconnect}
+                      >
+                        <PhoneOffIcon className="w-6 h-6" />
+                      </button>
+                    </div>
+                  )}
+                </>
+              )}
+
+              {/* Realtime Mode Layout */}
+              {!isPushToTalkMode && phoneMode !== "hand_off" && (
+                <>
+                  {/* Important Message Mode - Centered End Call Button */}
+                  {phoneMode === "important_message" ? (
+                    <div className="w-full flex items-center justify-center">
+                      <button
+                        className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
+                        onClick={handleDisconnect}
+                      >
+                        <PhoneOffIcon className="w-6 h-6" />
+                      </button>
+                    </div>
+                  ) : (
+                    <div className="w-full flex items-center justify-center gap-4">
+                      {/* Mic Toggle */}
+                        <button
+                          className={`p-4 rounded-full backdrop-blur-md transition-colors ${
+                            !isMicEnabled
+                              ? "bg-white text-black"
+                              : "bg-gray-600/50 text-white hover:bg-gray-600/70"
+                          }`}
+                          onClick={handleMicToggle}
+                        >
+                          {isMicEnabled ? (
+                            <MicIcon className="w-6 h-6" />
+                          ) : (
+                            <MicOffIcon className="w-6 h-6" />
+                          )}
+                        </button>
+
+                      {/* End Call Button */}
+                      <button
+                        className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
+                        onClick={handleDisconnect}
+                      >
+                        <PhoneOffIcon className="w-6 h-6" />
+                      </button>
+                    </div>
+                  )}
+                </>
+              )}
+
+              {/* Hand Off Mode - Show only End Call Button */}
+              {phoneMode === "hand_off" && (
+                <div className="w-full flex items-center justify-center">
+                  <button
+                    className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
+                    onClick={handleDisconnect}
+                  >
+                    <PhoneOffIcon className="w-6 h-6" />
+                  </button>
+                </div>
+              )}
+
+              {/* Fallback: Show End Call Button when in push-to-talk mode but no agent/audio */}
+              {phoneMode === "normal" && 
+               isPushToTalkMode && 
+               !voiceAssistant.agent && (
+                <div className="w-full flex items-center justify-center">
+                  <button
+                    className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
+                    onClick={handleDisconnect}
+                  >
+                    <PhoneOffIcon className="w-6 h-6" />
+                  </button>
+                </div>
+              )}
            </div>
            </div>
          )
--- a/src/components/playground/Playground.tsx
+++ b/src/components/playground/Playground.tsx
@@ -75,11 +75,69 @@ export default function Playground({
  const [rpcMethod, setRpcMethod] = useState("");
  const [rpcPayload, setRpcPayload] = useState("");
  const [showRpc, setShowRpc] = useState(false);
+  const [qrCodeUrl, setQrCodeUrl] = useState<string>("");
+
+  // Clean up RPC resolvers before disconnecting to prevent errors
+  const cleanupRpcResolvers = useCallback(() => {
+    // Clean up any pending important message RPC
+    if (importantMessageResolverRef.current) {
+      const resolver = importantMessageResolverRef.current;
+      importantMessageResolverRef.current = null;
+      try {
+        // Only resolve if room is still connected to avoid RPC errors
+        if (roomState === ConnectionState.Connected) {
+          resolver("disconnected");
+        }
+      } catch (error) {
+        // Ignore errors during cleanup - room might be disconnecting
+      }
+    }
+    // Clean up any pending image capture RPC
+    if (imageCaptureResolverRef.current) {
+      const resolver = imageCaptureResolverRef.current;
+      imageCaptureResolverRef.current = null;
+      try {
+        // Only resolve if room is still connected to avoid RPC errors
+        if (roomState === ConnectionState.Connected) {
+          resolver(JSON.stringify({ error: "disconnected" }));
+        }
+      } catch (error) {
+        // Ignore errors during cleanup - room might be disconnecting
+      }
+    }
+  }, [roomState]);
+
+  // Wrapper for disconnect that cleans up RPC resolvers first
+  const handleDisconnect = useCallback(() => {
+    cleanupRpcResolvers();
+    try {
+      onConnect(false);
+    } catch (error) {
+      // Silently handle any errors during disconnect
+      console.warn("Error during disconnect:", error);
+    }
+  }, [onConnect, cleanupRpcResolvers]);

  useEffect(() => {
-    if (roomState === ConnectionState.Connected) {
-      localParticipant.setCameraEnabled(config.settings.inputs.camera);
-      localParticipant.setMicrophoneEnabled(config.settings.inputs.mic);
+    if (roomState === ConnectionState.Connected && localParticipant) {
+      try {
+        localParticipant.setCameraEnabled(config.settings.inputs.camera);
+        localParticipant.setMicrophoneEnabled(config.settings.inputs.mic);
+      } catch (error) {
+        console.error("Failed to set camera/microphone:", error);
+        // Retry after a short delay if connection might not be fully ready
+        const retryTimeout = setTimeout(() => {
+          if (roomState === ConnectionState.Connected && localParticipant) {
+            try {
+              localParticipant.setCameraEnabled(config.settings.inputs.camera);
+              localParticipant.setMicrophoneEnabled(config.settings.inputs.mic);
+            } catch (retryError) {
+              console.error("Failed to set camera/microphone on retry:", retryError);
+            }
+          }
+        }, 500);
+        return () => clearTimeout(retryTimeout);
+      }
    }
  }, [config.settings.inputs.camera, config.settings.inputs.mic, localParticipant, roomState]);

@@ -145,7 +203,7 @@ export default function Playground({
      'hangUpCall',
      async () => {
        // Disconnect the call
-        onConnect(false);
+        handleDisconnect();
        return JSON.stringify({ success: true });
      }
    );
@@ -179,7 +237,7 @@ export default function Playground({
        });
      }
    );
-  }, [localParticipant, roomState, onConnect]);
+  }, [localParticipant, roomState, handleDisconnect]);

  useEffect(() => {
    if (roomState === ConnectionState.Connected) {
@@ -349,6 +407,7 @@ export default function Playground({
  }, [agentVideoTrack, config, roomState]);

  useEffect(() => {
+    if (typeof document !== "undefined") {
    document.body.style.setProperty(
      "--lk-theme-color",
      // @ts-ignore
@@ -358,8 +417,15 @@ export default function Playground({
      "--lk-drop-shadow",
      `var(--lk-theme-color) 0px 0px 18px`,
    );
+    }
  }, [config.settings.theme_color]);

+  useEffect(() => {
+    if (typeof window !== "undefined") {
+      setQrCodeUrl(window.location.href);
+    }
+  }, []);
+
  const audioTileContent = useMemo(() => {
    const disconnectedContent = (
      <div className="flex flex-col items-center justify-center gap-2 text-gray-700 text-center w-full">
@@ -422,6 +488,7 @@ export default function Playground({
  ]);

  const instructionsContent = (
+    <>
    <ConfigurationPanelItem title="Instructions">
      <textarea
        className="w-full bg-gray-950 text-white text-sm p-3 rounded-md border border-gray-800 focus:border-gray-600 focus:outline-none transition-colors resize-none disabled:opacity-50 disabled:cursor-not-allowed"
@@ -437,6 +504,18 @@ export default function Playground({
        disabled={roomState !== ConnectionState.Disconnected}
      />
    </ConfigurationPanelItem>
+      <ConfigurationPanelItem title="Color">
+        <ColorPicker
+          colors={themeColors}
+          selectedColor={config.settings.theme_color}
+          onSelect={(color) => {
+            const userSettings = { ...config.settings };
+            userSettings.theme_color = color;
+            setUserSettings(userSettings);
+          }}
+        />
+      </ConfigurationPanelItem>
+    </>
  );

  const handleRpcCall = useCallback(async () => {
@@ -459,13 +538,13 @@ export default function Playground({
  const settingsTileContent = useMemo(() => {
    return (
      <div className="flex flex-col h-full w-full items-start overflow-y-auto">
-        {config.description && (
+        {/* {config.description && (
          <ConfigurationPanelItem title="Description">
            {config.description}
          </ConfigurationPanelItem>
-        )}
+        )} */}

-        <ConfigurationPanelItem title="Room">
+        {/* <ConfigurationPanelItem title="Room">
          <div className="flex flex-col gap-2">
            <EditableNameValueRow
              name="Room name"
@@ -499,9 +578,9 @@ export default function Playground({
              }
            />
          </div>
-        </ConfigurationPanelItem>
+        </ConfigurationPanelItem> */}

-        <ConfigurationPanelItem title="Agent">
+        {/* <ConfigurationPanelItem title="Agent">
          <div className="flex flex-col gap-2">
            <EditableNameValueRow
              name="Agent name"
@@ -564,9 +643,9 @@ export default function Playground({
              .
            </p>
          </div>
-        </ConfigurationPanelItem>
+        </ConfigurationPanelItem> */}

-        <ConfigurationPanelItem title="User">
+        {/* <ConfigurationPanelItem title="User">
          <div className="flex flex-col gap-2">
            <EditableNameValueRow
              name="Name"
@@ -618,7 +697,7 @@ export default function Playground({
              connectionState={roomState}
            />
          </div>
-        </ConfigurationPanelItem>
+        </ConfigurationPanelItem> */}

        {roomState === ConnectionState.Connected &&
          config.settings.inputs.screen && (
@@ -668,30 +747,16 @@ export default function Playground({
            <AudioInputTile trackRef={localMicTrack} />
          </ConfigurationPanelItem>
        )}
-        <div className="w-full">
-          <ConfigurationPanelItem title="Color">
-            <ColorPicker
-              colors={themeColors}
-              selectedColor={config.settings.theme_color}
-              onSelect={(color) => {
-                const userSettings = { ...config.settings };
-                userSettings.theme_color = color;
-                setUserSettings(userSettings);
-              }}
-            />
-          </ConfigurationPanelItem>
-        </div>
-        {config.show_qr && (
+        {config.show_qr && qrCodeUrl && (
          <div className="w-full">
            <ConfigurationPanelItem title="QR Code">
-              <QRCodeSVG value={window.location.href} width="128" />
+              <QRCodeSVG value={qrCodeUrl} width="128" />
            </ConfigurationPanelItem>
          </div>
        )}
      </div>
    );
  }, [
-    config.description,
    config.settings,
    config.show_qr,
    localParticipant,
@@ -721,7 +786,7 @@ export default function Playground({
        >
          <PhoneSimulator 
            onConnect={() => onConnect(true)} 
-            onDisconnect={() => onConnect(false)}
+            onDisconnect={handleDisconnect}
            phoneMode={phoneMode} 
            capturePrompt={capturePrompt}
            importantMessage={importantMessage}
@@ -785,26 +850,30 @@ export default function Playground({

  return (
    <>
-      <PlaygroundHeader
+      {/* <PlaygroundHeader
        title={config.title}
        logo={logo}
        githubLink={config.github_link}
        height={headerHeight}
        accentColor={config.settings.theme_color}
        connectionState={roomState}
-        onConnectClicked={() =>
-          onConnect(roomState === ConnectionState.Disconnected)
-        }
-      />
+        onConnectClicked={() => {
+          if (roomState === ConnectionState.Disconnected) {
+            onConnect(true);
+          } else {
+            handleDisconnect();
+          }
+        }}
+      /> */}
      <div
        className={`flex gap-4 py-4 grow w-full selection:bg-${config.settings.theme_color}-900`}
-        style={{ height: `calc(100% - ${headerHeight}px)` }}
+        style={{ height: `100%` }}
      >
        <div className="flex flex-col grow basis-1/2 gap-4 h-full lg:hidden">
          <PlaygroundTabbedTile
            className="h-full"
            tabs={mobileTabs}
-            initialTab={mobileTabs.length - 1}
+            initialTab={0}
          />
        </div>
        <div
@@ -821,7 +890,7 @@ export default function Playground({
          >
            <PhoneSimulator 
            onConnect={() => onConnect(true)} 
-            onDisconnect={() => onConnect(false)}
+            onDisconnect={handleDisconnect}
            phoneMode={phoneMode} 
            capturePrompt={capturePrompt}
            importantMessage={importantMessage}
@@ -865,14 +934,14 @@ export default function Playground({
            </PlaygroundTile>
          </div>
        )}
-        <PlaygroundTile
+        {/* <PlaygroundTile
          padding={false}
          backgroundColor="gray-950"
          className="h-full w-full basis-1/4 items-start overflow-y-auto hidden max-w-[480px] lg:flex"
          childrenClassName="h-full grow items-start"
        >
          {settingsTileContent}
-        </PlaygroundTile>
+        </PlaygroundTile> */}
      </div>
    </>
  );
--- a/src/components/playground/PlaygroundTile.tsx
+++ b/src/components/playground/PlaygroundTile.tsx
@@ -45,7 +45,7 @@ export const PlaygroundTile: React.FC<PlaygroundTileProps> = ({
        </div>
      )}
      <div
-        className={`flex flex-col items-center grow w-full ${childrenClassName}`}
+        className={`flex flex-col items-center grow w-full relative ${childrenClassName}`}
        style={{
          height: `calc(100% - ${title ? titleHeight + "px" : "0px"})`,
          padding: `${contentPadding * 4}px`,
@@ -74,7 +74,7 @@ export const PlaygroundTabbedTile: React.FC<PlaygroundTabbedTileProps> = ({
      className={`flex flex-col h-full border rounded-sm border-gray-800 text-gray-500 bg-${backgroundColor} ${className}`}
    >
      <div
-        className="flex items-center justify-start text-xs uppercase border-b border-b-gray-800 tracking-wider"
+        className="flex items-center justify-start text-xs uppercase border-b border-b-gray-800 tracking-wider relative z-[100] bg-gray-950"
        style={{
          height: `${titleHeight}px`,
        }}
@@ -100,7 +100,18 @@ export const PlaygroundTabbedTile: React.FC<PlaygroundTabbedTileProps> = ({
          padding: `${contentPadding * 4}px`,
        }}
      >
-        {tabs[activeTab].content}
+        {tabs.map((tab, index) => (
+          <div
+            key={index}
+            style={{
+              display: index === activeTab ? 'block' : 'none',
+              height: '100%',
+              width: '100%',
+            }}
+          >
+            {tab.content}
+          </div>
+        ))}
      </div>
    </div>
  );
--- a/src/components/playground/icons.tsx
+++ b/src/components/playground/icons.tsx
@@ -207,3 +207,20 @@ export const VoiceIcon = ({ className }: { className?: string }) => (
    <line x1="12" y1="19" x2="12" y2="22" />
  </svg>
 );
+
+export const ChatIcon = ({ className }: { className?: string }) => (
+  <svg
+    xmlns="http://www.w3.org/2000/svg"
+    width="24"
+    height="24"
+    viewBox="0 0 24 24"
+    fill="none"
+    stroke="currentColor"
+    strokeWidth="2"
+    strokeLinecap="round"
+    strokeLinejoin="round"
+    className={className}
+  >
+    <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
+  </svg>
+);
--- a/src/pages/index.tsx
+++ b/src/pages/index.tsx
@@ -108,6 +108,16 @@ export function HomeInner() {
            token={token}
            connect={shouldConnect}
            onError={(e) => {
+              // Filter out expected errors from push-to-talk interrupt failures
+              // These are handled gracefully in the PhoneSimulator component
+              if (e.message?.includes("Application error in method handler") ||
+                  e.message?.includes("Method not supported at destination")) {
+                // Silently ignore - these are expected and handled in PhoneSimulator
+                if (process.env.NODE_ENV === 'development') {
+                  console.log("Filtered expected error:", e.message);
+                }
+                return;
+              }
              setToastMessage({ message: e.message, type: "error" });
              console.error(e);
            }}
--- a/src/styles/globals.css
+++ b/src/styles/globals.css
@@ -51,3 +51,12 @@ body {
    opacity: 1;
  }
 }
+
+/* Hide Next.js floating dev indicator */
+nextjs-portal,
+#__next-build-watcher,
+[data-nextjs-dialog],
+[data-nextjs-toast],
+div[style*="position: fixed"][style*="bottom"][style*="right"] {
+  display: none !important;
+}
--- a/src/transcriptions/TranscriptionTile.tsx
+++ b/src/transcriptions/TranscriptionTile.tsx
@@ -11,7 +11,7 @@ import {
  Track,
  TranscriptionSegment,
 } from "livekit-client";
-import { useEffect, useState } from "react";
+import { useEffect, useState, useRef } from "react";

 export function TranscriptionTile({
  agentAudioTrack,
@@ -30,39 +30,51 @@ export function TranscriptionTile({
    participant: localParticipant.localParticipant,
  });

-  const [transcripts, setTranscripts] = useState<Map<string, ChatMessageType>>(
-    new Map(),
-  );
  const [messages, setMessages] = useState<ChatMessageType[]>([]);
  const { chatMessages, send: sendChat } = useChat();
+  const transcriptMapRef = useRef<Map<string, ChatMessageType>>(new Map());

-  // store transcripts
+  // Build messages from segments and chat - always rebuild from current state
  useEffect(() => {
+    const transcriptMap = transcriptMapRef.current;
+    
+    // Process agent segments - update existing or add new
    if (agentAudioTrack) {
-      agentMessages.segments.forEach((s) =>
-        transcripts.set(
+      agentMessages.segments.forEach((s) => {
+        const existing = transcriptMap.get(s.id);
+        transcriptMap.set(
          s.id,
          segmentToChatMessage(
            s,
-            transcripts.get(s.id),
+            existing,
            agentAudioTrack.participant,
          ),
-        ),
-      );
+        );
+      });
    }

-    localMessages.segments.forEach((s) =>
-      transcripts.set(
+    // Process local segments - update existing or add new
+    localMessages.segments.forEach((s) => {
+      const existing = transcriptMap.get(s.id);
+      transcriptMap.set(
        s.id,
        segmentToChatMessage(
          s,
-          transcripts.get(s.id),
+          existing,
          localParticipant.localParticipant,
        ),
-      ),
-    );
+      );
+    });

-    const allMessages = Array.from(transcripts.values());
+    // Build all messages
+    const allMessages: ChatMessageType[] = [];
+    
+    // Add all transcript messages
+    transcriptMap.forEach((msg) => {
+      allMessages.push(msg);
+    });
+    
+    // Add chat messages
    for (const msg of chatMessages) {
      const isAgent = agentAudioTrack
        ? msg.from?.identity === agentAudioTrack.participant?.identity
@@ -79,6 +91,7 @@ export function TranscriptionTile({
          name = "Unknown";
        }
      }
+      
      allMessages.push({
        name,
        message: msg.message,
@@ -86,10 +99,11 @@ export function TranscriptionTile({
        isSelf: isSelf,
      });
    }
+    
+    // Sort by timestamp
    allMessages.sort((a, b) => a.timestamp - b.timestamp);
    setMessages(allMessages);
  }, [
-    transcripts,
    chatMessages,
    localParticipant.localParticipant,
    agentAudioTrack?.participant,
Author	SHA1	Message	Date
wx44wx	950d1ab3d4	Update agents/README.md	2026-02-02 15:28:28 +00:00
Xin Wang	28b9a16c4e	make chat message overlay right	2025-12-19 10:55:13 +08:00
Xin Wang	f1b331d923	try to fix fast reload	2025-12-18 09:41:42 +08:00
Xin Wang	739c019404	set chat message overlay draggable	2025-12-18 09:22:01 +08:00
Xin Wang	da11561f47	Bug fixed	2025-12-17 23:13:42 +08:00
Xin Wang	853e1558b1	fix realtime mode need push to talk once	2025-12-17 22:40:11 +08:00
Xin Wang	6652a5cd43	update logic of switch_ptt_and_rt	2025-12-17 22:17:44 +08:00
Xin Wang	d942222f11	update endcall button postion	2025-12-17 21:41:36 +08:00
Xin Wang	5be6ab12f3	add chat message overlay	2025-12-17 18:59:17 +08:00
Xin Wang	eeeed36494	add mic on push-to-talk mode, iphone open in phone tab default	2025-12-17 18:03:37 +08:00
Xin Wang	3e0276d6c0	hide nextjs icon	2025-12-17 16:19:28 +08:00
Xin Wang	4a6a6619df	optimized layout in iphone	2025-12-17 16:17:33 +08:00
Xin Wang	1f0365e716	Merge branch 'phone-interface'	2025-12-17 12:04:10 +08:00
Xin Wang	7fbb9a5431	fix color theme change bug	2025-12-17 12:04:01 +08:00
Xin Wang	a6b98e4100	Merge branch 'phone-interface'	2025-12-17 11:36:58 +08:00
Xin Wang	48cb450208	add gitignore	2025-12-17 11:36:27 +08:00
Xin Wang	800aa700f9	make endcall button keeps when connection fail	2025-12-17 11:33:44 +08:00
Xin Wang	2decf208b4	remove few components on frontend	2025-12-17 11:03:36 +08:00
Xin Wang	b75fd71bc7	does not allow interrupt in important stage	2025-12-17 09:26:56 +08:00
Xin Wang	e8ef7c6da7	bug fixed	2025-12-16 17:54:37 +08:00
Xin Wang	f2fcbe485f	return random phone number and id card number	2025-12-16 17:31:17 +08:00
Xin Wang	e09e4b6930	a better push to talk layout	2025-12-16 15:56:46 +08:00
Xin Wang	1774f550dd	first version push to talk	2025-12-16 15:11:55 +08:00
Xin Wang	9f05f067a6	fix end call bug	2025-12-16 11:41:06 +08:00