Compare commits

..

24 Commits

Author SHA1 Message Date
950d1ab3d4 Update agents/README.md 2026-02-02 15:28:28 +00:00
28b9a16c4e make chat message overlay right 2025-12-19 10:55:13 +08:00
f1b331d923 try to fix fast reload 2025-12-18 09:41:42 +08:00
739c019404 set chat message overlay draggable 2025-12-18 09:22:01 +08:00
da11561f47 Bug fixed 2025-12-17 23:13:42 +08:00
853e1558b1 fix realtime mode need push to talk once 2025-12-17 22:40:11 +08:00
6652a5cd43 update logic of switch_ptt_and_rt 2025-12-17 22:17:44 +08:00
d942222f11 update endcall button postion 2025-12-17 21:41:36 +08:00
5be6ab12f3 add chat message overlay 2025-12-17 18:59:17 +08:00
eeeed36494 add mic on push-to-talk mode, iphone open in phone tab default 2025-12-17 18:03:37 +08:00
3e0276d6c0 hide nextjs icon 2025-12-17 16:19:28 +08:00
4a6a6619df optimized layout in iphone 2025-12-17 16:17:33 +08:00
1f0365e716 Merge branch 'phone-interface' 2025-12-17 12:04:10 +08:00
7fbb9a5431 fix color theme change bug 2025-12-17 12:04:01 +08:00
a6b98e4100 Merge branch 'phone-interface' 2025-12-17 11:36:58 +08:00
48cb450208 add gitignore 2025-12-17 11:36:27 +08:00
800aa700f9 make endcall button keeps when connection fail 2025-12-17 11:33:44 +08:00
2decf208b4 remove few components on frontend 2025-12-17 11:03:36 +08:00
b75fd71bc7 does not allow interrupt in important stage 2025-12-17 09:26:56 +08:00
e8ef7c6da7 bug fixed 2025-12-16 17:54:37 +08:00
f2fcbe485f return random phone number and id card number 2025-12-16 17:31:17 +08:00
e09e4b6930 a better push to talk layout 2025-12-16 15:56:46 +08:00
1774f550dd first version push to talk 2025-12-16 15:11:55 +08:00
9f05f067a6 fix end call bug 2025-12-16 11:41:06 +08:00
12 changed files with 1423 additions and 156 deletions

58
.gitignore vendored
View File

@@ -6,27 +6,46 @@
.pnp.js .pnp.js
.yarn/install-state.gz .yarn/install-state.gz
# pnpm
.pnpm-store/
.pnpm-debug.log*
# testing # testing
/coverage /coverage
*.lcov
.nyc_output
# next.js # next.js
/.next/ /.next/
/out/ /out/
.next/
out/
# production # production
/build /build
dist/
# misc # misc
.DS_Store .DS_Store
*.pem *.pem
*.log
*.swp
*.swo
*~
# debug # debug
npm-debug.log* npm-debug.log*
yarn-debug.log* yarn-debug.log*
yarn-error.log* yarn-error.log*
pnpm-debug.log*
# local env files # local env files
.env
.env*.local .env*.local
.env.local
.env.development.local
.env.test.local
.env.production.local
# vercel # vercel
.vercel .vercel
@@ -34,3 +53,42 @@ yarn-error.log*
# typescript # typescript
*.tsbuildinfo *.tsbuildinfo
next-env.d.ts next-env.d.ts
# IDE
.vscode/
.idea/
*.sublime-project
*.sublime-workspace
*.code-workspace
# Python (for agents directory)
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
ENV/
.venv
pip-log.txt
pip-delete-this-directory.txt
.pytest_cache/
.coverage
htmlcov/
*.egg-info/
dist/
build/
*.egg
# OS
Thumbs.db
.DS_Store
.AppleDouble
.LSOverride
._*
# Temporary files
*.tmp
*.temp
.cache/

View File

@@ -0,0 +1 @@
use livekit-plugins-volcengine==1.2.9

View File

@@ -4,6 +4,7 @@ import base64
import json import json
import logging import logging
import os import os
import random
import sys import sys
import re import re
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
@@ -31,6 +32,7 @@ from livekit.agents import (
cli, cli,
get_job_context, get_job_context,
metrics, metrics,
RoomIO
) )
from livekit.agents.llm import ImageContent, ToolError, function_tool from livekit.agents.llm import ImageContent, ToolError, function_tool
from typing import Any, List, Optional from typing import Any, List, Optional
@@ -64,7 +66,7 @@ DEFAULT_INSTRUCTIONS = """# 角色
# 能力 # 能力
- 你具有调用工具操作前端界面系统的能力 - 你具有调用工具操作前端界面系统的能力
- ask_image_capture工具被调用后会在系统播放拍摄的目标和需求所以你每次在调用它之前不需要重复引导用户拍摄什么 - ask_image_capture工具被调用后会在系统播放拍摄的目标和需求所以你每次在调用它之前不需要重复引导用户拍摄什么而是使用ask_image_capture来传递拍摄需求
# 任务 # 任务
你的职责是全流程引导用户完成:事故信息采集 -> 现场证据拍照 -> 驾驶员信息核实。 你的职责是全流程引导用户完成:事故信息采集 -> 现场证据拍照 -> 驾驶员信息核实。
@@ -73,6 +75,7 @@ DEFAULT_INSTRUCTIONS = """# 角色
- 在事故信息采集阶段询问是否有人受伤请求用户简单描述事故情况询问事故发生时间并通过复述标准化时间xx年xx月xx日xx时xx分向用户确认询问事故车辆数量询问事故发生的原因例如追尾、刮擦、碰撞等。采集完成后进入现场证据拍照阶段 - 在事故信息采集阶段询问是否有人受伤请求用户简单描述事故情况询问事故发生时间并通过复述标准化时间xx年xx月xx日xx时xx分向用户确认询问事故车辆数量询问事故发生的原因例如追尾、刮擦、碰撞等。采集完成后进入现场证据拍照阶段
- 如果用户回答已包含需要问题的答案,改为与用户确认答案是否正确 - 如果用户回答已包含需要问题的答案,改为与用户确认答案是否正确
- 采集完成之后进入现场证据拍照阶段 - 采集完成之后进入现场证据拍照阶段
- 这个阶段不使用ask_important_question和ask_image_capture工具
## 现场证据拍照阶段 ## 现场证据拍照阶段
- 在现场证据拍照阶段使用askImageCapture工具引导用户依次拍摄照片1. 第一辆车的车牌2. 第一辆车的碰撞位置3. 第一辆车的驾驶员正脸; - 在现场证据拍照阶段使用askImageCapture工具引导用户依次拍摄照片1. 第一辆车的车牌2. 第一辆车的碰撞位置3. 第一辆车的驾驶员正脸;
@@ -100,9 +103,11 @@ DEFAULT_INSTRUCTIONS = """# 角色
- 一次询问一个问题 - 一次询问一个问题
- 不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符 - 不要在你的回复中使用 emojis, asterisks, markdown, 或其他特殊字符
- 不同阶段直接的过渡语句自然 - 不同阶段直接的过渡语句自然
- 你已经说过下面的开场白所以不需要重复说:“您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。” - 你已经说过下面的开场白,用户点击继续办理说明已经认可,所以不需要重复说:“您好,这里是无锡交警,我将为您远程处理交通事故。请将人员撤离至路侧安全区域,开启危险报警双闪灯、放置三角警告牌、做好安全防护,谨防二次事故伤害。若您已经准备好了,请点击继续办理,如需人工服务,请说转人工。”
""" """
DEFAULT_TALKING_MODE = 'push_to_talk'
# ## 黄金对话路径示例 GOLDEN_CONVERSATION_PATH # ## 黄金对话路径示例 GOLDEN_CONVERSATION_PATH
# ``` # ```
@@ -477,7 +482,7 @@ class MyAgent(Agent):
self._image_event.clear() self._image_event.clear()
# Speak the capture prompt so the user hears what to do # Speak the capture prompt so the user hears what to do
self.session.say(prompt, allow_interruptions=True) self.session.say(prompt, allow_interruptions=False)
# Ask for image capture and wait for user to capture/upload # Ask for image capture and wait for user to capture/upload
response = await room.local_participant.perform_rpc( response = await room.local_participant.perform_rpc(
@@ -605,13 +610,26 @@ class MyAgent(Agent):
f"│ plate: \"{normalized_plate}\"\n" f"│ plate: \"{normalized_plate}\"\n"
"└───────────────" "└───────────────"
) )
# Dummy fixed response (placeholder backend) # Generate random mobile number (11 digits: 1[3-9] + 9 random digits)
return { mobile_prefix = random.choice(['13', '14', '15', '16', '17', '18', '19'])
mobile_suffix = ''.join([str(random.randint(0, 9)) for _ in range(9)])
random_mobile = f"{mobile_prefix}{mobile_suffix}"
result = {
"success": True, "success": True,
"plate": normalized_plate, "plate": normalized_plate,
"mobile": "13800001234", "mobile": random_mobile,
} }
await self._send_chat_message(
"┌─✅ Result: get_mobile_by_plate\n"
f"│ plate: \"{normalized_plate}\"\n"
f"│ mobile: \"{random_mobile}\"\n"
"└───────────────"
)
return result
@function_tool() @function_tool()
async def get_id_card_by_plate( async def get_id_card_by_plate(
self, self,
@@ -630,13 +648,35 @@ class MyAgent(Agent):
f"│ plate: \"{normalized_plate}\"\n" f"│ plate: \"{normalized_plate}\"\n"
"└───────────────" "└───────────────"
) )
# Dummy fixed response (placeholder backend) # Generate random ID card number (18 digits: 6-digit area code + 8-digit birth date + 3-digit sequence + 1 check digit)
return { # Area code: random 6 digits (typically 110000-659999 for Chinese ID cards)
area_code = random.randint(110000, 659999)
# Birth date: random date between 1950-01-01 and 2000-12-31
year = random.randint(1950, 2000)
month = random.randint(1, 12)
day = random.randint(1, 28) # Use 28 to avoid month-specific day issues
birth_date = f"{year:04d}{month:02d}{day:02d}"
# Sequence number: 3 random digits
sequence = random.randint(100, 999)
# Check digit: random digit or X (10% chance of X)
check_digit = 'X' if random.random() < 0.1 else str(random.randint(0, 9))
random_id_card = f"{area_code}{birth_date}{sequence}{check_digit}"
result = {
"success": True, "success": True,
"plate": normalized_plate, "plate": normalized_plate,
"id_card": "320101198001011234", "id_card": random_id_card,
} }
await self._send_chat_message(
"┌─✅ Result: get_id_card_by_plate\n"
f"│ plate: \"{normalized_plate}\"\n"
f"│ id_card: \"{random_id_card}\"\n"
"└───────────────"
)
return result
@function_tool() @function_tool()
async def validate_mobile_number( async def validate_mobile_number(
self, self,
@@ -657,17 +697,33 @@ class MyAgent(Agent):
) )
is_valid = bool(re.fullmatch(r"1[3-9]\\d{9}", normalized)) is_valid = bool(re.fullmatch(r"1[3-9]\\d{9}", normalized))
if is_valid: if is_valid:
return { result = {
"success": True, "success": True,
"valid": True, "valid": True,
"mobile": normalized, "mobile": normalized,
} }
return { await self._send_chat_message(
"┌─✅ Result: validate_mobile_number\n"
f"│ mobile: \"{normalized}\"\n"
f"│ valid: true\n"
"└───────────────"
)
return result
result = {
"success": True, "success": True,
"valid": False, "valid": False,
"mobile": normalized, "mobile": normalized,
"error": "手机号格式不正确应为1[3-9]开头的11位数字", "error": "手机号格式不正确应为1[3-9]开头的11位数字",
} }
await self._send_chat_message(
"┌─✅ Result: validate_mobile_number\n"
f"│ mobile: \"{normalized}\"\n"
f"│ valid: false\n"
f"│ error: \"{result['error']}\"\n"
"└───────────────"
)
return result
@function_tool() @function_tool()
async def validate_id_card_number( async def validate_id_card_number(
@@ -689,25 +745,44 @@ class MyAgent(Agent):
) )
is_valid = bool(re.fullmatch(r"(\\d{17}[\\dX]|\\d{15})", normalized)) is_valid = bool(re.fullmatch(r"(\\d{17}[\\dX]|\\d{15})", normalized))
if is_valid: if is_valid:
return { result = {
"success": True, "success": True,
"valid": True, "valid": True,
"id_card": normalized, "id_card": normalized,
} }
return { await self._send_chat_message(
"┌─✅ Result: validate_id_card_number\n"
f"│ id_card: \"{normalized}\"\n"
f"│ valid: true\n"
"└───────────────"
)
return result
result = {
"success": True, "success": True,
"valid": False, "valid": False,
"id_card": normalized, "id_card": normalized,
"error": "身份证格式不正确应为18位末位可为X或15位数字", "error": "身份证格式不正确应为18位末位可为X或15位数字",
} }
await self._send_chat_message(
"┌─✅ Result: validate_id_card_number\n"
f"│ id_card: \"{normalized}\"\n"
f"│ valid: false\n"
f"│ error: \"{result['error']}\"\n"
"└───────────────"
)
return result
@function_tool() @function_tool()
async def enter_hand_off_to_human_mode( async def enter_hand_off_to_human_mode(
self, self,
context: RunContext, context: RunContext,
): ):
"""切换到“转人工”模式(前端电话界面进入人工处理)。返回成功/失败。""" """切换到"转人工"模式(前端电话界面进入人工处理)。返回成功/失败。"""
await self._send_chat_message("🔨 Call: enter_hand_off_to_human_mode") await self._send_chat_message(
"┌─🔨 Call: enter_hand_off_to_human_mode\n"
"└───────────────"
)
try: try:
room = get_job_context().room room = get_job_context().room
participant_identity = next(iter(room.remote_participants)) participant_identity = next(iter(room.remote_participants))
@@ -718,10 +793,21 @@ class MyAgent(Agent):
response_timeout=5.0, response_timeout=5.0,
) )
logger.info(f"Entered hand off to human mode: {response}") logger.info(f"Entered hand off to human mode: {response}")
await self._send_chat_message(f"✅ Result: enter_hand_off_to_human_mode\n • status: success") await self._send_chat_message(
"┌─✅ Result: enter_hand_off_to_human_mode\n"
f"│ status: success\n"
f"│ response: {response}\n"
"└───────────────"
)
return response return response
except Exception as e: except Exception as e:
logger.error(f"Failed to enter hand off to human mode: {e}") logger.error(f"Failed to enter hand off to human mode: {e}")
await self._send_chat_message(
"┌─❌ Result: enter_hand_off_to_human_mode\n"
f"│ status: error\n"
f"│ error: \"{str(e)}\"\n"
"└───────────────"
)
raise ToolError(f"Unable to enter hand off to human mode: {str(e)}") raise ToolError(f"Unable to enter hand off to human mode: {str(e)}")
@function_tool() @function_tool()
@@ -730,7 +816,10 @@ class MyAgent(Agent):
context: RunContext, context: RunContext,
): ):
"""挂断当前通话(结束会话),返回成功/失败。""" """挂断当前通话(结束会话),返回成功/失败。"""
await self._send_chat_message("🔨 Call: hang_up_call") await self._send_chat_message(
"┌─🔨 Call: hang_up_call\n"
"└───────────────"
)
try: try:
room = get_job_context().room room = get_job_context().room
participant_identity = next(iter(room.remote_participants)) participant_identity = next(iter(room.remote_participants))
@@ -741,14 +830,25 @@ class MyAgent(Agent):
response_timeout=5.0, response_timeout=5.0,
) )
logger.info(f"Hung up call: {response}") logger.info(f"Hung up call: {response}")
await self._send_chat_message(f"✅ Result: hang_up_call\n • status: disconnected") await self._send_chat_message(
"┌─✅ Result: hang_up_call\n"
f"│ status: disconnected\n"
f"│ response: {response}\n"
"└───────────────"
)
return response return response
except Exception as e: except Exception as e:
logger.error(f"Failed to hang up call: {e}") logger.error(f"Failed to hang up call: {e}")
await self._send_chat_message(
"┌─❌ Result: hang_up_call\n"
f"│ status: error\n"
f"│ error: \"{str(e)}\"\n"
"└───────────────"
)
raise ToolError(f"Unable to hang up call: {str(e)}") raise ToolError(f"Unable to hang up call: {str(e)}")
@function_tool() @function_tool()
async def ask_important_question(self, context: RunContext, message: str, options: Optional[List[str]] = None): async def ask_important_question(self, context: RunContext, message: str, options: Optional[List[str]] | str = None):
"""询问关键问题并等待用户选择选项,返回用户的选择结果。 """询问关键问题并等待用户选择选项,返回用户的选择结果。
参数: 参数:
@@ -758,7 +858,12 @@ class MyAgent(Agent):
返回: 返回:
str: 用户选择的文本内容。 str: 用户选择的文本内容。
""" """
await self._send_chat_message(f"🔨 Call: ask_important_question\n • message: \"{message}\"\n • options: {options}") await self._send_chat_message(
"┌─🔨 Call: ask_important_question\n"
f"│ message: \"{message}\"\n"
f"│ options: {options}\n"
"└───────────────"
)
try: try:
room = get_job_context().room room = get_job_context().room
participant_identity = next(iter(room.remote_participants)) participant_identity = next(iter(room.remote_participants))
@@ -781,7 +886,7 @@ class MyAgent(Agent):
payload_data["options"] = options payload_data["options"] = options
# Speak the message # Speak the message
speech_handle = self.session.say(message, allow_interruptions=True) speech_handle = self.session.say(message, allow_interruptions=False)
# Wait for user selection with longer timeout since user needs time to respond # Wait for user selection with longer timeout since user needs time to respond
response = await room.local_participant.perform_rpc( response = await room.local_participant.perform_rpc(
@@ -793,8 +898,12 @@ class MyAgent(Agent):
# Interrupt speech if user makes a selection while agent is speaking # Interrupt speech if user makes a selection while agent is speaking
if speech_handle and hasattr(speech_handle, "interrupt"): if speech_handle and hasattr(speech_handle, "interrupt"):
speech_handle.interrupt() try:
logger.info("Interrupted speech due to user selection") speech_handle.interrupt()
except Exception as e:
logger.error(f"Failed to interrupt speech: {e}")
else:
logger.info("Interrupted speech due to user selection")
logger.info(f"User made selection: {response}") logger.info(f"User made selection: {response}")
@@ -804,7 +913,11 @@ class MyAgent(Agent):
user_selection = response_data.get("selection", "确认") user_selection = response_data.get("selection", "确认")
logger.info(f"User selected: {user_selection}") logger.info(f"User selected: {user_selection}")
await self._send_chat_message(f"✅ Result: ask_important_question\n • selection: \"{user_selection}\"") await self._send_chat_message(
"┌─✅ Result: ask_important_question\n"
f"│ selection: \"{user_selection}\"\n"
"└───────────────"
)
return f"用户选择了: {user_selection}" return f"用户选择了: {user_selection}"
except json.JSONDecodeError: except json.JSONDecodeError:
logger.error(f"Failed to parse response: {response}") logger.error(f"Failed to parse response: {response}")
@@ -905,6 +1018,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
initial_instructions = participant.attributes.get("instructions") initial_instructions = participant.attributes.get("instructions")
logger.info(f"User selected instructions: {initial_instructions}") logger.info(f"User selected instructions: {initial_instructions}")
# Read talking_mode from frontend state
initial_talking_mode = DEFAULT_TALKING_MODE
if participant.attributes.get("talking_mode"):
frontend_talking_mode = participant.attributes.get("talking_mode")
if frontend_talking_mode in ["push_to_talk", "realtime"]:
initial_talking_mode = frontend_talking_mode
logger.info(f"Initializing talking_mode from frontend: {initial_talking_mode}")
else:
logger.warning(f"Invalid talking_mode from frontend: {frontend_talking_mode}, using default: {initial_talking_mode}")
# Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt # Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt
initial_instructions = initial_instructions.replace("{datetime}", current_time) initial_instructions = initial_instructions.replace("{datetime}", current_time)
initial_instructions = initial_instructions.replace("{weekday}", current_weekday) initial_instructions = initial_instructions.replace("{weekday}", current_weekday)
@@ -921,6 +1044,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
logger.info("Using default DeepSeek backend") logger.info("Using default DeepSeek backend")
llm = openai.LLM.with_deepseek( llm = openai.LLM.with_deepseek(
model='deepseek-chat', model='deepseek-chat',
temperature=0.1
) )
session = AgentSession( session = AgentSession(
@@ -953,6 +1077,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
# Increase the maximum number of function calls per turn to avoid hitting the limit # Increase the maximum number of function calls per turn to avoid hitting the limit
max_tool_steps=15, max_tool_steps=15,
) )
room_io = RoomIO(session, room=ctx.room)
await room_io.start()
# log metrics as they are emitted, and total usage after session is over # log metrics as they are emitted, and total usage after session is over
usage_collector = metrics.UsageCollector() usage_collector = metrics.UsageCollector()
@@ -1011,6 +1137,90 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
room_output_options=RoomOutputOptions(transcription_enabled=True), room_output_options=RoomOutputOptions(transcription_enabled=True),
) )
# disable input audio at the start
_talking_mode = initial_talking_mode
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
@ctx.room.local_participant.register_rpc_method("start_turn")
async def start_turn(data: rtc.RpcInvocationData):
try:
session.interrupt()
except RuntimeError as e:
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
# Return a message instead of raising an error
return json.dumps({"success": False, "message": "不能打断"})
session.clear_user_turn()
# listen to the caller if multi-user
room_io.set_participant(data.caller_identity)
session.input.set_audio_enabled(True)
return json.dumps({"success": True})
@ctx.room.local_participant.register_rpc_method("end_turn")
async def end_turn(data: rtc.RpcInvocationData):
session.input.set_audio_enabled(False)
session.commit_user_turn(
# the timeout for the final transcript to be received after committing the user turn
# increase this value if the STT is slow to respond
transcript_timeout=10.0,
# the duration of the silence to be appended to the STT to make it generate the final transcript
stt_flush_duration=2.0,
)
@ctx.room.local_participant.register_rpc_method("cancel_turn")
async def cancel_turn(data: rtc.RpcInvocationData):
session.input.set_audio_enabled(False)
session.clear_user_turn()
logger.info("cancel turn")
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
nonlocal _talking_mode
try:
# Parse the payload to get the target mode
payload = json.loads(data.payload) if data.payload else {}
target_mode = payload.get("mode")
# Validate and set the mode
if target_mode in ["push_to_talk", "realtime"]:
_talking_mode = target_mode
logger.info(f"Switching talking mode to: {_talking_mode}")
else:
# If invalid mode, toggle from current state
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
logger.info(f"Toggled talking mode to: {_talking_mode}")
# Apply the mode settings
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
logger.info("Setting audio enabled to False (PTT mode)")
else:
# When switching to realtime mode, clear user turn state to ensure proper initialization
session.clear_user_turn()
session.input.set_audio_enabled(True)
logger.info("Setting audio enabled to True (realtime mode)")
return json.dumps({"success": True, "mode": _talking_mode})
except json.JSONDecodeError:
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
# Fallback to toggle behavior
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
# When switching to realtime mode, clear user turn state
session.clear_user_turn()
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL") parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL")

View File

@@ -4,6 +4,10 @@ const withNextPluginPreval = createNextPluginPreval();
/** @type {import('next').NextConfig} */ /** @type {import('next').NextConfig} */
const nextConfig = { const nextConfig = {
reactStrictMode: false, reactStrictMode: false,
// Explicitly allow znjj.wangxin93.eu.org for Dev Origin, per future Next.js requirement.
allowedDevOrigins: [
"znjj.wangxin93.eu.org",
],
}; };
module.exports = withNextPluginPreval(nextConfig); module.exports = withNextPluginPreval(nextConfig);

View File

@@ -0,0 +1,214 @@
"use client";
import { TranscriptionTile } from "@/transcriptions/TranscriptionTile";
import { TrackReferenceOrPlaceholder } from "@livekit/components-react";
import { useCallback, useEffect, useRef, useState } from "react";
export interface ChatOverlayProps {
agentAudioTrack?: TrackReferenceOrPlaceholder;
accentColor: string;
inputDisabled?: boolean;
isVisible: boolean;
position: { x: number; y: number };
onPositionChange: (position: { x: number; y: number }) => void;
containerRef: React.RefObject<HTMLDivElement | null>;
onToggle: () => void;
}
export function ChatOverlay({
agentAudioTrack,
accentColor,
inputDisabled,
isVisible,
position,
onPositionChange,
containerRef,
onToggle,
}: ChatOverlayProps) {
const overlayRef = useRef<HTMLDivElement>(null);
const headerRef = useRef<HTMLDivElement>(null);
const [isDragging, setIsDragging] = useState(false);
const dragOffset = useRef({ x: 0, y: 0 });
// Responsive sizing based on container size
const [containerSize, setContainerSize] = useState({ width: 360, height: 500 });
useEffect(() => {
const updateSize = () => {
if (containerRef.current) {
const rect = containerRef.current.getBoundingClientRect();
setContainerSize({ width: rect.width, height: rect.height });
}
};
updateSize();
const resizeObserver = new ResizeObserver(updateSize);
if (containerRef.current) {
resizeObserver.observe(containerRef.current);
}
return () => {
resizeObserver.disconnect();
};
}, [containerRef]);
// Calculate overlay size as percentage of container, with min/max constraints
// Width: larger (up to 95% of container)
const overlayWidth = Math.min(
Math.max(containerSize.width * 0.9, 280),
containerSize.width * 0.95
);
// Height: smaller (reduced from 60-85% to 40-60%)
const overlayHeight = Math.min(
Math.max(containerSize.height * 0.4, 250),
containerSize.height * 0.6
);
// Position overlay at center (slightly moved up) when first shown
const hasPositionedRef = useRef(false);
useEffect(() => {
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
// Calculate center position, moved up by 15% of container height
const centerX = (containerSize.width - overlayWidth) / 2;
const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
// Only auto-position on first show (when position is at origin)
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
onPositionChange({ x: Math.max(0, centerX), y: Math.max(0, centerY) });
hasPositionedRef.current = true;
}
}
}, [isVisible, containerSize.width, containerSize.height, overlayWidth, overlayHeight, containerRef, position, onPositionChange]);
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
if (!overlayRef.current || !headerRef.current) return;
// Only allow dragging from the header, but not from buttons
const target = e.target as HTMLElement;
if (!headerRef.current.contains(target)) return;
// Don't drag if clicking on the close button
if (target.closest('button') || target.closest('svg')) return;
e.preventDefault();
e.stopPropagation();
setIsDragging(true);
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
dragOffset.current = {
x: clientX - position.x,
y: clientY - position.y,
};
};
const handleDragMove = useCallback((e: MouseEvent | TouchEvent) => {
if (!isDragging || !containerRef.current || !overlayRef.current) return;
e.preventDefault();
e.stopPropagation();
const containerRect = containerRef.current.getBoundingClientRect();
const overlayRect = overlayRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
let newX = clientX - dragOffset.current.x;
let newY = clientY - dragOffset.current.y;
// Constrain within container bounds
const maxX = containerRect.width - overlayRect.width;
const maxY = containerRect.height - overlayRect.height;
const minY = 0; // Allow dragging to top
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
onPositionChange({ x: newX, y: newY });
}, [isDragging, containerRef, overlayRef, onPositionChange]);
const handleDragEnd = useCallback(() => {
setIsDragging(false);
}, []);
useEffect(() => {
if (isDragging) {
window.addEventListener("mousemove", handleDragMove);
window.addEventListener("mouseup", handleDragEnd);
window.addEventListener("touchmove", handleDragMove, { passive: false });
window.addEventListener("touchend", handleDragEnd);
}
return () => {
window.removeEventListener("mousemove", handleDragMove);
window.removeEventListener("mouseup", handleDragEnd);
window.removeEventListener("touchmove", handleDragMove);
window.removeEventListener("touchend", handleDragEnd);
};
}, [isDragging, handleDragMove, handleDragEnd]);
return (
<div
ref={overlayRef}
className="absolute z-40 rounded-lg border border-white/20 shadow-2xl backdrop-blur-md transition-all duration-300 flex flex-col"
style={{
left: `${position.x}px`,
top: `${position.y}px`,
width: `${overlayWidth}px`,
height: `${overlayHeight}px`,
backgroundColor: 'rgba(0, 0, 0, 0.85)',
cursor: isDragging ? 'grabbing' : 'default',
display: isVisible ? 'flex' : 'none',
}}
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
{/* Header with drag handle and close button */}
<div
ref={headerRef}
className="flex items-center justify-between px-4 py-2 border-b border-white/10 cursor-move select-none"
style={{ backgroundColor: 'rgba(0, 0, 0, 0.3)' }}
>
<div className="flex items-center gap-2">
<div className="w-2 h-2 rounded-full bg-white/40"></div>
<span className="text-white text-xs font-medium">Chat</span>
</div>
<button
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onToggle();
}}
className="text-white hover:text-white transition-colors p-2 rounded hover:bg-white/10 flex items-center justify-center"
aria-label="Close chat overlay"
style={{ minWidth: '32px', minHeight: '32px' }}
>
<svg
className="w-5 h-5"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
strokeWidth={2.5}
>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M6 18L18 6M6 6l12 12"
/>
</svg>
</button>
</div>
{/* Chat content with padding */}
<div className="overflow-hidden flex flex-col px-2 py-2" style={{ height: `calc(100% - 40px)` }}>
<TranscriptionTile
agentAudioTrack={agentAudioTrack}
accentColor={accentColor}
inputDisabled={inputDisabled}
/>
</div>
</div>
);
}

View File

@@ -5,15 +5,17 @@ import {
BarVisualizer, BarVisualizer,
useConnectionState, useConnectionState,
useLocalParticipant, useLocalParticipant,
useParticipantAttributes,
useRoomContext, useRoomContext,
useTracks, useTracks,
useVoiceAssistant, useVoiceAssistant,
VideoTrack, VideoTrack,
} from "@livekit/components-react"; } from "@livekit/components-react";
import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client"; import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client";
import { useEffect, useMemo, useState, useRef } from "react"; import { useEffect, useMemo, useState, useRef, useCallback } from "react";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon } from "./icons"; import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon, ChatIcon } from "./icons";
import { useToast } from "@/components/toast/ToasterProvider"; import { useToast } from "@/components/toast/ToasterProvider";
import { ChatOverlay } from "@/components/chat/ChatOverlay";
export interface PhoneSimulatorProps { export interface PhoneSimulatorProps {
onConnect: () => void; onConnect: () => void;
@@ -43,6 +45,9 @@ export function PhoneSimulator({
const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant(); const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant();
const tracks = useTracks(); const tracks = useTracks();
const voiceAssistant = useVoiceAssistant(); const voiceAssistant = useVoiceAssistant();
const agentAttributes = useParticipantAttributes({
participant: voiceAssistant.agent,
});
const fileInputRef = useRef<HTMLInputElement>(null); const fileInputRef = useRef<HTMLInputElement>(null);
const phoneContainerRef = useRef<HTMLDivElement>(null); const phoneContainerRef = useRef<HTMLDivElement>(null);
const visualizerRef = useRef<HTMLDivElement>(null); const visualizerRef = useRef<HTMLDivElement>(null);
@@ -59,6 +64,17 @@ export function PhoneSimulator({
const isAgentSpeaking = voiceAssistant.state === "speaking"; const isAgentSpeaking = voiceAssistant.state === "speaking";
const wasMicEnabledRef = useRef(false); const wasMicEnabledRef = useRef(false);
const lastPhoneMode = useRef(phoneMode); const lastPhoneMode = useRef(phoneMode);
const [isPushToTalkActive, setIsPushToTalkActive] = useState(false);
const [interruptRejected, setInterruptRejected] = useState(false);
const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default)
const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
const [showChatOverlay, setShowChatOverlay] = useState(false);
const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component
const [chatTogglePosition, setChatTogglePosition] = useState<{ x?: number; right?: number; y: number }>({ right: 16, y: 56 }); // Initial position on the right
const [isDraggingChatToggle, setIsDraggingChatToggle] = useState(false);
const chatToggleRef = useRef<HTMLButtonElement>(null);
const chatToggleDragOffset = useRef({ x: 0, y: 0 });
const chatToggleHasDragged = useRef(false);
useEffect(() => { useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice"); const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
@@ -67,6 +83,47 @@ export function PhoneSimulator({
} }
}, [config.settings.attributes]); }, [config.settings.attributes]);
// Set talking_mode attribute when connected or when mode changes
const lastTalkingModeRef = useRef<string | null>(null);
const configAttributesRef = useRef(config.settings.attributes);
// Update config attributes ref when it changes
useEffect(() => {
configAttributesRef.current = config.settings.attributes;
}, [config.settings.attributes]);
useEffect(() => {
if (roomState === ConnectionState.Connected && localParticipant) {
const talkingMode = isPushToTalkMode ? "push_to_talk" : "realtime";
// Only update if the mode actually changed
if (lastTalkingModeRef.current === talkingMode) {
return;
}
lastTalkingModeRef.current = talkingMode;
try {
// Get current attributes from config to preserve them
const attributesToSet: Record<string, string> = {};
const configAttributes = configAttributesRef.current || [];
configAttributes.forEach(attr => {
if (attr.key && attr.value) {
attributesToSet[attr.key] = attr.value;
}
});
// Add talking_mode
attributesToSet.talking_mode = talkingMode;
localParticipant.setAttributes(attributesToSet);
} catch (error) {
console.error("Failed to set talking_mode attribute:", error);
}
} else if (roomState === ConnectionState.Disconnected) {
// Reset ref when disconnected
lastTalkingModeRef.current = null;
}
}, [roomState, localParticipant, isPushToTalkMode]);
const [currentTime, setCurrentTime] = useState(""); const [currentTime, setCurrentTime] = useState("");
const [visualizerPosition, setVisualizerPosition] = useState({ const [visualizerPosition, setVisualizerPosition] = useState({
@@ -76,30 +133,41 @@ export function PhoneSimulator({
const [isDragging, setIsDragging] = useState(false); const [isDragging, setIsDragging] = useState(false);
const dragOffset = useRef({ x: 0, y: 0 }); const dragOffset = useRef({ x: 0, y: 0 });
const handleDragStart = (e: React.MouseEvent) => { const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
setIsDragging(true); setIsDragging(true);
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
dragOffset.current = { dragOffset.current = {
x: e.clientX - visualizerPosition.x, x: clientX - visualizerPosition.x,
y: e.clientY - visualizerPosition.y, y: clientY - visualizerPosition.y,
}; };
}; };
const handleDragMove = (e: MouseEvent) => { const handleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDragging || !phoneContainerRef.current || !visualizerRef.current) return; if (!isDragging || !phoneContainerRef.current || !visualizerRef.current) return;
e.preventDefault();
const containerRect = phoneContainerRef.current.getBoundingClientRect(); const containerRect = phoneContainerRef.current.getBoundingClientRect();
const visualizerRect = visualizerRef.current.getBoundingClientRect(); const visualizerRect = visualizerRef.current.getBoundingClientRect();
let newX = e.clientX - dragOffset.current.x; const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
let newY = e.clientY - dragOffset.current.y; const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
let newX = clientX - dragOffset.current.x;
let newY = clientY - dragOffset.current.y;
// Constrain within container // Constrain within container
const maxX = containerRect.width - visualizerRect.width; const maxX = containerRect.width - visualizerRect.width;
const maxY = containerRect.height - visualizerRect.height; const maxY = containerRect.height - visualizerRect.height;
const statusBarHeight = 48; // h-12 = 48px // On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX)); newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(statusBarHeight, Math.min(newY, maxY)); newY = Math.max(minY, Math.min(newY, maxY));
setVisualizerPosition({ setVisualizerPosition({
x: newX, x: newX,
@@ -115,13 +183,107 @@ export function PhoneSimulator({
if (isDragging) { if (isDragging) {
window.addEventListener("mouseup", handleDragEnd); window.addEventListener("mouseup", handleDragEnd);
window.addEventListener("mousemove", handleDragMove); window.addEventListener("mousemove", handleDragMove);
window.addEventListener("touchend", handleDragEnd);
window.addEventListener("touchmove", handleDragMove, { passive: false });
} }
return () => { return () => {
window.removeEventListener("mouseup", handleDragEnd); window.removeEventListener("mouseup", handleDragEnd);
window.removeEventListener("mousemove", handleDragMove); window.removeEventListener("mousemove", handleDragMove);
window.removeEventListener("touchend", handleDragEnd);
window.removeEventListener("touchmove", handleDragMove);
}; };
}, [isDragging]); }, [isDragging]);
// Chat toggle button drag handlers
const handleChatToggleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
e.stopPropagation(); // Prevent triggering the button click
setIsDraggingChatToggle(true);
chatToggleHasDragged.current = false;
if (!phoneContainerRef.current || !chatToggleRef.current) return;
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// If using right positioning, convert to x for dragging
if (chatTogglePosition.right !== undefined && chatTogglePosition.x === undefined) {
const currentX = containerRect.width - chatTogglePosition.right - buttonRect.width;
setChatTogglePosition({ x: currentX, y: chatTogglePosition.y });
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
} else {
// Already using x positioning
const currentX = chatTogglePosition.x ?? 0;
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
}
};
const handleChatToggleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDraggingChatToggle || !phoneContainerRef.current || !chatToggleRef.current) return;
e.preventDefault();
chatToggleHasDragged.current = true; // Mark that we've actually dragged
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// Calculate new position relative to container
let newX = clientX - containerRect.left - chatToggleDragOffset.current.x;
let newY = clientY - containerRect.top - chatToggleDragOffset.current.y;
// Constrain within container
const maxX = containerRect.width - buttonRect.width;
const maxY = containerRect.height - buttonRect.height;
// On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
setChatTogglePosition({
x: newX,
y: newY,
});
};
const handleChatToggleDragEnd = () => {
setIsDraggingChatToggle(false);
// Reset the flag after a short delay to allow onClick to check it
setTimeout(() => {
chatToggleHasDragged.current = false;
}, 100);
};
useEffect(() => {
if (isDraggingChatToggle) {
window.addEventListener("mouseup", handleChatToggleDragEnd);
window.addEventListener("mousemove", handleChatToggleDragMove);
window.addEventListener("touchend", handleChatToggleDragEnd);
window.addEventListener("touchmove", handleChatToggleDragMove, { passive: false });
}
return () => {
window.removeEventListener("mouseup", handleChatToggleDragEnd);
window.removeEventListener("mousemove", handleChatToggleDragMove);
window.removeEventListener("touchend", handleChatToggleDragEnd);
window.removeEventListener("touchmove", handleChatToggleDragMove);
};
}, [isDraggingChatToggle]);
// Initialize chat toggle button position - keep it on the right using 'right' CSS property
// Only convert to 'x' (left positioning) when user drags it
useEffect(() => { useEffect(() => {
if (showCameraMenu) { if (showCameraMenu) {
Room.getLocalDevices("videoinput").then(setCameras); Room.getLocalDevices("videoinput").then(setCameras);
@@ -156,35 +318,53 @@ export function PhoneSimulator({
const enteringMode = (mode: typeof phoneMode) => const enteringMode = (mode: typeof phoneMode) =>
phoneMode === mode && lastPhoneMode.current !== mode; phoneMode === mode && lastPhoneMode.current !== mode;
// Entering important message / capture / hand_off: remember mic state and mute if needed // Only proceed if connected and localParticipant is available
if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) { if (roomState !== ConnectionState.Connected || !localParticipant) return;
wasMicEnabledRef.current = isMicEnabled;
if (isMicEnabled) {
localParticipant.setMicrophoneEnabled(false);
}
}
// Exiting important message mode or hand off mode or capture mode
else if (
(phoneMode !== "important_message" && lastPhoneMode.current === "important_message") ||
(phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") ||
(phoneMode !== "capture" && lastPhoneMode.current === "capture")
) {
// Restore mic to previous state
localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current);
// If exiting capture mode, clear processing image const updateMicState = async () => {
if (lastPhoneMode.current === "capture") { // Entering important message / capture / hand_off: remember mic state and mute if needed
setProcessingImage(null); if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) {
setProcessingSource(null); wasMicEnabledRef.current = isMicEnabled;
if (isMicEnabled) {
try {
await localParticipant.setMicrophoneEnabled(false);
} catch (error) {
console.error("Failed to disable microphone:", error);
}
}
} }
} // Exiting important message mode or hand off mode or capture mode
// Enforce mic off in important message mode, hand off mode, or capture mode else if (
else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) { (phoneMode !== "important_message" && lastPhoneMode.current === "important_message") ||
localParticipant.setMicrophoneEnabled(false); (phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") ||
} (phoneMode !== "capture" && lastPhoneMode.current === "capture")
) {
// Restore mic to previous state
try {
await localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current);
} catch (error) {
console.error("Failed to restore microphone:", error);
}
// If exiting capture mode, clear processing image
if (lastPhoneMode.current === "capture") {
setProcessingImage(null);
setProcessingSource(null);
}
}
// Enforce mic off in important message mode, hand off mode, or capture mode
else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) {
try {
await localParticipant.setMicrophoneEnabled(false);
} catch (error) {
console.error("Failed to disable microphone:", error);
}
}
};
updateMicState();
lastPhoneMode.current = phoneMode; lastPhoneMode.current = phoneMode;
}, [phoneMode, isMicEnabled, localParticipant]); }, [phoneMode, isMicEnabled, localParticipant, roomState]);
useEffect(() => { useEffect(() => {
const updateTime = () => { const updateTime = () => {
@@ -210,15 +390,36 @@ export function PhoneSimulator({
); );
const handleMicToggle = async () => { const handleMicToggle = async () => {
if (isMicEnabled) { if (roomState !== ConnectionState.Connected || !localParticipant) return;
await localParticipant.setMicrophoneEnabled(false);
} else { try {
await localParticipant.setMicrophoneEnabled(true); if (isMicEnabled) {
await localParticipant.setMicrophoneEnabled(false);
} else {
await localParticipant.setMicrophoneEnabled(true);
}
} catch (error) {
console.error("Failed to toggle microphone:", error);
// Silently handle the error to avoid disrupting user experience
} }
}; };
const handleDisconnect = () => { const handleDisconnect = () => {
onDisconnect(); try {
// Only disconnect if we're actually connected
if (roomState === ConnectionState.Connected || roomState === ConnectionState.Connecting) {
onDisconnect();
}
} catch (error) {
// Silently handle any errors during disconnect
console.warn("Error during disconnect:", error);
// Still try to call onDisconnect to ensure cleanup
try {
onDisconnect();
} catch (e) {
// Ignore secondary errors
}
}
}; };
const validateImageFile = (file: File) => { const validateImageFile = (file: File) => {
@@ -407,6 +608,221 @@ export function PhoneSimulator({
setShowVoiceMenu(!showVoiceMenu); setShowVoiceMenu(!showVoiceMenu);
}; };
const handleModeSwitch = async () => {
if (!room || !voiceAssistant.agent) return;
// Determine the target mode (toggle from current state)
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
try {
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "switch_ptt_and_rt",
payload: JSON.stringify({ mode: targetMode }),
});
// Parse the response to confirm the mode was set
try {
const responseData = JSON.parse(response);
const confirmedMode = responseData.mode;
// Update state based on server response
setIsPushToTalkMode(confirmedMode === "push_to_talk");
} catch (parseError) {
// If parsing fails, update state based on what we sent
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
setIsPushToTalkMode(targetMode === "push_to_talk");
}
} catch (error: any) {
console.error("Failed to switch mode:", error);
// Don't show error toast for mode switch failures, just log
}
};
// Check if agent supports push-to-talk (optional check, button will show regardless)
const supportsPushToTalk = useMemo(() => {
if (!voiceAssistant.agent || !agentAttributes.attributes) return false;
return agentAttributes.attributes["push-to-talk"] === "1";
}, [voiceAssistant.agent, agentAttributes.attributes]);
const handlePushToTalkStart = async () => {
if (!room || !voiceAssistant.agent || isPushToTalkActive) return;
// Reset interrupt rejection state
setInterruptRejected(false);
try {
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "start_turn",
payload: "",
});
// Parse the response to check for success/failure
try {
const responseData = JSON.parse(response);
if (responseData.success === false) {
// Interrupt was rejected, show message
if (responseData.message === "不能打断") {
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
}
return;
}
} else if (responseData.success === true) {
// Successfully started turn
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (parseError) {
// If response is not JSON, assume success (backward compatibility)
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (error: any) {
// Handle RPC errors (method not found, etc.)
setIsPushToTalkActive(false);
const errorMessage = error?.message || "";
const errorCode = error?.code;
// Check for "Method not supported at destination" - this happens when RPC methods aren't registered yet
// This can occur on first call before agent is fully ready, so we silently ignore it
if (errorMessage.includes("Method not supported at destination") ||
errorMessage.includes("method not found") ||
errorCode === 12) { // METHOD_NOT_FOUND
// Silently ignore - the method will be available after first turn
console.log("RPC method not ready yet, will be available after first turn");
return;
}
// Only log and show error for unexpected errors
console.error("Unexpected error in push-to-talk:", error);
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
setToastMessage({ message: defaultErrorMessage, type: "error" });
}
};
const handlePushToTalkEnd = useCallback(async () => {
// Always clear interrupt rejection state when button is released
setInterruptRejected(false);
if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
try {
await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "end_turn",
payload: "",
});
setIsPushToTalkActive(false);
} catch (error: any) {
console.error("Failed to end turn:", error);
// Don't show error toast on end_turn failure as it might be called during cleanup
setIsPushToTalkActive(false);
}
}, [room, voiceAssistant.agent, isPushToTalkActive]);
const handlePushToTalkCancel = useCallback(async () => {
// Always clear interrupt rejection state when button is cancelled
setInterruptRejected(false);
if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
try {
await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "cancel_turn",
payload: "",
});
setIsPushToTalkActive(false);
} catch (error) {
console.error("Failed to cancel turn:", error);
setIsPushToTalkActive(false);
}
}, [room, voiceAssistant.agent, isPushToTalkActive]);
// Handle mouse events for push-to-talk
const handlePushToTalkMouseDown = (e: React.MouseEvent) => {
e.preventDefault();
handlePushToTalkStart();
};
const handlePushToTalkMouseUp = (e: React.MouseEvent) => {
e.preventDefault();
handlePushToTalkEnd();
};
// Handle touch events for push-to-talk
const handlePushToTalkTouchStart = (e: React.TouchEvent) => {
e.preventDefault();
handlePushToTalkStart();
};
const handlePushToTalkTouchEnd = (e: React.TouchEvent) => {
e.preventDefault();
handlePushToTalkEnd();
};
// Handle window blur, escape key, and global mouse/touch events to cancel/end push-to-talk
useEffect(() => {
if (!isPushToTalkActive) return;
const handleBlur = () => {
handlePushToTalkCancel();
};
const handleKeyDown = (e: KeyboardEvent) => {
if (e.key === "Escape") {
handlePushToTalkCancel();
}
};
// Handle global mouseup/touchend to end push-to-talk even if released outside button
const handleGlobalMouseUp = () => {
// Clear interrupt rejection state immediately when button is released
setInterruptRejected(false);
handlePushToTalkEnd();
};
const handleGlobalTouchEnd = () => {
// Clear interrupt rejection state immediately when button is released
setInterruptRejected(false);
handlePushToTalkEnd();
};
window.addEventListener("blur", handleBlur);
window.addEventListener("keydown", handleKeyDown);
window.addEventListener("mouseup", handleGlobalMouseUp);
window.addEventListener("touchend", handleGlobalTouchEnd);
return () => {
window.removeEventListener("blur", handleBlur);
window.removeEventListener("keydown", handleKeyDown);
window.removeEventListener("mouseup", handleGlobalMouseUp);
window.removeEventListener("touchend", handleGlobalTouchEnd);
};
}, [isPushToTalkActive, handlePushToTalkCancel, handlePushToTalkEnd]);
// Clean up push-to-talk state on disconnect
useEffect(() => {
if (roomState === ConnectionState.Disconnected && isPushToTalkActive) {
setIsPushToTalkActive(false);
setInterruptRejected(false);
}
}, [roomState, isPushToTalkActive]);
// Reset interrupt rejection when agent stops speaking
useEffect(() => {
if (!isAgentSpeaking && interruptRejected) {
// Clear rejection state when agent finishes speaking
const timer = setTimeout(() => setInterruptRejected(false), 1000);
return () => clearTimeout(timer);
}
}, [isAgentSpeaking, interruptRejected]);
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => { const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]; const file = event.target.files?.[0];
if (file && onCapture) { if (file && onCapture) {
@@ -449,7 +865,7 @@ export function PhoneSimulator({
> >
<PhoneIcon className="w-8 h-8" /> <PhoneIcon className="w-8 h-8" />
</div> </div>
<span className="font-medium text-white">Call Agent</span> <span className="font-medium text-white"></span>
</button> </button>
<div className="relative"> <div className="relative">
@@ -459,7 +875,7 @@ export function PhoneSimulator({
> >
<VoiceIcon className="w-3 h-3" /> <VoiceIcon className="w-3 h-3" />
<span> <span>
{currentVoiceId === "BV001_streaming" ? "Female Voice" : "Male Voice"} {currentVoiceId === "BV001_streaming" ? "女性声音" : "男性声音"}
</span> </span>
</button> </button>
{showVoiceMenu && ( {showVoiceMenu && (
@@ -479,7 +895,7 @@ export function PhoneSimulator({
: "text-white" : "text-white"
}`} }`}
> >
<span>Female Voice</span> <span></span>
{currentVoiceId === "BV001_streaming" && <CheckIcon />} {currentVoiceId === "BV001_streaming" && <CheckIcon />}
</button> </button>
<button <button
@@ -494,7 +910,7 @@ export function PhoneSimulator({
: "text-white" : "text-white"
}`} }`}
> >
<span>Male Voice</span> <span></span>
{currentVoiceId === "BV002_streaming" && ( {currentVoiceId === "BV002_streaming" && (
<CheckIcon /> <CheckIcon />
)} )}
@@ -526,7 +942,7 @@ export function PhoneSimulator({
})(); })();
return ( return (
<div className="w-auto max-w-full h-full aspect-[9/19.5] max-h-full bg-black rounded-[40px] border-[12px] border-gray-900 overflow-hidden relative shadow-2xl flex flex-col shrink-0"> <div className="absolute inset-0 w-full h-full bg-black rounded-none border-0 overflow-hidden flex flex-col shrink-0 md:relative md:w-auto md:max-w-full md:h-full md:aspect-[9/19.5] md:max-h-full md:rounded-[40px] md:border-[12px] md:border-gray-900 md:shadow-2xl">
<style jsx global>{` <style jsx global>{`
.mirror-video video { .mirror-video video {
transform: scaleX(-1); transform: scaleX(-1);
@@ -558,7 +974,12 @@ export function PhoneSimulator({
} }
`}</style> `}</style>
{/* Status Bar */} {/* Status Bar */}
<div className="h-12 w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 flex items-center justify-between px-6 text-white text-xs font-medium"> <div className="hidden md:flex w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 items-center justify-between px-6 text-white text-xs font-medium"
style={{
paddingTop: 'max(env(safe-area-inset-top, 0px), 0.5rem)',
paddingBottom: '0.75rem',
minHeight: '3rem',
}}>
<span>{currentTime}</span> <span>{currentTime}</span>
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
<WifiIcon className="w-4 h-4" /> <WifiIcon className="w-4 h-4" />
@@ -566,8 +987,42 @@ export function PhoneSimulator({
</div> </div>
</div> </div>
{/* Chat Toggle Button - Top Right, aligned with audio visualizer (Draggable) */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "important_message" &&
phoneMode !== "capture" && (
<button
ref={chatToggleRef}
className={`absolute z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg cursor-move select-none touch-none ${
showChatOverlay
? "bg-blue-500/80 text-white"
: "bg-gray-800/70 text-white hover:bg-gray-800/90"
}`}
onClick={(e) => {
// Only toggle if we didn't just drag
if (!chatToggleHasDragged.current) {
setShowChatOverlay(!showChatOverlay);
}
}}
onMouseDown={handleChatToggleDragStart}
onTouchStart={handleChatToggleDragStart}
title={showChatOverlay ? "Hide chat (drag to move)" : "Show chat (drag to move)"}
style={{
...(chatTogglePosition.x !== undefined ? { left: chatTogglePosition.x } : {}),
...(chatTogglePosition.right !== undefined ? { right: chatTogglePosition.right } : {}),
top: chatTogglePosition.y,
}}
>
<ChatIcon className="w-5 h-5 md:w-6 md:h-6" />
</button>
)}
{/* Main Content */} {/* Main Content */}
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden"> <div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden"
style={{
paddingBottom: 'env(safe-area-inset-bottom, 0px)',
}}>
<div className={`h-full w-full transition-all duration-500 ease-in-out transform ${ <div className={`h-full w-full transition-all duration-500 ease-in-out transform ${
phoneMode === "hand_off" && roomState === ConnectionState.Connected phoneMode === "hand_off" && roomState === ConnectionState.Connected
? "blur-md scale-105" ? "blur-md scale-105"
@@ -694,12 +1149,13 @@ export function PhoneSimulator({
{roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && ( {roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && (
<div <div
ref={visualizerRef} ref={visualizerRef}
className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none" className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none touch-none"
style={{ style={{
left: visualizerPosition.x, left: visualizerPosition.x,
top: visualizerPosition.y, top: visualizerPosition.y,
}} }}
onMouseDown={handleDragStart} onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
> >
<div className="h-8 w-24 flex items-center justify-center [--lk-va-bar-width:3px] [--lk-va-bar-gap:2px] [--lk-fg:white]"> <div className="h-8 w-24 flex items-center justify-center [--lk-va-bar-width:3px] [--lk-va-bar-gap:2px] [--lk-fg:white]">
<BarVisualizer <BarVisualizer
@@ -712,10 +1168,30 @@ export function PhoneSimulator({
</div> </div>
)} )}
{/* Chat Overlay - Hidden during capture and important_message modes */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "capture" &&
phoneMode !== "important_message" && (
<ChatOverlay
agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color}
inputDisabled={phoneMode === "hand_off"}
isVisible={showChatOverlay}
position={chatOverlayPosition}
onPositionChange={setChatOverlayPosition}
containerRef={phoneContainerRef}
onToggle={() => setShowChatOverlay(!showChatOverlay)}
/>
)}
{/* Call Controls Overlay */} {/* Call Controls Overlay */}
{roomState === ConnectionState.Connected && ( {roomState === ConnectionState.Connected && (
phoneMode === "capture" ? ( phoneMode === "capture" ? (
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end pb-[5%] px-[8%] z-40"> <div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end px-[8%] z-40"
style={{
paddingBottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
}}>
{/* Camera Controls Row */} {/* Camera Controls Row */}
<div className="w-full flex items-center justify-evenly mb-8"> <div className="w-full flex items-center justify-evenly mb-8">
{/* Left: Upload */} {/* Left: Upload */}
@@ -792,31 +1268,205 @@ export function PhoneSimulator({
</div> </div>
</div> </div>
) : ( ) : (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40"> <div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40"
<div className="w-full flex items-center justify-center gap-8"> style={{
{phoneMode !== "important_message" && phoneMode !== "hand_off" && ( paddingBottom: 'max(env(safe-area-inset-bottom, 0px), 0px)',
<button bottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
className={`p-4 rounded-full backdrop-blur-md transition-colors ${ }}>
!isMicEnabled <div className="w-full flex flex-col items-center justify-center gap-4">
? "bg-white text-black" {/* Mode Toggle Switch */}
: "bg-gray-600/50 text-white hover:bg-gray-600/70" {phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
}`} <div className="flex items-center gap-3 mb-2">
onClick={handleMicToggle} <span className={`text-xs font-medium transition-colors ${isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
>
{isMicEnabled ? ( </span>
<MicIcon className="w-6 h-6" /> <button
) : ( onClick={handleModeSwitch}
<MicOffIcon className="w-6 h-6" /> className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 ${
)} !isPushToTalkMode ? "bg-blue-500" : "bg-gray-600"
</button> }`}
role="switch"
aria-checked={!isPushToTalkMode}
title={isPushToTalkMode ? "切换到实时对话模式" : "切换到按下说话模式"}
>
<span
className={`inline-block h-4 w-4 transform rounded-full bg-white transition-transform ${
!isPushToTalkMode ? "translate-x-6" : "translate-x-1"
}`}
/>
</button>
<span className={`text-xs font-medium transition-colors ${!isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
</span>
</div>
)} )}
<button {/* Push-to-Talk Mode Layout */}
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors" {isPushToTalkMode && phoneMode !== "hand_off" && voiceAssistant.agent && (
onClick={handleDisconnect} <>
> {/* Important Message Mode - Centered End Call Button */}
<PhoneOffIcon className="w-6 h-6" /> {phoneMode === "important_message" ? (
</button> <div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
) : (
<div className="w-full flex items-center justify-between gap-8">
{/* Left side: Mic Toggle and Camera Switch Buttons */}
<div className="flex flex-col items-center gap-2">
{/* Mic Toggle Button */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-800/50 text-white hover:bg-gray-800/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* Camera Switch Button */}
<div className="relative">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
)}
</div>
)}
</div>
</div>
{/* Center: Large Push-to-Talk Button */}
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square select-none ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
style={{ borderRadius: '50%' }}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
{/* Right side: End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</>
)}
{/* Realtime Mode Layout */}
{!isPushToTalkMode && phoneMode !== "hand_off" && (
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
) : (
<div className="w-full flex items-center justify-center gap-4">
{/* Mic Toggle */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</>
)}
{/* Hand Off Mode - Show only End Call Button */}
{phoneMode === "hand_off" && (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
{/* Fallback: Show End Call Button when in push-to-talk mode but no agent/audio */}
{phoneMode === "normal" &&
isPushToTalkMode &&
!voiceAssistant.agent && (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</div> </div>
</div> </div>
) )

View File

@@ -75,11 +75,69 @@ export default function Playground({
const [rpcMethod, setRpcMethod] = useState(""); const [rpcMethod, setRpcMethod] = useState("");
const [rpcPayload, setRpcPayload] = useState(""); const [rpcPayload, setRpcPayload] = useState("");
const [showRpc, setShowRpc] = useState(false); const [showRpc, setShowRpc] = useState(false);
const [qrCodeUrl, setQrCodeUrl] = useState<string>("");
// Clean up RPC resolvers before disconnecting to prevent errors
const cleanupRpcResolvers = useCallback(() => {
// Clean up any pending important message RPC
if (importantMessageResolverRef.current) {
const resolver = importantMessageResolverRef.current;
importantMessageResolverRef.current = null;
try {
// Only resolve if room is still connected to avoid RPC errors
if (roomState === ConnectionState.Connected) {
resolver("disconnected");
}
} catch (error) {
// Ignore errors during cleanup - room might be disconnecting
}
}
// Clean up any pending image capture RPC
if (imageCaptureResolverRef.current) {
const resolver = imageCaptureResolverRef.current;
imageCaptureResolverRef.current = null;
try {
// Only resolve if room is still connected to avoid RPC errors
if (roomState === ConnectionState.Connected) {
resolver(JSON.stringify({ error: "disconnected" }));
}
} catch (error) {
// Ignore errors during cleanup - room might be disconnecting
}
}
}, [roomState]);
// Wrapper for disconnect that cleans up RPC resolvers first
const handleDisconnect = useCallback(() => {
cleanupRpcResolvers();
try {
onConnect(false);
} catch (error) {
// Silently handle any errors during disconnect
console.warn("Error during disconnect:", error);
}
}, [onConnect, cleanupRpcResolvers]);
useEffect(() => { useEffect(() => {
if (roomState === ConnectionState.Connected) { if (roomState === ConnectionState.Connected && localParticipant) {
localParticipant.setCameraEnabled(config.settings.inputs.camera); try {
localParticipant.setMicrophoneEnabled(config.settings.inputs.mic); localParticipant.setCameraEnabled(config.settings.inputs.camera);
localParticipant.setMicrophoneEnabled(config.settings.inputs.mic);
} catch (error) {
console.error("Failed to set camera/microphone:", error);
// Retry after a short delay if connection might not be fully ready
const retryTimeout = setTimeout(() => {
if (roomState === ConnectionState.Connected && localParticipant) {
try {
localParticipant.setCameraEnabled(config.settings.inputs.camera);
localParticipant.setMicrophoneEnabled(config.settings.inputs.mic);
} catch (retryError) {
console.error("Failed to set camera/microphone on retry:", retryError);
}
}
}, 500);
return () => clearTimeout(retryTimeout);
}
} }
}, [config.settings.inputs.camera, config.settings.inputs.mic, localParticipant, roomState]); }, [config.settings.inputs.camera, config.settings.inputs.mic, localParticipant, roomState]);
@@ -145,7 +203,7 @@ export default function Playground({
'hangUpCall', 'hangUpCall',
async () => { async () => {
// Disconnect the call // Disconnect the call
onConnect(false); handleDisconnect();
return JSON.stringify({ success: true }); return JSON.stringify({ success: true });
} }
); );
@@ -179,7 +237,7 @@ export default function Playground({
}); });
} }
); );
}, [localParticipant, roomState, onConnect]); }, [localParticipant, roomState, handleDisconnect]);
useEffect(() => { useEffect(() => {
if (roomState === ConnectionState.Connected) { if (roomState === ConnectionState.Connected) {
@@ -349,6 +407,7 @@ export default function Playground({
}, [agentVideoTrack, config, roomState]); }, [agentVideoTrack, config, roomState]);
useEffect(() => { useEffect(() => {
if (typeof document !== "undefined") {
document.body.style.setProperty( document.body.style.setProperty(
"--lk-theme-color", "--lk-theme-color",
// @ts-ignore // @ts-ignore
@@ -358,8 +417,15 @@ export default function Playground({
"--lk-drop-shadow", "--lk-drop-shadow",
`var(--lk-theme-color) 0px 0px 18px`, `var(--lk-theme-color) 0px 0px 18px`,
); );
}
}, [config.settings.theme_color]); }, [config.settings.theme_color]);
useEffect(() => {
if (typeof window !== "undefined") {
setQrCodeUrl(window.location.href);
}
}, []);
const audioTileContent = useMemo(() => { const audioTileContent = useMemo(() => {
const disconnectedContent = ( const disconnectedContent = (
<div className="flex flex-col items-center justify-center gap-2 text-gray-700 text-center w-full"> <div className="flex flex-col items-center justify-center gap-2 text-gray-700 text-center w-full">
@@ -422,6 +488,7 @@ export default function Playground({
]); ]);
const instructionsContent = ( const instructionsContent = (
<>
<ConfigurationPanelItem title="Instructions"> <ConfigurationPanelItem title="Instructions">
<textarea <textarea
className="w-full bg-gray-950 text-white text-sm p-3 rounded-md border border-gray-800 focus:border-gray-600 focus:outline-none transition-colors resize-none disabled:opacity-50 disabled:cursor-not-allowed" className="w-full bg-gray-950 text-white text-sm p-3 rounded-md border border-gray-800 focus:border-gray-600 focus:outline-none transition-colors resize-none disabled:opacity-50 disabled:cursor-not-allowed"
@@ -437,6 +504,18 @@ export default function Playground({
disabled={roomState !== ConnectionState.Disconnected} disabled={roomState !== ConnectionState.Disconnected}
/> />
</ConfigurationPanelItem> </ConfigurationPanelItem>
<ConfigurationPanelItem title="Color">
<ColorPicker
colors={themeColors}
selectedColor={config.settings.theme_color}
onSelect={(color) => {
const userSettings = { ...config.settings };
userSettings.theme_color = color;
setUserSettings(userSettings);
}}
/>
</ConfigurationPanelItem>
</>
); );
const handleRpcCall = useCallback(async () => { const handleRpcCall = useCallback(async () => {
@@ -459,13 +538,13 @@ export default function Playground({
const settingsTileContent = useMemo(() => { const settingsTileContent = useMemo(() => {
return ( return (
<div className="flex flex-col h-full w-full items-start overflow-y-auto"> <div className="flex flex-col h-full w-full items-start overflow-y-auto">
{config.description && ( {/* {config.description && (
<ConfigurationPanelItem title="Description"> <ConfigurationPanelItem title="Description">
{config.description} {config.description}
</ConfigurationPanelItem> </ConfigurationPanelItem>
)} )} */}
<ConfigurationPanelItem title="Room"> {/* <ConfigurationPanelItem title="Room">
<div className="flex flex-col gap-2"> <div className="flex flex-col gap-2">
<EditableNameValueRow <EditableNameValueRow
name="Room name" name="Room name"
@@ -499,9 +578,9 @@ export default function Playground({
} }
/> />
</div> </div>
</ConfigurationPanelItem> </ConfigurationPanelItem> */}
<ConfigurationPanelItem title="Agent"> {/* <ConfigurationPanelItem title="Agent">
<div className="flex flex-col gap-2"> <div className="flex flex-col gap-2">
<EditableNameValueRow <EditableNameValueRow
name="Agent name" name="Agent name"
@@ -564,9 +643,9 @@ export default function Playground({
. .
</p> </p>
</div> </div>
</ConfigurationPanelItem> </ConfigurationPanelItem> */}
<ConfigurationPanelItem title="User"> {/* <ConfigurationPanelItem title="User">
<div className="flex flex-col gap-2"> <div className="flex flex-col gap-2">
<EditableNameValueRow <EditableNameValueRow
name="Name" name="Name"
@@ -618,7 +697,7 @@ export default function Playground({
connectionState={roomState} connectionState={roomState}
/> />
</div> </div>
</ConfigurationPanelItem> </ConfigurationPanelItem> */}
{roomState === ConnectionState.Connected && {roomState === ConnectionState.Connected &&
config.settings.inputs.screen && ( config.settings.inputs.screen && (
@@ -668,30 +747,16 @@ export default function Playground({
<AudioInputTile trackRef={localMicTrack} /> <AudioInputTile trackRef={localMicTrack} />
</ConfigurationPanelItem> </ConfigurationPanelItem>
)} )}
<div className="w-full"> {config.show_qr && qrCodeUrl && (
<ConfigurationPanelItem title="Color">
<ColorPicker
colors={themeColors}
selectedColor={config.settings.theme_color}
onSelect={(color) => {
const userSettings = { ...config.settings };
userSettings.theme_color = color;
setUserSettings(userSettings);
}}
/>
</ConfigurationPanelItem>
</div>
{config.show_qr && (
<div className="w-full"> <div className="w-full">
<ConfigurationPanelItem title="QR Code"> <ConfigurationPanelItem title="QR Code">
<QRCodeSVG value={window.location.href} width="128" /> <QRCodeSVG value={qrCodeUrl} width="128" />
</ConfigurationPanelItem> </ConfigurationPanelItem>
</div> </div>
)} )}
</div> </div>
); );
}, [ }, [
config.description,
config.settings, config.settings,
config.show_qr, config.show_qr,
localParticipant, localParticipant,
@@ -721,7 +786,7 @@ export default function Playground({
> >
<PhoneSimulator <PhoneSimulator
onConnect={() => onConnect(true)} onConnect={() => onConnect(true)}
onDisconnect={() => onConnect(false)} onDisconnect={handleDisconnect}
phoneMode={phoneMode} phoneMode={phoneMode}
capturePrompt={capturePrompt} capturePrompt={capturePrompt}
importantMessage={importantMessage} importantMessage={importantMessage}
@@ -785,26 +850,30 @@ export default function Playground({
return ( return (
<> <>
<PlaygroundHeader {/* <PlaygroundHeader
title={config.title} title={config.title}
logo={logo} logo={logo}
githubLink={config.github_link} githubLink={config.github_link}
height={headerHeight} height={headerHeight}
accentColor={config.settings.theme_color} accentColor={config.settings.theme_color}
connectionState={roomState} connectionState={roomState}
onConnectClicked={() => onConnectClicked={() => {
onConnect(roomState === ConnectionState.Disconnected) if (roomState === ConnectionState.Disconnected) {
} onConnect(true);
/> } else {
handleDisconnect();
}
}}
/> */}
<div <div
className={`flex gap-4 py-4 grow w-full selection:bg-${config.settings.theme_color}-900`} className={`flex gap-4 py-4 grow w-full selection:bg-${config.settings.theme_color}-900`}
style={{ height: `calc(100% - ${headerHeight}px)` }} style={{ height: `100%` }}
> >
<div className="flex flex-col grow basis-1/2 gap-4 h-full lg:hidden"> <div className="flex flex-col grow basis-1/2 gap-4 h-full lg:hidden">
<PlaygroundTabbedTile <PlaygroundTabbedTile
className="h-full" className="h-full"
tabs={mobileTabs} tabs={mobileTabs}
initialTab={mobileTabs.length - 1} initialTab={0}
/> />
</div> </div>
<div <div
@@ -821,7 +890,7 @@ export default function Playground({
> >
<PhoneSimulator <PhoneSimulator
onConnect={() => onConnect(true)} onConnect={() => onConnect(true)}
onDisconnect={() => onConnect(false)} onDisconnect={handleDisconnect}
phoneMode={phoneMode} phoneMode={phoneMode}
capturePrompt={capturePrompt} capturePrompt={capturePrompt}
importantMessage={importantMessage} importantMessage={importantMessage}
@@ -865,14 +934,14 @@ export default function Playground({
</PlaygroundTile> </PlaygroundTile>
</div> </div>
)} )}
<PlaygroundTile {/* <PlaygroundTile
padding={false} padding={false}
backgroundColor="gray-950" backgroundColor="gray-950"
className="h-full w-full basis-1/4 items-start overflow-y-auto hidden max-w-[480px] lg:flex" className="h-full w-full basis-1/4 items-start overflow-y-auto hidden max-w-[480px] lg:flex"
childrenClassName="h-full grow items-start" childrenClassName="h-full grow items-start"
> >
{settingsTileContent} {settingsTileContent}
</PlaygroundTile> </PlaygroundTile> */}
</div> </div>
</> </>
); );

View File

@@ -45,7 +45,7 @@ export const PlaygroundTile: React.FC<PlaygroundTileProps> = ({
</div> </div>
)} )}
<div <div
className={`flex flex-col items-center grow w-full ${childrenClassName}`} className={`flex flex-col items-center grow w-full relative ${childrenClassName}`}
style={{ style={{
height: `calc(100% - ${title ? titleHeight + "px" : "0px"})`, height: `calc(100% - ${title ? titleHeight + "px" : "0px"})`,
padding: `${contentPadding * 4}px`, padding: `${contentPadding * 4}px`,
@@ -74,7 +74,7 @@ export const PlaygroundTabbedTile: React.FC<PlaygroundTabbedTileProps> = ({
className={`flex flex-col h-full border rounded-sm border-gray-800 text-gray-500 bg-${backgroundColor} ${className}`} className={`flex flex-col h-full border rounded-sm border-gray-800 text-gray-500 bg-${backgroundColor} ${className}`}
> >
<div <div
className="flex items-center justify-start text-xs uppercase border-b border-b-gray-800 tracking-wider" className="flex items-center justify-start text-xs uppercase border-b border-b-gray-800 tracking-wider relative z-[100] bg-gray-950"
style={{ style={{
height: `${titleHeight}px`, height: `${titleHeight}px`,
}} }}
@@ -100,7 +100,18 @@ export const PlaygroundTabbedTile: React.FC<PlaygroundTabbedTileProps> = ({
padding: `${contentPadding * 4}px`, padding: `${contentPadding * 4}px`,
}} }}
> >
{tabs[activeTab].content} {tabs.map((tab, index) => (
<div
key={index}
style={{
display: index === activeTab ? 'block' : 'none',
height: '100%',
width: '100%',
}}
>
{tab.content}
</div>
))}
</div> </div>
</div> </div>
); );

View File

@@ -207,3 +207,20 @@ export const VoiceIcon = ({ className }: { className?: string }) => (
<line x1="12" y1="19" x2="12" y2="22" /> <line x1="12" y1="19" x2="12" y2="22" />
</svg> </svg>
); );
export const ChatIcon = ({ className }: { className?: string }) => (
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
className={className}
>
<path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
</svg>
);

View File

@@ -108,6 +108,16 @@ export function HomeInner() {
token={token} token={token}
connect={shouldConnect} connect={shouldConnect}
onError={(e) => { onError={(e) => {
// Filter out expected errors from push-to-talk interrupt failures
// These are handled gracefully in the PhoneSimulator component
if (e.message?.includes("Application error in method handler") ||
e.message?.includes("Method not supported at destination")) {
// Silently ignore - these are expected and handled in PhoneSimulator
if (process.env.NODE_ENV === 'development') {
console.log("Filtered expected error:", e.message);
}
return;
}
setToastMessage({ message: e.message, type: "error" }); setToastMessage({ message: e.message, type: "error" });
console.error(e); console.error(e);
}} }}

View File

@@ -51,3 +51,12 @@ body {
opacity: 1; opacity: 1;
} }
} }
/* Hide Next.js floating dev indicator */
nextjs-portal,
#__next-build-watcher,
[data-nextjs-dialog],
[data-nextjs-toast],
div[style*="position: fixed"][style*="bottom"][style*="right"] {
display: none !important;
}

View File

@@ -11,7 +11,7 @@ import {
Track, Track,
TranscriptionSegment, TranscriptionSegment,
} from "livekit-client"; } from "livekit-client";
import { useEffect, useState } from "react"; import { useEffect, useState, useRef } from "react";
export function TranscriptionTile({ export function TranscriptionTile({
agentAudioTrack, agentAudioTrack,
@@ -30,39 +30,51 @@ export function TranscriptionTile({
participant: localParticipant.localParticipant, participant: localParticipant.localParticipant,
}); });
const [transcripts, setTranscripts] = useState<Map<string, ChatMessageType>>(
new Map(),
);
const [messages, setMessages] = useState<ChatMessageType[]>([]); const [messages, setMessages] = useState<ChatMessageType[]>([]);
const { chatMessages, send: sendChat } = useChat(); const { chatMessages, send: sendChat } = useChat();
const transcriptMapRef = useRef<Map<string, ChatMessageType>>(new Map());
// store transcripts // Build messages from segments and chat - always rebuild from current state
useEffect(() => { useEffect(() => {
const transcriptMap = transcriptMapRef.current;
// Process agent segments - update existing or add new
if (agentAudioTrack) { if (agentAudioTrack) {
agentMessages.segments.forEach((s) => agentMessages.segments.forEach((s) => {
transcripts.set( const existing = transcriptMap.get(s.id);
transcriptMap.set(
s.id, s.id,
segmentToChatMessage( segmentToChatMessage(
s, s,
transcripts.get(s.id), existing,
agentAudioTrack.participant, agentAudioTrack.participant,
), ),
), );
); });
} }
localMessages.segments.forEach((s) => // Process local segments - update existing or add new
transcripts.set( localMessages.segments.forEach((s) => {
const existing = transcriptMap.get(s.id);
transcriptMap.set(
s.id, s.id,
segmentToChatMessage( segmentToChatMessage(
s, s,
transcripts.get(s.id), existing,
localParticipant.localParticipant, localParticipant.localParticipant,
), ),
), );
); });
const allMessages = Array.from(transcripts.values()); // Build all messages
const allMessages: ChatMessageType[] = [];
// Add all transcript messages
transcriptMap.forEach((msg) => {
allMessages.push(msg);
});
// Add chat messages
for (const msg of chatMessages) { for (const msg of chatMessages) {
const isAgent = agentAudioTrack const isAgent = agentAudioTrack
? msg.from?.identity === agentAudioTrack.participant?.identity ? msg.from?.identity === agentAudioTrack.participant?.identity
@@ -79,6 +91,7 @@ export function TranscriptionTile({
name = "Unknown"; name = "Unknown";
} }
} }
allMessages.push({ allMessages.push({
name, name,
message: msg.message, message: msg.message,
@@ -86,10 +99,11 @@ export function TranscriptionTile({
isSelf: isSelf, isSelf: isSelf,
}); });
} }
// Sort by timestamp
allMessages.sort((a, b) => a.timestamp - b.timestamp); allMessages.sort((a, b) => a.timestamp - b.timestamp);
setMessages(allMessages); setMessages(allMessages);
}, [ }, [
transcripts,
chatMessages, chatMessages,
localParticipant.localParticipant, localParticipant.localParticipant,
agentAudioTrack?.participant, agentAudioTrack?.participant,