update logic of switch_ptt_and_rt

This commit is contained in:
2025-12-17 22:17:44 +08:00
parent d942222f11
commit 6652a5cd43
2 changed files with 90 additions and 63 deletions

View File

@@ -898,8 +898,12 @@ class MyAgent(Agent):
# Interrupt speech if user makes a selection while agent is speaking
if speech_handle and hasattr(speech_handle, "interrupt"):
speech_handle.interrupt()
logger.info("Interrupted speech due to user selection")
try:
speech_handle.interrupt()
except Exception as e:
logger.error(f"Failed to interrupt speech: {e}")
else:
logger.info("Interrupted speech due to user selection")
logger.info(f"User made selection: {response}")
@@ -1135,19 +1139,17 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
try:
session.interrupt()
except RuntimeError as e:
logger.error(f"Failed to interrupt session: {e}")
# Raise RPC error so client can detect interrupt failure
# Use ERROR_INTERNAL (code 13) to indicate application error
raise rtc.RpcError(
code=13, # ERROR_INTERNAL
message="Application error in method handler"
)
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
# Return a message instead of raising an error
return json.dumps({"success": False, "message": "不能打断"})
session.clear_user_turn()
# listen to the caller if multi-user
room_io.set_participant(data.caller_identity)
session.input.set_audio_enabled(True)
return json.dumps({"success": True})
@ctx.room.local_participant.register_rpc_method("end_turn")
async def end_turn(data: rtc.RpcInvocationData):
@@ -1169,12 +1171,41 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
nonlocal _talking_mode
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
try:
# Parse the payload to get the target mode
payload = json.loads(data.payload) if data.payload else {}
target_mode = payload.get("mode")
# Validate and set the mode
if target_mode in ["push_to_talk", "realtime"]:
_talking_mode = target_mode
logger.info(f"Switching talking mode to: {_talking_mode}")
else:
# If invalid mode, toggle from current state
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
logger.info(f"Toggled talking mode to: {_talking_mode}")
# Apply the mode settings
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
logger.info("Setting audio enabled to False (PTT mode)")
else:
session.input.set_audio_enabled(True)
logger.info("Setting audio enabled to True (realtime mode)")
return json.dumps({"success": True, "mode": _talking_mode})
except json.JSONDecodeError:
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
# Fallback to toggle behavior
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
if __name__ == "__main__":
parser = argparse.ArgumentParser()