From 6652a5cd43fe62c2fa12a6f89719a6446d41a66a Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Wed, 17 Dec 2025 22:17:44 +0800 Subject: [PATCH] update logic of switch_ptt_and_rt --- agents/my_basic_agent_1_2_9.py | 61 +++++++++---- src/components/playground/PhoneSimulator.tsx | 92 ++++++++++---------- 2 files changed, 90 insertions(+), 63 deletions(-) diff --git a/agents/my_basic_agent_1_2_9.py b/agents/my_basic_agent_1_2_9.py index 0931ef8..caaeca9 100644 --- a/agents/my_basic_agent_1_2_9.py +++ b/agents/my_basic_agent_1_2_9.py @@ -898,8 +898,12 @@ class MyAgent(Agent): # Interrupt speech if user makes a selection while agent is speaking if speech_handle and hasattr(speech_handle, "interrupt"): - speech_handle.interrupt() - logger.info("Interrupted speech due to user selection") + try: + speech_handle.interrupt() + except Exception as e: + logger.error(f"Failed to interrupt speech: {e}") + else: + logger.info("Interrupted speech due to user selection") logger.info(f"User made selection: {response}") @@ -1135,19 +1139,17 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_ try: session.interrupt() except RuntimeError as e: - logger.error(f"Failed to interrupt session: {e}") - # Raise RPC error so client can detect interrupt failure - # Use ERROR_INTERNAL (code 13) to indicate application error - raise rtc.RpcError( - code=13, # ERROR_INTERNAL - message="Application error in method handler" - ) + logger.info(f"Cannot interrupt session (agent is speaking): {e}") + # Return a message instead of raising an error + return json.dumps({"success": False, "message": "不能打断"}) session.clear_user_turn() # listen to the caller if multi-user room_io.set_participant(data.caller_identity) session.input.set_audio_enabled(True) + + return json.dumps({"success": True}) @ctx.room.local_participant.register_rpc_method("end_turn") async def end_turn(data: rtc.RpcInvocationData): @@ -1169,12 +1171,41 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_ @ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt") async def switch_ptt_and_rt(data: rtc.RpcInvocationData): nonlocal _talking_mode - _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime" - if _talking_mode == "push_to_talk": - session.input.set_audio_enabled(False) - else: - session.input.set_audio_enabled(True) - return json.dumps({"success": True, "mode": _talking_mode}) + try: + # Parse the payload to get the target mode + payload = json.loads(data.payload) if data.payload else {} + target_mode = payload.get("mode") + + # Validate and set the mode + if target_mode in ["push_to_talk", "realtime"]: + _talking_mode = target_mode + logger.info(f"Switching talking mode to: {_talking_mode}") + else: + # If invalid mode, toggle from current state + logger.warning(f"Invalid mode '{target_mode}', toggling from current state") + _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime" + logger.info(f"Toggled talking mode to: {_talking_mode}") + + # Apply the mode settings + room_io.set_participant(data.caller_identity) + if _talking_mode == "push_to_talk": + session.input.set_audio_enabled(False) + logger.info("Setting audio enabled to False (PTT mode)") + else: + session.input.set_audio_enabled(True) + logger.info("Setting audio enabled to True (realtime mode)") + + return json.dumps({"success": True, "mode": _talking_mode}) + except json.JSONDecodeError: + logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}") + # Fallback to toggle behavior + _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime" + room_io.set_participant(data.caller_identity) + if _talking_mode == "push_to_talk": + session.input.set_audio_enabled(False) + else: + session.input.set_audio_enabled(True) + return json.dumps({"success": True, "mode": _talking_mode}) if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/src/components/playground/PhoneSimulator.tsx b/src/components/playground/PhoneSimulator.tsx index 3a7ea9a..c9e71be 100644 --- a/src/components/playground/PhoneSimulator.tsx +++ b/src/components/playground/PhoneSimulator.tsx @@ -475,14 +475,27 @@ export function PhoneSimulator({ const handleModeSwitch = async () => { if (!room || !voiceAssistant.agent) return; + // Determine the target mode (toggle from current state) + const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk"; + try { - await room.localParticipant.performRpc({ + const response = await room.localParticipant.performRpc({ destinationIdentity: voiceAssistant.agent.identity, method: "switch_ptt_and_rt", - payload: "", + payload: JSON.stringify({ mode: targetMode }), }); - // Toggle mode on success - setIsPushToTalkMode(prev => !prev); + + // Parse the response to confirm the mode was set + try { + const responseData = JSON.parse(response); + const confirmedMode = responseData.mode; + // Update state based on server response + setIsPushToTalkMode(confirmedMode === "push_to_talk"); + } catch (parseError) { + // If parsing fails, update state based on what we sent + console.warn("Failed to parse mode switch response, using sent mode:", parseError); + setIsPushToTalkMode(targetMode === "push_to_talk"); + } } catch (error: any) { console.error("Failed to switch mode:", error); // Don't show error toast for mode switch failures, just log @@ -502,16 +515,38 @@ export function PhoneSimulator({ setInterruptRejected(false); try { - await room.localParticipant.performRpc({ + const response = await room.localParticipant.performRpc({ destinationIdentity: voiceAssistant.agent.identity, method: "start_turn", payload: "", }); - setIsPushToTalkActive(true); - setInterruptRejected(false); + + // Parse the response to check for success/failure + try { + const responseData = JSON.parse(response); + if (responseData.success === false) { + // Interrupt was rejected, show message + if (responseData.message === "不能打断") { + setInterruptRejected(true); + // Clear the rejection message after 3 seconds + setTimeout(() => setInterruptRejected(false), 3000); + if (process.env.NODE_ENV === 'development') { + console.log("Interrupt rejected (cannot interrupt):", responseData.message); + } + return; + } + } else if (responseData.success === true) { + // Successfully started turn + setIsPushToTalkActive(true); + setInterruptRejected(false); + } + } catch (parseError) { + // If response is not JSON, assume success (backward compatibility) + setIsPushToTalkActive(true); + setInterruptRejected(false); + } } catch (error: any) { - // Prevent error from propagating to React error boundary - // by handling all expected errors here + // Handle RPC errors (method not found, etc.) setIsPushToTalkActive(false); const errorMessage = error?.message || ""; @@ -527,45 +562,6 @@ export function PhoneSimulator({ return; } - // Check for "Application error in method handler" - this indicates interrupt failed - // This error is raised when session.interrupt() fails in the agent - // We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error - if (errorMessage.includes("Application error in method handler") || - errorMessage.includes("Application error") || - errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL) - (isAgentSpeaking && errorMessage.includes("interrupt"))) { - // Suppress error logging for expected interrupt failures - // Only log at debug level to avoid error popups - if (process.env.NODE_ENV === 'development') { - console.log("Interrupt rejected (expected behavior):", errorMessage); - } - setInterruptRejected(true); - // Clear the rejection message after 3 seconds - setTimeout(() => setInterruptRejected(false), 3000); - // Explicitly prevent error from propagating - error.preventDefault?.(); - error.stopPropagation?.(); - return; - } - - // Check if agent is speaking and the error suggests interruption was rejected - if (isAgentSpeaking) { - // Check for common rejection indicators - if (errorMessage.includes("reject") || - errorMessage.includes("not allowed") || - errorCode === 403 || // Forbidden - errorCode === 409) { // Conflict - // Suppress error logging for expected rejections - if (process.env.NODE_ENV === 'development') { - console.log("Interrupt rejected:", errorMessage); - } - setInterruptRejected(true); - // Clear the rejection message after 3 seconds - setTimeout(() => setInterruptRejected(false), 3000); - return; - } - } - // Only log and show error for unexpected errors console.error("Unexpected error in push-to-talk:", error); const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";