update logic of switch_ptt_and_rt

2025-12-17 22:17:44 +08:00
parent d942222f11
commit 6652a5cd43
2 changed files with 90 additions and 63 deletions
--- a/agents/my_basic_agent_1_2_9.py
+++ b/agents/my_basic_agent_1_2_9.py
@@ -898,8 +898,12 @@ class MyAgent(Agent):
            
            # Interrupt speech if user makes a selection while agent is speaking
            if speech_handle and hasattr(speech_handle, "interrupt"):
-                speech_handle.interrupt()
-                logger.info("Interrupted speech due to user selection")
+                try:
+                    speech_handle.interrupt()
+                except Exception as e:
+                    logger.error(f"Failed to interrupt speech: {e}")
+                else:
+                    logger.info("Interrupted speech due to user selection")
                
            logger.info(f"User made selection: {response}")
            
@@ -1135,19 +1139,17 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
        try:
            session.interrupt()
        except RuntimeError as e:
-            logger.error(f"Failed to interrupt session: {e}")
-            # Raise RPC error so client can detect interrupt failure
-            # Use ERROR_INTERNAL (code 13) to indicate application error
-            raise rtc.RpcError(
-                code=13,  # ERROR_INTERNAL
-                message="Application error in method handler"
-            )
+            logger.info(f"Cannot interrupt session (agent is speaking): {e}")
+            # Return a message instead of raising an error
+            return json.dumps({"success": False, "message": "不能打断"})

        session.clear_user_turn()

        # listen to the caller if multi-user
        room_io.set_participant(data.caller_identity)
        session.input.set_audio_enabled(True)
+        
+        return json.dumps({"success": True})

    @ctx.room.local_participant.register_rpc_method("end_turn")
    async def end_turn(data: rtc.RpcInvocationData):
@@ -1169,12 +1171,41 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
    @ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
    async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
        nonlocal _talking_mode
-        _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
-        if _talking_mode == "push_to_talk":
-            session.input.set_audio_enabled(False)
-        else:
-            session.input.set_audio_enabled(True)
-        return json.dumps({"success": True, "mode": _talking_mode})
+        try:
+            # Parse the payload to get the target mode
+            payload = json.loads(data.payload) if data.payload else {}
+            target_mode = payload.get("mode")
+            
+            # Validate and set the mode
+            if target_mode in ["push_to_talk", "realtime"]:
+                _talking_mode = target_mode
+                logger.info(f"Switching talking mode to: {_talking_mode}")
+            else:
+                # If invalid mode, toggle from current state
+                logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
+                _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
+                logger.info(f"Toggled talking mode to: {_talking_mode}")
+            
+            # Apply the mode settings
+            room_io.set_participant(data.caller_identity)
+            if _talking_mode == "push_to_talk":
+                session.input.set_audio_enabled(False)
+                logger.info("Setting audio enabled to False (PTT mode)")
+            else:
+                session.input.set_audio_enabled(True)
+                logger.info("Setting audio enabled to True (realtime mode)")
+            
+            return json.dumps({"success": True, "mode": _talking_mode})
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
+            # Fallback to toggle behavior
+            _talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
+            room_io.set_participant(data.caller_identity)
+            if _talking_mode == "push_to_talk":
+                session.input.set_audio_enabled(False)
+            else:
+                session.input.set_audio_enabled(True)
+            return json.dumps({"success": True, "mode": _talking_mode})

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()