a better push to talk layout

2025-12-16 15:56:46 +08:00
parent 1774f550dd
commit e09e4b6930
2 changed files with 156 additions and 48 deletions
--- a/agents/my_basic_agent_1_2_9.py
+++ b/agents/my_basic_agent_1_2_9.py
@@ -1016,6 +1016,9 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_

     # disable input audio at the start
    session.input.set_audio_enabled(False)
+    
+    # Track current audio state for mode switching
+    _audio_enabled_state = False

    @ctx.room.local_participant.register_rpc_method("start_turn")
    async def start_turn(data: rtc.RpcInvocationData):
@@ -1053,6 +1056,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
        session.clear_user_turn()
        logger.info("cancel turn")

+    @ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
+    async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
+        nonlocal _audio_enabled_state
+        # Toggle audio input state
+        _audio_enabled_state = not _audio_enabled_state
+        session.input.set_audio_enabled(_audio_enabled_state)
+        mode = "push-to-talk" if not _audio_enabled_state else "realtime"
+        logger.info(f"Switched to {mode} mode (audio enabled: {_audio_enabled_state})")
+        return json.dumps({"success": True, "mode": mode, "audio_enabled": _audio_enabled_state})
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL")