a better push to talk layout

This commit is contained in:
Xin Wang 2025-12-16 15:56:46 +08:00
parent 1774f550dd
commit e09e4b6930
2 changed files with 156 additions and 48 deletions

View File

@ -1016,6 +1016,9 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
# disable input audio at the start
session.input.set_audio_enabled(False)
# Track current audio state for mode switching
_audio_enabled_state = False
@ctx.room.local_participant.register_rpc_method("start_turn")
async def start_turn(data: rtc.RpcInvocationData):
@ -1053,6 +1056,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
session.clear_user_turn()
logger.info("cancel turn")
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
nonlocal _audio_enabled_state
# Toggle audio input state
_audio_enabled_state = not _audio_enabled_state
session.input.set_audio_enabled(_audio_enabled_state)
mode = "push-to-talk" if not _audio_enabled_state else "realtime"
logger.info(f"Switched to {mode} mode (audio enabled: {_audio_enabled_state})")
return json.dumps({"success": True, "mode": mode, "audio_enabled": _audio_enabled_state})
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL")

View File

@ -65,6 +65,7 @@ export function PhoneSimulator({
const lastPhoneMode = useRef(phoneMode);
const [isPushToTalkActive, setIsPushToTalkActive] = useState(false);
const [interruptRejected, setInterruptRejected] = useState(false);
const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default)
const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
useEffect(() => {
@ -428,6 +429,23 @@ export function PhoneSimulator({
setShowVoiceMenu(!showVoiceMenu);
};
const handleModeSwitch = async () => {
if (!room || !voiceAssistant.agent) return;
try {
await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "switch_ptt_and_rt",
payload: "",
});
// Toggle mode on success
setIsPushToTalkMode(prev => !prev);
} catch (error: any) {
console.error("Failed to switch mode:", error);
// Don't show error toast for mode switch failures, just log
}
};
// Check if agent supports push-to-talk (optional check, button will show regardless)
const supportsPushToTalk = useMemo(() => {
if (!voiceAssistant.agent || !agentAttributes.attributes) return false;
@ -513,6 +531,9 @@ export function PhoneSimulator({
};
const handlePushToTalkEnd = useCallback(async () => {
// Always clear interrupt rejection state when button is released
setInterruptRejected(false);
if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
try {
@ -522,16 +543,17 @@ export function PhoneSimulator({
payload: "",
});
setIsPushToTalkActive(false);
setInterruptRejected(false);
} catch (error: any) {
console.error("Failed to end turn:", error);
// Don't show error toast on end_turn failure as it might be called during cleanup
setIsPushToTalkActive(false);
setInterruptRejected(false);
}
}, [room, voiceAssistant.agent, isPushToTalkActive]);
const handlePushToTalkCancel = useCallback(async () => {
// Always clear interrupt rejection state when button is cancelled
setInterruptRejected(false);
if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
try {
@ -541,11 +563,9 @@ export function PhoneSimulator({
payload: "",
});
setIsPushToTalkActive(false);
setInterruptRejected(false);
} catch (error) {
console.error("Failed to cancel turn:", error);
setIsPushToTalkActive(false);
setInterruptRejected(false);
}
}, [room, voiceAssistant.agent, isPushToTalkActive]);
@ -587,10 +607,14 @@ export function PhoneSimulator({
// Handle global mouseup/touchend to end push-to-talk even if released outside button
const handleGlobalMouseUp = () => {
// Clear interrupt rejection state immediately when button is released
setInterruptRejected(false);
handlePushToTalkEnd();
};
const handleGlobalTouchEnd = () => {
// Clear interrupt rejection state immediately when button is released
setInterruptRejected(false);
handlePushToTalkEnd();
};
@ -1011,56 +1035,127 @@ export function PhoneSimulator({
) : (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40">
<div className="w-full flex flex-col items-center justify-center gap-4">
{/* Push-to-Talk Button - Centered and Bigger */}
{/* Mode Toggle Switch */}
{phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
<div className="flex items-center gap-3 mb-2">
<span className={`text-xs font-medium transition-colors ${isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
</span>
</button>
<button
onClick={handleModeSwitch}
className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 ${
!isPushToTalkMode ? "bg-blue-500" : "bg-gray-600"
}`}
role="switch"
aria-checked={!isPushToTalkMode}
title={isPushToTalkMode ? "切换到实时对话模式" : "切换到按下说话模式"}
>
<span
className={`inline-block h-4 w-4 transform rounded-full bg-white transition-transform ${
!isPushToTalkMode ? "translate-x-6" : "translate-x-1"
}`}
/>
</button>
<span className={`text-xs font-medium transition-colors ${!isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
</span>
</div>
)}
{/* Other Controls */}
<div className="w-full flex items-center justify-center gap-8">
{phoneMode !== "important_message" && phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
{/* Push-to-Talk Mode Layout */}
{isPushToTalkMode && phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
<div className="w-full flex items-center justify-center gap-8">
{/* Camera Switch Button - Left */}
<div className="relative">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
)}
</div>
)}
</button>
)}
</div>
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
{/* Large Push-to-Talk Button - Center */}
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
{/* End Call Button - Right */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
{/* Realtime Mode Layout */}
{!isPushToTalkMode && (
<div className="w-full flex items-center justify-center gap-8">
{phoneMode !== "important_message" && phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
)}
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</div>
</div>
)