diff --git a/agents/my_basic_agent_1_2_9.py b/agents/my_basic_agent_1_2_9.py index 7b9b8ed..f81ea89 100644 --- a/agents/my_basic_agent_1_2_9.py +++ b/agents/my_basic_agent_1_2_9.py @@ -31,6 +31,7 @@ from livekit.agents import ( cli, get_job_context, metrics, + RoomIO ) from livekit.agents.llm import ImageContent, ToolError, function_tool from typing import Any, List, Optional @@ -953,6 +954,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_ # Increase the maximum number of function calls per turn to avoid hitting the limit max_tool_steps=15, ) + room_io = RoomIO(session, room=ctx.room) + await room_io.start() # log metrics as they are emitted, and total usage after session is over usage_collector = metrics.UsageCollector() @@ -1011,6 +1014,45 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_ room_output_options=RoomOutputOptions(transcription_enabled=True), ) + # disable input audio at the start + session.input.set_audio_enabled(False) + + @ctx.room.local_participant.register_rpc_method("start_turn") + async def start_turn(data: rtc.RpcInvocationData): + try: + session.interrupt() + except RuntimeError as e: + logger.error(f"Failed to interrupt session: {e}") + # Raise RPC error so client can detect interrupt failure + # Use ERROR_INTERNAL (code 13) to indicate application error + raise rtc.RpcError( + code=13, # ERROR_INTERNAL + message="Application error in method handler" + ) + + session.clear_user_turn() + + # listen to the caller if multi-user + room_io.set_participant(data.caller_identity) + session.input.set_audio_enabled(True) + + @ctx.room.local_participant.register_rpc_method("end_turn") + async def end_turn(data: rtc.RpcInvocationData): + session.input.set_audio_enabled(False) + session.commit_user_turn( + # the timeout for the final transcript to be received after committing the user turn + # increase this value if the STT is slow to respond + transcript_timeout=10.0, + # the duration of the silence to be appended to the STT to make it generate the final transcript + stt_flush_duration=2.0, + ) + + @ctx.room.local_participant.register_rpc_method("cancel_turn") + async def cancel_turn(data: rtc.RpcInvocationData): + session.input.set_audio_enabled(False) + session.clear_user_turn() + logger.info("cancel turn") + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL") diff --git a/src/components/playground/PhoneSimulator.tsx b/src/components/playground/PhoneSimulator.tsx index 1773209..68e51e8 100644 --- a/src/components/playground/PhoneSimulator.tsx +++ b/src/components/playground/PhoneSimulator.tsx @@ -5,13 +5,14 @@ import { BarVisualizer, useConnectionState, useLocalParticipant, + useParticipantAttributes, useRoomContext, useTracks, useVoiceAssistant, VideoTrack, } from "@livekit/components-react"; import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client"; -import { useEffect, useMemo, useState, useRef } from "react"; +import { useEffect, useMemo, useState, useRef, useCallback } from "react"; import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon } from "./icons"; import { useToast } from "@/components/toast/ToasterProvider"; @@ -43,6 +44,9 @@ export function PhoneSimulator({ const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant(); const tracks = useTracks(); const voiceAssistant = useVoiceAssistant(); + const agentAttributes = useParticipantAttributes({ + participant: voiceAssistant.agent, + }); const fileInputRef = useRef(null); const phoneContainerRef = useRef(null); const visualizerRef = useRef(null); @@ -59,6 +63,9 @@ export function PhoneSimulator({ const isAgentSpeaking = voiceAssistant.state === "speaking"; const wasMicEnabledRef = useRef(false); const lastPhoneMode = useRef(phoneMode); + const [isPushToTalkActive, setIsPushToTalkActive] = useState(false); + const [interruptRejected, setInterruptRejected] = useState(false); + const pushToTalkButtonRef = useRef(null); useEffect(() => { const voiceAttr = config.settings.attributes?.find(a => a.key === "voice"); @@ -421,6 +428,202 @@ export function PhoneSimulator({ setShowVoiceMenu(!showVoiceMenu); }; + // Check if agent supports push-to-talk (optional check, button will show regardless) + const supportsPushToTalk = useMemo(() => { + if (!voiceAssistant.agent || !agentAttributes.attributes) return false; + return agentAttributes.attributes["push-to-talk"] === "1"; + }, [voiceAssistant.agent, agentAttributes.attributes]); + + const handlePushToTalkStart = async () => { + if (!room || !voiceAssistant.agent || isPushToTalkActive) return; + + // Reset interrupt rejection state + setInterruptRejected(false); + + try { + await room.localParticipant.performRpc({ + destinationIdentity: voiceAssistant.agent.identity, + method: "start_turn", + payload: "", + }); + setIsPushToTalkActive(true); + setInterruptRejected(false); + } catch (error: any) { + // Prevent error from propagating to React error boundary + // by handling all expected errors here + setIsPushToTalkActive(false); + + const errorMessage = error?.message || ""; + const errorCode = error?.code; + + // Check for "Method not supported at destination" - this happens when RPC methods aren't registered yet + // This can occur on first call before agent is fully ready, so we silently ignore it + if (errorMessage.includes("Method not supported at destination") || + errorMessage.includes("method not found") || + errorCode === 12) { // METHOD_NOT_FOUND + // Silently ignore - the method will be available after first turn + console.log("RPC method not ready yet, will be available after first turn"); + return; + } + + // Check for "Application error in method handler" - this indicates interrupt failed + // This error is raised when session.interrupt() fails in the agent + // We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error + if (errorMessage.includes("Application error in method handler") || + errorMessage.includes("Application error") || + errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL) + (isAgentSpeaking && errorMessage.includes("interrupt"))) { + // Suppress error logging for expected interrupt failures + // Only log at debug level to avoid error popups + if (process.env.NODE_ENV === 'development') { + console.log("Interrupt rejected (expected behavior):", errorMessage); + } + setInterruptRejected(true); + // Clear the rejection message after 3 seconds + setTimeout(() => setInterruptRejected(false), 3000); + // Explicitly prevent error from propagating + error.preventDefault?.(); + error.stopPropagation?.(); + return; + } + + // Check if agent is speaking and the error suggests interruption was rejected + if (isAgentSpeaking) { + // Check for common rejection indicators + if (errorMessage.includes("reject") || + errorMessage.includes("not allowed") || + errorCode === 403 || // Forbidden + errorCode === 409) { // Conflict + // Suppress error logging for expected rejections + if (process.env.NODE_ENV === 'development') { + console.log("Interrupt rejected:", errorMessage); + } + setInterruptRejected(true); + // Clear the rejection message after 3 seconds + setTimeout(() => setInterruptRejected(false), 3000); + return; + } + } + + // Only log and show error for unexpected errors + console.error("Unexpected error in push-to-talk:", error); + const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered."; + setToastMessage({ message: defaultErrorMessage, type: "error" }); + } + }; + + const handlePushToTalkEnd = useCallback(async () => { + if (!room || !voiceAssistant.agent || !isPushToTalkActive) return; + + try { + await room.localParticipant.performRpc({ + destinationIdentity: voiceAssistant.agent.identity, + method: "end_turn", + payload: "", + }); + setIsPushToTalkActive(false); + setInterruptRejected(false); + } catch (error: any) { + console.error("Failed to end turn:", error); + // Don't show error toast on end_turn failure as it might be called during cleanup + setIsPushToTalkActive(false); + setInterruptRejected(false); + } + }, [room, voiceAssistant.agent, isPushToTalkActive]); + + const handlePushToTalkCancel = useCallback(async () => { + if (!room || !voiceAssistant.agent || !isPushToTalkActive) return; + + try { + await room.localParticipant.performRpc({ + destinationIdentity: voiceAssistant.agent.identity, + method: "cancel_turn", + payload: "", + }); + setIsPushToTalkActive(false); + setInterruptRejected(false); + } catch (error) { + console.error("Failed to cancel turn:", error); + setIsPushToTalkActive(false); + setInterruptRejected(false); + } + }, [room, voiceAssistant.agent, isPushToTalkActive]); + + // Handle mouse events for push-to-talk + const handlePushToTalkMouseDown = (e: React.MouseEvent) => { + e.preventDefault(); + handlePushToTalkStart(); + }; + + const handlePushToTalkMouseUp = (e: React.MouseEvent) => { + e.preventDefault(); + handlePushToTalkEnd(); + }; + + // Handle touch events for push-to-talk + const handlePushToTalkTouchStart = (e: React.TouchEvent) => { + e.preventDefault(); + handlePushToTalkStart(); + }; + + const handlePushToTalkTouchEnd = (e: React.TouchEvent) => { + e.preventDefault(); + handlePushToTalkEnd(); + }; + + // Handle window blur, escape key, and global mouse/touch events to cancel/end push-to-talk + useEffect(() => { + if (!isPushToTalkActive) return; + + const handleBlur = () => { + handlePushToTalkCancel(); + }; + + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === "Escape") { + handlePushToTalkCancel(); + } + }; + + // Handle global mouseup/touchend to end push-to-talk even if released outside button + const handleGlobalMouseUp = () => { + handlePushToTalkEnd(); + }; + + const handleGlobalTouchEnd = () => { + handlePushToTalkEnd(); + }; + + window.addEventListener("blur", handleBlur); + window.addEventListener("keydown", handleKeyDown); + window.addEventListener("mouseup", handleGlobalMouseUp); + window.addEventListener("touchend", handleGlobalTouchEnd); + + return () => { + window.removeEventListener("blur", handleBlur); + window.removeEventListener("keydown", handleKeyDown); + window.removeEventListener("mouseup", handleGlobalMouseUp); + window.removeEventListener("touchend", handleGlobalTouchEnd); + }; + }, [isPushToTalkActive, handlePushToTalkCancel, handlePushToTalkEnd]); + + // Clean up push-to-talk state on disconnect + useEffect(() => { + if (roomState === ConnectionState.Disconnected && isPushToTalkActive) { + setIsPushToTalkActive(false); + setInterruptRejected(false); + } + }, [roomState, isPushToTalkActive]); + + // Reset interrupt rejection when agent stops speaking + useEffect(() => { + if (!isAgentSpeaking && interruptRejected) { + // Clear rejection state when agent finishes speaking + const timer = setTimeout(() => setInterruptRejected(false), 1000); + return () => clearTimeout(timer); + } + }, [isAgentSpeaking, interruptRejected]); + const handleFileChange = (event: React.ChangeEvent) => { const file = event.target.files?.[0]; if (file && onCapture) { @@ -807,30 +1010,57 @@ export function PhoneSimulator({ ) : (
-
- {phoneMode !== "important_message" && phoneMode !== "hand_off" && ( +
+ {/* Push-to-Talk Button - Centered and Bigger */} + {phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && ( )} - + {/* Other Controls */} +
+ {phoneMode !== "important_message" && phoneMode !== "hand_off" && ( + + )} + + +
) diff --git a/src/pages/index.tsx b/src/pages/index.tsx index 3aa2822..a581108 100644 --- a/src/pages/index.tsx +++ b/src/pages/index.tsx @@ -108,6 +108,16 @@ export function HomeInner() { token={token} connect={shouldConnect} onError={(e) => { + // Filter out expected errors from push-to-talk interrupt failures + // These are handled gracefully in the PhoneSimulator component + if (e.message?.includes("Application error in method handler") || + e.message?.includes("Method not supported at destination")) { + // Silently ignore - these are expected and handled in PhoneSimulator + if (process.env.NODE_ENV === 'development') { + console.log("Filtered expected error:", e.message); + } + return; + } setToastMessage({ message: e.message, type: "error" }); console.error(e); }}