Compare commits

...

2 Commits

Author SHA1 Message Date
6652a5cd43 update logic of switch_ptt_and_rt 2025-12-17 22:17:44 +08:00
d942222f11 update endcall button postion 2025-12-17 21:41:36 +08:00
3 changed files with 96 additions and 69 deletions

View File

@ -898,8 +898,12 @@ class MyAgent(Agent):
# Interrupt speech if user makes a selection while agent is speaking
if speech_handle and hasattr(speech_handle, "interrupt"):
speech_handle.interrupt()
logger.info("Interrupted speech due to user selection")
try:
speech_handle.interrupt()
except Exception as e:
logger.error(f"Failed to interrupt speech: {e}")
else:
logger.info("Interrupted speech due to user selection")
logger.info(f"User made selection: {response}")
@ -1135,19 +1139,17 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
try:
session.interrupt()
except RuntimeError as e:
logger.error(f"Failed to interrupt session: {e}")
# Raise RPC error so client can detect interrupt failure
# Use ERROR_INTERNAL (code 13) to indicate application error
raise rtc.RpcError(
code=13, # ERROR_INTERNAL
message="Application error in method handler"
)
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
# Return a message instead of raising an error
return json.dumps({"success": False, "message": "不能打断"})
session.clear_user_turn()
# listen to the caller if multi-user
room_io.set_participant(data.caller_identity)
session.input.set_audio_enabled(True)
return json.dumps({"success": True})
@ctx.room.local_participant.register_rpc_method("end_turn")
async def end_turn(data: rtc.RpcInvocationData):
@ -1169,12 +1171,41 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
nonlocal _talking_mode
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
try:
# Parse the payload to get the target mode
payload = json.loads(data.payload) if data.payload else {}
target_mode = payload.get("mode")
# Validate and set the mode
if target_mode in ["push_to_talk", "realtime"]:
_talking_mode = target_mode
logger.info(f"Switching talking mode to: {_talking_mode}")
else:
# If invalid mode, toggle from current state
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
logger.info(f"Toggled talking mode to: {_talking_mode}")
# Apply the mode settings
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
logger.info("Setting audio enabled to False (PTT mode)")
else:
session.input.set_audio_enabled(True)
logger.info("Setting audio enabled to True (realtime mode)")
return json.dumps({"success": True, "mode": _talking_mode})
except json.JSONDecodeError:
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
# Fallback to toggle behavior
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
if __name__ == "__main__":
parser = argparse.ArgumentParser()

View File

@ -64,13 +64,13 @@ export function ChatOverlay({
containerSize.height * 0.6
);
// Position overlay at center when first shown
// Position overlay at center (slightly moved up) when first shown
const hasPositionedRef = useRef(false);
useEffect(() => {
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
// Calculate center position
// Calculate center position, moved up by 15% of container height
const centerX = (containerSize.width - overlayWidth) / 2;
const centerY = (containerSize.height - overlayHeight) / 2;
const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
// Only auto-position on first show (when position is at origin)
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {

View File

@ -475,14 +475,27 @@ export function PhoneSimulator({
const handleModeSwitch = async () => {
if (!room || !voiceAssistant.agent) return;
// Determine the target mode (toggle from current state)
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
try {
await room.localParticipant.performRpc({
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "switch_ptt_and_rt",
payload: "",
payload: JSON.stringify({ mode: targetMode }),
});
// Toggle mode on success
setIsPushToTalkMode(prev => !prev);
// Parse the response to confirm the mode was set
try {
const responseData = JSON.parse(response);
const confirmedMode = responseData.mode;
// Update state based on server response
setIsPushToTalkMode(confirmedMode === "push_to_talk");
} catch (parseError) {
// If parsing fails, update state based on what we sent
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
setIsPushToTalkMode(targetMode === "push_to_talk");
}
} catch (error: any) {
console.error("Failed to switch mode:", error);
// Don't show error toast for mode switch failures, just log
@ -502,16 +515,38 @@ export function PhoneSimulator({
setInterruptRejected(false);
try {
await room.localParticipant.performRpc({
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "start_turn",
payload: "",
});
setIsPushToTalkActive(true);
setInterruptRejected(false);
// Parse the response to check for success/failure
try {
const responseData = JSON.parse(response);
if (responseData.success === false) {
// Interrupt was rejected, show message
if (responseData.message === "不能打断") {
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
}
return;
}
} else if (responseData.success === true) {
// Successfully started turn
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (parseError) {
// If response is not JSON, assume success (backward compatibility)
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (error: any) {
// Prevent error from propagating to React error boundary
// by handling all expected errors here
// Handle RPC errors (method not found, etc.)
setIsPushToTalkActive(false);
const errorMessage = error?.message || "";
@ -527,45 +562,6 @@ export function PhoneSimulator({
return;
}
// Check for "Application error in method handler" - this indicates interrupt failed
// This error is raised when session.interrupt() fails in the agent
// We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error
if (errorMessage.includes("Application error in method handler") ||
errorMessage.includes("Application error") ||
errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL)
(isAgentSpeaking && errorMessage.includes("interrupt"))) {
// Suppress error logging for expected interrupt failures
// Only log at debug level to avoid error popups
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (expected behavior):", errorMessage);
}
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
// Explicitly prevent error from propagating
error.preventDefault?.();
error.stopPropagation?.();
return;
}
// Check if agent is speaking and the error suggests interruption was rejected
if (isAgentSpeaking) {
// Check for common rejection indicators
if (errorMessage.includes("reject") ||
errorMessage.includes("not allowed") ||
errorCode === 403 || // Forbidden
errorCode === 409) { // Conflict
// Suppress error logging for expected rejections
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected:", errorMessage);
}
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
return;
}
}
// Only log and show error for unexpected errors
console.error("Unexpected error in push-to-talk:", error);
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
@ -1272,8 +1268,8 @@ export function PhoneSimulator({
</button>
</div>
) : (
<div className="w-full flex items-center justify-between gap-8">
{/* Left side: Mic Toggle */}
<div className="w-full flex items-center justify-center gap-4">
{/* Mic Toggle */}
{phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
@ -1291,7 +1287,7 @@ export function PhoneSimulator({
</button>
)}
{/* Right side: End Call Button */}
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}