Compare commits
2 Commits
5be6ab12f3
...
6652a5cd43
| Author | SHA1 | Date | |
|---|---|---|---|
| 6652a5cd43 | |||
| d942222f11 |
@ -898,8 +898,12 @@ class MyAgent(Agent):
|
||||
|
||||
# Interrupt speech if user makes a selection while agent is speaking
|
||||
if speech_handle and hasattr(speech_handle, "interrupt"):
|
||||
speech_handle.interrupt()
|
||||
logger.info("Interrupted speech due to user selection")
|
||||
try:
|
||||
speech_handle.interrupt()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to interrupt speech: {e}")
|
||||
else:
|
||||
logger.info("Interrupted speech due to user selection")
|
||||
|
||||
logger.info(f"User made selection: {response}")
|
||||
|
||||
@ -1135,13 +1139,9 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
||||
try:
|
||||
session.interrupt()
|
||||
except RuntimeError as e:
|
||||
logger.error(f"Failed to interrupt session: {e}")
|
||||
# Raise RPC error so client can detect interrupt failure
|
||||
# Use ERROR_INTERNAL (code 13) to indicate application error
|
||||
raise rtc.RpcError(
|
||||
code=13, # ERROR_INTERNAL
|
||||
message="Application error in method handler"
|
||||
)
|
||||
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
|
||||
# Return a message instead of raising an error
|
||||
return json.dumps({"success": False, "message": "不能打断"})
|
||||
|
||||
session.clear_user_turn()
|
||||
|
||||
@ -1149,6 +1149,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
||||
room_io.set_participant(data.caller_identity)
|
||||
session.input.set_audio_enabled(True)
|
||||
|
||||
return json.dumps({"success": True})
|
||||
|
||||
@ctx.room.local_participant.register_rpc_method("end_turn")
|
||||
async def end_turn(data: rtc.RpcInvocationData):
|
||||
session.input.set_audio_enabled(False)
|
||||
@ -1169,12 +1171,41 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
||||
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
|
||||
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
|
||||
nonlocal _talking_mode
|
||||
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
|
||||
if _talking_mode == "push_to_talk":
|
||||
session.input.set_audio_enabled(False)
|
||||
else:
|
||||
session.input.set_audio_enabled(True)
|
||||
return json.dumps({"success": True, "mode": _talking_mode})
|
||||
try:
|
||||
# Parse the payload to get the target mode
|
||||
payload = json.loads(data.payload) if data.payload else {}
|
||||
target_mode = payload.get("mode")
|
||||
|
||||
# Validate and set the mode
|
||||
if target_mode in ["push_to_talk", "realtime"]:
|
||||
_talking_mode = target_mode
|
||||
logger.info(f"Switching talking mode to: {_talking_mode}")
|
||||
else:
|
||||
# If invalid mode, toggle from current state
|
||||
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
|
||||
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
|
||||
logger.info(f"Toggled talking mode to: {_talking_mode}")
|
||||
|
||||
# Apply the mode settings
|
||||
room_io.set_participant(data.caller_identity)
|
||||
if _talking_mode == "push_to_talk":
|
||||
session.input.set_audio_enabled(False)
|
||||
logger.info("Setting audio enabled to False (PTT mode)")
|
||||
else:
|
||||
session.input.set_audio_enabled(True)
|
||||
logger.info("Setting audio enabled to True (realtime mode)")
|
||||
|
||||
return json.dumps({"success": True, "mode": _talking_mode})
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
|
||||
# Fallback to toggle behavior
|
||||
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
|
||||
room_io.set_participant(data.caller_identity)
|
||||
if _talking_mode == "push_to_talk":
|
||||
session.input.set_audio_enabled(False)
|
||||
else:
|
||||
session.input.set_audio_enabled(True)
|
||||
return json.dumps({"success": True, "mode": _talking_mode})
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
@ -64,13 +64,13 @@ export function ChatOverlay({
|
||||
containerSize.height * 0.6
|
||||
);
|
||||
|
||||
// Position overlay at center when first shown
|
||||
// Position overlay at center (slightly moved up) when first shown
|
||||
const hasPositionedRef = useRef(false);
|
||||
useEffect(() => {
|
||||
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
|
||||
// Calculate center position
|
||||
// Calculate center position, moved up by 15% of container height
|
||||
const centerX = (containerSize.width - overlayWidth) / 2;
|
||||
const centerY = (containerSize.height - overlayHeight) / 2;
|
||||
const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
|
||||
|
||||
// Only auto-position on first show (when position is at origin)
|
||||
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
|
||||
|
||||
@ -475,14 +475,27 @@ export function PhoneSimulator({
|
||||
const handleModeSwitch = async () => {
|
||||
if (!room || !voiceAssistant.agent) return;
|
||||
|
||||
// Determine the target mode (toggle from current state)
|
||||
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
|
||||
|
||||
try {
|
||||
await room.localParticipant.performRpc({
|
||||
const response = await room.localParticipant.performRpc({
|
||||
destinationIdentity: voiceAssistant.agent.identity,
|
||||
method: "switch_ptt_and_rt",
|
||||
payload: "",
|
||||
payload: JSON.stringify({ mode: targetMode }),
|
||||
});
|
||||
// Toggle mode on success
|
||||
setIsPushToTalkMode(prev => !prev);
|
||||
|
||||
// Parse the response to confirm the mode was set
|
||||
try {
|
||||
const responseData = JSON.parse(response);
|
||||
const confirmedMode = responseData.mode;
|
||||
// Update state based on server response
|
||||
setIsPushToTalkMode(confirmedMode === "push_to_talk");
|
||||
} catch (parseError) {
|
||||
// If parsing fails, update state based on what we sent
|
||||
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
|
||||
setIsPushToTalkMode(targetMode === "push_to_talk");
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error("Failed to switch mode:", error);
|
||||
// Don't show error toast for mode switch failures, just log
|
||||
@ -502,16 +515,38 @@ export function PhoneSimulator({
|
||||
setInterruptRejected(false);
|
||||
|
||||
try {
|
||||
await room.localParticipant.performRpc({
|
||||
const response = await room.localParticipant.performRpc({
|
||||
destinationIdentity: voiceAssistant.agent.identity,
|
||||
method: "start_turn",
|
||||
payload: "",
|
||||
});
|
||||
setIsPushToTalkActive(true);
|
||||
setInterruptRejected(false);
|
||||
|
||||
// Parse the response to check for success/failure
|
||||
try {
|
||||
const responseData = JSON.parse(response);
|
||||
if (responseData.success === false) {
|
||||
// Interrupt was rejected, show message
|
||||
if (responseData.message === "不能打断") {
|
||||
setInterruptRejected(true);
|
||||
// Clear the rejection message after 3 seconds
|
||||
setTimeout(() => setInterruptRejected(false), 3000);
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
|
||||
}
|
||||
return;
|
||||
}
|
||||
} else if (responseData.success === true) {
|
||||
// Successfully started turn
|
||||
setIsPushToTalkActive(true);
|
||||
setInterruptRejected(false);
|
||||
}
|
||||
} catch (parseError) {
|
||||
// If response is not JSON, assume success (backward compatibility)
|
||||
setIsPushToTalkActive(true);
|
||||
setInterruptRejected(false);
|
||||
}
|
||||
} catch (error: any) {
|
||||
// Prevent error from propagating to React error boundary
|
||||
// by handling all expected errors here
|
||||
// Handle RPC errors (method not found, etc.)
|
||||
setIsPushToTalkActive(false);
|
||||
|
||||
const errorMessage = error?.message || "";
|
||||
@ -527,45 +562,6 @@ export function PhoneSimulator({
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for "Application error in method handler" - this indicates interrupt failed
|
||||
// This error is raised when session.interrupt() fails in the agent
|
||||
// We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error
|
||||
if (errorMessage.includes("Application error in method handler") ||
|
||||
errorMessage.includes("Application error") ||
|
||||
errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL)
|
||||
(isAgentSpeaking && errorMessage.includes("interrupt"))) {
|
||||
// Suppress error logging for expected interrupt failures
|
||||
// Only log at debug level to avoid error popups
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
console.log("Interrupt rejected (expected behavior):", errorMessage);
|
||||
}
|
||||
setInterruptRejected(true);
|
||||
// Clear the rejection message after 3 seconds
|
||||
setTimeout(() => setInterruptRejected(false), 3000);
|
||||
// Explicitly prevent error from propagating
|
||||
error.preventDefault?.();
|
||||
error.stopPropagation?.();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if agent is speaking and the error suggests interruption was rejected
|
||||
if (isAgentSpeaking) {
|
||||
// Check for common rejection indicators
|
||||
if (errorMessage.includes("reject") ||
|
||||
errorMessage.includes("not allowed") ||
|
||||
errorCode === 403 || // Forbidden
|
||||
errorCode === 409) { // Conflict
|
||||
// Suppress error logging for expected rejections
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
console.log("Interrupt rejected:", errorMessage);
|
||||
}
|
||||
setInterruptRejected(true);
|
||||
// Clear the rejection message after 3 seconds
|
||||
setTimeout(() => setInterruptRejected(false), 3000);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Only log and show error for unexpected errors
|
||||
console.error("Unexpected error in push-to-talk:", error);
|
||||
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
|
||||
@ -1272,8 +1268,8 @@ export function PhoneSimulator({
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-full flex items-center justify-between gap-8">
|
||||
{/* Left side: Mic Toggle */}
|
||||
<div className="w-full flex items-center justify-center gap-4">
|
||||
{/* Mic Toggle */}
|
||||
{phoneMode !== "hand_off" && (
|
||||
<button
|
||||
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
|
||||
@ -1291,7 +1287,7 @@ export function PhoneSimulator({
|
||||
</button>
|
||||
)}
|
||||
|
||||
{/* Right side: End Call Button */}
|
||||
{/* End Call Button */}
|
||||
<button
|
||||
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
|
||||
onClick={handleDisconnect}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user