Compare commits
2 Commits
5be6ab12f3
...
6652a5cd43
| Author | SHA1 | Date | |
|---|---|---|---|
| 6652a5cd43 | |||
| d942222f11 |
@ -898,8 +898,12 @@ class MyAgent(Agent):
|
|||||||
|
|
||||||
# Interrupt speech if user makes a selection while agent is speaking
|
# Interrupt speech if user makes a selection while agent is speaking
|
||||||
if speech_handle and hasattr(speech_handle, "interrupt"):
|
if speech_handle and hasattr(speech_handle, "interrupt"):
|
||||||
speech_handle.interrupt()
|
try:
|
||||||
logger.info("Interrupted speech due to user selection")
|
speech_handle.interrupt()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to interrupt speech: {e}")
|
||||||
|
else:
|
||||||
|
logger.info("Interrupted speech due to user selection")
|
||||||
|
|
||||||
logger.info(f"User made selection: {response}")
|
logger.info(f"User made selection: {response}")
|
||||||
|
|
||||||
@ -1135,13 +1139,9 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
|||||||
try:
|
try:
|
||||||
session.interrupt()
|
session.interrupt()
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
logger.error(f"Failed to interrupt session: {e}")
|
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
|
||||||
# Raise RPC error so client can detect interrupt failure
|
# Return a message instead of raising an error
|
||||||
# Use ERROR_INTERNAL (code 13) to indicate application error
|
return json.dumps({"success": False, "message": "不能打断"})
|
||||||
raise rtc.RpcError(
|
|
||||||
code=13, # ERROR_INTERNAL
|
|
||||||
message="Application error in method handler"
|
|
||||||
)
|
|
||||||
|
|
||||||
session.clear_user_turn()
|
session.clear_user_turn()
|
||||||
|
|
||||||
@ -1149,6 +1149,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
|||||||
room_io.set_participant(data.caller_identity)
|
room_io.set_participant(data.caller_identity)
|
||||||
session.input.set_audio_enabled(True)
|
session.input.set_audio_enabled(True)
|
||||||
|
|
||||||
|
return json.dumps({"success": True})
|
||||||
|
|
||||||
@ctx.room.local_participant.register_rpc_method("end_turn")
|
@ctx.room.local_participant.register_rpc_method("end_turn")
|
||||||
async def end_turn(data: rtc.RpcInvocationData):
|
async def end_turn(data: rtc.RpcInvocationData):
|
||||||
session.input.set_audio_enabled(False)
|
session.input.set_audio_enabled(False)
|
||||||
@ -1169,12 +1171,41 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
|
|||||||
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
|
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
|
||||||
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
|
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
|
||||||
nonlocal _talking_mode
|
nonlocal _talking_mode
|
||||||
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
|
try:
|
||||||
if _talking_mode == "push_to_talk":
|
# Parse the payload to get the target mode
|
||||||
session.input.set_audio_enabled(False)
|
payload = json.loads(data.payload) if data.payload else {}
|
||||||
else:
|
target_mode = payload.get("mode")
|
||||||
session.input.set_audio_enabled(True)
|
|
||||||
return json.dumps({"success": True, "mode": _talking_mode})
|
# Validate and set the mode
|
||||||
|
if target_mode in ["push_to_talk", "realtime"]:
|
||||||
|
_talking_mode = target_mode
|
||||||
|
logger.info(f"Switching talking mode to: {_talking_mode}")
|
||||||
|
else:
|
||||||
|
# If invalid mode, toggle from current state
|
||||||
|
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
|
||||||
|
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
|
||||||
|
logger.info(f"Toggled talking mode to: {_talking_mode}")
|
||||||
|
|
||||||
|
# Apply the mode settings
|
||||||
|
room_io.set_participant(data.caller_identity)
|
||||||
|
if _talking_mode == "push_to_talk":
|
||||||
|
session.input.set_audio_enabled(False)
|
||||||
|
logger.info("Setting audio enabled to False (PTT mode)")
|
||||||
|
else:
|
||||||
|
session.input.set_audio_enabled(True)
|
||||||
|
logger.info("Setting audio enabled to True (realtime mode)")
|
||||||
|
|
||||||
|
return json.dumps({"success": True, "mode": _talking_mode})
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
|
||||||
|
# Fallback to toggle behavior
|
||||||
|
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
|
||||||
|
room_io.set_participant(data.caller_identity)
|
||||||
|
if _talking_mode == "push_to_talk":
|
||||||
|
session.input.set_audio_enabled(False)
|
||||||
|
else:
|
||||||
|
session.input.set_audio_enabled(True)
|
||||||
|
return json.dumps({"success": True, "mode": _talking_mode})
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|||||||
@ -64,13 +64,13 @@ export function ChatOverlay({
|
|||||||
containerSize.height * 0.6
|
containerSize.height * 0.6
|
||||||
);
|
);
|
||||||
|
|
||||||
// Position overlay at center when first shown
|
// Position overlay at center (slightly moved up) when first shown
|
||||||
const hasPositionedRef = useRef(false);
|
const hasPositionedRef = useRef(false);
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
|
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
|
||||||
// Calculate center position
|
// Calculate center position, moved up by 15% of container height
|
||||||
const centerX = (containerSize.width - overlayWidth) / 2;
|
const centerX = (containerSize.width - overlayWidth) / 2;
|
||||||
const centerY = (containerSize.height - overlayHeight) / 2;
|
const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
|
||||||
|
|
||||||
// Only auto-position on first show (when position is at origin)
|
// Only auto-position on first show (when position is at origin)
|
||||||
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
|
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
|
||||||
|
|||||||
@ -475,14 +475,27 @@ export function PhoneSimulator({
|
|||||||
const handleModeSwitch = async () => {
|
const handleModeSwitch = async () => {
|
||||||
if (!room || !voiceAssistant.agent) return;
|
if (!room || !voiceAssistant.agent) return;
|
||||||
|
|
||||||
|
// Determine the target mode (toggle from current state)
|
||||||
|
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await room.localParticipant.performRpc({
|
const response = await room.localParticipant.performRpc({
|
||||||
destinationIdentity: voiceAssistant.agent.identity,
|
destinationIdentity: voiceAssistant.agent.identity,
|
||||||
method: "switch_ptt_and_rt",
|
method: "switch_ptt_and_rt",
|
||||||
payload: "",
|
payload: JSON.stringify({ mode: targetMode }),
|
||||||
});
|
});
|
||||||
// Toggle mode on success
|
|
||||||
setIsPushToTalkMode(prev => !prev);
|
// Parse the response to confirm the mode was set
|
||||||
|
try {
|
||||||
|
const responseData = JSON.parse(response);
|
||||||
|
const confirmedMode = responseData.mode;
|
||||||
|
// Update state based on server response
|
||||||
|
setIsPushToTalkMode(confirmedMode === "push_to_talk");
|
||||||
|
} catch (parseError) {
|
||||||
|
// If parsing fails, update state based on what we sent
|
||||||
|
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
|
||||||
|
setIsPushToTalkMode(targetMode === "push_to_talk");
|
||||||
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error("Failed to switch mode:", error);
|
console.error("Failed to switch mode:", error);
|
||||||
// Don't show error toast for mode switch failures, just log
|
// Don't show error toast for mode switch failures, just log
|
||||||
@ -502,16 +515,38 @@ export function PhoneSimulator({
|
|||||||
setInterruptRejected(false);
|
setInterruptRejected(false);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await room.localParticipant.performRpc({
|
const response = await room.localParticipant.performRpc({
|
||||||
destinationIdentity: voiceAssistant.agent.identity,
|
destinationIdentity: voiceAssistant.agent.identity,
|
||||||
method: "start_turn",
|
method: "start_turn",
|
||||||
payload: "",
|
payload: "",
|
||||||
});
|
});
|
||||||
setIsPushToTalkActive(true);
|
|
||||||
setInterruptRejected(false);
|
// Parse the response to check for success/failure
|
||||||
|
try {
|
||||||
|
const responseData = JSON.parse(response);
|
||||||
|
if (responseData.success === false) {
|
||||||
|
// Interrupt was rejected, show message
|
||||||
|
if (responseData.message === "不能打断") {
|
||||||
|
setInterruptRejected(true);
|
||||||
|
// Clear the rejection message after 3 seconds
|
||||||
|
setTimeout(() => setInterruptRejected(false), 3000);
|
||||||
|
if (process.env.NODE_ENV === 'development') {
|
||||||
|
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else if (responseData.success === true) {
|
||||||
|
// Successfully started turn
|
||||||
|
setIsPushToTalkActive(true);
|
||||||
|
setInterruptRejected(false);
|
||||||
|
}
|
||||||
|
} catch (parseError) {
|
||||||
|
// If response is not JSON, assume success (backward compatibility)
|
||||||
|
setIsPushToTalkActive(true);
|
||||||
|
setInterruptRejected(false);
|
||||||
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
// Prevent error from propagating to React error boundary
|
// Handle RPC errors (method not found, etc.)
|
||||||
// by handling all expected errors here
|
|
||||||
setIsPushToTalkActive(false);
|
setIsPushToTalkActive(false);
|
||||||
|
|
||||||
const errorMessage = error?.message || "";
|
const errorMessage = error?.message || "";
|
||||||
@ -527,45 +562,6 @@ export function PhoneSimulator({
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for "Application error in method handler" - this indicates interrupt failed
|
|
||||||
// This error is raised when session.interrupt() fails in the agent
|
|
||||||
// We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error
|
|
||||||
if (errorMessage.includes("Application error in method handler") ||
|
|
||||||
errorMessage.includes("Application error") ||
|
|
||||||
errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL)
|
|
||||||
(isAgentSpeaking && errorMessage.includes("interrupt"))) {
|
|
||||||
// Suppress error logging for expected interrupt failures
|
|
||||||
// Only log at debug level to avoid error popups
|
|
||||||
if (process.env.NODE_ENV === 'development') {
|
|
||||||
console.log("Interrupt rejected (expected behavior):", errorMessage);
|
|
||||||
}
|
|
||||||
setInterruptRejected(true);
|
|
||||||
// Clear the rejection message after 3 seconds
|
|
||||||
setTimeout(() => setInterruptRejected(false), 3000);
|
|
||||||
// Explicitly prevent error from propagating
|
|
||||||
error.preventDefault?.();
|
|
||||||
error.stopPropagation?.();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if agent is speaking and the error suggests interruption was rejected
|
|
||||||
if (isAgentSpeaking) {
|
|
||||||
// Check for common rejection indicators
|
|
||||||
if (errorMessage.includes("reject") ||
|
|
||||||
errorMessage.includes("not allowed") ||
|
|
||||||
errorCode === 403 || // Forbidden
|
|
||||||
errorCode === 409) { // Conflict
|
|
||||||
// Suppress error logging for expected rejections
|
|
||||||
if (process.env.NODE_ENV === 'development') {
|
|
||||||
console.log("Interrupt rejected:", errorMessage);
|
|
||||||
}
|
|
||||||
setInterruptRejected(true);
|
|
||||||
// Clear the rejection message after 3 seconds
|
|
||||||
setTimeout(() => setInterruptRejected(false), 3000);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only log and show error for unexpected errors
|
// Only log and show error for unexpected errors
|
||||||
console.error("Unexpected error in push-to-talk:", error);
|
console.error("Unexpected error in push-to-talk:", error);
|
||||||
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
|
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
|
||||||
@ -1272,8 +1268,8 @@ export function PhoneSimulator({
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="w-full flex items-center justify-between gap-8">
|
<div className="w-full flex items-center justify-center gap-4">
|
||||||
{/* Left side: Mic Toggle */}
|
{/* Mic Toggle */}
|
||||||
{phoneMode !== "hand_off" && (
|
{phoneMode !== "hand_off" && (
|
||||||
<button
|
<button
|
||||||
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
|
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
|
||||||
@ -1291,7 +1287,7 @@ export function PhoneSimulator({
|
|||||||
</button>
|
</button>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Right side: End Call Button */}
|
{/* End Call Button */}
|
||||||
<button
|
<button
|
||||||
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
|
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
|
||||||
onClick={handleDisconnect}
|
onClick={handleDisconnect}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user