Compare commits

..

14 Commits

Author SHA1 Message Date
950d1ab3d4 Update agents/README.md 2026-02-02 15:28:28 +00:00
28b9a16c4e make chat message overlay right 2025-12-19 10:55:13 +08:00
f1b331d923 try to fix fast reload 2025-12-18 09:41:42 +08:00
739c019404 set chat message overlay draggable 2025-12-18 09:22:01 +08:00
da11561f47 Bug fixed 2025-12-17 23:13:42 +08:00
853e1558b1 fix realtime mode need push to talk once 2025-12-17 22:40:11 +08:00
6652a5cd43 update logic of switch_ptt_and_rt 2025-12-17 22:17:44 +08:00
d942222f11 update endcall button postion 2025-12-17 21:41:36 +08:00
5be6ab12f3 add chat message overlay 2025-12-17 18:59:17 +08:00
eeeed36494 add mic on push-to-talk mode, iphone open in phone tab default 2025-12-17 18:03:37 +08:00
3e0276d6c0 hide nextjs icon 2025-12-17 16:19:28 +08:00
4a6a6619df optimized layout in iphone 2025-12-17 16:17:33 +08:00
1f0365e716 Merge branch 'phone-interface' 2025-12-17 12:04:10 +08:00
7fbb9a5431 fix color theme change bug 2025-12-17 12:04:01 +08:00
9 changed files with 737 additions and 183 deletions

View File

@@ -0,0 +1 @@
use livekit-plugins-volcengine==1.2.9

View File

@@ -898,8 +898,12 @@ class MyAgent(Agent):
# Interrupt speech if user makes a selection while agent is speaking
if speech_handle and hasattr(speech_handle, "interrupt"):
speech_handle.interrupt()
logger.info("Interrupted speech due to user selection")
try:
speech_handle.interrupt()
except Exception as e:
logger.error(f"Failed to interrupt speech: {e}")
else:
logger.info("Interrupted speech due to user selection")
logger.info(f"User made selection: {response}")
@@ -1014,6 +1018,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
initial_instructions = participant.attributes.get("instructions")
logger.info(f"User selected instructions: {initial_instructions}")
# Read talking_mode from frontend state
initial_talking_mode = DEFAULT_TALKING_MODE
if participant.attributes.get("talking_mode"):
frontend_talking_mode = participant.attributes.get("talking_mode")
if frontend_talking_mode in ["push_to_talk", "realtime"]:
initial_talking_mode = frontend_talking_mode
logger.info(f"Initializing talking_mode from frontend: {initial_talking_mode}")
else:
logger.warning(f"Invalid talking_mode from frontend: {frontend_talking_mode}, using default: {initial_talking_mode}")
# Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt
initial_instructions = initial_instructions.replace("{datetime}", current_time)
initial_instructions = initial_instructions.replace("{weekday}", current_weekday)
@@ -1124,7 +1138,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
)
# disable input audio at the start
_talking_mode = DEFAULT_TALKING_MODE
_talking_mode = initial_talking_mode
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
@@ -1135,19 +1149,17 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
try:
session.interrupt()
except RuntimeError as e:
logger.error(f"Failed to interrupt session: {e}")
# Raise RPC error so client can detect interrupt failure
# Use ERROR_INTERNAL (code 13) to indicate application error
raise rtc.RpcError(
code=13, # ERROR_INTERNAL
message="Application error in method handler"
)
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
# Return a message instead of raising an error
return json.dumps({"success": False, "message": "不能打断"})
session.clear_user_turn()
# listen to the caller if multi-user
room_io.set_participant(data.caller_identity)
session.input.set_audio_enabled(True)
return json.dumps({"success": True})
@ctx.room.local_participant.register_rpc_method("end_turn")
async def end_turn(data: rtc.RpcInvocationData):
@@ -1169,12 +1181,45 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
nonlocal _talking_mode
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
try:
# Parse the payload to get the target mode
payload = json.loads(data.payload) if data.payload else {}
target_mode = payload.get("mode")
# Validate and set the mode
if target_mode in ["push_to_talk", "realtime"]:
_talking_mode = target_mode
logger.info(f"Switching talking mode to: {_talking_mode}")
else:
# If invalid mode, toggle from current state
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
logger.info(f"Toggled talking mode to: {_talking_mode}")
# Apply the mode settings
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
logger.info("Setting audio enabled to False (PTT mode)")
else:
# When switching to realtime mode, clear user turn state to ensure proper initialization
session.clear_user_turn()
session.input.set_audio_enabled(True)
logger.info("Setting audio enabled to True (realtime mode)")
return json.dumps({"success": True, "mode": _talking_mode})
except json.JSONDecodeError:
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
# Fallback to toggle behavior
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
# When switching to realtime mode, clear user turn state
session.clear_user_turn()
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
if __name__ == "__main__":
parser = argparse.ArgumentParser()

View File

@@ -4,6 +4,10 @@ const withNextPluginPreval = createNextPluginPreval();
/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: false,
// Explicitly allow znjj.wangxin93.eu.org for Dev Origin, per future Next.js requirement.
allowedDevOrigins: [
"znjj.wangxin93.eu.org",
],
};
module.exports = withNextPluginPreval(nextConfig);

View File

@@ -0,0 +1,214 @@
"use client";
import { TranscriptionTile } from "@/transcriptions/TranscriptionTile";
import { TrackReferenceOrPlaceholder } from "@livekit/components-react";
import { useCallback, useEffect, useRef, useState } from "react";
export interface ChatOverlayProps {
agentAudioTrack?: TrackReferenceOrPlaceholder;
accentColor: string;
inputDisabled?: boolean;
isVisible: boolean;
position: { x: number; y: number };
onPositionChange: (position: { x: number; y: number }) => void;
containerRef: React.RefObject<HTMLDivElement | null>;
onToggle: () => void;
}
export function ChatOverlay({
agentAudioTrack,
accentColor,
inputDisabled,
isVisible,
position,
onPositionChange,
containerRef,
onToggle,
}: ChatOverlayProps) {
const overlayRef = useRef<HTMLDivElement>(null);
const headerRef = useRef<HTMLDivElement>(null);
const [isDragging, setIsDragging] = useState(false);
const dragOffset = useRef({ x: 0, y: 0 });
// Responsive sizing based on container size
const [containerSize, setContainerSize] = useState({ width: 360, height: 500 });
useEffect(() => {
const updateSize = () => {
if (containerRef.current) {
const rect = containerRef.current.getBoundingClientRect();
setContainerSize({ width: rect.width, height: rect.height });
}
};
updateSize();
const resizeObserver = new ResizeObserver(updateSize);
if (containerRef.current) {
resizeObserver.observe(containerRef.current);
}
return () => {
resizeObserver.disconnect();
};
}, [containerRef]);
// Calculate overlay size as percentage of container, with min/max constraints
// Width: larger (up to 95% of container)
const overlayWidth = Math.min(
Math.max(containerSize.width * 0.9, 280),
containerSize.width * 0.95
);
// Height: smaller (reduced from 60-85% to 40-60%)
const overlayHeight = Math.min(
Math.max(containerSize.height * 0.4, 250),
containerSize.height * 0.6
);
// Position overlay at center (slightly moved up) when first shown
const hasPositionedRef = useRef(false);
useEffect(() => {
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
// Calculate center position, moved up by 15% of container height
const centerX = (containerSize.width - overlayWidth) / 2;
const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
// Only auto-position on first show (when position is at origin)
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
onPositionChange({ x: Math.max(0, centerX), y: Math.max(0, centerY) });
hasPositionedRef.current = true;
}
}
}, [isVisible, containerSize.width, containerSize.height, overlayWidth, overlayHeight, containerRef, position, onPositionChange]);
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
if (!overlayRef.current || !headerRef.current) return;
// Only allow dragging from the header, but not from buttons
const target = e.target as HTMLElement;
if (!headerRef.current.contains(target)) return;
// Don't drag if clicking on the close button
if (target.closest('button') || target.closest('svg')) return;
e.preventDefault();
e.stopPropagation();
setIsDragging(true);
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
dragOffset.current = {
x: clientX - position.x,
y: clientY - position.y,
};
};
const handleDragMove = useCallback((e: MouseEvent | TouchEvent) => {
if (!isDragging || !containerRef.current || !overlayRef.current) return;
e.preventDefault();
e.stopPropagation();
const containerRect = containerRef.current.getBoundingClientRect();
const overlayRect = overlayRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
let newX = clientX - dragOffset.current.x;
let newY = clientY - dragOffset.current.y;
// Constrain within container bounds
const maxX = containerRect.width - overlayRect.width;
const maxY = containerRect.height - overlayRect.height;
const minY = 0; // Allow dragging to top
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
onPositionChange({ x: newX, y: newY });
}, [isDragging, containerRef, overlayRef, onPositionChange]);
const handleDragEnd = useCallback(() => {
setIsDragging(false);
}, []);
useEffect(() => {
if (isDragging) {
window.addEventListener("mousemove", handleDragMove);
window.addEventListener("mouseup", handleDragEnd);
window.addEventListener("touchmove", handleDragMove, { passive: false });
window.addEventListener("touchend", handleDragEnd);
}
return () => {
window.removeEventListener("mousemove", handleDragMove);
window.removeEventListener("mouseup", handleDragEnd);
window.removeEventListener("touchmove", handleDragMove);
window.removeEventListener("touchend", handleDragEnd);
};
}, [isDragging, handleDragMove, handleDragEnd]);
return (
<div
ref={overlayRef}
className="absolute z-40 rounded-lg border border-white/20 shadow-2xl backdrop-blur-md transition-all duration-300 flex flex-col"
style={{
left: `${position.x}px`,
top: `${position.y}px`,
width: `${overlayWidth}px`,
height: `${overlayHeight}px`,
backgroundColor: 'rgba(0, 0, 0, 0.85)',
cursor: isDragging ? 'grabbing' : 'default',
display: isVisible ? 'flex' : 'none',
}}
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
{/* Header with drag handle and close button */}
<div
ref={headerRef}
className="flex items-center justify-between px-4 py-2 border-b border-white/10 cursor-move select-none"
style={{ backgroundColor: 'rgba(0, 0, 0, 0.3)' }}
>
<div className="flex items-center gap-2">
<div className="w-2 h-2 rounded-full bg-white/40"></div>
<span className="text-white text-xs font-medium">Chat</span>
</div>
<button
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onToggle();
}}
className="text-white hover:text-white transition-colors p-2 rounded hover:bg-white/10 flex items-center justify-center"
aria-label="Close chat overlay"
style={{ minWidth: '32px', minHeight: '32px' }}
>
<svg
className="w-5 h-5"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
strokeWidth={2.5}
>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M6 18L18 6M6 6l12 12"
/>
</svg>
</button>
</div>
{/* Chat content with padding */}
<div className="overflow-hidden flex flex-col px-2 py-2" style={{ height: `calc(100% - 40px)` }}>
<TranscriptionTile
agentAudioTrack={agentAudioTrack}
accentColor={accentColor}
inputDisabled={inputDisabled}
/>
</div>
</div>
);
}

View File

@@ -13,8 +13,9 @@ import {
} from "@livekit/components-react";
import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client";
import { useEffect, useMemo, useState, useRef, useCallback } from "react";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon } from "./icons";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon, ChatIcon } from "./icons";
import { useToast } from "@/components/toast/ToasterProvider";
import { ChatOverlay } from "@/components/chat/ChatOverlay";
export interface PhoneSimulatorProps {
onConnect: () => void;
@@ -67,6 +68,13 @@ export function PhoneSimulator({
const [interruptRejected, setInterruptRejected] = useState(false);
const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default)
const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
const [showChatOverlay, setShowChatOverlay] = useState(false);
const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component
const [chatTogglePosition, setChatTogglePosition] = useState<{ x?: number; right?: number; y: number }>({ right: 16, y: 56 }); // Initial position on the right
const [isDraggingChatToggle, setIsDraggingChatToggle] = useState(false);
const chatToggleRef = useRef<HTMLButtonElement>(null);
const chatToggleDragOffset = useRef({ x: 0, y: 0 });
const chatToggleHasDragged = useRef(false);
useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
@@ -75,6 +83,47 @@ export function PhoneSimulator({
}
}, [config.settings.attributes]);
// Set talking_mode attribute when connected or when mode changes
const lastTalkingModeRef = useRef<string | null>(null);
const configAttributesRef = useRef(config.settings.attributes);
// Update config attributes ref when it changes
useEffect(() => {
configAttributesRef.current = config.settings.attributes;
}, [config.settings.attributes]);
useEffect(() => {
if (roomState === ConnectionState.Connected && localParticipant) {
const talkingMode = isPushToTalkMode ? "push_to_talk" : "realtime";
// Only update if the mode actually changed
if (lastTalkingModeRef.current === talkingMode) {
return;
}
lastTalkingModeRef.current = talkingMode;
try {
// Get current attributes from config to preserve them
const attributesToSet: Record<string, string> = {};
const configAttributes = configAttributesRef.current || [];
configAttributes.forEach(attr => {
if (attr.key && attr.value) {
attributesToSet[attr.key] = attr.value;
}
});
// Add talking_mode
attributesToSet.talking_mode = talkingMode;
localParticipant.setAttributes(attributesToSet);
} catch (error) {
console.error("Failed to set talking_mode attribute:", error);
}
} else if (roomState === ConnectionState.Disconnected) {
// Reset ref when disconnected
lastTalkingModeRef.current = null;
}
}, [roomState, localParticipant, isPushToTalkMode]);
const [currentTime, setCurrentTime] = useState("");
const [visualizerPosition, setVisualizerPosition] = useState({
@@ -84,30 +133,41 @@ export function PhoneSimulator({
const [isDragging, setIsDragging] = useState(false);
const dragOffset = useRef({ x: 0, y: 0 });
const handleDragStart = (e: React.MouseEvent) => {
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
setIsDragging(true);
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
dragOffset.current = {
x: e.clientX - visualizerPosition.x,
y: e.clientY - visualizerPosition.y,
x: clientX - visualizerPosition.x,
y: clientY - visualizerPosition.y,
};
};
const handleDragMove = (e: MouseEvent) => {
const handleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDragging || !phoneContainerRef.current || !visualizerRef.current) return;
e.preventDefault();
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const visualizerRect = visualizerRef.current.getBoundingClientRect();
let newX = e.clientX - dragOffset.current.x;
let newY = e.clientY - dragOffset.current.y;
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
let newX = clientX - dragOffset.current.x;
let newY = clientY - dragOffset.current.y;
// Constrain within container
const maxX = containerRect.width - visualizerRect.width;
const maxY = containerRect.height - visualizerRect.height;
const statusBarHeight = 48; // h-12 = 48px
// On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(statusBarHeight, Math.min(newY, maxY));
newY = Math.max(minY, Math.min(newY, maxY));
setVisualizerPosition({
x: newX,
@@ -123,13 +183,107 @@ export function PhoneSimulator({
if (isDragging) {
window.addEventListener("mouseup", handleDragEnd);
window.addEventListener("mousemove", handleDragMove);
window.addEventListener("touchend", handleDragEnd);
window.addEventListener("touchmove", handleDragMove, { passive: false });
}
return () => {
window.removeEventListener("mouseup", handleDragEnd);
window.removeEventListener("mousemove", handleDragMove);
window.removeEventListener("touchend", handleDragEnd);
window.removeEventListener("touchmove", handleDragMove);
};
}, [isDragging]);
// Chat toggle button drag handlers
const handleChatToggleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
e.stopPropagation(); // Prevent triggering the button click
setIsDraggingChatToggle(true);
chatToggleHasDragged.current = false;
if (!phoneContainerRef.current || !chatToggleRef.current) return;
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// If using right positioning, convert to x for dragging
if (chatTogglePosition.right !== undefined && chatTogglePosition.x === undefined) {
const currentX = containerRect.width - chatTogglePosition.right - buttonRect.width;
setChatTogglePosition({ x: currentX, y: chatTogglePosition.y });
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
} else {
// Already using x positioning
const currentX = chatTogglePosition.x ?? 0;
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
}
};
const handleChatToggleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDraggingChatToggle || !phoneContainerRef.current || !chatToggleRef.current) return;
e.preventDefault();
chatToggleHasDragged.current = true; // Mark that we've actually dragged
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// Calculate new position relative to container
let newX = clientX - containerRect.left - chatToggleDragOffset.current.x;
let newY = clientY - containerRect.top - chatToggleDragOffset.current.y;
// Constrain within container
const maxX = containerRect.width - buttonRect.width;
const maxY = containerRect.height - buttonRect.height;
// On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
setChatTogglePosition({
x: newX,
y: newY,
});
};
const handleChatToggleDragEnd = () => {
setIsDraggingChatToggle(false);
// Reset the flag after a short delay to allow onClick to check it
setTimeout(() => {
chatToggleHasDragged.current = false;
}, 100);
};
useEffect(() => {
if (isDraggingChatToggle) {
window.addEventListener("mouseup", handleChatToggleDragEnd);
window.addEventListener("mousemove", handleChatToggleDragMove);
window.addEventListener("touchend", handleChatToggleDragEnd);
window.addEventListener("touchmove", handleChatToggleDragMove, { passive: false });
}
return () => {
window.removeEventListener("mouseup", handleChatToggleDragEnd);
window.removeEventListener("mousemove", handleChatToggleDragMove);
window.removeEventListener("touchend", handleChatToggleDragEnd);
window.removeEventListener("touchmove", handleChatToggleDragMove);
};
}, [isDraggingChatToggle]);
// Initialize chat toggle button position - keep it on the right using 'right' CSS property
// Only convert to 'x' (left positioning) when user drags it
useEffect(() => {
if (showCameraMenu) {
Room.getLocalDevices("videoinput").then(setCameras);
@@ -457,14 +611,27 @@ export function PhoneSimulator({
const handleModeSwitch = async () => {
if (!room || !voiceAssistant.agent) return;
// Determine the target mode (toggle from current state)
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
try {
await room.localParticipant.performRpc({
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "switch_ptt_and_rt",
payload: "",
payload: JSON.stringify({ mode: targetMode }),
});
// Toggle mode on success
setIsPushToTalkMode(prev => !prev);
// Parse the response to confirm the mode was set
try {
const responseData = JSON.parse(response);
const confirmedMode = responseData.mode;
// Update state based on server response
setIsPushToTalkMode(confirmedMode === "push_to_talk");
} catch (parseError) {
// If parsing fails, update state based on what we sent
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
setIsPushToTalkMode(targetMode === "push_to_talk");
}
} catch (error: any) {
console.error("Failed to switch mode:", error);
// Don't show error toast for mode switch failures, just log
@@ -484,16 +651,38 @@ export function PhoneSimulator({
setInterruptRejected(false);
try {
await room.localParticipant.performRpc({
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "start_turn",
payload: "",
});
setIsPushToTalkActive(true);
setInterruptRejected(false);
// Parse the response to check for success/failure
try {
const responseData = JSON.parse(response);
if (responseData.success === false) {
// Interrupt was rejected, show message
if (responseData.message === "不能打断") {
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
}
return;
}
} else if (responseData.success === true) {
// Successfully started turn
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (parseError) {
// If response is not JSON, assume success (backward compatibility)
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (error: any) {
// Prevent error from propagating to React error boundary
// by handling all expected errors here
// Handle RPC errors (method not found, etc.)
setIsPushToTalkActive(false);
const errorMessage = error?.message || "";
@@ -509,45 +698,6 @@ export function PhoneSimulator({
return;
}
// Check for "Application error in method handler" - this indicates interrupt failed
// This error is raised when session.interrupt() fails in the agent
// We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error
if (errorMessage.includes("Application error in method handler") ||
errorMessage.includes("Application error") ||
errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL)
(isAgentSpeaking && errorMessage.includes("interrupt"))) {
// Suppress error logging for expected interrupt failures
// Only log at debug level to avoid error popups
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (expected behavior):", errorMessage);
}
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
// Explicitly prevent error from propagating
error.preventDefault?.();
error.stopPropagation?.();
return;
}
// Check if agent is speaking and the error suggests interruption was rejected
if (isAgentSpeaking) {
// Check for common rejection indicators
if (errorMessage.includes("reject") ||
errorMessage.includes("not allowed") ||
errorCode === 403 || // Forbidden
errorCode === 409) { // Conflict
// Suppress error logging for expected rejections
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected:", errorMessage);
}
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
return;
}
}
// Only log and show error for unexpected errors
console.error("Unexpected error in push-to-talk:", error);
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
@@ -792,7 +942,7 @@ export function PhoneSimulator({
})();
return (
<div className="w-auto max-w-full h-full aspect-[9/19.5] max-h-full bg-black rounded-[40px] border-[12px] border-gray-900 overflow-hidden relative shadow-2xl flex flex-col shrink-0">
<div className="absolute inset-0 w-full h-full bg-black rounded-none border-0 overflow-hidden flex flex-col shrink-0 md:relative md:w-auto md:max-w-full md:h-full md:aspect-[9/19.5] md:max-h-full md:rounded-[40px] md:border-[12px] md:border-gray-900 md:shadow-2xl">
<style jsx global>{`
.mirror-video video {
transform: scaleX(-1);
@@ -824,7 +974,12 @@ export function PhoneSimulator({
}
`}</style>
{/* Status Bar */}
<div className="h-12 w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 flex items-center justify-between px-6 text-white text-xs font-medium">
<div className="hidden md:flex w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 items-center justify-between px-6 text-white text-xs font-medium"
style={{
paddingTop: 'max(env(safe-area-inset-top, 0px), 0.5rem)',
paddingBottom: '0.75rem',
minHeight: '3rem',
}}>
<span>{currentTime}</span>
<div className="flex items-center gap-2">
<WifiIcon className="w-4 h-4" />
@@ -832,8 +987,42 @@ export function PhoneSimulator({
</div>
</div>
{/* Chat Toggle Button - Top Right, aligned with audio visualizer (Draggable) */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "important_message" &&
phoneMode !== "capture" && (
<button
ref={chatToggleRef}
className={`absolute z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg cursor-move select-none touch-none ${
showChatOverlay
? "bg-blue-500/80 text-white"
: "bg-gray-800/70 text-white hover:bg-gray-800/90"
}`}
onClick={(e) => {
// Only toggle if we didn't just drag
if (!chatToggleHasDragged.current) {
setShowChatOverlay(!showChatOverlay);
}
}}
onMouseDown={handleChatToggleDragStart}
onTouchStart={handleChatToggleDragStart}
title={showChatOverlay ? "Hide chat (drag to move)" : "Show chat (drag to move)"}
style={{
...(chatTogglePosition.x !== undefined ? { left: chatTogglePosition.x } : {}),
...(chatTogglePosition.right !== undefined ? { right: chatTogglePosition.right } : {}),
top: chatTogglePosition.y,
}}
>
<ChatIcon className="w-5 h-5 md:w-6 md:h-6" />
</button>
)}
{/* Main Content */}
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden">
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden"
style={{
paddingBottom: 'env(safe-area-inset-bottom, 0px)',
}}>
<div className={`h-full w-full transition-all duration-500 ease-in-out transform ${
phoneMode === "hand_off" && roomState === ConnectionState.Connected
? "blur-md scale-105"
@@ -960,12 +1149,13 @@ export function PhoneSimulator({
{roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && (
<div
ref={visualizerRef}
className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none"
className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none touch-none"
style={{
left: visualizerPosition.x,
top: visualizerPosition.y,
}}
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
<div className="h-8 w-24 flex items-center justify-center [--lk-va-bar-width:3px] [--lk-va-bar-gap:2px] [--lk-fg:white]">
<BarVisualizer
@@ -978,10 +1168,30 @@ export function PhoneSimulator({
</div>
)}
{/* Chat Overlay - Hidden during capture and important_message modes */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "capture" &&
phoneMode !== "important_message" && (
<ChatOverlay
agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color}
inputDisabled={phoneMode === "hand_off"}
isVisible={showChatOverlay}
position={chatOverlayPosition}
onPositionChange={setChatOverlayPosition}
containerRef={phoneContainerRef}
onToggle={() => setShowChatOverlay(!showChatOverlay)}
/>
)}
{/* Call Controls Overlay */}
{roomState === ConnectionState.Connected && (
phoneMode === "capture" ? (
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end pb-[5%] px-[8%] z-40">
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end px-[8%] z-40"
style={{
paddingBottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
}}>
{/* Camera Controls Row */}
<div className="w-full flex items-center justify-evenly mb-8">
{/* Left: Upload */}
@@ -1058,7 +1268,11 @@ export function PhoneSimulator({
</div>
</div>
) : (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40">
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40"
style={{
paddingBottom: 'max(env(safe-area-inset-bottom, 0px), 0px)',
bottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
}}>
<div className="w-full flex flex-col items-center justify-center gap-4">
{/* Mode Toggle Switch */}
{phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
@@ -1089,102 +1303,143 @@ export function PhoneSimulator({
{/* Push-to-Talk Mode Layout */}
{isPushToTalkMode && phoneMode !== "hand_off" && voiceAssistant.agent && (
<div className="w-full flex items-center justify-center gap-8">
{/* Camera Switch Button - Left (hidden in important_message mode) */}
{phoneMode !== "important_message" && (
<div className="relative">
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<SwitchCameraIcon className="w-6 h-6" />
<PhoneOffIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
</div>
) : (
<div className="w-full flex items-center justify-between gap-8">
{/* Left side: Mic Toggle and Camera Switch Buttons */}
<div className="flex flex-col items-center gap-2">
{/* Mic Toggle Button */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-800/50 text-white hover:bg-gray-800/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* Camera Switch Button */}
<div className="relative">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
)}
</div>
)}
</div>
)}
</div>
{/* Center: Large Push-to-Talk Button */}
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square select-none ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
style={{ borderRadius: '50%' }}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
{/* Right side: End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
{/* Large Push-to-Talk Button - Center (hidden in important_message mode) */}
{phoneMode !== "important_message" && (
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
style={{ borderRadius: '50%' }}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
)}
{/* End Call Button - Right (always shown in PTT mode) */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
</>
)}
{/* Realtime Mode Layout */}
{!isPushToTalkMode && (
<div className="w-full flex items-center justify-center gap-8">
{phoneMode !== "important_message" && phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
)}
{!isPushToTalkMode && phoneMode !== "hand_off" && (
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
) : (
<div className="w-full flex items-center justify-center gap-4">
{/* Mic Toggle */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</>
)}
{/* Hand Off Mode - Show only End Call Button */}

View File

@@ -75,6 +75,7 @@ export default function Playground({
const [rpcMethod, setRpcMethod] = useState("");
const [rpcPayload, setRpcPayload] = useState("");
const [showRpc, setShowRpc] = useState(false);
const [qrCodeUrl, setQrCodeUrl] = useState<string>("");
// Clean up RPC resolvers before disconnecting to prevent errors
const cleanupRpcResolvers = useCallback(() => {
@@ -406,6 +407,7 @@ export default function Playground({
}, [agentVideoTrack, config, roomState]);
useEffect(() => {
if (typeof document !== "undefined") {
document.body.style.setProperty(
"--lk-theme-color",
// @ts-ignore
@@ -415,8 +417,15 @@ export default function Playground({
"--lk-drop-shadow",
`var(--lk-theme-color) 0px 0px 18px`,
);
}
}, [config.settings.theme_color]);
useEffect(() => {
if (typeof window !== "undefined") {
setQrCodeUrl(window.location.href);
}
}, []);
const audioTileContent = useMemo(() => {
const disconnectedContent = (
<div className="flex flex-col items-center justify-center gap-2 text-gray-700 text-center w-full">
@@ -480,21 +489,21 @@ export default function Playground({
const instructionsContent = (
<>
<ConfigurationPanelItem title="Instructions">
<textarea
className="w-full bg-gray-950 text-white text-sm p-3 rounded-md border border-gray-800 focus:border-gray-600 focus:outline-none transition-colors resize-none disabled:opacity-50 disabled:cursor-not-allowed"
style={{ minHeight: "80px" }}
rows={3}
placeholder="Enter system instructions for the agent..."
value={config.settings.instructions}
onChange={(e) => {
const newSettings = { ...config.settings };
newSettings.instructions = e.target.value;
setUserSettings(newSettings);
}}
disabled={roomState !== ConnectionState.Disconnected}
/>
</ConfigurationPanelItem>
<ConfigurationPanelItem title="Instructions">
<textarea
className="w-full bg-gray-950 text-white text-sm p-3 rounded-md border border-gray-800 focus:border-gray-600 focus:outline-none transition-colors resize-none disabled:opacity-50 disabled:cursor-not-allowed"
style={{ minHeight: "80px" }}
rows={3}
placeholder="Enter system instructions for the agent..."
value={config.settings.instructions}
onChange={(e) => {
const newSettings = { ...config.settings };
newSettings.instructions = e.target.value;
setUserSettings(newSettings);
}}
disabled={roomState !== ConnectionState.Disconnected}
/>
</ConfigurationPanelItem>
<ConfigurationPanelItem title="Color">
<ColorPicker
colors={themeColors}
@@ -738,10 +747,10 @@ export default function Playground({
<AudioInputTile trackRef={localMicTrack} />
</ConfigurationPanelItem>
)}
{config.show_qr && (
{config.show_qr && qrCodeUrl && (
<div className="w-full">
<ConfigurationPanelItem title="QR Code">
<QRCodeSVG value={window.location.href} width="128" />
<QRCodeSVG value={qrCodeUrl} width="128" />
</ConfigurationPanelItem>
</div>
)}
@@ -864,7 +873,7 @@ export default function Playground({
<PlaygroundTabbedTile
className="h-full"
tabs={mobileTabs}
initialTab={mobileTabs.length - 1}
initialTab={0}
/>
</div>
<div

View File

@@ -45,7 +45,7 @@ export const PlaygroundTile: React.FC<PlaygroundTileProps> = ({
</div>
)}
<div
className={`flex flex-col items-center grow w-full ${childrenClassName}`}
className={`flex flex-col items-center grow w-full relative ${childrenClassName}`}
style={{
height: `calc(100% - ${title ? titleHeight + "px" : "0px"})`,
padding: `${contentPadding * 4}px`,
@@ -74,7 +74,7 @@ export const PlaygroundTabbedTile: React.FC<PlaygroundTabbedTileProps> = ({
className={`flex flex-col h-full border rounded-sm border-gray-800 text-gray-500 bg-${backgroundColor} ${className}`}
>
<div
className="flex items-center justify-start text-xs uppercase border-b border-b-gray-800 tracking-wider"
className="flex items-center justify-start text-xs uppercase border-b border-b-gray-800 tracking-wider relative z-[100] bg-gray-950"
style={{
height: `${titleHeight}px`,
}}

View File

@@ -207,3 +207,20 @@ export const VoiceIcon = ({ className }: { className?: string }) => (
<line x1="12" y1="19" x2="12" y2="22" />
</svg>
);
export const ChatIcon = ({ className }: { className?: string }) => (
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
className={className}
>
<path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
</svg>
);

View File

@@ -51,3 +51,12 @@ body {
opacity: 1;
}
}
/* Hide Next.js floating dev indicator */
nextjs-portal,
#__next-build-watcher,
[data-nextjs-dialog],
[data-nextjs-toast],
div[style*="position: fixed"][style*="bottom"][style*="right"] {
display: none !important;
}