Compare commits

..

10 Commits

Author SHA1 Message Date
950d1ab3d4 Update agents/README.md 2026-02-02 15:28:28 +00:00
28b9a16c4e make chat message overlay right 2025-12-19 10:55:13 +08:00
f1b331d923 try to fix fast reload 2025-12-18 09:41:42 +08:00
739c019404 set chat message overlay draggable 2025-12-18 09:22:01 +08:00
da11561f47 Bug fixed 2025-12-17 23:13:42 +08:00
853e1558b1 fix realtime mode need push to talk once 2025-12-17 22:40:11 +08:00
6652a5cd43 update logic of switch_ptt_and_rt 2025-12-17 22:17:44 +08:00
d942222f11 update endcall button postion 2025-12-17 21:41:36 +08:00
5be6ab12f3 add chat message overlay 2025-12-17 18:59:17 +08:00
eeeed36494 add mic on push-to-talk mode, iphone open in phone tab default 2025-12-17 18:03:37 +08:00
7 changed files with 655 additions and 150 deletions

View File

@@ -0,0 +1 @@
use livekit-plugins-volcengine==1.2.9

View File

@@ -898,8 +898,12 @@ class MyAgent(Agent):
# Interrupt speech if user makes a selection while agent is speaking
if speech_handle and hasattr(speech_handle, "interrupt"):
speech_handle.interrupt()
logger.info("Interrupted speech due to user selection")
try:
speech_handle.interrupt()
except Exception as e:
logger.error(f"Failed to interrupt speech: {e}")
else:
logger.info("Interrupted speech due to user selection")
logger.info(f"User made selection: {response}")
@@ -1014,6 +1018,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
initial_instructions = participant.attributes.get("instructions")
logger.info(f"User selected instructions: {initial_instructions}")
# Read talking_mode from frontend state
initial_talking_mode = DEFAULT_TALKING_MODE
if participant.attributes.get("talking_mode"):
frontend_talking_mode = participant.attributes.get("talking_mode")
if frontend_talking_mode in ["push_to_talk", "realtime"]:
initial_talking_mode = frontend_talking_mode
logger.info(f"Initializing talking_mode from frontend: {initial_talking_mode}")
else:
logger.warning(f"Invalid talking_mode from frontend: {frontend_talking_mode}, using default: {initial_talking_mode}")
# Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt
initial_instructions = initial_instructions.replace("{datetime}", current_time)
initial_instructions = initial_instructions.replace("{weekday}", current_weekday)
@@ -1124,7 +1138,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
)
# disable input audio at the start
_talking_mode = DEFAULT_TALKING_MODE
_talking_mode = initial_talking_mode
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
@@ -1135,13 +1149,9 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
try:
session.interrupt()
except RuntimeError as e:
logger.error(f"Failed to interrupt session: {e}")
# Raise RPC error so client can detect interrupt failure
# Use ERROR_INTERNAL (code 13) to indicate application error
raise rtc.RpcError(
code=13, # ERROR_INTERNAL
message="Application error in method handler"
)
logger.info(f"Cannot interrupt session (agent is speaking): {e}")
# Return a message instead of raising an error
return json.dumps({"success": False, "message": "不能打断"})
session.clear_user_turn()
@@ -1149,6 +1159,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
room_io.set_participant(data.caller_identity)
session.input.set_audio_enabled(True)
return json.dumps({"success": True})
@ctx.room.local_participant.register_rpc_method("end_turn")
async def end_turn(data: rtc.RpcInvocationData):
session.input.set_audio_enabled(False)
@@ -1169,12 +1181,45 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
@ctx.room.local_participant.register_rpc_method("switch_ptt_and_rt")
async def switch_ptt_and_rt(data: rtc.RpcInvocationData):
nonlocal _talking_mode
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
try:
# Parse the payload to get the target mode
payload = json.loads(data.payload) if data.payload else {}
target_mode = payload.get("mode")
# Validate and set the mode
if target_mode in ["push_to_talk", "realtime"]:
_talking_mode = target_mode
logger.info(f"Switching talking mode to: {_talking_mode}")
else:
# If invalid mode, toggle from current state
logger.warning(f"Invalid mode '{target_mode}', toggling from current state")
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
logger.info(f"Toggled talking mode to: {_talking_mode}")
# Apply the mode settings
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
logger.info("Setting audio enabled to False (PTT mode)")
else:
# When switching to realtime mode, clear user turn state to ensure proper initialization
session.clear_user_turn()
session.input.set_audio_enabled(True)
logger.info("Setting audio enabled to True (realtime mode)")
return json.dumps({"success": True, "mode": _talking_mode})
except json.JSONDecodeError:
logger.error(f"Failed to parse switch_ptt_and_rt payload: {data.payload}")
# Fallback to toggle behavior
_talking_mode = "push_to_talk" if _talking_mode == "realtime" else "realtime"
room_io.set_participant(data.caller_identity)
if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False)
else:
# When switching to realtime mode, clear user turn state
session.clear_user_turn()
session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode})
if __name__ == "__main__":
parser = argparse.ArgumentParser()

View File

@@ -4,6 +4,10 @@ const withNextPluginPreval = createNextPluginPreval();
/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: false,
// Explicitly allow znjj.wangxin93.eu.org for Dev Origin, per future Next.js requirement.
allowedDevOrigins: [
"znjj.wangxin93.eu.org",
],
};
module.exports = withNextPluginPreval(nextConfig);

View File

@@ -0,0 +1,214 @@
"use client";
import { TranscriptionTile } from "@/transcriptions/TranscriptionTile";
import { TrackReferenceOrPlaceholder } from "@livekit/components-react";
import { useCallback, useEffect, useRef, useState } from "react";
export interface ChatOverlayProps {
agentAudioTrack?: TrackReferenceOrPlaceholder;
accentColor: string;
inputDisabled?: boolean;
isVisible: boolean;
position: { x: number; y: number };
onPositionChange: (position: { x: number; y: number }) => void;
containerRef: React.RefObject<HTMLDivElement | null>;
onToggle: () => void;
}
export function ChatOverlay({
agentAudioTrack,
accentColor,
inputDisabled,
isVisible,
position,
onPositionChange,
containerRef,
onToggle,
}: ChatOverlayProps) {
const overlayRef = useRef<HTMLDivElement>(null);
const headerRef = useRef<HTMLDivElement>(null);
const [isDragging, setIsDragging] = useState(false);
const dragOffset = useRef({ x: 0, y: 0 });
// Responsive sizing based on container size
const [containerSize, setContainerSize] = useState({ width: 360, height: 500 });
useEffect(() => {
const updateSize = () => {
if (containerRef.current) {
const rect = containerRef.current.getBoundingClientRect();
setContainerSize({ width: rect.width, height: rect.height });
}
};
updateSize();
const resizeObserver = new ResizeObserver(updateSize);
if (containerRef.current) {
resizeObserver.observe(containerRef.current);
}
return () => {
resizeObserver.disconnect();
};
}, [containerRef]);
// Calculate overlay size as percentage of container, with min/max constraints
// Width: larger (up to 95% of container)
const overlayWidth = Math.min(
Math.max(containerSize.width * 0.9, 280),
containerSize.width * 0.95
);
// Height: smaller (reduced from 60-85% to 40-60%)
const overlayHeight = Math.min(
Math.max(containerSize.height * 0.4, 250),
containerSize.height * 0.6
);
// Position overlay at center (slightly moved up) when first shown
const hasPositionedRef = useRef(false);
useEffect(() => {
if (isVisible && containerRef.current && containerSize.width > 0 && overlayWidth > 0 && overlayHeight > 0) {
// Calculate center position, moved up by 15% of container height
const centerX = (containerSize.width - overlayWidth) / 2;
const centerY = (containerSize.height - overlayHeight) / 2 - (containerSize.height * 0.15);
// Only auto-position on first show (when position is at origin)
if (!hasPositionedRef.current && position.x === 0 && position.y === 0) {
onPositionChange({ x: Math.max(0, centerX), y: Math.max(0, centerY) });
hasPositionedRef.current = true;
}
}
}, [isVisible, containerSize.width, containerSize.height, overlayWidth, overlayHeight, containerRef, position, onPositionChange]);
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
if (!overlayRef.current || !headerRef.current) return;
// Only allow dragging from the header, but not from buttons
const target = e.target as HTMLElement;
if (!headerRef.current.contains(target)) return;
// Don't drag if clicking on the close button
if (target.closest('button') || target.closest('svg')) return;
e.preventDefault();
e.stopPropagation();
setIsDragging(true);
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
dragOffset.current = {
x: clientX - position.x,
y: clientY - position.y,
};
};
const handleDragMove = useCallback((e: MouseEvent | TouchEvent) => {
if (!isDragging || !containerRef.current || !overlayRef.current) return;
e.preventDefault();
e.stopPropagation();
const containerRect = containerRef.current.getBoundingClientRect();
const overlayRect = overlayRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
let newX = clientX - dragOffset.current.x;
let newY = clientY - dragOffset.current.y;
// Constrain within container bounds
const maxX = containerRect.width - overlayRect.width;
const maxY = containerRect.height - overlayRect.height;
const minY = 0; // Allow dragging to top
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
onPositionChange({ x: newX, y: newY });
}, [isDragging, containerRef, overlayRef, onPositionChange]);
const handleDragEnd = useCallback(() => {
setIsDragging(false);
}, []);
useEffect(() => {
if (isDragging) {
window.addEventListener("mousemove", handleDragMove);
window.addEventListener("mouseup", handleDragEnd);
window.addEventListener("touchmove", handleDragMove, { passive: false });
window.addEventListener("touchend", handleDragEnd);
}
return () => {
window.removeEventListener("mousemove", handleDragMove);
window.removeEventListener("mouseup", handleDragEnd);
window.removeEventListener("touchmove", handleDragMove);
window.removeEventListener("touchend", handleDragEnd);
};
}, [isDragging, handleDragMove, handleDragEnd]);
return (
<div
ref={overlayRef}
className="absolute z-40 rounded-lg border border-white/20 shadow-2xl backdrop-blur-md transition-all duration-300 flex flex-col"
style={{
left: `${position.x}px`,
top: `${position.y}px`,
width: `${overlayWidth}px`,
height: `${overlayHeight}px`,
backgroundColor: 'rgba(0, 0, 0, 0.85)',
cursor: isDragging ? 'grabbing' : 'default',
display: isVisible ? 'flex' : 'none',
}}
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
{/* Header with drag handle and close button */}
<div
ref={headerRef}
className="flex items-center justify-between px-4 py-2 border-b border-white/10 cursor-move select-none"
style={{ backgroundColor: 'rgba(0, 0, 0, 0.3)' }}
>
<div className="flex items-center gap-2">
<div className="w-2 h-2 rounded-full bg-white/40"></div>
<span className="text-white text-xs font-medium">Chat</span>
</div>
<button
onClick={(e) => {
e.stopPropagation();
e.preventDefault();
onToggle();
}}
className="text-white hover:text-white transition-colors p-2 rounded hover:bg-white/10 flex items-center justify-center"
aria-label="Close chat overlay"
style={{ minWidth: '32px', minHeight: '32px' }}
>
<svg
className="w-5 h-5"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
strokeWidth={2.5}
>
<path
strokeLinecap="round"
strokeLinejoin="round"
d="M6 18L18 6M6 6l12 12"
/>
</svg>
</button>
</div>
{/* Chat content with padding */}
<div className="overflow-hidden flex flex-col px-2 py-2" style={{ height: `calc(100% - 40px)` }}>
<TranscriptionTile
agentAudioTrack={agentAudioTrack}
accentColor={accentColor}
inputDisabled={inputDisabled}
/>
</div>
</div>
);
}

View File

@@ -13,8 +13,9 @@ import {
} from "@livekit/components-react";
import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client";
import { useEffect, useMemo, useState, useRef, useCallback } from "react";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon } from "./icons";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon, ChatIcon } from "./icons";
import { useToast } from "@/components/toast/ToasterProvider";
import { ChatOverlay } from "@/components/chat/ChatOverlay";
export interface PhoneSimulatorProps {
onConnect: () => void;
@@ -67,6 +68,13 @@ export function PhoneSimulator({
const [interruptRejected, setInterruptRejected] = useState(false);
const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default)
const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
const [showChatOverlay, setShowChatOverlay] = useState(false);
const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component
const [chatTogglePosition, setChatTogglePosition] = useState<{ x?: number; right?: number; y: number }>({ right: 16, y: 56 }); // Initial position on the right
const [isDraggingChatToggle, setIsDraggingChatToggle] = useState(false);
const chatToggleRef = useRef<HTMLButtonElement>(null);
const chatToggleDragOffset = useRef({ x: 0, y: 0 });
const chatToggleHasDragged = useRef(false);
useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
@@ -75,6 +83,47 @@ export function PhoneSimulator({
}
}, [config.settings.attributes]);
// Set talking_mode attribute when connected or when mode changes
const lastTalkingModeRef = useRef<string | null>(null);
const configAttributesRef = useRef(config.settings.attributes);
// Update config attributes ref when it changes
useEffect(() => {
configAttributesRef.current = config.settings.attributes;
}, [config.settings.attributes]);
useEffect(() => {
if (roomState === ConnectionState.Connected && localParticipant) {
const talkingMode = isPushToTalkMode ? "push_to_talk" : "realtime";
// Only update if the mode actually changed
if (lastTalkingModeRef.current === talkingMode) {
return;
}
lastTalkingModeRef.current = talkingMode;
try {
// Get current attributes from config to preserve them
const attributesToSet: Record<string, string> = {};
const configAttributes = configAttributesRef.current || [];
configAttributes.forEach(attr => {
if (attr.key && attr.value) {
attributesToSet[attr.key] = attr.value;
}
});
// Add talking_mode
attributesToSet.talking_mode = talkingMode;
localParticipant.setAttributes(attributesToSet);
} catch (error) {
console.error("Failed to set talking_mode attribute:", error);
}
} else if (roomState === ConnectionState.Disconnected) {
// Reset ref when disconnected
lastTalkingModeRef.current = null;
}
}, [roomState, localParticipant, isPushToTalkMode]);
const [currentTime, setCurrentTime] = useState("");
const [visualizerPosition, setVisualizerPosition] = useState({
@@ -145,6 +194,96 @@ export function PhoneSimulator({
};
}, [isDragging]);
// Chat toggle button drag handlers
const handleChatToggleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
e.stopPropagation(); // Prevent triggering the button click
setIsDraggingChatToggle(true);
chatToggleHasDragged.current = false;
if (!phoneContainerRef.current || !chatToggleRef.current) return;
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// If using right positioning, convert to x for dragging
if (chatTogglePosition.right !== undefined && chatTogglePosition.x === undefined) {
const currentX = containerRect.width - chatTogglePosition.right - buttonRect.width;
setChatTogglePosition({ x: currentX, y: chatTogglePosition.y });
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
} else {
// Already using x positioning
const currentX = chatTogglePosition.x ?? 0;
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
}
};
const handleChatToggleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDraggingChatToggle || !phoneContainerRef.current || !chatToggleRef.current) return;
e.preventDefault();
chatToggleHasDragged.current = true; // Mark that we've actually dragged
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// Calculate new position relative to container
let newX = clientX - containerRect.left - chatToggleDragOffset.current.x;
let newY = clientY - containerRect.top - chatToggleDragOffset.current.y;
// Constrain within container
const maxX = containerRect.width - buttonRect.width;
const maxY = containerRect.height - buttonRect.height;
// On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
setChatTogglePosition({
x: newX,
y: newY,
});
};
const handleChatToggleDragEnd = () => {
setIsDraggingChatToggle(false);
// Reset the flag after a short delay to allow onClick to check it
setTimeout(() => {
chatToggleHasDragged.current = false;
}, 100);
};
useEffect(() => {
if (isDraggingChatToggle) {
window.addEventListener("mouseup", handleChatToggleDragEnd);
window.addEventListener("mousemove", handleChatToggleDragMove);
window.addEventListener("touchend", handleChatToggleDragEnd);
window.addEventListener("touchmove", handleChatToggleDragMove, { passive: false });
}
return () => {
window.removeEventListener("mouseup", handleChatToggleDragEnd);
window.removeEventListener("mousemove", handleChatToggleDragMove);
window.removeEventListener("touchend", handleChatToggleDragEnd);
window.removeEventListener("touchmove", handleChatToggleDragMove);
};
}, [isDraggingChatToggle]);
// Initialize chat toggle button position - keep it on the right using 'right' CSS property
// Only convert to 'x' (left positioning) when user drags it
useEffect(() => {
if (showCameraMenu) {
Room.getLocalDevices("videoinput").then(setCameras);
@@ -472,14 +611,27 @@ export function PhoneSimulator({
const handleModeSwitch = async () => {
if (!room || !voiceAssistant.agent) return;
// Determine the target mode (toggle from current state)
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
try {
await room.localParticipant.performRpc({
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "switch_ptt_and_rt",
payload: "",
payload: JSON.stringify({ mode: targetMode }),
});
// Toggle mode on success
setIsPushToTalkMode(prev => !prev);
// Parse the response to confirm the mode was set
try {
const responseData = JSON.parse(response);
const confirmedMode = responseData.mode;
// Update state based on server response
setIsPushToTalkMode(confirmedMode === "push_to_talk");
} catch (parseError) {
// If parsing fails, update state based on what we sent
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
setIsPushToTalkMode(targetMode === "push_to_talk");
}
} catch (error: any) {
console.error("Failed to switch mode:", error);
// Don't show error toast for mode switch failures, just log
@@ -499,16 +651,38 @@ export function PhoneSimulator({
setInterruptRejected(false);
try {
await room.localParticipant.performRpc({
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "start_turn",
payload: "",
});
setIsPushToTalkActive(true);
setInterruptRejected(false);
// Parse the response to check for success/failure
try {
const responseData = JSON.parse(response);
if (responseData.success === false) {
// Interrupt was rejected, show message
if (responseData.message === "不能打断") {
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
}
return;
}
} else if (responseData.success === true) {
// Successfully started turn
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (parseError) {
// If response is not JSON, assume success (backward compatibility)
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (error: any) {
// Prevent error from propagating to React error boundary
// by handling all expected errors here
// Handle RPC errors (method not found, etc.)
setIsPushToTalkActive(false);
const errorMessage = error?.message || "";
@@ -524,45 +698,6 @@ export function PhoneSimulator({
return;
}
// Check for "Application error in method handler" - this indicates interrupt failed
// This error is raised when session.interrupt() fails in the agent
// We handle this gracefully by showing "不允许打断" on the button, so we don't log it as an error
if (errorMessage.includes("Application error in method handler") ||
errorMessage.includes("Application error") ||
errorCode === 13 || // ERROR_INTERNAL (RpcErrorCode.ERROR_INTERNAL)
(isAgentSpeaking && errorMessage.includes("interrupt"))) {
// Suppress error logging for expected interrupt failures
// Only log at debug level to avoid error popups
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (expected behavior):", errorMessage);
}
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
// Explicitly prevent error from propagating
error.preventDefault?.();
error.stopPropagation?.();
return;
}
// Check if agent is speaking and the error suggests interruption was rejected
if (isAgentSpeaking) {
// Check for common rejection indicators
if (errorMessage.includes("reject") ||
errorMessage.includes("not allowed") ||
errorCode === 403 || // Forbidden
errorCode === 409) { // Conflict
// Suppress error logging for expected rejections
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected:", errorMessage);
}
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
return;
}
}
// Only log and show error for unexpected errors
console.error("Unexpected error in push-to-talk:", error);
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
@@ -852,6 +987,37 @@ export function PhoneSimulator({
</div>
</div>
{/* Chat Toggle Button - Top Right, aligned with audio visualizer (Draggable) */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "important_message" &&
phoneMode !== "capture" && (
<button
ref={chatToggleRef}
className={`absolute z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg cursor-move select-none touch-none ${
showChatOverlay
? "bg-blue-500/80 text-white"
: "bg-gray-800/70 text-white hover:bg-gray-800/90"
}`}
onClick={(e) => {
// Only toggle if we didn't just drag
if (!chatToggleHasDragged.current) {
setShowChatOverlay(!showChatOverlay);
}
}}
onMouseDown={handleChatToggleDragStart}
onTouchStart={handleChatToggleDragStart}
title={showChatOverlay ? "Hide chat (drag to move)" : "Show chat (drag to move)"}
style={{
...(chatTogglePosition.x !== undefined ? { left: chatTogglePosition.x } : {}),
...(chatTogglePosition.right !== undefined ? { right: chatTogglePosition.right } : {}),
top: chatTogglePosition.y,
}}
>
<ChatIcon className="w-5 h-5 md:w-6 md:h-6" />
</button>
)}
{/* Main Content */}
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden"
style={{
@@ -1002,6 +1168,23 @@ export function PhoneSimulator({
</div>
)}
{/* Chat Overlay - Hidden during capture and important_message modes */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "capture" &&
phoneMode !== "important_message" && (
<ChatOverlay
agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color}
inputDisabled={phoneMode === "hand_off"}
isVisible={showChatOverlay}
position={chatOverlayPosition}
onPositionChange={setChatOverlayPosition}
containerRef={phoneContainerRef}
onToggle={() => setShowChatOverlay(!showChatOverlay)}
/>
)}
{/* Call Controls Overlay */}
{roomState === ConnectionState.Connected && (
phoneMode === "capture" ? (
@@ -1120,102 +1303,143 @@ export function PhoneSimulator({
{/* Push-to-Talk Mode Layout */}
{isPushToTalkMode && phoneMode !== "hand_off" && voiceAssistant.agent && (
<div className="w-full flex items-center justify-center gap-8">
{/* Camera Switch Button - Left (hidden in important_message mode) */}
{phoneMode !== "important_message" && (
<div className="relative">
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<SwitchCameraIcon className="w-6 h-6" />
<PhoneOffIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
</div>
) : (
<div className="w-full flex items-center justify-between gap-8">
{/* Left side: Mic Toggle and Camera Switch Buttons */}
<div className="flex flex-col items-center gap-2">
{/* Mic Toggle Button */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-800/50 text-white hover:bg-gray-800/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* Camera Switch Button */}
<div className="relative">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
)}
</div>
)}
</div>
)}
</div>
{/* Center: Large Push-to-Talk Button */}
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square select-none ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
style={{ borderRadius: '50%' }}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
{/* Right side: End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
{/* Large Push-to-Talk Button - Center (hidden in important_message mode) */}
{phoneMode !== "important_message" && (
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square select-none ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
style={{ borderRadius: '50%' }}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
)}
{/* End Call Button - Right (always shown in PTT mode) */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
</>
)}
{/* Realtime Mode Layout */}
{!isPushToTalkMode && (
<div className="w-full flex items-center justify-center gap-8">
{phoneMode !== "important_message" && phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
)}
{!isPushToTalkMode && phoneMode !== "hand_off" && (
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
) : (
<div className="w-full flex items-center justify-center gap-4">
{/* Mic Toggle */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</>
)}
{/* Hand Off Mode - Show only End Call Button */}

View File

@@ -873,7 +873,7 @@ export default function Playground({
<PlaygroundTabbedTile
className="h-full"
tabs={mobileTabs}
initialTab={mobileTabs.length - 1}
initialTab={0}
/>
</div>
<div

View File

@@ -207,3 +207,20 @@ export const VoiceIcon = ({ className }: { className?: string }) => (
<line x1="12" y1="19" x2="12" y2="22" />
</svg>
);
export const ChatIcon = ({ className }: { className?: string }) => (
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
className={className}
>
<path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
</svg>
);