"use client"; import { useConfig } from "@/hooks/useConfig"; import { BarVisualizer, useConnectionState, useLocalParticipant, useParticipantAttributes, useRoomContext, useTracks, useVoiceAssistant, VideoTrack, } from "@livekit/components-react"; import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client"; import { useEffect, useMemo, useState, useRef, useCallback } from "react"; import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon, ChatIcon } from "./icons"; import { useToast } from "@/components/toast/ToasterProvider"; import { ChatOverlay } from "@/components/chat/ChatOverlay"; export interface PhoneSimulatorProps { onConnect: () => void; onDisconnect: () => void; phoneMode?: "normal" | "capture" | "important_message" | "hand_off"; onCapture?: (image: File) => void; capturePrompt?: string; importantMessage?: string; importantMessageOptions?: string[]; onImportantMessageAction?: (text: string) => void; } export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal", onCapture, capturePrompt, importantMessage, importantMessageOptions, onImportantMessageAction }: PhoneSimulatorProps) { const { config, setUserSettings } = useConfig(); const { setToastMessage } = useToast(); const room = useRoomContext(); const roomState = useConnectionState(); const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant(); const tracks = useTracks(); const voiceAssistant = useVoiceAssistant(); const agentAttributes = useParticipantAttributes({ participant: voiceAssistant.agent, }); const fileInputRef = useRef(null); const phoneContainerRef = useRef(null); const visualizerRef = useRef(null); const [showCameraMenu, setShowCameraMenu] = useState(false); const [showVoiceMenu, setShowVoiceMenu] = useState(false); const [cameras, setCameras] = useState([]); const [processingImage, setProcessingImage] = useState(null); const [currentVoiceId, setCurrentVoiceId] = useState("BV001_streaming"); // Default voice ID const [isCapturing, setIsCapturing] = useState(false); const [processingSource, setProcessingSource] = useState< "camera" | "upload" | null >(null); const MAX_UPLOAD_MB = 10; const isAgentSpeaking = voiceAssistant.state === "speaking"; const wasMicEnabledRef = useRef(false); const lastPhoneMode = useRef(phoneMode); const [isPushToTalkActive, setIsPushToTalkActive] = useState(false); const [interruptRejected, setInterruptRejected] = useState(false); const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default) const pushToTalkButtonRef = useRef(null); const [showChatOverlay, setShowChatOverlay] = useState(false); const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component useEffect(() => { const voiceAttr = config.settings.attributes?.find(a => a.key === "voice"); if (voiceAttr) { setCurrentVoiceId(voiceAttr.value); } }, [config.settings.attributes]); const [currentTime, setCurrentTime] = useState(""); const [visualizerPosition, setVisualizerPosition] = useState({ x: 16, y: 56, }); const [isDragging, setIsDragging] = useState(false); const dragOffset = useRef({ x: 0, y: 0 }); const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => { e.preventDefault(); setIsDragging(true); const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX; const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY; dragOffset.current = { x: clientX - visualizerPosition.x, y: clientY - visualizerPosition.y, }; }; const handleDragMove = (e: MouseEvent | TouchEvent) => { if (!isDragging || !phoneContainerRef.current || !visualizerRef.current) return; e.preventDefault(); const containerRect = phoneContainerRef.current.getBoundingClientRect(); const visualizerRect = visualizerRef.current.getBoundingClientRect(); const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX; const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY; let newX = clientX - dragOffset.current.x; let newY = clientY - dragOffset.current.y; // Constrain within container const maxX = containerRect.width - visualizerRect.width; const maxY = containerRect.height - visualizerRect.height; // On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0) // On desktop, keep status bar height constraint (48px) const isMobile = typeof window !== 'undefined' && window.innerWidth < 768; const minY = isMobile ? 0 : 48; // statusBarHeight = 48px newX = Math.max(0, Math.min(newX, maxX)); newY = Math.max(minY, Math.min(newY, maxY)); setVisualizerPosition({ x: newX, y: newY, }); }; const handleDragEnd = () => { setIsDragging(false); }; useEffect(() => { if (isDragging) { window.addEventListener("mouseup", handleDragEnd); window.addEventListener("mousemove", handleDragMove); window.addEventListener("touchend", handleDragEnd); window.addEventListener("touchmove", handleDragMove, { passive: false }); } return () => { window.removeEventListener("mouseup", handleDragEnd); window.removeEventListener("mousemove", handleDragMove); window.removeEventListener("touchend", handleDragEnd); window.removeEventListener("touchmove", handleDragMove); }; }, [isDragging]); useEffect(() => { if (showCameraMenu) { Room.getLocalDevices("videoinput").then(setCameras); } }, [showCameraMenu]); // Close menu when clicking outside useEffect(() => { const handleClickOutside = (event: MouseEvent) => { if (showCameraMenu) { setShowCameraMenu(false); } if (showVoiceMenu) { setShowVoiceMenu(false); } }; if (showCameraMenu || showVoiceMenu) { document.addEventListener("click", handleClickOutside); } return () => { document.removeEventListener("click", handleClickOutside); }; }, [showCameraMenu, showVoiceMenu]); useEffect(() => { if (voiceAssistant.state === "speaking") { setProcessingImage(null); setProcessingSource(null); } }, [voiceAssistant.state]); useEffect(() => { const enteringMode = (mode: typeof phoneMode) => phoneMode === mode && lastPhoneMode.current !== mode; // Only proceed if connected and localParticipant is available if (roomState !== ConnectionState.Connected || !localParticipant) return; const updateMicState = async () => { // Entering important message / capture / hand_off: remember mic state and mute if needed if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) { wasMicEnabledRef.current = isMicEnabled; if (isMicEnabled) { try { await localParticipant.setMicrophoneEnabled(false); } catch (error) { console.error("Failed to disable microphone:", error); } } } // Exiting important message mode or hand off mode or capture mode else if ( (phoneMode !== "important_message" && lastPhoneMode.current === "important_message") || (phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") || (phoneMode !== "capture" && lastPhoneMode.current === "capture") ) { // Restore mic to previous state try { await localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current); } catch (error) { console.error("Failed to restore microphone:", error); } // If exiting capture mode, clear processing image if (lastPhoneMode.current === "capture") { setProcessingImage(null); setProcessingSource(null); } } // Enforce mic off in important message mode, hand off mode, or capture mode else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) { try { await localParticipant.setMicrophoneEnabled(false); } catch (error) { console.error("Failed to disable microphone:", error); } } }; updateMicState(); lastPhoneMode.current = phoneMode; }, [phoneMode, isMicEnabled, localParticipant, roomState]); useEffect(() => { const updateTime = () => { const now = new Date(); setCurrentTime( now.toLocaleTimeString("en-US", { hour: "numeric", minute: "2-digit", hour12: true, }) ); }; updateTime(); const interval = setInterval(updateTime, 60000); return () => clearInterval(interval); }, []); const localTracks = tracks.filter( ({ participant }) => participant instanceof LocalParticipant ); const localCameraTrack = localTracks.find( ({ source }) => source === Track.Source.Camera ); const handleMicToggle = async () => { if (roomState !== ConnectionState.Connected || !localParticipant) return; try { if (isMicEnabled) { await localParticipant.setMicrophoneEnabled(false); } else { await localParticipant.setMicrophoneEnabled(true); } } catch (error) { console.error("Failed to toggle microphone:", error); // Silently handle the error to avoid disrupting user experience } }; const handleDisconnect = () => { try { // Only disconnect if we're actually connected if (roomState === ConnectionState.Connected || roomState === ConnectionState.Connecting) { onDisconnect(); } } catch (error) { // Silently handle any errors during disconnect console.warn("Error during disconnect:", error); // Still try to call onDisconnect to ensure cleanup try { onDisconnect(); } catch (e) { // Ignore secondary errors } } }; const validateImageFile = (file: File) => { const isImage = file.type.startsWith("image/"); const isSizeOk = file.size > 0 && file.size <= MAX_UPLOAD_MB * 1024 * 1024; return { isValid: isImage && isSizeOk, isImage, isSizeOk, }; }; const showErrorToast = (message: string) => { setToastMessage({ message, type: "error" }); }; const handleCapture = async () => { if (!localCameraTrack || !onCapture || isCapturing) return; setIsCapturing(true); const trackReference = localCameraTrack as any; // Handle both TrackReference (from useTracks) and potential direct Track objects const track = trackReference.publication?.track?.mediaStreamTrack || trackReference.mediaStreamTrack; if (!track) { console.error("No media stream track found"); setIsCapturing(false); return; } const video = document.createElement("video"); video.srcObject = new MediaStream([track]); video.muted = true; video.playsInline = true; video.autoplay = true; // Element needs to be in the DOM for some browsers to play it properly video.style.position = "absolute"; video.style.top = "-9999px"; video.style.left = "-9999px"; document.body.appendChild(video); try { await video.play(); // Wait for video dimensions to be available if (video.videoWidth === 0 || video.videoHeight === 0) { await new Promise((resolve) => { video.onloadedmetadata = () => resolve(); // Timeout to prevent hanging setTimeout(resolve, 1000); }); } const canvas = document.createElement("canvas"); // Default to video dimensions let renderWidth = video.videoWidth; let renderHeight = video.videoHeight; let sourceX = 0; let sourceY = 0; let sourceWidth = video.videoWidth; let sourceHeight = video.videoHeight; // If the video is landscape but we want a portrait crop (like a phone) // We want an aspect ratio of roughly 9:19.5 (from the container styles) const targetAspect = 9 / 19.5; const videoAspect = video.videoWidth / video.videoHeight; if (videoAspect > targetAspect) { // Video is wider than target - crop width const newWidth = video.videoHeight * targetAspect; sourceX = (video.videoWidth - newWidth) / 2; sourceWidth = newWidth; renderWidth = newWidth; } else { // Video is taller than target - crop height (less common for landscape webcam) const newHeight = video.videoWidth / targetAspect; sourceY = (video.videoHeight - newHeight) / 2; sourceHeight = newHeight; renderHeight = newHeight; } canvas.width = renderWidth; canvas.height = renderHeight; const ctx = canvas.getContext("2d"); if (ctx) { // Mirror the image to match the preview ctx.translate(canvas.width, 0); ctx.scale(-1, 1); // Draw only the cropped portion of the video ctx.drawImage( video, sourceX, sourceY, sourceWidth, sourceHeight, 0, 0, renderWidth, renderHeight ); // Reset transform ctx.setTransform(1, 0, 0, 1, 0, 0); // Use toDataURL for immediate preview feedback const dataUrl = canvas.toDataURL("image/jpeg"); setProcessingImage(dataUrl); setProcessingSource("camera"); // Create a new canvas for the final output (unmirrored if needed, but user requested mirrored) // The user requested to mirror the shuttled photo, which we did above for the canvas. // So the blob created from this canvas will also be mirrored. canvas.toBlob((blob) => { if (blob && onCapture) { const file = new File([blob], "camera-capture.jpg", { type: "image/jpeg", }); const { isValid } = validateImageFile(file); if (!isValid) { setProcessingImage(null); setProcessingSource(null); showErrorToast(`请拍摄${MAX_UPLOAD_MB}MB以内的图片文件。`); setIsCapturing(false); return; } onCapture(file); } setIsCapturing(false); }, "image/jpeg"); } else { setIsCapturing(false); } } catch (e) { console.error("Failed to capture image", e); setIsCapturing(false); } finally { // Cleanup video.pause(); video.srcObject = null; if (document.body.contains(video)) { document.body.removeChild(video); } video.remove(); } }; const handleUpload = () => { if (fileInputRef.current) { fileInputRef.current.click(); } }; const handleSwitchCamera = async (e: React.MouseEvent) => { e.stopPropagation(); // Prevent immediate close setShowCameraMenu(!showCameraMenu); }; const handleSelectCamera = async (deviceId: string) => { await room.switchActiveDevice("videoinput", deviceId); setShowCameraMenu(false); }; const handleChangeVoice = (voiceId: string) => { const newSettings = { ...config.settings }; const attributes = newSettings.attributes ? [...newSettings.attributes] : []; const voiceAttrIndex = attributes.findIndex(a => a.key === "voice"); if (voiceAttrIndex >= 0) { attributes[voiceAttrIndex] = { ...attributes[voiceAttrIndex], value: voiceId }; } else { attributes.push({ id: "voice", key: "voice", value: voiceId }); } newSettings.attributes = attributes; setUserSettings(newSettings); setCurrentVoiceId(voiceId); setTimeout(() => setShowVoiceMenu(false), 200); }; const handleVoiceMenuToggle = (e: React.MouseEvent) => { e.stopPropagation(); setShowVoiceMenu(!showVoiceMenu); }; const handleModeSwitch = async () => { if (!room || !voiceAssistant.agent) return; // Determine the target mode (toggle from current state) const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk"; try { const response = await room.localParticipant.performRpc({ destinationIdentity: voiceAssistant.agent.identity, method: "switch_ptt_and_rt", payload: JSON.stringify({ mode: targetMode }), }); // Parse the response to confirm the mode was set try { const responseData = JSON.parse(response); const confirmedMode = responseData.mode; // Update state based on server response setIsPushToTalkMode(confirmedMode === "push_to_talk"); } catch (parseError) { // If parsing fails, update state based on what we sent console.warn("Failed to parse mode switch response, using sent mode:", parseError); setIsPushToTalkMode(targetMode === "push_to_talk"); } } catch (error: any) { console.error("Failed to switch mode:", error); // Don't show error toast for mode switch failures, just log } }; // Check if agent supports push-to-talk (optional check, button will show regardless) const supportsPushToTalk = useMemo(() => { if (!voiceAssistant.agent || !agentAttributes.attributes) return false; return agentAttributes.attributes["push-to-talk"] === "1"; }, [voiceAssistant.agent, agentAttributes.attributes]); const handlePushToTalkStart = async () => { if (!room || !voiceAssistant.agent || isPushToTalkActive) return; // Reset interrupt rejection state setInterruptRejected(false); try { const response = await room.localParticipant.performRpc({ destinationIdentity: voiceAssistant.agent.identity, method: "start_turn", payload: "", }); // Parse the response to check for success/failure try { const responseData = JSON.parse(response); if (responseData.success === false) { // Interrupt was rejected, show message if (responseData.message === "不能打断") { setInterruptRejected(true); // Clear the rejection message after 3 seconds setTimeout(() => setInterruptRejected(false), 3000); if (process.env.NODE_ENV === 'development') { console.log("Interrupt rejected (cannot interrupt):", responseData.message); } return; } } else if (responseData.success === true) { // Successfully started turn setIsPushToTalkActive(true); setInterruptRejected(false); } } catch (parseError) { // If response is not JSON, assume success (backward compatibility) setIsPushToTalkActive(true); setInterruptRejected(false); } } catch (error: any) { // Handle RPC errors (method not found, etc.) setIsPushToTalkActive(false); const errorMessage = error?.message || ""; const errorCode = error?.code; // Check for "Method not supported at destination" - this happens when RPC methods aren't registered yet // This can occur on first call before agent is fully ready, so we silently ignore it if (errorMessage.includes("Method not supported at destination") || errorMessage.includes("method not found") || errorCode === 12) { // METHOD_NOT_FOUND // Silently ignore - the method will be available after first turn console.log("RPC method not ready yet, will be available after first turn"); return; } // Only log and show error for unexpected errors console.error("Unexpected error in push-to-talk:", error); const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered."; setToastMessage({ message: defaultErrorMessage, type: "error" }); } }; const handlePushToTalkEnd = useCallback(async () => { // Always clear interrupt rejection state when button is released setInterruptRejected(false); if (!room || !voiceAssistant.agent || !isPushToTalkActive) return; try { await room.localParticipant.performRpc({ destinationIdentity: voiceAssistant.agent.identity, method: "end_turn", payload: "", }); setIsPushToTalkActive(false); } catch (error: any) { console.error("Failed to end turn:", error); // Don't show error toast on end_turn failure as it might be called during cleanup setIsPushToTalkActive(false); } }, [room, voiceAssistant.agent, isPushToTalkActive]); const handlePushToTalkCancel = useCallback(async () => { // Always clear interrupt rejection state when button is cancelled setInterruptRejected(false); if (!room || !voiceAssistant.agent || !isPushToTalkActive) return; try { await room.localParticipant.performRpc({ destinationIdentity: voiceAssistant.agent.identity, method: "cancel_turn", payload: "", }); setIsPushToTalkActive(false); } catch (error) { console.error("Failed to cancel turn:", error); setIsPushToTalkActive(false); } }, [room, voiceAssistant.agent, isPushToTalkActive]); // Handle mouse events for push-to-talk const handlePushToTalkMouseDown = (e: React.MouseEvent) => { e.preventDefault(); handlePushToTalkStart(); }; const handlePushToTalkMouseUp = (e: React.MouseEvent) => { e.preventDefault(); handlePushToTalkEnd(); }; // Handle touch events for push-to-talk const handlePushToTalkTouchStart = (e: React.TouchEvent) => { e.preventDefault(); handlePushToTalkStart(); }; const handlePushToTalkTouchEnd = (e: React.TouchEvent) => { e.preventDefault(); handlePushToTalkEnd(); }; // Handle window blur, escape key, and global mouse/touch events to cancel/end push-to-talk useEffect(() => { if (!isPushToTalkActive) return; const handleBlur = () => { handlePushToTalkCancel(); }; const handleKeyDown = (e: KeyboardEvent) => { if (e.key === "Escape") { handlePushToTalkCancel(); } }; // Handle global mouseup/touchend to end push-to-talk even if released outside button const handleGlobalMouseUp = () => { // Clear interrupt rejection state immediately when button is released setInterruptRejected(false); handlePushToTalkEnd(); }; const handleGlobalTouchEnd = () => { // Clear interrupt rejection state immediately when button is released setInterruptRejected(false); handlePushToTalkEnd(); }; window.addEventListener("blur", handleBlur); window.addEventListener("keydown", handleKeyDown); window.addEventListener("mouseup", handleGlobalMouseUp); window.addEventListener("touchend", handleGlobalTouchEnd); return () => { window.removeEventListener("blur", handleBlur); window.removeEventListener("keydown", handleKeyDown); window.removeEventListener("mouseup", handleGlobalMouseUp); window.removeEventListener("touchend", handleGlobalTouchEnd); }; }, [isPushToTalkActive, handlePushToTalkCancel, handlePushToTalkEnd]); // Clean up push-to-talk state on disconnect useEffect(() => { if (roomState === ConnectionState.Disconnected && isPushToTalkActive) { setIsPushToTalkActive(false); setInterruptRejected(false); } }, [roomState, isPushToTalkActive]); // Reset interrupt rejection when agent stops speaking useEffect(() => { if (!isAgentSpeaking && interruptRejected) { // Clear rejection state when agent finishes speaking const timer = setTimeout(() => setInterruptRejected(false), 1000); return () => clearTimeout(timer); } }, [isAgentSpeaking, interruptRejected]); const handleFileChange = (event: React.ChangeEvent) => { const file = event.target.files?.[0]; if (file && onCapture) { const { isValid, isImage, isSizeOk } = validateImageFile(file); if (!isValid) { const msg = !isImage ? "请选择图片文件。" : `请上传${MAX_UPLOAD_MB}MB以内的图片文件。`; showErrorToast(msg); } else { onCapture(file); setProcessingImage(URL.createObjectURL(file)); setProcessingSource("upload"); } } // Reset input so the same file can be selected again if (event.target) { event.target.value = ""; } }; const videoContent = (() => { if (roomState === ConnectionState.Disconnected) { return (
{showVoiceMenu && (
e.stopPropagation()} >
)}
); } if (!localCameraTrack) { return (
Camera off
); } return ( ); })(); return (
{/* Status Bar */}
{currentTime}
{/* Chat Toggle Button - Top Right, aligned with audio visualizer */} {roomState === ConnectionState.Connected && voiceAssistant.agent && phoneMode !== "important_message" && phoneMode !== "capture" && ( )} {/* Main Content */}
{videoContent}
{processingImage && (
{/* eslint-disable-next-line @next/next/no-img-element */} Processing
正在分析照片,请稍候...
)} {/* Capture Guide Lines */} {roomState === ConnectionState.Connected && phoneMode === "capture" && !processingImage && (
{/* Thirds Grid */}
{/* Center Focus Indicator */}
{/* Prompt Display */} {capturePrompt && (
{capturePrompt}
)}
)}
{/* Important Message Overlay */} {roomState === ConnectionState.Connected && phoneMode === "important_message" && (

{importantMessage}

{importantMessageOptions && importantMessageOptions.length > 0 ? ( importantMessageOptions.map((option, index) => ( )) ) : ( )}
)} {/* Hand Off Mode Overlay */} {roomState === ConnectionState.Connected && phoneMode === "hand_off" && (
转人工...
)} {/* Agent Audio Visualizer (Draggable) */} {roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && (
)} {/* Chat Overlay - Hidden during capture and important_message modes */} {roomState === ConnectionState.Connected && voiceAssistant.agent && phoneMode !== "capture" && phoneMode !== "important_message" && ( setShowChatOverlay(!showChatOverlay)} /> )} {/* Call Controls Overlay */} {roomState === ConnectionState.Connected && ( phoneMode === "capture" ? (
{/* Camera Controls Row */}
{/* Left: Upload */} {/* Center: Capture */} {/* Right: Switch Camera */}
{showCameraMenu && (
{cameras.length === 0 ? (
No cameras found
) : ( cameras.map((device) => ( )) )}
)}
{/* Call Controls Row (mic hidden in capture mode) */}
{/* End Call */}
) : (
{/* Mode Toggle Switch */} {phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
按下说话模式 实时对话模式
)} {/* Push-to-Talk Mode Layout */} {isPushToTalkMode && phoneMode !== "hand_off" && voiceAssistant.agent && ( <> {/* Important Message Mode - Centered End Call Button */} {phoneMode === "important_message" ? (
) : (
{/* Left side: Mic Toggle and Camera Switch Buttons */}
{/* Mic Toggle Button */} {/* Camera Switch Button */}
{showCameraMenu && (
{cameras.length === 0 ? (
No cameras found
) : ( cameras.map((device) => ( )) )}
)}
{/* Center: Large Push-to-Talk Button */} {/* Right side: End Call Button */}
)} )} {/* Realtime Mode Layout */} {!isPushToTalkMode && ( <> {/* Important Message Mode - Centered End Call Button */} {phoneMode === "important_message" ? (
) : (
{/* Mic Toggle */} {phoneMode !== "hand_off" && ( )} {/* End Call Button */}
)} )} {/* Hand Off Mode - Show only End Call Button */} {phoneMode === "hand_off" && (
)} {/* Fallback: Show End Call Button when in push-to-talk mode but no agent/audio */} {phoneMode === "normal" && isPushToTalkMode && !voiceAssistant.agent && (
)}
) )}
); }