1335 lines
52 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use client";
import { useConfig } from "@/hooks/useConfig";
import {
BarVisualizer,
useConnectionState,
useLocalParticipant,
useParticipantAttributes,
useRoomContext,
useTracks,
useVoiceAssistant,
VideoTrack,
} from "@livekit/components-react";
import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client";
import { useEffect, useMemo, useState, useRef, useCallback } from "react";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon, ChatIcon } from "./icons";
import { useToast } from "@/components/toast/ToasterProvider";
import { ChatOverlay } from "@/components/chat/ChatOverlay";
export interface PhoneSimulatorProps {
onConnect: () => void;
onDisconnect: () => void;
phoneMode?: "normal" | "capture" | "important_message" | "hand_off";
onCapture?: (image: File) => void;
capturePrompt?: string;
importantMessage?: string;
importantMessageOptions?: string[];
onImportantMessageAction?: (text: string) => void;
}
export function PhoneSimulator({
onConnect,
onDisconnect,
phoneMode = "normal",
onCapture,
capturePrompt,
importantMessage,
importantMessageOptions,
onImportantMessageAction
}: PhoneSimulatorProps) {
const { config, setUserSettings } = useConfig();
const { setToastMessage } = useToast();
const room = useRoomContext();
const roomState = useConnectionState();
const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant();
const tracks = useTracks();
const voiceAssistant = useVoiceAssistant();
const agentAttributes = useParticipantAttributes({
participant: voiceAssistant.agent,
});
const fileInputRef = useRef<HTMLInputElement>(null);
const phoneContainerRef = useRef<HTMLDivElement>(null);
const visualizerRef = useRef<HTMLDivElement>(null);
const [showCameraMenu, setShowCameraMenu] = useState(false);
const [showVoiceMenu, setShowVoiceMenu] = useState(false);
const [cameras, setCameras] = useState<MediaDeviceInfo[]>([]);
const [processingImage, setProcessingImage] = useState<string | null>(null);
const [currentVoiceId, setCurrentVoiceId] = useState<string>("BV001_streaming"); // Default voice ID
const [isCapturing, setIsCapturing] = useState(false);
const [processingSource, setProcessingSource] = useState<
"camera" | "upload" | null
>(null);
const MAX_UPLOAD_MB = 10;
const isAgentSpeaking = voiceAssistant.state === "speaking";
const wasMicEnabledRef = useRef(false);
const lastPhoneMode = useRef(phoneMode);
const [isPushToTalkActive, setIsPushToTalkActive] = useState(false);
const [interruptRejected, setInterruptRejected] = useState(false);
const [isPushToTalkMode, setIsPushToTalkMode] = useState(true); // false = realtime mode, true = PTT mode (default)
const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
const [showChatOverlay, setShowChatOverlay] = useState(false);
const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component
useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
if (voiceAttr) {
setCurrentVoiceId(voiceAttr.value);
}
}, [config.settings.attributes]);
const [currentTime, setCurrentTime] = useState("");
const [visualizerPosition, setVisualizerPosition] = useState({
x: 16,
y: 56,
});
const [isDragging, setIsDragging] = useState(false);
const dragOffset = useRef({ x: 0, y: 0 });
const handleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
setIsDragging(true);
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
dragOffset.current = {
x: clientX - visualizerPosition.x,
y: clientY - visualizerPosition.y,
};
};
const handleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDragging || !phoneContainerRef.current || !visualizerRef.current) return;
e.preventDefault();
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const visualizerRect = visualizerRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
let newX = clientX - dragOffset.current.x;
let newY = clientY - dragOffset.current.y;
// Constrain within container
const maxX = containerRect.width - visualizerRect.width;
const maxY = containerRect.height - visualizerRect.height;
// On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
setVisualizerPosition({
x: newX,
y: newY,
});
};
const handleDragEnd = () => {
setIsDragging(false);
};
useEffect(() => {
if (isDragging) {
window.addEventListener("mouseup", handleDragEnd);
window.addEventListener("mousemove", handleDragMove);
window.addEventListener("touchend", handleDragEnd);
window.addEventListener("touchmove", handleDragMove, { passive: false });
}
return () => {
window.removeEventListener("mouseup", handleDragEnd);
window.removeEventListener("mousemove", handleDragMove);
window.removeEventListener("touchend", handleDragEnd);
window.removeEventListener("touchmove", handleDragMove);
};
}, [isDragging]);
useEffect(() => {
if (showCameraMenu) {
Room.getLocalDevices("videoinput").then(setCameras);
}
}, [showCameraMenu]);
// Close menu when clicking outside
useEffect(() => {
const handleClickOutside = (event: MouseEvent) => {
if (showCameraMenu) {
setShowCameraMenu(false);
}
if (showVoiceMenu) {
setShowVoiceMenu(false);
}
};
if (showCameraMenu || showVoiceMenu) {
document.addEventListener("click", handleClickOutside);
}
return () => {
document.removeEventListener("click", handleClickOutside);
};
}, [showCameraMenu, showVoiceMenu]);
useEffect(() => {
if (voiceAssistant.state === "speaking") {
setProcessingImage(null);
setProcessingSource(null);
}
}, [voiceAssistant.state]);
useEffect(() => {
const enteringMode = (mode: typeof phoneMode) =>
phoneMode === mode && lastPhoneMode.current !== mode;
// Only proceed if connected and localParticipant is available
if (roomState !== ConnectionState.Connected || !localParticipant) return;
const updateMicState = async () => {
// Entering important message / capture / hand_off: remember mic state and mute if needed
if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) {
wasMicEnabledRef.current = isMicEnabled;
if (isMicEnabled) {
try {
await localParticipant.setMicrophoneEnabled(false);
} catch (error) {
console.error("Failed to disable microphone:", error);
}
}
}
// Exiting important message mode or hand off mode or capture mode
else if (
(phoneMode !== "important_message" && lastPhoneMode.current === "important_message") ||
(phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") ||
(phoneMode !== "capture" && lastPhoneMode.current === "capture")
) {
// Restore mic to previous state
try {
await localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current);
} catch (error) {
console.error("Failed to restore microphone:", error);
}
// If exiting capture mode, clear processing image
if (lastPhoneMode.current === "capture") {
setProcessingImage(null);
setProcessingSource(null);
}
}
// Enforce mic off in important message mode, hand off mode, or capture mode
else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) {
try {
await localParticipant.setMicrophoneEnabled(false);
} catch (error) {
console.error("Failed to disable microphone:", error);
}
}
};
updateMicState();
lastPhoneMode.current = phoneMode;
}, [phoneMode, isMicEnabled, localParticipant, roomState]);
useEffect(() => {
const updateTime = () => {
const now = new Date();
setCurrentTime(
now.toLocaleTimeString("en-US", {
hour: "numeric",
minute: "2-digit",
hour12: true,
})
);
};
updateTime();
const interval = setInterval(updateTime, 60000);
return () => clearInterval(interval);
}, []);
const localTracks = tracks.filter(
({ participant }) => participant instanceof LocalParticipant
);
const localCameraTrack = localTracks.find(
({ source }) => source === Track.Source.Camera
);
const handleMicToggle = async () => {
if (roomState !== ConnectionState.Connected || !localParticipant) return;
try {
if (isMicEnabled) {
await localParticipant.setMicrophoneEnabled(false);
} else {
await localParticipant.setMicrophoneEnabled(true);
}
} catch (error) {
console.error("Failed to toggle microphone:", error);
// Silently handle the error to avoid disrupting user experience
}
};
const handleDisconnect = () => {
try {
// Only disconnect if we're actually connected
if (roomState === ConnectionState.Connected || roomState === ConnectionState.Connecting) {
onDisconnect();
}
} catch (error) {
// Silently handle any errors during disconnect
console.warn("Error during disconnect:", error);
// Still try to call onDisconnect to ensure cleanup
try {
onDisconnect();
} catch (e) {
// Ignore secondary errors
}
}
};
const validateImageFile = (file: File) => {
const isImage = file.type.startsWith("image/");
const isSizeOk = file.size > 0 && file.size <= MAX_UPLOAD_MB * 1024 * 1024;
return {
isValid: isImage && isSizeOk,
isImage,
isSizeOk,
};
};
const showErrorToast = (message: string) => {
setToastMessage({ message, type: "error" });
};
const handleCapture = async () => {
if (!localCameraTrack || !onCapture || isCapturing) return;
setIsCapturing(true);
const trackReference = localCameraTrack as any;
// Handle both TrackReference (from useTracks) and potential direct Track objects
const track =
trackReference.publication?.track?.mediaStreamTrack ||
trackReference.mediaStreamTrack;
if (!track) {
console.error("No media stream track found");
setIsCapturing(false);
return;
}
const video = document.createElement("video");
video.srcObject = new MediaStream([track]);
video.muted = true;
video.playsInline = true;
video.autoplay = true;
// Element needs to be in the DOM for some browsers to play it properly
video.style.position = "absolute";
video.style.top = "-9999px";
video.style.left = "-9999px";
document.body.appendChild(video);
try {
await video.play();
// Wait for video dimensions to be available
if (video.videoWidth === 0 || video.videoHeight === 0) {
await new Promise<void>((resolve) => {
video.onloadedmetadata = () => resolve();
// Timeout to prevent hanging
setTimeout(resolve, 1000);
});
}
const canvas = document.createElement("canvas");
// Default to video dimensions
let renderWidth = video.videoWidth;
let renderHeight = video.videoHeight;
let sourceX = 0;
let sourceY = 0;
let sourceWidth = video.videoWidth;
let sourceHeight = video.videoHeight;
// If the video is landscape but we want a portrait crop (like a phone)
// We want an aspect ratio of roughly 9:19.5 (from the container styles)
const targetAspect = 9 / 19.5;
const videoAspect = video.videoWidth / video.videoHeight;
if (videoAspect > targetAspect) {
// Video is wider than target - crop width
const newWidth = video.videoHeight * targetAspect;
sourceX = (video.videoWidth - newWidth) / 2;
sourceWidth = newWidth;
renderWidth = newWidth;
} else {
// Video is taller than target - crop height (less common for landscape webcam)
const newHeight = video.videoWidth / targetAspect;
sourceY = (video.videoHeight - newHeight) / 2;
sourceHeight = newHeight;
renderHeight = newHeight;
}
canvas.width = renderWidth;
canvas.height = renderHeight;
const ctx = canvas.getContext("2d");
if (ctx) {
// Mirror the image to match the preview
ctx.translate(canvas.width, 0);
ctx.scale(-1, 1);
// Draw only the cropped portion of the video
ctx.drawImage(
video,
sourceX,
sourceY,
sourceWidth,
sourceHeight,
0,
0,
renderWidth,
renderHeight
);
// Reset transform
ctx.setTransform(1, 0, 0, 1, 0, 0);
// Use toDataURL for immediate preview feedback
const dataUrl = canvas.toDataURL("image/jpeg");
setProcessingImage(dataUrl);
setProcessingSource("camera");
// Create a new canvas for the final output (unmirrored if needed, but user requested mirrored)
// The user requested to mirror the shuttled photo, which we did above for the canvas.
// So the blob created from this canvas will also be mirrored.
canvas.toBlob((blob) => {
if (blob && onCapture) {
const file = new File([blob], "camera-capture.jpg", {
type: "image/jpeg",
});
const { isValid } = validateImageFile(file);
if (!isValid) {
setProcessingImage(null);
setProcessingSource(null);
showErrorToast(`请拍摄${MAX_UPLOAD_MB}MB以内的图片文件。`);
setIsCapturing(false);
return;
}
onCapture(file);
}
setIsCapturing(false);
}, "image/jpeg");
} else {
setIsCapturing(false);
}
} catch (e) {
console.error("Failed to capture image", e);
setIsCapturing(false);
} finally {
// Cleanup
video.pause();
video.srcObject = null;
if (document.body.contains(video)) {
document.body.removeChild(video);
}
video.remove();
}
};
const handleUpload = () => {
if (fileInputRef.current) {
fileInputRef.current.click();
}
};
const handleSwitchCamera = async (e: React.MouseEvent) => {
e.stopPropagation(); // Prevent immediate close
setShowCameraMenu(!showCameraMenu);
};
const handleSelectCamera = async (deviceId: string) => {
await room.switchActiveDevice("videoinput", deviceId);
setShowCameraMenu(false);
};
const handleChangeVoice = (voiceId: string) => {
const newSettings = { ...config.settings };
const attributes = newSettings.attributes ? [...newSettings.attributes] : [];
const voiceAttrIndex = attributes.findIndex(a => a.key === "voice");
if (voiceAttrIndex >= 0) {
attributes[voiceAttrIndex] = { ...attributes[voiceAttrIndex], value: voiceId };
} else {
attributes.push({ id: "voice", key: "voice", value: voiceId });
}
newSettings.attributes = attributes;
setUserSettings(newSettings);
setCurrentVoiceId(voiceId);
setTimeout(() => setShowVoiceMenu(false), 200);
};
const handleVoiceMenuToggle = (e: React.MouseEvent) => {
e.stopPropagation();
setShowVoiceMenu(!showVoiceMenu);
};
const handleModeSwitch = async () => {
if (!room || !voiceAssistant.agent) return;
// Determine the target mode (toggle from current state)
const targetMode = isPushToTalkMode ? "realtime" : "push_to_talk";
try {
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "switch_ptt_and_rt",
payload: JSON.stringify({ mode: targetMode }),
});
// Parse the response to confirm the mode was set
try {
const responseData = JSON.parse(response);
const confirmedMode = responseData.mode;
// Update state based on server response
setIsPushToTalkMode(confirmedMode === "push_to_talk");
} catch (parseError) {
// If parsing fails, update state based on what we sent
console.warn("Failed to parse mode switch response, using sent mode:", parseError);
setIsPushToTalkMode(targetMode === "push_to_talk");
}
} catch (error: any) {
console.error("Failed to switch mode:", error);
// Don't show error toast for mode switch failures, just log
}
};
// Check if agent supports push-to-talk (optional check, button will show regardless)
const supportsPushToTalk = useMemo(() => {
if (!voiceAssistant.agent || !agentAttributes.attributes) return false;
return agentAttributes.attributes["push-to-talk"] === "1";
}, [voiceAssistant.agent, agentAttributes.attributes]);
const handlePushToTalkStart = async () => {
if (!room || !voiceAssistant.agent || isPushToTalkActive) return;
// Reset interrupt rejection state
setInterruptRejected(false);
try {
const response = await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "start_turn",
payload: "",
});
// Parse the response to check for success/failure
try {
const responseData = JSON.parse(response);
if (responseData.success === false) {
// Interrupt was rejected, show message
if (responseData.message === "不能打断") {
setInterruptRejected(true);
// Clear the rejection message after 3 seconds
setTimeout(() => setInterruptRejected(false), 3000);
if (process.env.NODE_ENV === 'development') {
console.log("Interrupt rejected (cannot interrupt):", responseData.message);
}
return;
}
} else if (responseData.success === true) {
// Successfully started turn
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (parseError) {
// If response is not JSON, assume success (backward compatibility)
setIsPushToTalkActive(true);
setInterruptRejected(false);
}
} catch (error: any) {
// Handle RPC errors (method not found, etc.)
setIsPushToTalkActive(false);
const errorMessage = error?.message || "";
const errorCode = error?.code;
// Check for "Method not supported at destination" - this happens when RPC methods aren't registered yet
// This can occur on first call before agent is fully ready, so we silently ignore it
if (errorMessage.includes("Method not supported at destination") ||
errorMessage.includes("method not found") ||
errorCode === 12) { // METHOD_NOT_FOUND
// Silently ignore - the method will be available after first turn
console.log("RPC method not ready yet, will be available after first turn");
return;
}
// Only log and show error for unexpected errors
console.error("Unexpected error in push-to-talk:", error);
const defaultErrorMessage = "Agent does not support push-to-talk. Make sure your agent has the push-to-talk RPC methods (start_turn, end_turn, cancel_turn) registered.";
setToastMessage({ message: defaultErrorMessage, type: "error" });
}
};
const handlePushToTalkEnd = useCallback(async () => {
// Always clear interrupt rejection state when button is released
setInterruptRejected(false);
if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
try {
await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "end_turn",
payload: "",
});
setIsPushToTalkActive(false);
} catch (error: any) {
console.error("Failed to end turn:", error);
// Don't show error toast on end_turn failure as it might be called during cleanup
setIsPushToTalkActive(false);
}
}, [room, voiceAssistant.agent, isPushToTalkActive]);
const handlePushToTalkCancel = useCallback(async () => {
// Always clear interrupt rejection state when button is cancelled
setInterruptRejected(false);
if (!room || !voiceAssistant.agent || !isPushToTalkActive) return;
try {
await room.localParticipant.performRpc({
destinationIdentity: voiceAssistant.agent.identity,
method: "cancel_turn",
payload: "",
});
setIsPushToTalkActive(false);
} catch (error) {
console.error("Failed to cancel turn:", error);
setIsPushToTalkActive(false);
}
}, [room, voiceAssistant.agent, isPushToTalkActive]);
// Handle mouse events for push-to-talk
const handlePushToTalkMouseDown = (e: React.MouseEvent) => {
e.preventDefault();
handlePushToTalkStart();
};
const handlePushToTalkMouseUp = (e: React.MouseEvent) => {
e.preventDefault();
handlePushToTalkEnd();
};
// Handle touch events for push-to-talk
const handlePushToTalkTouchStart = (e: React.TouchEvent) => {
e.preventDefault();
handlePushToTalkStart();
};
const handlePushToTalkTouchEnd = (e: React.TouchEvent) => {
e.preventDefault();
handlePushToTalkEnd();
};
// Handle window blur, escape key, and global mouse/touch events to cancel/end push-to-talk
useEffect(() => {
if (!isPushToTalkActive) return;
const handleBlur = () => {
handlePushToTalkCancel();
};
const handleKeyDown = (e: KeyboardEvent) => {
if (e.key === "Escape") {
handlePushToTalkCancel();
}
};
// Handle global mouseup/touchend to end push-to-talk even if released outside button
const handleGlobalMouseUp = () => {
// Clear interrupt rejection state immediately when button is released
setInterruptRejected(false);
handlePushToTalkEnd();
};
const handleGlobalTouchEnd = () => {
// Clear interrupt rejection state immediately when button is released
setInterruptRejected(false);
handlePushToTalkEnd();
};
window.addEventListener("blur", handleBlur);
window.addEventListener("keydown", handleKeyDown);
window.addEventListener("mouseup", handleGlobalMouseUp);
window.addEventListener("touchend", handleGlobalTouchEnd);
return () => {
window.removeEventListener("blur", handleBlur);
window.removeEventListener("keydown", handleKeyDown);
window.removeEventListener("mouseup", handleGlobalMouseUp);
window.removeEventListener("touchend", handleGlobalTouchEnd);
};
}, [isPushToTalkActive, handlePushToTalkCancel, handlePushToTalkEnd]);
// Clean up push-to-talk state on disconnect
useEffect(() => {
if (roomState === ConnectionState.Disconnected && isPushToTalkActive) {
setIsPushToTalkActive(false);
setInterruptRejected(false);
}
}, [roomState, isPushToTalkActive]);
// Reset interrupt rejection when agent stops speaking
useEffect(() => {
if (!isAgentSpeaking && interruptRejected) {
// Clear rejection state when agent finishes speaking
const timer = setTimeout(() => setInterruptRejected(false), 1000);
return () => clearTimeout(timer);
}
}, [isAgentSpeaking, interruptRejected]);
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file && onCapture) {
const { isValid, isImage, isSizeOk } = validateImageFile(file);
if (!isValid) {
const msg = !isImage
? "请选择图片文件。"
: `请上传${MAX_UPLOAD_MB}MB以内的图片文件。`;
showErrorToast(msg);
} else {
onCapture(file);
setProcessingImage(URL.createObjectURL(file));
setProcessingSource("upload");
}
}
// Reset input so the same file can be selected again
if (event.target) {
event.target.value = "";
}
};
const videoContent = (() => {
if (roomState === ConnectionState.Disconnected) {
return (
<div className="flex items-center justify-center h-full w-full bg-gray-900 text-gray-500 text-sm p-4 text-center">
<div className="flex flex-col items-center gap-6">
<button
onClick={(e) => {
e.stopPropagation();
// Guard against accidental call when just changing voice
if (showVoiceMenu) return;
onConnect();
}}
disabled={showVoiceMenu}
className={`flex flex-col items-center gap-4 transition-opacity ${showVoiceMenu ? 'opacity-50 cursor-not-allowed' : 'hover:opacity-80 cursor-pointer'}`}
>
<div
className="w-16 h-16 rounded-full flex items-center justify-center text-white"
style={{ backgroundColor: `var(--lk-theme-color)` }}
>
<PhoneIcon className="w-8 h-8" />
</div>
<span className="font-medium text-white"></span>
</button>
<div className="relative">
<button
onClick={handleVoiceMenuToggle}
className="flex items-center gap-2 px-4 py-2 rounded-full bg-gray-800 text-white hover:bg-gray-700 transition-colors text-xs"
>
<VoiceIcon className="w-3 h-3" />
<span>
{currentVoiceId === "BV001_streaming" ? "女性声音" : "男性声音"}
</span>
</button>
{showVoiceMenu && (
<div
className="absolute top-full mt-2 left-1/2 -translate-x-1/2 bg-gray-800 border border-gray-700 rounded-lg shadow-xl py-1 w-40 z-50"
onClick={(e) => e.stopPropagation()}
>
<button
onClick={(e) => {
e.preventDefault();
e.stopPropagation();
handleChangeVoice("BV001_streaming");
}}
className={`w-full text-left px-4 py-2 text-xs hover:bg-gray-700 transition-colors flex items-center justify-between ${
currentVoiceId === "BV001_streaming"
? "text-blue-400 font-bold"
: "text-white"
}`}
>
<span></span>
{currentVoiceId === "BV001_streaming" && <CheckIcon />}
</button>
<button
onClick={(e) => {
e.preventDefault();
e.stopPropagation();
handleChangeVoice("BV002_streaming");
}}
className={`w-full text-left px-4 py-2 text-xs hover:bg-gray-700 transition-colors flex items-center justify-between ${
currentVoiceId === "BV002_streaming"
? "text-blue-400 font-bold"
: "text-white"
}`}
>
<span></span>
{currentVoiceId === "BV002_streaming" && (
<CheckIcon />
)}
</button>
</div>
)}
</div>
</div>
</div>
);
}
if (!localCameraTrack) {
return (
<div className="flex items-center justify-center h-full w-full bg-gray-900 text-gray-500 text-sm p-4 text-center">
<div className="flex flex-col items-center gap-2">
<span>Camera off</span>
</div>
</div>
);
}
return (
<VideoTrack
trackRef={localCameraTrack}
className="w-full h-full object-cover mirror-video"
/>
);
})();
return (
<div className="absolute inset-0 w-full h-full bg-black rounded-none border-0 overflow-hidden flex flex-col shrink-0 md:relative md:w-auto md:max-w-full md:h-full md:aspect-[9/19.5] md:max-h-full md:rounded-[40px] md:border-[12px] md:border-gray-900 md:shadow-2xl">
<style jsx global>{`
.mirror-video video {
transform: scaleX(-1);
}
@keyframes scan {
0% { top: 0%; }
50% { top: 100%; }
100% { top: 0%; }
}
.scan-animation {
animation: scan 3s linear infinite;
}
@keyframes handoffPulse {
0%, 100% {
opacity: 1;
transform: scale(1);
}
50% {
opacity: 0.8;
transform: scale(1.05);
}
}
@keyframes handoffFade {
0%, 100% { opacity: 1; }
50% { opacity: 0.6; }
}
.handoff-animation {
animation: handoffPulse 2s ease-in-out infinite, handoffFade 2s ease-in-out infinite;
}
`}</style>
{/* Status Bar */}
<div className="hidden md:flex w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 items-center justify-between px-6 text-white text-xs font-medium"
style={{
paddingTop: 'max(env(safe-area-inset-top, 0px), 0.5rem)',
paddingBottom: '0.75rem',
minHeight: '3rem',
}}>
<span>{currentTime}</span>
<div className="flex items-center gap-2">
<WifiIcon className="w-4 h-4" />
<BatteryIcon className="w-4 h-4" />
</div>
</div>
{/* Chat Toggle Button - Top Right, aligned with audio visualizer */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "important_message" &&
phoneMode !== "capture" && (
<button
className={`absolute right-2 z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg ${
showChatOverlay
? "bg-blue-500/80 text-white"
: "bg-gray-800/70 text-white hover:bg-gray-800/90"
}`}
onClick={() => setShowChatOverlay(!showChatOverlay)}
title={showChatOverlay ? "Hide chat" : "Show chat"}
style={{
top: '56px', // Align with audio visualizer initial position
right: '8px',
}}
>
<ChatIcon className="w-5 h-5 md:w-6 md:h-6" />
</button>
)}
{/* Main Content */}
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden"
style={{
paddingBottom: 'env(safe-area-inset-bottom, 0px)',
}}>
<div className={`h-full w-full transition-all duration-500 ease-in-out transform ${
phoneMode === "hand_off" && roomState === ConnectionState.Connected
? "blur-md scale-105"
: phoneMode === "capture"
? "scale-105 brightness-110"
: "scale-100"
}`}>
{videoContent}
</div>
{processingImage && (
<div className="absolute inset-0 z-10 bg-black flex items-center justify-center">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={processingImage}
alt="Processing"
className={`w-full h-full opacity-50 ${
processingSource === "camera"
? "object-cover scale-x-[-1]"
: "object-contain"
}`}
/>
<div className="absolute inset-0 overflow-hidden">
<div className="w-full h-[2px] bg-blue-500 shadow-[0_0_15px_rgba(59,130,246,1)] scan-animation absolute"></div>
</div>
<div
className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-full text-center font-mono text-2xl font-semibold animate-pulse z-20"
style={{ color: "var(--lk-theme-color)" }}
>
...
</div>
</div>
)}
<input
type="file"
ref={fileInputRef}
onChange={handleFileChange}
accept="image/*"
className="hidden"
/>
{/* Capture Guide Lines */}
{roomState === ConnectionState.Connected && phoneMode === "capture" && !processingImage && (
<div className="absolute inset-0 pointer-events-none z-10">
{/* Thirds Grid */}
<div className="absolute top-1/3 left-0 w-full h-[1px] bg-white/20"></div>
<div className="absolute top-2/3 left-0 w-full h-[1px] bg-white/20"></div>
<div className="absolute left-1/3 top-0 w-[1px] h-full bg-white/20"></div>
<div className="absolute left-2/3 top-0 w-[1px] h-full bg-white/20"></div>
{/* Center Focus Indicator */}
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-16 h-16 border border-white/50 rounded-sm"></div>
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-1 h-1 bg-white/50 rounded-full"></div>
{/* Prompt Display */}
{capturePrompt && (
<div className="absolute top-32 left-0 w-full px-6 text-center z-20">
<div className="inline-block bg-black/60 backdrop-blur-md text-white px-4 py-3 rounded-2xl text-sm font-medium shadow-lg border border-white/10 max-w-full break-words">
{capturePrompt}
</div>
</div>
)}
</div>
)}
</div>
{/* Important Message Overlay */}
{roomState === ConnectionState.Connected && phoneMode === "important_message" && (
<div className="absolute inset-0 z-30 bg-black/60 backdrop-blur-md flex items-center justify-center p-6">
<div className="bg-gray-800/80 backdrop-blur-xl border border-white/10 p-6 rounded-[2rem] shadow-2xl w-full max-w-xs text-center flex flex-col gap-6 transform transition-all">
<div className="flex flex-col items-center gap-4">
<p className="text-white text-lg font-medium leading-relaxed">
{importantMessage}
</p>
</div>
<div className="flex flex-col gap-3 w-full">
{importantMessageOptions && importantMessageOptions.length > 0 ? (
importantMessageOptions.map((option, index) => (
<button
key={index}
onClick={() => {
if (!isAgentSpeaking) {
onImportantMessageAction?.(option);
}
}}
disabled={isAgentSpeaking}
className={`w-full bg-white text-black font-semibold py-3.5 rounded-xl hover:bg-gray-200 transition-colors active:scale-95 transform ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
>
{option}
</button>
))
) : (
<button
onClick={() => {
if (!isAgentSpeaking) {
onImportantMessageAction?.("确认");
}
}}
disabled={isAgentSpeaking}
className={`w-full bg-white text-black font-semibold py-3.5 rounded-xl hover:bg-gray-200 transition-colors active:scale-95 transform ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
>
</button>
)}
</div>
</div>
</div>
)}
{/* Hand Off Mode Overlay */}
{roomState === ConnectionState.Connected && phoneMode === "hand_off" && (
<div className="absolute inset-0 z-30 flex items-center justify-center p-6">
<div
className="text-3xl font-semibold font-mono tracking-widest handoff-animation"
style={{ color: "var(--lk-theme-color)" }}
>
...
</div>
</div>
)}
{/* Agent Audio Visualizer (Draggable) */}
{roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && (
<div
ref={visualizerRef}
className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none touch-none"
style={{
left: visualizerPosition.x,
top: visualizerPosition.y,
}}
onMouseDown={handleDragStart}
onTouchStart={handleDragStart}
>
<div className="h-8 w-24 flex items-center justify-center [--lk-va-bar-width:3px] [--lk-va-bar-gap:2px] [--lk-fg:white]">
<BarVisualizer
state={voiceAssistant.state}
trackRef={voiceAssistant.audioTrack}
barCount={7}
options={{ minHeight: 5 }}
/>
</div>
</div>
)}
{/* Chat Overlay - Hidden during capture and important_message modes */}
{roomState === ConnectionState.Connected &&
voiceAssistant.agent &&
phoneMode !== "capture" &&
phoneMode !== "important_message" && (
<ChatOverlay
agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color}
inputDisabled={phoneMode === "important_message" || phoneMode === "hand_off"}
isVisible={showChatOverlay}
position={chatOverlayPosition}
onPositionChange={setChatOverlayPosition}
containerRef={phoneContainerRef}
onToggle={() => setShowChatOverlay(!showChatOverlay)}
/>
)}
{/* Call Controls Overlay */}
{roomState === ConnectionState.Connected && (
phoneMode === "capture" ? (
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end px-[8%] z-40"
style={{
paddingBottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
}}>
{/* Camera Controls Row */}
<div className="w-full flex items-center justify-evenly mb-8">
{/* Left: Upload */}
<button
className={`p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
onClick={() => {
if (!isAgentSpeaking) {
handleUpload();
}
}}
disabled={isAgentSpeaking}
>
<ImageIcon className="w-6 h-6" />
</button>
{/* Center: Capture */}
<button
className={`w-16 h-16 rounded-full border-4 border-white p-1 hover:scale-105 transition-transform shrink-0 ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
onClick={() => {
if (!isAgentSpeaking) {
handleCapture();
}
}}
disabled={isAgentSpeaking}
>
<div className="w-full h-full bg-white rounded-full"></div>
</button>
{/* Right: Switch Camera */}
<div className="relative">
<button
className={`p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
onClick={(e) => {
if (!isAgentSpeaking) {
handleSwitchCamera(e);
}
}}
disabled={isAgentSpeaking}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 right-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
)}
</div>
)}
</div>
</div>
{/* Call Controls Row (mic hidden in capture mode) */}
<div className="w-full flex items-center justify-center gap-8">
{/* End Call */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
</div>
) : (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40"
style={{
paddingBottom: 'max(env(safe-area-inset-bottom, 0px), 0px)',
bottom: 'calc(5% + env(safe-area-inset-bottom, 0px))',
}}>
<div className="w-full flex flex-col items-center justify-center gap-4">
{/* Mode Toggle Switch */}
{phoneMode !== "important_message" && phoneMode !== "hand_off" && voiceAssistant.agent && (
<div className="flex items-center gap-3 mb-2">
<span className={`text-xs font-medium transition-colors ${isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
</span>
<button
onClick={handleModeSwitch}
className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors focus:outline-none focus:ring-2 focus:ring-blue-500 focus:ring-offset-2 ${
!isPushToTalkMode ? "bg-blue-500" : "bg-gray-600"
}`}
role="switch"
aria-checked={!isPushToTalkMode}
title={isPushToTalkMode ? "切换到实时对话模式" : "切换到按下说话模式"}
>
<span
className={`inline-block h-4 w-4 transform rounded-full bg-white transition-transform ${
!isPushToTalkMode ? "translate-x-6" : "translate-x-1"
}`}
/>
</button>
<span className={`text-xs font-medium transition-colors ${!isPushToTalkMode ? "text-white" : "text-gray-400"}`}>
</span>
</div>
)}
{/* Push-to-Talk Mode Layout */}
{isPushToTalkMode && phoneMode !== "hand_off" && voiceAssistant.agent && (
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
) : (
<div className="w-full flex items-center justify-between gap-8">
{/* Left side: Mic Toggle and Camera Switch Buttons */}
<div className="flex flex-col items-center gap-2">
{/* Mic Toggle Button */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-800/50 text-white hover:bg-gray-800/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* Camera Switch Button */}
<div className="relative">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-full mb-2 left-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
</div>
) : (
cameras.map((device) => (
<button
key={device.deviceId}
onClick={() => handleSelectCamera(device.deviceId)}
className="w-full text-left px-4 py-2 text-sm text-white hover:bg-gray-800 transition-colors truncate"
>
{device.label ||
`Camera ${cameras.indexOf(device) + 1}`}
</button>
))
)}
</div>
)}
</div>
</div>
{/* Center: Large Push-to-Talk Button */}
<button
ref={pushToTalkButtonRef}
className={`w-24 h-24 rounded-full backdrop-blur-md transition-all flex flex-col items-center justify-center gap-2 aspect-square select-none ${
interruptRejected
? "bg-red-500/70 text-white"
: isPushToTalkActive
? "bg-green-500 text-white scale-110 shadow-lg shadow-green-500/50"
: "bg-blue-500/70 text-white hover:bg-blue-500/90"
}`}
style={{ borderRadius: '50%' }}
onMouseDown={handlePushToTalkMouseDown}
onMouseUp={handlePushToTalkMouseUp}
onTouchStart={handlePushToTalkTouchStart}
onTouchEnd={handlePushToTalkTouchEnd}
title={supportsPushToTalk ? "Push to Talk" : "Push to Talk (may not be supported by this agent)"}
>
<MicIcon className="w-8 h-8" />
<span className="text-xs font-medium">
{interruptRejected ? "不允许打断" : "按住说话"}
</span>
</button>
{/* Right side: End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</>
)}
{/* Realtime Mode Layout */}
{!isPushToTalkMode && (
<>
{/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
) : (
<div className="w-full flex items-center justify-center gap-4">
{/* Mic Toggle */}
{phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
)}
{/* End Call Button */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</>
)}
{/* Hand Off Mode - Show only End Call Button */}
{phoneMode === "hand_off" && (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
{/* Fallback: Show End Call Button when in push-to-talk mode but no agent/audio */}
{phoneMode === "normal" &&
isPushToTalkMode &&
!voiceAssistant.agent && (
<div className="w-full flex items-center justify-center">
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
)}
</div>
</div>
)
)}
</div>
);
}