now frontend can change voice type

This commit is contained in:
2025-12-09 17:10:06 +08:00
parent 3c9a7cf3af
commit 026cde6d47
3 changed files with 220 additions and 64 deletions

View File

@@ -12,7 +12,8 @@ import {
} from "@livekit/components-react";
import { ConnectionState, Track, LocalParticipant, Room } from "livekit-client";
import { useEffect, useMemo, useState, useRef } from "react";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon } from "./icons";
import { BatteryIcon, ImageIcon, MicIcon, MicOffIcon, PhoneIcon, PhoneOffIcon, WifiIcon, SwitchCameraIcon, VoiceIcon, CheckIcon } from "./icons";
import { useToast } from "@/components/toast/ToasterProvider";
export interface PhoneSimulatorProps {
onConnect: () => void;
@@ -21,7 +22,8 @@ export interface PhoneSimulatorProps {
}
export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: PhoneSimulatorProps) {
const { config } = useConfig();
const { config, setUserSettings } = useConfig();
const { setToastMessage } = useToast();
const room = useRoomContext();
const roomState = useConnectionState();
const { localParticipant } = useLocalParticipant();
@@ -31,12 +33,22 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
const phoneContainerRef = useRef<HTMLDivElement>(null);
const visualizerRef = useRef<HTMLDivElement>(null);
const [showCameraMenu, setShowCameraMenu] = useState(false);
const [showVoiceMenu, setShowVoiceMenu] = useState(false);
const [cameras, setCameras] = useState<MediaDeviceInfo[]>([]);
const [processingImage, setProcessingImage] = useState<string | null>(null);
const [currentVoiceId, setCurrentVoiceId] = useState<string>("BV001_streaming"); // Default voice ID
const [isCapturing, setIsCapturing] = useState(false);
const [processingSource, setProcessingSource] = useState<
"camera" | "upload" | null
>(null);
const [lastVoiceChangeAt, setLastVoiceChangeAt] = useState<number | null>(null);
useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
if (voiceAttr) {
setCurrentVoiceId(voiceAttr.value);
}
}, [config.settings.attributes]);
const [currentTime, setCurrentTime] = useState("");
@@ -104,14 +116,17 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
if (showCameraMenu) {
setShowCameraMenu(false);
}
if (showVoiceMenu) {
setShowVoiceMenu(false);
}
};
if (showCameraMenu) {
if (showCameraMenu || showVoiceMenu) {
document.addEventListener("click", handleClickOutside);
}
return () => {
document.removeEventListener("click", handleClickOutside);
};
}, [showCameraMenu]);
}, [showCameraMenu, showVoiceMenu]);
useEffect(() => {
if (voiceAssistant.state === "speaking") {
@@ -299,6 +314,29 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
setShowCameraMenu(false);
};
const handleChangeVoice = (voiceId: string) => {
const newSettings = { ...config.settings };
const attributes = newSettings.attributes ? [...newSettings.attributes] : [];
const voiceAttrIndex = attributes.findIndex(a => a.key === "voice");
if (voiceAttrIndex >= 0) {
attributes[voiceAttrIndex] = { ...attributes[voiceAttrIndex], value: voiceId };
} else {
attributes.push({ id: "voice", key: "voice", value: voiceId });
}
newSettings.attributes = attributes;
setUserSettings(newSettings);
setCurrentVoiceId(voiceId);
setLastVoiceChangeAt(Date.now());
setTimeout(() => setShowVoiceMenu(false), 100);
};
const handleVoiceMenuToggle = (e: React.MouseEvent) => {
e.stopPropagation();
setShowVoiceMenu(!showVoiceMenu);
};
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file && onCapture) {
@@ -312,13 +350,23 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
}
};
const videoContent = useMemo(() => {
const videoContent = (() => {
if (roomState === ConnectionState.Disconnected) {
return (
<div className="flex items-center justify-center h-full w-full bg-gray-900 text-gray-500 text-sm p-4 text-center">
<div className="flex flex-col items-center gap-6">
<button
onClick={onConnect}
className="flex flex-col items-center gap-4 hover:opacity-80 transition-opacity"
onClick={(e) => {
e.stopPropagation();
// Guard against accidental call when just changing voice
if (showVoiceMenu) return;
if (lastVoiceChangeAt && Date.now() - lastVoiceChangeAt < 400) {
return;
}
onConnect();
}}
disabled={showVoiceMenu}
className={`flex flex-col items-center gap-4 transition-opacity ${showVoiceMenu ? 'opacity-50 cursor-not-allowed' : 'hover:opacity-80 cursor-pointer'}`}
>
<div
className="w-16 h-16 rounded-full flex items-center justify-center text-white"
@@ -328,6 +376,58 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
</div>
<span className="font-medium text-white">Call Agent</span>
</button>
<div className="relative">
<button
onClick={handleVoiceMenuToggle}
className="flex items-center gap-2 px-4 py-2 rounded-full bg-gray-800 text-white hover:bg-gray-700 transition-colors text-xs"
>
<VoiceIcon className="w-3 h-3" />
<span>
{currentVoiceId === "BV001_streaming" ? "Female Voice" : "Male Voice"}
</span>
</button>
{showVoiceMenu && (
<div
className="absolute top-full mt-2 left-1/2 -translate-x-1/2 bg-gray-800 border border-gray-700 rounded-lg shadow-xl py-1 w-40 z-50"
onClick={(e) => e.stopPropagation()}
>
<button
onClick={(e) => {
e.preventDefault();
e.stopPropagation();
handleChangeVoice("BV001_streaming");
}}
className={`w-full text-left px-4 py-2 text-xs hover:bg-gray-700 transition-colors flex items-center justify-between ${
currentVoiceId === "BV001_streaming"
? "text-blue-400 font-bold"
: "text-white"
}`}
>
<span>Female Voice</span>
{currentVoiceId === "BV001_streaming" && <CheckIcon />}
</button>
<button
onClick={(e) => {
e.preventDefault();
e.stopPropagation();
handleChangeVoice("BV002_streaming");
}}
className={`w-full text-left px-4 py-2 text-xs hover:bg-gray-700 transition-colors flex items-center justify-between ${
currentVoiceId === "BV002_streaming"
? "text-blue-400 font-bold"
: "text-white"
}`}
>
<span>Male Voice</span>
{currentVoiceId === "BV002_streaming" && (
<CheckIcon />
)}
</button>
</div>
)}
</div>
</div>
</div>
);
}
@@ -348,7 +448,7 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
className="w-full h-full object-cover mirror-video"
/>
);
}, [roomState, localCameraTrack, onConnect]);
})();
return (
<div className="w-auto max-w-full h-full aspect-[9/19.5] max-h-full bg-black rounded-[40px] border-[12px] border-gray-900 overflow-hidden relative shadow-2xl flex flex-col shrink-0">
@@ -449,34 +549,37 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
{/* Call Controls Overlay */}
{roomState === ConnectionState.Connected && (
<div className="absolute bottom-8 left-0 w-full px-8 z-20">
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-20">
{phoneMode === "capture" ? (
<div className="w-full grid grid-cols-3 items-center">
<div className="flex justify-start">
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end pb-[5%] px-[8%] z-20">
{/* Camera Controls Row */}
<div className="w-full flex items-center justify-evenly mb-8">
{/* Left: Upload */}
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors shrink-0"
className="p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleUpload}
>
<ImageIcon className="w-6 h-6" />
</button>
</div>
<div className="flex justify-center">
{/* Center: Capture */}
<button
className="w-20 h-20 rounded-full border-4 border-white p-1 hover:scale-105 transition-transform shrink-0 aspect-square"
className="w-16 h-16 rounded-full border-4 border-white p-1 hover:scale-105 transition-transform shrink-0"
onClick={handleCapture}
>
<div className="w-full h-full bg-white rounded-full"></div>
</button>
</div>
<div className="flex justify-end relative">
{/* Right: Switch Camera */}
<div className="relative">
<button
className="p-4 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors shrink-0"
className="p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
{showCameraMenu && (
<div className="absolute bottom-16 right-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
<div className="absolute bottom-full mb-2 right-0 bg-gray-900 border border-gray-800 rounded-lg shadow-xl py-2 w-48 z-50">
{cameras.length === 0 ? (
<div className="px-4 py-2 text-gray-500 text-sm">
No cameras found
@@ -497,7 +600,36 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
)}
</div>
</div>
{/* Call Controls Row */}
<div className="w-full flex items-center justify-center gap-8">
{/* Mic Toggle */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* End Call */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
onClick={handleDisconnect}
>
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
</div>
) : (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-20">
<div className="w-full flex items-center justify-center gap-8">
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
@@ -521,6 +653,7 @@ export function PhoneSimulator({ onConnect, phoneMode = "normal", onCapture }: P
<PhoneOffIcon className="w-6 h-6" />
</button>
</div>
</div>
)}
</div>
)}

View File

@@ -26,6 +26,7 @@ import {
useVoiceAssistant,
useRoomContext,
useParticipantAttributes,
useChat,
} from "@livekit/components-react";
import { ConnectionState, LocalParticipant, Track, RpcError, RpcInvocationData } from "livekit-client";
import { QRCodeSVG } from "qrcode.react";
@@ -57,6 +58,7 @@ export default function Playground({
const { name } = useRoomInfo();
const [transcripts, setTranscripts] = useState<ChatMessageType[]>([]);
const { localParticipant } = useLocalParticipant();
const { send: sendChat } = useChat();
const voiceAssistant = useVoiceAssistant();
@@ -74,7 +76,7 @@ export default function Playground({
localParticipant.setCameraEnabled(config.settings.inputs.camera);
localParticipant.setMicrophoneEnabled(config.settings.inputs.mic);
}
}, [config, localParticipant, roomState]);
}, [config.settings.inputs.camera, config.settings.inputs.mic, localParticipant, roomState]);
useEffect(() => {
if (!localParticipant || roomState !== ConnectionState.Connected) {
@@ -596,9 +598,10 @@ export default function Playground({
<PhoneSimulator
onConnect={() => onConnect(true)}
phoneMode={phoneMode}
onCapture={(content: File) => {
onCapture={async (content: File) => {
if (localParticipant) {
localParticipant.sendFile(content, { topic: "image" });
await localParticipant.sendFile(content, { topic: "image" });
await sendChat("用户上传了照片" );
}
}}
/>
@@ -667,9 +670,10 @@ export default function Playground({
<PhoneSimulator
onConnect={() => onConnect(true)}
phoneMode={phoneMode}
onCapture={(content: File) => {
onCapture={async (content: File) => {
if (localParticipant) {
localParticipant.sendFile(content, { topic: "image" });
await localParticipant.sendFile(content, { topic: "image" });
await sendChat("用户上传了一张照片");
}
}}
/>

View File

@@ -188,3 +188,22 @@ export const SwitchCameraIcon = ({ className }: { className?: string }) => (
<path d="M20 12v3a3 3 0 0 1-3 3H4m3 3-3-3 3-3" />
</svg>
);
export const VoiceIcon = ({ className }: { className?: string }) => (
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
className={className}
>
<path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z" />
<path d="M19 10v2a7 7 0 0 1-14 0v-2" />
<line x1="12" y1="19" x2="12" y2="22" />
</svg>
);