a version use tools to askImportantQuestion and askImageCapture

This commit is contained in:
2025-12-12 17:05:16 +08:00
parent 944deec482
commit 2aeaf00603
8 changed files with 1409 additions and 90 deletions

22
agents/.env.example Normal file
View File

@@ -0,0 +1,22 @@
LIVEKIT_API_SECRET="secret"
LIVEKIT_API_KEY="devkey"
LIVEKIT_URL="ws://127.0.0.1:7880"
MINIMAX_API_KEY="aa"
DEEPSEEK_API_KEY="bb"
AZURE_SPEECH_KEY="aa"
AZURE_SPEECH_REGION="eastasia"
CARTESIA_API_KEY="bb"
CARTESIA_LANGUAGE="zh"
SILICONFLOW_API_KEY="aa"
DASHSCOPE_API_KEY="aa"
VOLCENGINE_TTS_ACCESS_TOKEN="aa"
VOLCENGINE_STT_ACCESS_TOKEN="aa"
VOLCENGINE_LLM_API_KEY="aa"
VOLCENGINE_REALTIME_ACCESS_TOKEN="aa"

0
agents/README.md Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,7 @@ type ChatMessageInput = {
accentColor: string;
height: number;
onSend?: (message: string) => void;
disabled?: boolean;
};
export const ChatMessageInput = ({
@@ -13,6 +14,7 @@ export const ChatMessageInput = ({
accentColor,
height,
onSend,
disabled = false,
}: ChatMessageInput) => {
const [message, setMessage] = useState("");
const [inputTextWidth, setInputTextWidth] = useState(0);
@@ -79,7 +81,8 @@ export const ChatMessageInput = ({
></div>
<input
ref={inputRef}
className={`w-full text-xs caret-transparent bg-transparent opacity-25 text-gray-300 p-2 pr-6 rounded-sm focus:opacity-100 focus:outline-none focus:border-${accentColor}-700 focus:ring-1 focus:ring-${accentColor}-700`}
disabled={disabled}
className={`w-full text-xs caret-transparent bg-transparent opacity-25 text-gray-300 p-2 pr-6 rounded-sm focus:opacity-100 focus:outline-none focus:border-${accentColor}-700 focus:ring-1 focus:ring-${accentColor}-700 disabled:opacity-10 disabled:cursor-not-allowed`}
style={{
paddingLeft: message.length > 0 ? "12px" : "24px",
caretShape: "block",
@@ -108,11 +111,11 @@ export const ChatMessageInput = ({
{message.replaceAll(" ", "\u00a0")}
</span>
<button
disabled={message.length === 0 || !onSend}
disabled={disabled || message.length === 0 || !onSend}
onClick={handleSend}
className={`text-xs uppercase text-${accentColor}-500 hover:bg-${accentColor}-950 p-2 rounded-md opacity-${
message.length > 0 ? 100 : 25
} pointer-events-${message.length > 0 ? "auto" : "none"}`}
!disabled && message.length > 0 ? 100 : 25
} pointer-events-${!disabled && message.length > 0 ? "auto" : "none"}`}
>
Send
</button>

View File

@@ -16,9 +16,10 @@ type ChatTileProps = {
messages: ChatMessageType[];
accentColor: string;
onSend?: (message: string) => Promise<ComponentsChatMessage>;
inputDisabled?: boolean;
};
export const ChatTile = ({ messages, accentColor, onSend }: ChatTileProps) => {
export const ChatTile = ({ messages, accentColor, onSend, inputDisabled }: ChatTileProps) => {
const containerRef = useRef<HTMLDivElement>(null);
useEffect(() => {
if (containerRef.current) {
@@ -30,7 +31,7 @@ export const ChatTile = ({ messages, accentColor, onSend }: ChatTileProps) => {
<div className="flex flex-col gap-4 w-full h-full">
<div
ref={containerRef}
className="overflow-y-auto"
className="overflow-y-auto select-text"
style={{
height: `calc(100% - ${inputHeight}px)`,
}}
@@ -58,6 +59,7 @@ export const ChatTile = ({ messages, accentColor, onSend }: ChatTileProps) => {
placeholder="Type a message"
accentColor={accentColor}
onSend={onSend}
disabled={inputDisabled}
/>
</div>
);

View File

@@ -18,17 +18,29 @@ import { useToast } from "@/components/toast/ToasterProvider";
export interface PhoneSimulatorProps {
onConnect: () => void;
onDisconnect: () => void;
phoneMode?: "normal" | "capture";
phoneMode?: "normal" | "capture" | "important_message" | "hand_off";
onCapture?: (image: File) => void;
capturePrompt?: string;
importantMessage?: string;
importantMessageOptions?: string[];
onImportantMessageAction?: (text: string) => void;
}
export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal", onCapture, capturePrompt }: PhoneSimulatorProps) {
export function PhoneSimulator({
onConnect,
onDisconnect,
phoneMode = "normal",
onCapture,
capturePrompt,
importantMessage,
importantMessageOptions,
onImportantMessageAction
}: PhoneSimulatorProps) {
const { config, setUserSettings } = useConfig();
const { setToastMessage } = useToast();
const room = useRoomContext();
const roomState = useConnectionState();
const { localParticipant } = useLocalParticipant();
const { localParticipant, isMicrophoneEnabled: isMicEnabled } = useLocalParticipant();
const tracks = useTracks();
const voiceAssistant = useVoiceAssistant();
const fileInputRef = useRef<HTMLInputElement>(null);
@@ -43,6 +55,10 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
const [processingSource, setProcessingSource] = useState<
"camera" | "upload" | null
>(null);
const MAX_UPLOAD_MB = 10;
const isAgentSpeaking = voiceAssistant.state === "speaking";
const wasMicEnabledRef = useRef(false);
const lastPhoneMode = useRef(phoneMode);
useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
@@ -136,6 +152,40 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
}
}, [voiceAssistant.state]);
useEffect(() => {
const enteringMode = (mode: typeof phoneMode) =>
phoneMode === mode && lastPhoneMode.current !== mode;
// Entering important message / capture / hand_off: remember mic state and mute if needed
if (enteringMode("important_message") || enteringMode("capture") || enteringMode("hand_off")) {
wasMicEnabledRef.current = isMicEnabled;
if (isMicEnabled) {
localParticipant.setMicrophoneEnabled(false);
}
}
// Exiting important message mode or hand off mode or capture mode
else if (
(phoneMode !== "important_message" && lastPhoneMode.current === "important_message") ||
(phoneMode !== "hand_off" && lastPhoneMode.current === "hand_off") ||
(phoneMode !== "capture" && lastPhoneMode.current === "capture")
) {
// Restore mic to previous state
localParticipant.setMicrophoneEnabled(wasMicEnabledRef.current);
// If exiting capture mode, clear processing image
if (lastPhoneMode.current === "capture") {
setProcessingImage(null);
setProcessingSource(null);
}
}
// Enforce mic off in important message mode, hand off mode, or capture mode
else if ((phoneMode === "important_message" || phoneMode === "hand_off" || phoneMode === "capture") && isMicEnabled) {
localParticipant.setMicrophoneEnabled(false);
}
lastPhoneMode.current = phoneMode;
}, [phoneMode, isMicEnabled, localParticipant]);
useEffect(() => {
const updateTime = () => {
const now = new Date();
@@ -159,8 +209,6 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
({ source }) => source === Track.Source.Camera
);
const isMicEnabled = localParticipant.isMicrophoneEnabled;
const handleMicToggle = async () => {
if (isMicEnabled) {
await localParticipant.setMicrophoneEnabled(false);
@@ -173,6 +221,20 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
onDisconnect();
};
const validateImageFile = (file: File) => {
const isImage = file.type.startsWith("image/");
const isSizeOk = file.size > 0 && file.size <= MAX_UPLOAD_MB * 1024 * 1024;
return {
isValid: isImage && isSizeOk,
isImage,
isSizeOk,
};
};
const showErrorToast = (message: string) => {
setToastMessage({ message, type: "error" });
};
const handleCapture = async () => {
if (!localCameraTrack || !onCapture || isCapturing) return;
setIsCapturing(true);
@@ -278,6 +340,14 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
const file = new File([blob], "camera-capture.jpg", {
type: "image/jpeg",
});
const { isValid } = validateImageFile(file);
if (!isValid) {
setProcessingImage(null);
setProcessingSource(null);
showErrorToast(`请拍摄${MAX_UPLOAD_MB}MB以内的图片文件。`);
setIsCapturing(false);
return;
}
onCapture(file);
}
setIsCapturing(false);
@@ -340,10 +410,18 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file && onCapture) {
const { isValid, isImage, isSizeOk } = validateImageFile(file);
if (!isValid) {
const msg = !isImage
? "请选择图片文件。"
: `请上传${MAX_UPLOAD_MB}MB以内的图片文件。`;
showErrorToast(msg);
} else {
onCapture(file);
setProcessingImage(URL.createObjectURL(file));
setProcessingSource("upload");
}
}
// Reset input so the same file can be selected again
if (event.target) {
event.target.value = "";
@@ -461,9 +539,26 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
.scan-animation {
animation: scan 3s linear infinite;
}
@keyframes handoffPulse {
0%, 100% {
opacity: 1;
transform: scale(1);
}
50% {
opacity: 0.8;
transform: scale(1.05);
}
}
@keyframes handoffFade {
0%, 100% { opacity: 1; }
50% { opacity: 0.6; }
}
.handoff-animation {
animation: handoffPulse 2s ease-in-out infinite, handoffFade 2s ease-in-out infinite;
}
`}</style>
{/* Status Bar */}
<div className="h-12 w-full bg-black/20 absolute top-0 left-0 z-20 flex items-center justify-between px-6 text-white text-xs font-medium backdrop-blur-sm">
<div className="h-12 w-full bg-black/20 backdrop-blur-sm absolute top-0 left-0 z-50 flex items-center justify-between px-6 text-white text-xs font-medium">
<span>{currentTime}</span>
<div className="flex items-center gap-2">
<WifiIcon className="w-4 h-4" />
@@ -471,12 +566,17 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
</div>
</div>
{/* Dynamic Island / Notch Placeholder */}
<div className="absolute top-0 left-1/2 -translate-x-1/2 w-32 h-7 bg-black rounded-b-2xl z-30"></div>
{/* Main Content */}
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full">
<div ref={phoneContainerRef} className="flex-grow relative bg-gray-950 w-full h-full overflow-hidden">
<div className={`h-full w-full transition-all duration-500 ease-in-out transform ${
phoneMode === "hand_off" && roomState === ConnectionState.Connected
? "blur-md scale-105"
: phoneMode === "capture"
? "scale-105 brightness-110"
: "scale-100"
}`}>
{videoContent}
</div>
{processingImage && (
<div className="absolute inset-0 z-10 bg-black flex items-center justify-center">
{/* eslint-disable-next-line @next/next/no-img-element */}
@@ -492,8 +592,11 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
<div className="absolute inset-0 overflow-hidden">
<div className="w-full h-[2px] bg-blue-500 shadow-[0_0_15px_rgba(59,130,246,1)] scan-animation absolute"></div>
</div>
<div className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-full text-center text-blue-400 font-mono text-sm animate-pulse z-20">
...
<div
className="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 w-full text-center font-mono text-2xl font-semibold animate-pulse z-20"
style={{ color: "var(--lk-theme-color)" }}
>
...
</div>
</div>
)}
@@ -530,11 +633,68 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
</div>
)}
</div>
{/* Important Message Overlay */}
{roomState === ConnectionState.Connected && phoneMode === "important_message" && (
<div className="absolute inset-0 z-30 bg-black/60 backdrop-blur-md flex items-center justify-center p-6">
<div className="bg-gray-800/80 backdrop-blur-xl border border-white/10 p-6 rounded-[2rem] shadow-2xl w-full max-w-xs text-center flex flex-col gap-6 transform transition-all">
<div className="flex flex-col items-center gap-4">
<p className="text-white text-lg font-medium leading-relaxed">
{importantMessage}
</p>
</div>
<div className="flex flex-col gap-3 w-full">
{importantMessageOptions && importantMessageOptions.length > 0 ? (
importantMessageOptions.map((option, index) => (
<button
key={index}
onClick={() => {
if (!isAgentSpeaking) {
onImportantMessageAction?.(option);
}
}}
disabled={isAgentSpeaking}
className={`w-full bg-white text-black font-semibold py-3.5 rounded-xl hover:bg-gray-200 transition-colors active:scale-95 transform ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
>
{option}
</button>
))
) : (
<button
onClick={() => {
if (!isAgentSpeaking) {
onImportantMessageAction?.("确认");
}
}}
disabled={isAgentSpeaking}
className={`w-full bg-white text-black font-semibold py-3.5 rounded-xl hover:bg-gray-200 transition-colors active:scale-95 transform ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
>
</button>
)}
</div>
</div>
</div>
)}
{/* Hand Off Mode Overlay */}
{roomState === ConnectionState.Connected && phoneMode === "hand_off" && (
<div className="absolute inset-0 z-30 flex items-center justify-center p-6">
<div
className="text-3xl font-semibold font-mono tracking-widest handoff-animation"
style={{ color: "var(--lk-theme-color)" }}
>
...
</div>
</div>
)}
{/* Agent Audio Visualizer (Draggable) */}
{roomState === ConnectionState.Connected && voiceAssistant.audioTrack && (
{roomState === ConnectionState.Connected && voiceAssistant.audioTrack && phoneMode !== "hand_off" && (
<div
ref={visualizerRef}
className="absolute z-20 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none"
className="absolute z-50 p-2 bg-black/40 backdrop-blur-md rounded-lg border border-white/10 shadow-lg cursor-move select-none"
style={{
left: visualizerPosition.x,
top: visualizerPosition.y,
@@ -551,27 +711,35 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
</div>
</div>
)}
</div>
{/* Call Controls Overlay */}
{roomState === ConnectionState.Connected && (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-20">
{phoneMode === "capture" ? (
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end pb-[5%] px-[8%] z-20">
phoneMode === "capture" ? (
<div className="absolute top-0 left-0 w-full h-full flex flex-col justify-end pb-[5%] px-[8%] z-40">
{/* Camera Controls Row */}
<div className="w-full flex items-center justify-evenly mb-8">
{/* Left: Upload */}
<button
className="p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleUpload}
className={`p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
onClick={() => {
if (!isAgentSpeaking) {
handleUpload();
}
}}
disabled={isAgentSpeaking}
>
<ImageIcon className="w-6 h-6" />
</button>
{/* Center: Capture */}
<button
className="w-16 h-16 rounded-full border-4 border-white p-1 hover:scale-105 transition-transform shrink-0"
onClick={handleCapture}
className={`w-16 h-16 rounded-full border-4 border-white p-1 hover:scale-105 transition-transform shrink-0 ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
onClick={() => {
if (!isAgentSpeaking) {
handleCapture();
}
}}
disabled={isAgentSpeaking}
>
<div className="w-full h-full bg-white rounded-full"></div>
</button>
@@ -579,8 +747,13 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
{/* Right: Switch Camera */}
<div className="relative">
<button
className="p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors"
onClick={handleSwitchCamera}
className={`p-3 rounded-full bg-gray-800/50 text-white hover:bg-gray-800/70 transition-colors ${isAgentSpeaking ? "opacity-50 cursor-not-allowed" : ""}`}
onClick={(e) => {
if (!isAgentSpeaking) {
handleSwitchCamera(e);
}
}}
disabled={isAgentSpeaking}
>
<SwitchCameraIcon className="w-6 h-6" />
</button>
@@ -607,24 +780,8 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
</div>
</div>
{/* Call Controls Row */}
{/* Call Controls Row (mic hidden in capture mode) */}
<div className="w-full flex items-center justify-center gap-8">
{/* Mic Toggle */}
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
? "bg-white text-black"
: "bg-gray-600/50 text-white hover:bg-gray-600/70"
}`}
onClick={handleMicToggle}
>
{isMicEnabled ? (
<MicIcon className="w-6 h-6" />
) : (
<MicOffIcon className="w-6 h-6" />
)}
</button>
{/* End Call */}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
@@ -635,8 +792,9 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
</div>
</div>
) : (
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-20">
<div className="absolute bottom-[5%] left-0 w-full px-[8%] z-40">
<div className="w-full flex items-center justify-center gap-8">
{phoneMode !== "important_message" && phoneMode !== "hand_off" && (
<button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled
@@ -651,6 +809,7 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
<MicOffIcon className="w-6 h-6" />
)}
</button>
)}
<button
className="p-4 rounded-full bg-red-500 text-white hover:bg-red-600 transition-colors"
@@ -660,8 +819,7 @@ export function PhoneSimulator({ onConnect, onDisconnect, phoneMode = "normal",
</button>
</div>
</div>
)}
</div>
)
)}
</div>
);

View File

@@ -30,7 +30,7 @@ import {
} from "@livekit/components-react";
import { ConnectionState, LocalParticipant, Track, RpcError, RpcInvocationData } from "livekit-client";
import { QRCodeSVG } from "qrcode.react";
import { ReactNode, useCallback, useEffect, useMemo, useState } from "react";
import { ReactNode, useCallback, useEffect, useMemo, useRef, useState } from "react";
import tailwindTheme from "../../lib/tailwindTheme.preval";
import { EditableNameValueRow } from "@/components/config/NameValueRow";
import { AttributesInspector } from "@/components/config/AttributesInspector";
@@ -58,15 +58,19 @@ export default function Playground({
const { name } = useRoomInfo();
const [transcripts, setTranscripts] = useState<ChatMessageType[]>([]);
const { localParticipant } = useLocalParticipant();
const { send: sendChat } = useChat();
const { send: sendChat, chatMessages } = useChat();
const voiceAssistant = useVoiceAssistant();
const roomState = useConnectionState();
const tracks = useTracks();
const room = useRoomContext();
const [phoneMode, setPhoneMode] = useState<"normal" | "capture">("normal");
const [phoneMode, setPhoneMode] = useState<"normal" | "capture" | "important_message" | "hand_off">("normal");
const [capturePrompt, setCapturePrompt] = useState<string>("");
const [importantMessage, setImportantMessage] = useState<string>("");
const [importantMessageOptions, setImportantMessageOptions] = useState<string[]>([]);
const importantMessageResolverRef = useRef<((value: string) => void) | null>(null);
const imageCaptureResolverRef = useRef<((value: string) => void) | null>(null);
const [rpcMethod, setRpcMethod] = useState("");
const [rpcPayload, setRpcPayload] = useState("");
@@ -106,8 +110,10 @@ export default function Playground({
);
localParticipant.registerRpcMethod(
'enterImageCaptureMode',
'askImageCapture',
async (data: RpcInvocationData) => {
// Return a promise that will be resolved when user captures/uploads an image
return new Promise<string>((resolve) => {
if (data.payload) {
try {
const payload = JSON.parse(data.payload);
@@ -115,30 +121,104 @@ export default function Playground({
setCapturePrompt(payload.prompt);
}
} catch (e) {
console.error("Failed to parse enterImageCaptureMode payload", e);
console.error("Failed to parse askImageCapture payload", e);
}
}
setPhoneMode("capture");
// Store the resolver to be called when user captures/uploads an image
imageCaptureResolverRef.current = (imageData: string) => {
resolve(imageData);
};
});
}
);
localParticipant.registerRpcMethod(
'enterHandOffToHumanMode',
async () => {
setPhoneMode("hand_off");
return JSON.stringify({ success: true });
}
);
localParticipant.registerRpcMethod(
'exitImageCaptureMode',
'hangUpCall',
async () => {
setPhoneMode("normal");
setCapturePrompt("");
// Disconnect the call
onConnect(false);
return JSON.stringify({ success: true });
}
);
}, [localParticipant, roomState]);
localParticipant.registerRpcMethod(
'askImportantQuestion',
async (data: RpcInvocationData) => {
// Return a promise that will be resolved when user makes a selection
return new Promise<string>((resolve) => {
if (data.payload) {
try {
const payload = JSON.parse(data.payload);
if (payload.message) {
setImportantMessage(payload.message);
}
if (payload.options && Array.isArray(payload.options)) {
setImportantMessageOptions(payload.options);
} else {
// Default fallback if no options provided
setImportantMessageOptions(["确认"]);
}
} catch (e) {
console.error("Failed to parse askImportantQuestion payload", e);
}
}
setPhoneMode("important_message");
// Store the resolver to be called when user makes a selection
importantMessageResolverRef.current = (selection: string) => {
resolve(JSON.stringify({ selection }));
};
});
}
);
}, [localParticipant, roomState, onConnect]);
useEffect(() => {
if (roomState === ConnectionState.Connected) {
setPhoneMode("normal");
} else if (roomState === ConnectionState.Disconnected) {
setPhoneMode("normal");
// Clean up any pending important message RPC
if (importantMessageResolverRef.current) {
importantMessageResolverRef.current("disconnected");
importantMessageResolverRef.current = null;
}
// Clean up any pending image capture RPC
if (imageCaptureResolverRef.current) {
imageCaptureResolverRef.current(JSON.stringify({ error: "disconnected" }));
imageCaptureResolverRef.current = null;
}
}
}, [roomState]);
useEffect(() => {
// When agent starts speaking, exit capture mode (and PhoneSimulator will clear processing image)
// Only exit if we are NOT waiting for capture (resolver is null), meaning this speech is likely the analysis result
if (voiceAssistant.state === "speaking" && phoneMode === "capture" && !imageCaptureResolverRef.current) {
setPhoneMode("normal");
setCapturePrompt("");
}
}, [voiceAssistant.state, phoneMode]);
useEffect(() => {
// Also exit capture mode if we receive a completion message in chat (in case agent doesn't speak immediately)
if (chatMessages.length > 0) {
const lastMsg = chatMessages[chatMessages.length - 1];
if (lastMsg.message && lastMsg.message.includes("✅ Result: ask_image_capture") && phoneMode === "capture") {
setPhoneMode("normal");
setCapturePrompt("");
}
}
}, [chatMessages, phoneMode]);
useEffect(() => {
if (!localParticipant || roomState !== ConnectionState.Connected) {
return;
@@ -191,6 +271,18 @@ export default function Playground({
({ source }) => source === Track.Source.Microphone,
);
const handleImportantMessageAction = useCallback(async (text: string) => {
setPhoneMode("normal");
setImportantMessage("");
setImportantMessageOptions([]);
// Resolve the RPC with the user's selection
if (importantMessageResolverRef.current) {
importantMessageResolverRef.current(text || "确认");
importantMessageResolverRef.current = null;
}
}, []);
const onDataReceived = useCallback(
(msg: any) => {
if (msg.topic === "transcription") {
@@ -317,6 +409,7 @@ export default function Playground({
<TranscriptionTile
agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color}
inputDisabled={phoneMode === "important_message" || phoneMode === "hand_off"}
/>
);
}
@@ -325,6 +418,7 @@ export default function Playground({
config.settings.theme_color,
voiceAssistant.audioTrack,
voiceAssistant.agent,
phoneMode,
]);
const instructionsContent = (
@@ -630,10 +724,23 @@ export default function Playground({
onDisconnect={() => onConnect(false)}
phoneMode={phoneMode}
capturePrompt={capturePrompt}
importantMessage={importantMessage}
importantMessageOptions={importantMessageOptions}
onImportantMessageAction={handleImportantMessageAction}
onCapture={async (content: File) => {
if (localParticipant) {
// Send file via LiveKit byte stream
await localParticipant.sendFile(content, { topic: "image" });
await sendChat("用户上传了照片" );
// Resolve RPC to signal completion (without image data)
if (imageCaptureResolverRef.current) {
// Do NOT exit capture mode immediately - wait for agent response (voiceAssistant.state === "speaking")
// setPhoneMode("normal");
// setCapturePrompt("");
imageCaptureResolverRef.current(JSON.stringify({ success: true }));
imageCaptureResolverRef.current = null;
}
}
}}
/>
@@ -717,10 +824,23 @@ export default function Playground({
onDisconnect={() => onConnect(false)}
phoneMode={phoneMode}
capturePrompt={capturePrompt}
importantMessage={importantMessage}
importantMessageOptions={importantMessageOptions}
onImportantMessageAction={handleImportantMessageAction}
onCapture={async (content: File) => {
if (localParticipant) {
// Send file via LiveKit byte stream
await localParticipant.sendFile(content, { topic: "image" });
await sendChat("用户上传了一张照片");
// Resolve RPC to signal completion (without image data)
if (imageCaptureResolverRef.current) {
// Do NOT exit capture mode immediately - wait for agent response (voiceAssistant.state === "speaking")
// setPhoneMode("normal");
// setCapturePrompt("");
imageCaptureResolverRef.current(JSON.stringify({ success: true }));
imageCaptureResolverRef.current = null;
}
}
}}
/>

View File

@@ -16,9 +16,11 @@ import { useEffect, useState } from "react";
export function TranscriptionTile({
agentAudioTrack,
accentColor,
inputDisabled,
}: {
agentAudioTrack?: TrackReferenceOrPlaceholder;
accentColor: string;
inputDisabled?: boolean;
}) {
const agentMessages = useTrackTranscription(agentAudioTrack || undefined);
const localParticipant = useLocalParticipant();
@@ -97,7 +99,7 @@ export function TranscriptionTile({
]);
return (
<ChatTile messages={messages} accentColor={accentColor} onSend={sendChat} />
<ChatTile messages={messages} accentColor={accentColor} onSend={sendChat} inputDisabled={inputDisabled} />
);
}