Compare commits

..

6 Commits

Author SHA1 Message Date
950d1ab3d4 Update agents/README.md 2026-02-02 15:28:28 +00:00
28b9a16c4e make chat message overlay right 2025-12-19 10:55:13 +08:00
f1b331d923 try to fix fast reload 2025-12-18 09:41:42 +08:00
739c019404 set chat message overlay draggable 2025-12-18 09:22:01 +08:00
da11561f47 Bug fixed 2025-12-17 23:13:42 +08:00
853e1558b1 fix realtime mode need push to talk once 2025-12-17 22:40:11 +08:00
5 changed files with 174 additions and 12 deletions

View File

@@ -0,0 +1 @@
use livekit-plugins-volcengine==1.2.9

View File

@@ -1018,6 +1018,16 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
initial_instructions = participant.attributes.get("instructions") initial_instructions = participant.attributes.get("instructions")
logger.info(f"User selected instructions: {initial_instructions}") logger.info(f"User selected instructions: {initial_instructions}")
# Read talking_mode from frontend state
initial_talking_mode = DEFAULT_TALKING_MODE
if participant.attributes.get("talking_mode"):
frontend_talking_mode = participant.attributes.get("talking_mode")
if frontend_talking_mode in ["push_to_talk", "realtime"]:
initial_talking_mode = frontend_talking_mode
logger.info(f"Initializing talking_mode from frontend: {initial_talking_mode}")
else:
logger.warning(f"Invalid talking_mode from frontend: {frontend_talking_mode}, using default: {initial_talking_mode}")
# Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt # Replace the datetime and weekday placeholders to avoid KeyError from other braces in the prompt
initial_instructions = initial_instructions.replace("{datetime}", current_time) initial_instructions = initial_instructions.replace("{datetime}", current_time)
initial_instructions = initial_instructions.replace("{weekday}", current_weekday) initial_instructions = initial_instructions.replace("{weekday}", current_weekday)
@@ -1128,7 +1138,7 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
) )
# disable input audio at the start # disable input audio at the start
_talking_mode = DEFAULT_TALKING_MODE _talking_mode = initial_talking_mode
if _talking_mode == "push_to_talk": if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False) session.input.set_audio_enabled(False)
else: else:
@@ -1192,6 +1202,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
session.input.set_audio_enabled(False) session.input.set_audio_enabled(False)
logger.info("Setting audio enabled to False (PTT mode)") logger.info("Setting audio enabled to False (PTT mode)")
else: else:
# When switching to realtime mode, clear user turn state to ensure proper initialization
session.clear_user_turn()
session.input.set_audio_enabled(True) session.input.set_audio_enabled(True)
logger.info("Setting audio enabled to True (realtime mode)") logger.info("Setting audio enabled to True (realtime mode)")
@@ -1204,6 +1216,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
if _talking_mode == "push_to_talk": if _talking_mode == "push_to_talk":
session.input.set_audio_enabled(False) session.input.set_audio_enabled(False)
else: else:
# When switching to realtime mode, clear user turn state
session.clear_user_turn()
session.input.set_audio_enabled(True) session.input.set_audio_enabled(True)
return json.dumps({"success": True, "mode": _talking_mode}) return json.dumps({"success": True, "mode": _talking_mode})

View File

@@ -4,6 +4,10 @@ const withNextPluginPreval = createNextPluginPreval();
/** @type {import('next').NextConfig} */ /** @type {import('next').NextConfig} */
const nextConfig = { const nextConfig = {
reactStrictMode: false, reactStrictMode: false,
// Explicitly allow znjj.wangxin93.eu.org for Dev Origin, per future Next.js requirement.
allowedDevOrigins: [
"znjj.wangxin93.eu.org",
],
}; };
module.exports = withNextPluginPreval(nextConfig); module.exports = withNextPluginPreval(nextConfig);

View File

@@ -11,7 +11,7 @@ export interface ChatOverlayProps {
isVisible: boolean; isVisible: boolean;
position: { x: number; y: number }; position: { x: number; y: number };
onPositionChange: (position: { x: number; y: number }) => void; onPositionChange: (position: { x: number; y: number }) => void;
containerRef: React.RefObject<HTMLDivElement>; containerRef: React.RefObject<HTMLDivElement | null>;
onToggle: () => void; onToggle: () => void;
} }

View File

@@ -70,6 +70,11 @@ export function PhoneSimulator({
const pushToTalkButtonRef = useRef<HTMLButtonElement>(null); const pushToTalkButtonRef = useRef<HTMLButtonElement>(null);
const [showChatOverlay, setShowChatOverlay] = useState(false); const [showChatOverlay, setShowChatOverlay] = useState(false);
const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component const [chatOverlayPosition, setChatOverlayPosition] = useState({ x: 0, y: 0 }); // Will be positioned at top-right by ChatOverlay component
const [chatTogglePosition, setChatTogglePosition] = useState<{ x?: number; right?: number; y: number }>({ right: 16, y: 56 }); // Initial position on the right
const [isDraggingChatToggle, setIsDraggingChatToggle] = useState(false);
const chatToggleRef = useRef<HTMLButtonElement>(null);
const chatToggleDragOffset = useRef({ x: 0, y: 0 });
const chatToggleHasDragged = useRef(false);
useEffect(() => { useEffect(() => {
const voiceAttr = config.settings.attributes?.find(a => a.key === "voice"); const voiceAttr = config.settings.attributes?.find(a => a.key === "voice");
@@ -78,6 +83,47 @@ export function PhoneSimulator({
} }
}, [config.settings.attributes]); }, [config.settings.attributes]);
// Set talking_mode attribute when connected or when mode changes
const lastTalkingModeRef = useRef<string | null>(null);
const configAttributesRef = useRef(config.settings.attributes);
// Update config attributes ref when it changes
useEffect(() => {
configAttributesRef.current = config.settings.attributes;
}, [config.settings.attributes]);
useEffect(() => {
if (roomState === ConnectionState.Connected && localParticipant) {
const talkingMode = isPushToTalkMode ? "push_to_talk" : "realtime";
// Only update if the mode actually changed
if (lastTalkingModeRef.current === talkingMode) {
return;
}
lastTalkingModeRef.current = talkingMode;
try {
// Get current attributes from config to preserve them
const attributesToSet: Record<string, string> = {};
const configAttributes = configAttributesRef.current || [];
configAttributes.forEach(attr => {
if (attr.key && attr.value) {
attributesToSet[attr.key] = attr.value;
}
});
// Add talking_mode
attributesToSet.talking_mode = talkingMode;
localParticipant.setAttributes(attributesToSet);
} catch (error) {
console.error("Failed to set talking_mode attribute:", error);
}
} else if (roomState === ConnectionState.Disconnected) {
// Reset ref when disconnected
lastTalkingModeRef.current = null;
}
}, [roomState, localParticipant, isPushToTalkMode]);
const [currentTime, setCurrentTime] = useState(""); const [currentTime, setCurrentTime] = useState("");
const [visualizerPosition, setVisualizerPosition] = useState({ const [visualizerPosition, setVisualizerPosition] = useState({
@@ -148,6 +194,96 @@ export function PhoneSimulator({
}; };
}, [isDragging]); }, [isDragging]);
// Chat toggle button drag handlers
const handleChatToggleDragStart = (e: React.MouseEvent | React.TouchEvent) => {
e.preventDefault();
e.stopPropagation(); // Prevent triggering the button click
setIsDraggingChatToggle(true);
chatToggleHasDragged.current = false;
if (!phoneContainerRef.current || !chatToggleRef.current) return;
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// If using right positioning, convert to x for dragging
if (chatTogglePosition.right !== undefined && chatTogglePosition.x === undefined) {
const currentX = containerRect.width - chatTogglePosition.right - buttonRect.width;
setChatTogglePosition({ x: currentX, y: chatTogglePosition.y });
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
} else {
// Already using x positioning
const currentX = chatTogglePosition.x ?? 0;
chatToggleDragOffset.current = {
x: clientX - containerRect.left - currentX,
y: clientY - containerRect.top - chatTogglePosition.y,
};
}
};
const handleChatToggleDragMove = (e: MouseEvent | TouchEvent) => {
if (!isDraggingChatToggle || !phoneContainerRef.current || !chatToggleRef.current) return;
e.preventDefault();
chatToggleHasDragged.current = true; // Mark that we've actually dragged
const containerRect = phoneContainerRef.current.getBoundingClientRect();
const buttonRect = chatToggleRef.current.getBoundingClientRect();
const clientX = 'touches' in e ? e.touches[0].clientX : e.clientX;
const clientY = 'touches' in e ? e.touches[0].clientY : e.clientY;
// Calculate new position relative to container
let newX = clientX - containerRect.left - chatToggleDragOffset.current.x;
let newY = clientY - containerRect.top - chatToggleDragOffset.current.y;
// Constrain within container
const maxX = containerRect.width - buttonRect.width;
const maxY = containerRect.height - buttonRect.height;
// On mobile (width < 768px), status bar is hidden, so allow dragging to top (y=0)
// On desktop, keep status bar height constraint (48px)
const isMobile = typeof window !== 'undefined' && window.innerWidth < 768;
const minY = isMobile ? 0 : 48; // statusBarHeight = 48px
newX = Math.max(0, Math.min(newX, maxX));
newY = Math.max(minY, Math.min(newY, maxY));
setChatTogglePosition({
x: newX,
y: newY,
});
};
const handleChatToggleDragEnd = () => {
setIsDraggingChatToggle(false);
// Reset the flag after a short delay to allow onClick to check it
setTimeout(() => {
chatToggleHasDragged.current = false;
}, 100);
};
useEffect(() => {
if (isDraggingChatToggle) {
window.addEventListener("mouseup", handleChatToggleDragEnd);
window.addEventListener("mousemove", handleChatToggleDragMove);
window.addEventListener("touchend", handleChatToggleDragEnd);
window.addEventListener("touchmove", handleChatToggleDragMove, { passive: false });
}
return () => {
window.removeEventListener("mouseup", handleChatToggleDragEnd);
window.removeEventListener("mousemove", handleChatToggleDragMove);
window.removeEventListener("touchend", handleChatToggleDragEnd);
window.removeEventListener("touchmove", handleChatToggleDragMove);
};
}, [isDraggingChatToggle]);
// Initialize chat toggle button position - keep it on the right using 'right' CSS property
// Only convert to 'x' (left positioning) when user drags it
useEffect(() => { useEffect(() => {
if (showCameraMenu) { if (showCameraMenu) {
Room.getLocalDevices("videoinput").then(setCameras); Room.getLocalDevices("videoinput").then(setCameras);
@@ -851,22 +987,31 @@ export function PhoneSimulator({
</div> </div>
</div> </div>
{/* Chat Toggle Button - Top Right, aligned with audio visualizer */} {/* Chat Toggle Button - Top Right, aligned with audio visualizer (Draggable) */}
{roomState === ConnectionState.Connected && {roomState === ConnectionState.Connected &&
voiceAssistant.agent && voiceAssistant.agent &&
phoneMode !== "important_message" && phoneMode !== "important_message" &&
phoneMode !== "capture" && ( phoneMode !== "capture" && (
<button <button
className={`absolute right-2 z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg ${ ref={chatToggleRef}
className={`absolute z-50 p-3 rounded-full backdrop-blur-md transition-colors shadow-lg cursor-move select-none touch-none ${
showChatOverlay showChatOverlay
? "bg-blue-500/80 text-white" ? "bg-blue-500/80 text-white"
: "bg-gray-800/70 text-white hover:bg-gray-800/90" : "bg-gray-800/70 text-white hover:bg-gray-800/90"
}`} }`}
onClick={() => setShowChatOverlay(!showChatOverlay)} onClick={(e) => {
title={showChatOverlay ? "Hide chat" : "Show chat"} // Only toggle if we didn't just drag
if (!chatToggleHasDragged.current) {
setShowChatOverlay(!showChatOverlay);
}
}}
onMouseDown={handleChatToggleDragStart}
onTouchStart={handleChatToggleDragStart}
title={showChatOverlay ? "Hide chat (drag to move)" : "Show chat (drag to move)"}
style={{ style={{
top: '56px', // Align with audio visualizer initial position ...(chatTogglePosition.x !== undefined ? { left: chatTogglePosition.x } : {}),
right: '8px', ...(chatTogglePosition.right !== undefined ? { right: chatTogglePosition.right } : {}),
top: chatTogglePosition.y,
}} }}
> >
<ChatIcon className="w-5 h-5 md:w-6 md:h-6" /> <ChatIcon className="w-5 h-5 md:w-6 md:h-6" />
@@ -1031,7 +1176,7 @@ export function PhoneSimulator({
<ChatOverlay <ChatOverlay
agentAudioTrack={voiceAssistant.audioTrack} agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color} accentColor={config.settings.theme_color}
inputDisabled={phoneMode === "important_message" || phoneMode === "hand_off"} inputDisabled={phoneMode === "hand_off"}
isVisible={showChatOverlay} isVisible={showChatOverlay}
position={chatOverlayPosition} position={chatOverlayPosition}
onPositionChange={setChatOverlayPosition} onPositionChange={setChatOverlayPosition}
@@ -1255,7 +1400,7 @@ export function PhoneSimulator({
)} )}
{/* Realtime Mode Layout */} {/* Realtime Mode Layout */}
{!isPushToTalkMode && ( {!isPushToTalkMode && phoneMode !== "hand_off" && (
<> <>
{/* Important Message Mode - Centered End Call Button */} {/* Important Message Mode - Centered End Call Button */}
{phoneMode === "important_message" ? ( {phoneMode === "important_message" ? (
@@ -1270,7 +1415,6 @@ export function PhoneSimulator({
) : ( ) : (
<div className="w-full flex items-center justify-center gap-4"> <div className="w-full flex items-center justify-center gap-4">
{/* Mic Toggle */} {/* Mic Toggle */}
{phoneMode !== "hand_off" && (
<button <button
className={`p-4 rounded-full backdrop-blur-md transition-colors ${ className={`p-4 rounded-full backdrop-blur-md transition-colors ${
!isMicEnabled !isMicEnabled
@@ -1285,7 +1429,6 @@ export function PhoneSimulator({
<MicOffIcon className="w-6 h-6" /> <MicOffIcon className="w-6 h-6" />
)} )}
</button> </button>
)}
{/* End Call Button */} {/* End Call Button */}
<button <button