diff --git a/web/pages/Assistants.tsx b/web/pages/Assistants.tsx index 7722af3..dadae74 100644 --- a/web/pages/Assistants.tsx +++ b/web/pages/Assistants.tsx @@ -1,5 +1,6 @@ import React, { useState, useEffect, useMemo, useRef } from 'react'; +import { createPortal } from 'react-dom'; import { Plus, Search, Play, Square, Copy, Trash2, Mic, MessageSquare, Save, Video, PhoneOff, Camera, ArrowLeftRight, Send, Phone, Rocket, AlertTriangle, PhoneCall, CameraOff, Image, Images, CloudSun, Calendar, TrendingUp, Coins, Wrench, Globe, Terminal, X, ClipboardCheck, Sparkles, Volume2, Timer, ChevronDown, Database, Server, Zap, ExternalLink, Key, BrainCircuit, Ear, Book, Filter } from 'lucide-react'; import { Button, Input, Badge, Drawer, Dialog } from '../components/UI'; import { ASRModel, Assistant, KnowledgeBase, LLMModel, TabValue, Tool, Voice } from '../types'; @@ -791,30 +792,34 @@ export const AssistantsPage: React.FC = () => { }} placeholder="设定小助手的人设、语气、行为规范以及业务逻辑..." /> - {templateSuggestion?.field === 'prompt' && filteredSystemTemplateVariables.length > 0 && ( -
- {filteredSystemTemplateVariables.map((item) => ( - - ))} -
- )} + {templateSuggestion?.field === 'prompt' && + filteredSystemTemplateVariables.length > 0 && + typeof document !== 'undefined' && + createPortal( +
+ {filteredSystemTemplateVariables.map((item) => ( + + ))} +
, + document.body + )} @@ -914,30 +919,35 @@ export const AssistantsPage: React.FC = () => { disabled={selectedAssistant.generatedOpenerEnabled === true} className="bg-white/5 border-white/10 focus:border-primary/50 disabled:opacity-50 disabled:cursor-not-allowed" /> - {templateSuggestion?.field === 'opener' && filteredSystemTemplateVariables.length > 0 && selectedAssistant.generatedOpenerEnabled !== true && ( -
- {filteredSystemTemplateVariables.map((item) => ( - - ))} -
- )} + {templateSuggestion?.field === 'opener' && + filteredSystemTemplateVariables.length > 0 && + selectedAssistant.generatedOpenerEnabled !== true && + typeof document !== 'undefined' && + createPortal( +
+ {filteredSystemTemplateVariables.map((item) => ( + + ))} +
, + document.body + )}

{selectedAssistant.generatedOpenerEnabled === true @@ -1754,12 +1764,15 @@ const getTemplateSuggestionAnchor = ( } const computedStyle = window.getComputedStyle(control); + const isInput = control instanceof HTMLInputElement; const mirror = document.createElement('div'); const marker = document.createElement('span'); const propertiesToCopy = [ + 'direction', 'boxSizing', 'width', - 'height', + 'overflowX', + 'overflowY', 'paddingTop', 'paddingRight', 'paddingBottom', @@ -1768,28 +1781,41 @@ const getTemplateSuggestionAnchor = ( 'borderRightWidth', 'borderBottomWidth', 'borderLeftWidth', + 'borderTopStyle', + 'borderRightStyle', + 'borderBottomStyle', + 'borderLeftStyle', 'fontFamily', 'fontSize', 'fontStyle', 'fontVariant', 'fontWeight', + 'fontStretch', 'letterSpacing', 'lineHeight', 'textAlign', 'textIndent', 'textTransform', + 'textDecoration', 'wordSpacing', 'tabSize', + 'overflowWrap', ] as const; - mirror.style.position = 'fixed'; - mirror.style.left = `${rect.left}px`; - mirror.style.top = `${rect.top}px`; + mirror.style.position = 'absolute'; + mirror.style.left = '0px'; + mirror.style.top = '0px'; mirror.style.visibility = 'hidden'; mirror.style.pointerEvents = 'none'; - mirror.style.overflow = 'hidden'; - mirror.style.whiteSpace = control instanceof HTMLInputElement ? 'pre' : 'pre-wrap'; + mirror.style.whiteSpace = isInput ? 'pre' : 'pre-wrap'; mirror.style.wordBreak = 'break-word'; + mirror.style.wordWrap = 'break-word'; + mirror.style.overflow = 'hidden'; + + if (isInput) { + mirror.style.height = computedStyle.height; + mirror.style.lineHeight = computedStyle.height; + } for (const property of propertiesToCopy) { (mirror.style as any)[property] = (computedStyle as any)[property]; @@ -1798,16 +1824,16 @@ const getTemplateSuggestionAnchor = ( const value = control.value || ''; const safeCaret = Math.min(caret, value.length); mirror.textContent = value.slice(0, safeCaret); - marker.textContent = value.slice(safeCaret, safeCaret + 1) || '\u200b'; + marker.textContent = value.slice(safeCaret) || '.'; + marker.style.display = 'inline-block'; + marker.style.width = '1px'; mirror.appendChild(marker); document.body.appendChild(mirror); - mirror.scrollTop = control.scrollTop; - mirror.scrollLeft = control.scrollLeft; - - const markerRect = marker.getBoundingClientRect(); + const markerLeft = marker.offsetLeft; + const markerTop = marker.offsetTop; document.body.removeChild(mirror); - if (!Number.isFinite(markerRect.left) || !Number.isFinite(markerRect.top)) { + if (!Number.isFinite(markerLeft) || !Number.isFinite(markerTop)) { return fallback; } @@ -1818,7 +1844,9 @@ const getTemplateSuggestionAnchor = ( : Number.isFinite(fontSizeRaw) ? fontSizeRaw * 1.2 : 16; - return clampTemplateSuggestionPosition(markerRect.left, markerRect.top + lineHeight + 6); + const caretLeft = rect.left + markerLeft - control.scrollLeft; + const caretTop = rect.top + markerTop - control.scrollTop; + return clampTemplateSuggestionPosition(caretLeft, caretTop + lineHeight + 6); }; const extractDynamicTemplateKeys = (text: string): string[] => { @@ -1833,10 +1861,17 @@ const extractDynamicTemplateKeys = (text: string): string[] => { return Array.from(keys); }; +type DebugTranscriptMessage = { + role: 'user' | 'model' | 'tool'; + text: string; + responseId?: string; + ttfbMs?: number; +}; + // Stable transcription log so the scroll container is not recreated on every render (avoids scroll jumping) const TranscriptionLog: React.FC<{ scrollRef: React.RefObject; - messages: { role: 'user' | 'model' | 'tool'; text: string }[]; + messages: DebugTranscriptMessage[]; isLoading: boolean; className?: string; }> = ({ scrollRef, messages, isLoading, className = '' }) => ( @@ -1845,7 +1880,14 @@ const TranscriptionLog: React.FC<{ {messages.map((m, i) => (

- {m.role === 'user' ? 'Me' : m.role === 'tool' ? 'Tool' : 'AI'} +
+ {m.role === 'user' ? 'Me' : m.role === 'tool' ? 'Tool' : 'AI'} + {m.role === 'model' && typeof m.ttfbMs === 'number' && Number.isFinite(m.ttfbMs) && ( + + TTFB {Math.round(m.ttfbMs)}ms + + )} +
{m.text}
@@ -1911,7 +1953,7 @@ export const DebugDrawer: React.FC<{ }; const [mode, setMode] = useState<'text' | 'voice' | 'video'>('text'); - const [messages, setMessages] = useState<{role: 'user' | 'model' | 'tool', text: string}[]>([]); + const [messages, setMessages] = useState([]); const [inputText, setInputText] = useState(''); const [isLoading, setIsLoading] = useState(false); const [callStatus, setCallStatus] = useState<'idle' | 'calling' | 'active'>('idle'); @@ -1973,6 +2015,8 @@ export const DebugDrawer: React.FC<{ const pendingRejectRef = useRef<((e: Error) => void) | null>(null); const submittedMetadataRef = useRef | null>(null); const assistantDraftIndexRef = useRef(null); + const assistantResponseIndexByIdRef = useRef>(new Map()); + const pendingTtfbByResponseIdRef = useRef>(new Map()); const audioCtxRef = useRef(null); const playbackTimeRef = useRef(0); const activeAudioSourcesRef = useRef>(new Set()); @@ -2018,13 +2062,21 @@ export const DebugDrawer: React.FC<{ }); }, [assistant.tools, tools]); + const clearResponseTracking = () => { + assistantDraftIndexRef.current = null; + assistantResponseIndexByIdRef.current.clear(); + pendingTtfbByResponseIdRef.current.clear(); + }; + // Initialize useEffect(() => { if (isOpen) { if (mode === 'text') { + clearResponseTracking(); setMessages([]); setTextSessionStarted(false); } else { + clearResponseTracking(); setMessages([]); setCallStatus('idle'); } @@ -2353,6 +2405,7 @@ export const DebugDrawer: React.FC<{ const launchVoice = async () => { try { setCallStatus('calling'); + clearResponseTracking(); setMessages([]); lastUserFinalRef.current = ''; setWsError(''); @@ -2384,6 +2437,7 @@ export const DebugDrawer: React.FC<{ stopMedia(); closeWs(); setCallStatus('idle'); + clearResponseTracking(); setMessages([]); lastUserFinalRef.current = ''; setIsLoading(false); @@ -2438,9 +2492,9 @@ export const DebugDrawer: React.FC<{ setWsError(''); setDynamicVariablesError(''); // Start every text debug run as a fresh session transcript. + clearResponseTracking(); setMessages([]); lastUserFinalRef.current = ''; - assistantDraftIndexRef.current = null; // Force a fresh WS session so updated assistant runtime config // (voice/model/provider/speed) is applied on session.start. closeWs(); @@ -2733,7 +2787,7 @@ export const DebugDrawer: React.FC<{ wsReadyRef.current = false; pendingResolveRef.current = null; pendingRejectRef.current = null; - assistantDraftIndexRef.current = null; + clearResponseTracking(); userDraftIndexRef.current = null; lastUserFinalRef.current = ''; micFrameBufferRef.current = new Uint8Array(0); @@ -2814,6 +2868,39 @@ export const DebugDrawer: React.FC<{ return; } + if (type === 'metrics.ttfb') { + const maybeTtfb = Number(payload?.latencyMs ?? payload?.data?.latencyMs); + if (!Number.isFinite(maybeTtfb) || maybeTtfb < 0) return; + const ttfbMs = Math.round(maybeTtfb); + const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId; + const responseId = String(responseIdRaw || '').trim(); + if (responseId) { + const indexed = assistantResponseIndexByIdRef.current.get(responseId); + if (typeof indexed === 'number') { + setMessages((prev) => { + if (!prev[indexed] || prev[indexed].role !== 'model') return prev; + const next = [...prev]; + next[indexed] = { ...next[indexed], ttfbMs }; + return next; + }); + } else { + pendingTtfbByResponseIdRef.current.set(responseId, ttfbMs); + } + return; + } + setMessages((prev) => { + for (let i = prev.length - 1; i >= 0; i -= 1) { + if (prev[i]?.role === 'model') { + const next = [...prev]; + next[i] = { ...next[i], ttfbMs }; + return next; + } + } + return prev; + }); + return; + } + if (type === 'assistant.tool_call') { const toolCall = payload?.tool_call || {}; const toolCallId = String(toolCall?.id || '').trim(); @@ -2964,6 +3051,8 @@ export const DebugDrawer: React.FC<{ if (type === 'assistant.response.delta') { const delta = String(payload.text || ''); if (!delta) return; + const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId; + const responseId = String(responseIdRaw || '').trim() || undefined; setMessages((prev) => { let idx = assistantDraftIndexRef.current; if (idx === null || !prev[idx] || prev[idx].role !== 'model') { @@ -2971,6 +3060,13 @@ export const DebugDrawer: React.FC<{ // latest model row instead of creating a duplicate assistant row. for (let i = prev.length - 1; i >= 0; i -= 1) { if (prev[i]?.role === 'model') { + if ( + responseId + && prev[i].responseId + && prev[i].responseId !== responseId + ) { + break; + } idx = i; assistantDraftIndexRef.current = i; break; @@ -2983,12 +3079,38 @@ export const DebugDrawer: React.FC<{ if (last?.role === 'model' && last.text === delta) { return prev; } - const next = [...prev, { role: 'model' as const, text: delta }]; + const nextMessage: DebugTranscriptMessage = { role: 'model' as const, text: delta }; + if (responseId) { + nextMessage.responseId = responseId; + if (pendingTtfbByResponseIdRef.current.has(responseId)) { + nextMessage.ttfbMs = pendingTtfbByResponseIdRef.current.get(responseId); + pendingTtfbByResponseIdRef.current.delete(responseId); + } + } + const next = [...prev, nextMessage]; assistantDraftIndexRef.current = next.length - 1; + if (responseId) { + assistantResponseIndexByIdRef.current.set(responseId, next.length - 1); + } return next; } const next = [...prev]; - next[idx] = { ...next[idx], text: next[idx].text + delta }; + const nextMessage = { ...next[idx], text: next[idx].text + delta }; + if (responseId && !nextMessage.responseId) { + nextMessage.responseId = responseId; + } + if ( + responseId + && typeof nextMessage.ttfbMs !== 'number' + && pendingTtfbByResponseIdRef.current.has(responseId) + ) { + nextMessage.ttfbMs = pendingTtfbByResponseIdRef.current.get(responseId); + pendingTtfbByResponseIdRef.current.delete(responseId); + } + next[idx] = nextMessage; + if (responseId) { + assistantResponseIndexByIdRef.current.set(responseId, idx); + } return next; }); return; @@ -2996,12 +3118,21 @@ export const DebugDrawer: React.FC<{ if (type === 'assistant.response.final') { const finalText = String(payload.text || ''); + const responseIdRaw = payload?.data?.response_id ?? payload?.response_id ?? payload?.responseId; + const responseId = String(responseIdRaw || '').trim() || undefined; setMessages((prev) => { let idx = assistantDraftIndexRef.current; assistantDraftIndexRef.current = null; if (idx === null || !prev[idx] || prev[idx].role !== 'model') { for (let i = prev.length - 1; i >= 0; i -= 1) { if (prev[i]?.role === 'model') { + if ( + responseId + && prev[i].responseId + && prev[i].responseId !== responseId + ) { + break; + } idx = i; break; } @@ -3010,7 +3141,22 @@ export const DebugDrawer: React.FC<{ } if (idx !== null && prev[idx] && prev[idx].role === 'model') { const next = [...prev]; - next[idx] = { ...next[idx], text: finalText || next[idx].text }; + const nextMessage = { ...next[idx], text: finalText || next[idx].text }; + if (responseId && !nextMessage.responseId) { + nextMessage.responseId = responseId; + } + if ( + responseId + && typeof nextMessage.ttfbMs !== 'number' + && pendingTtfbByResponseIdRef.current.has(responseId) + ) { + nextMessage.ttfbMs = pendingTtfbByResponseIdRef.current.get(responseId); + pendingTtfbByResponseIdRef.current.delete(responseId); + } + next[idx] = nextMessage; + if (responseId) { + assistantResponseIndexByIdRef.current.set(responseId, idx); + } return next; } if (!finalText) return prev; @@ -3023,7 +3169,19 @@ export const DebugDrawer: React.FC<{ return next; } } - return [...prev, { role: 'model', text: finalText }]; + const nextMessage: DebugTranscriptMessage = { role: 'model', text: finalText }; + if (responseId) { + nextMessage.responseId = responseId; + if (pendingTtfbByResponseIdRef.current.has(responseId)) { + nextMessage.ttfbMs = pendingTtfbByResponseIdRef.current.get(responseId); + pendingTtfbByResponseIdRef.current.delete(responseId); + } + } + const next = [...prev, nextMessage]; + if (responseId) { + assistantResponseIndexByIdRef.current.set(responseId, next.length - 1); + } + return next; }); setIsLoading(false); return;