diff --git a/.claude/launch.json b/.claude/launch.json index dd66d24..d74c463 100644 --- a/.claude/launch.json +++ b/.claude/launch.json @@ -8,6 +8,13 @@ "cwd": "frontend", "port": 3001, "autoPort": false + }, + { + "name": "ui-docker", + "runtimeExecutable": "docker", + "runtimeArgs": ["compose", "up", "ui"], + "port": 3030, + "autoPort": false } ] } diff --git a/backend/services/pipecat/pipeline.py b/backend/services/pipecat/pipeline.py index d3db21b..d63a892 100644 --- a/backend/services/pipecat/pipeline.py +++ b/backend/services/pipecat/pipeline.py @@ -10,11 +10,19 @@ from loguru import logger from models import AssistantConfig from services.pipecat.service_factory import create_services -from pipecat.frames.frames import EndFrame, TTSSpeakFrame +from pipecat.frames.frames import ( + EndFrame, + InterruptionTaskFrame, + TranscriptionFrame, + TransportMessageUrgentFrame, + TTSSpeakFrame, +) from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.processors.transcript_processor import TranscriptProcessor +from pipecat.utils.time import time_now_iso8601 async def run_pipeline(transport, cfg: AssistantConfig) -> None: @@ -32,14 +40,20 @@ async def run_pipeline(transport, cfg: AssistantConfig) -> None: context = OpenAILLMContext(messages=[{"role": "system", "content": cfg.prompt}]) context_aggregator = llm.create_context_aggregator(context) + # 转写收集:user 侧收 ASR 最终转写,assistant 侧聚合 TTS 实际播报的文本, + # 统一通过 data channel 推给前端聊天记录面板。 + transcript = TranscriptProcessor() + pipeline = Pipeline( [ transport.input(), stt, + transcript.user(), context_aggregator.user(), llm, tts, transport.output(), + transcript.assistant(), context_aggregator.assistant(), ] ) @@ -52,6 +66,39 @@ async def run_pipeline(transport, cfg: AssistantConfig) -> None: ), ) + @transcript.event_handler("on_transcript_update") + async def on_transcript_update(_processor, frame): + # 每条最终转写(用户/助手)推给前端,前端据此渲染聊天记录 + for msg in frame.messages: + await task.queue_frame( + TransportMessageUrgentFrame( + message={ + "type": "transcript", + "role": msg.role, + "content": msg.content, + "timestamp": msg.timestamp, + } + ) + ) + + @transport.event_handler("on_app_message") + async def on_app_message(_transport, message, _sender): + # 前端文字输入:先打断当前播报,再当作一条用户最终转写注入, + # 走与语音完全相同的 转写→上下文→LLM→TTS 链路 + if not isinstance(message, dict) or message.get("type") != "user-text": + return + text = str(message.get("text") or "").strip() + if not text: + return + await task.queue_frames( + [ + InterruptionTaskFrame(), + TranscriptionFrame( + text=text, user_id="debug", timestamp=time_now_iso8601() + ), + ] + ) + @transport.event_handler("on_client_connected") async def on_client_connected(_transport, _client): if cfg.greeting: diff --git a/frontend/src/components/pages/AssistantPage.tsx b/frontend/src/components/pages/AssistantPage.tsx index 769cbfb..c5ab86f 100644 --- a/frontend/src/components/pages/AssistantPage.tsx +++ b/frontend/src/components/pages/AssistantPage.tsx @@ -78,7 +78,7 @@ import { type Credential, type KnowledgeBase, } from "@/lib/api"; -import { useVoicePreview } from "@/hooks/use-voice-preview"; +import { useVoicePreview, type ChatMessage } from "@/hooks/use-voice-preview"; type RuntimeMode = "pipeline" | "realtime"; @@ -1856,19 +1856,28 @@ function DebugVoicePanel({ error, micWarning, localStream, + messages, + sendText, connect, disconnect, audioRef, } = useVoicePreview(assistantId); // 连接中或已连通都视作"会话进行中" const recording = status === "connecting" || status === "connected"; + const [textDraft, setTextDraft] = useState(""); + + function handleSendText() { + if (sendText(textDraft)) { + setTextDraft(""); + } + } return (
{/* 后端 TTS 音频经 WebRTC 媒体流过来,挂这里播放 */}