Enhance voice interaction features and introduce voice preview functionality
- Update README to reflect the integration of the DebugVoicePanel with WebSocket support for voice interactions. - Refactor voice_webrtc.py to improve error handling during WebRTC signaling and include assistant_id in the offer payload. - Add useVoicePreview hook to manage microphone access and WebRTC connections for real-time voice previews. - Modify AssistantPage to incorporate new visualizer options and pass assistantId to DebugVoicePanel, enhancing user experience during audio interactions. - Update API model to include new fields for voice, speed, and language, supporting TTS and ASR configurations.
This commit is contained in:
@@ -100,5 +100,5 @@ docker compose --profile remote up -d
|
||||
- [ ] `pip install` 后跑通,核对 pipecat 版本的服务/transport 构造参数(代码内有注释)
|
||||
- [ ] 起本地 SenseVoice / CosyVoice 的 OpenAI 兼容服务
|
||||
- [ ] `realtime` 模式(目前只 `pipeline` 级联)
|
||||
- [ ] 前端 `DebugVoicePanel` 接 `/ws/voice`(抄 dograh `useWebSocketRTC.tsx`)
|
||||
- [x] 前端 `DebugVoicePanel` 接 `/ws/voice`(参考 dograh `useWebSocketRTC.tsx`)
|
||||
- [ ] 加 DB 后:助手配置入库(目前随请求内联)
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
|
||||
参考 dograh 的 webrtc_signaling.py,砍掉鉴权/配额/DB/org/ICE 过滤策略/TURN。
|
||||
握手消息:
|
||||
client → {type:"offer", payload:{pc_id, sdp, type, config}}
|
||||
client → {type:"offer", payload:{pc_id, sdp, type, assistant_id}}
|
||||
server → {type:"answer", payload:{pc_id, sdp, type}}
|
||||
both → {type:"ice-candidate", payload:{pc_id, candidate:{...}}}
|
||||
server → {type:"error", payload:{message}}
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -36,10 +37,22 @@ async def voice_signaling(websocket: WebSocket):
|
||||
try:
|
||||
while True:
|
||||
message = await websocket.receive_json()
|
||||
if message.get("type") == "offer":
|
||||
await _handle_offer(websocket, message.get("payload", {}), peers)
|
||||
elif message.get("type") == "ice-candidate":
|
||||
await _handle_ice(message.get("payload", {}), peers)
|
||||
try:
|
||||
if message.get("type") == "offer":
|
||||
await _handle_offer(websocket, message.get("payload", {}), peers)
|
||||
elif message.get("type") == "ice-candidate":
|
||||
await _handle_ice(message.get("payload", {}), peers)
|
||||
except Exception as e:
|
||||
logger.exception(f"处理 WebRTC 信令消息失败: {e}")
|
||||
if websocket.application_state == WebSocketState.CONNECTED:
|
||||
await websocket.send_json(
|
||||
{
|
||||
"type": "error",
|
||||
"payload": {
|
||||
"message": f"语音会话启动失败: {type(e).__name__}"
|
||||
},
|
||||
}
|
||||
)
|
||||
except WebSocketDisconnect:
|
||||
logger.info("WebRTC 信令断开")
|
||||
except Exception as e:
|
||||
|
||||
@@ -57,6 +57,7 @@ import {
|
||||
PopoverTrigger,
|
||||
} from "@/components/ui/popover";
|
||||
import { AuraVisualizer } from "@/components/ui/aura-visualizer";
|
||||
import { NebulaVisualizer } from "@/components/ui/nebula-visualizer";
|
||||
import { SpectrumVisualizer } from "@/components/ui/spectrum-visualizer";
|
||||
import { WaveVisualizer } from "@/components/ui/wave-visualizer";
|
||||
import {
|
||||
@@ -76,6 +77,7 @@ import {
|
||||
type Credential,
|
||||
type KnowledgeBase,
|
||||
} from "@/lib/api";
|
||||
import { useVoicePreview } from "@/hooks/use-voice-preview";
|
||||
|
||||
type RuntimeMode = "pipeline" | "realtime";
|
||||
|
||||
@@ -425,7 +427,6 @@ export function AssistantPage() {
|
||||
appId: "",
|
||||
apiUrl: "",
|
||||
apiKey: "",
|
||||
model: "",
|
||||
asr: "",
|
||||
voice: "",
|
||||
enableInterrupt: true,
|
||||
@@ -455,6 +456,7 @@ export function AssistantPage() {
|
||||
prompt: "",
|
||||
apiUrl: "",
|
||||
apiKey: "",
|
||||
model: "",
|
||||
asr: "",
|
||||
voice: "",
|
||||
enableInterrupt: true,
|
||||
@@ -549,7 +551,6 @@ export function AssistantPage() {
|
||||
apiUrl: a.apiUrl,
|
||||
// 编辑时不把打码占位符放入输入框;空值写回后端表示保留旧 key
|
||||
apiKey: "",
|
||||
model: a.llmCredentialId ?? "",
|
||||
asr: a.asrCredentialId ?? "",
|
||||
voice: a.ttsCredentialId ?? "",
|
||||
enableInterrupt: a.enableInterrupt,
|
||||
@@ -607,6 +608,7 @@ export function AssistantPage() {
|
||||
apiUrl: a.apiUrl,
|
||||
// 编辑时不把打码占位符放入输入框;空值写回后端表示保留旧 key
|
||||
apiKey: "",
|
||||
model: a.llmCredentialId ?? "",
|
||||
asr: a.asrCredentialId ?? "",
|
||||
voice: a.ttsCredentialId ?? "",
|
||||
enableInterrupt: a.enableInterrupt,
|
||||
@@ -1229,7 +1231,7 @@ export function AssistantPage() {
|
||||
</SectionCard>
|
||||
</div>
|
||||
|
||||
<DebugDrawer />
|
||||
<DebugDrawer assistantId={editingId} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -1334,7 +1336,7 @@ export function AssistantPage() {
|
||||
</SectionCard>
|
||||
</div>
|
||||
|
||||
<DebugDrawer />
|
||||
<DebugDrawer assistantId={editingId} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -1453,7 +1455,7 @@ export function AssistantPage() {
|
||||
</SectionCard>
|
||||
</div>
|
||||
|
||||
<DebugDrawer />
|
||||
<DebugDrawer assistantId={editingId} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -1664,71 +1666,117 @@ export function AssistantPage() {
|
||||
</SectionCard>
|
||||
</div>
|
||||
|
||||
<DebugDrawer />
|
||||
<DebugDrawer assistantId={editingId} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
type VizStyle = "aura" | "bars" | "wave";
|
||||
type VizStyle = "aura" | "nebula" | "bars" | "wave";
|
||||
|
||||
const VIZ_ORDER: VizStyle[] = ["aura", "bars", "wave"];
|
||||
const VIZ_LABEL: Record<VizStyle, string> = {
|
||||
aura: "光环",
|
||||
bars: "频谱",
|
||||
wave: "波形",
|
||||
};
|
||||
const VIZ_OPTIONS: { style: VizStyle; label: string; icon: React.ReactNode }[] =
|
||||
[
|
||||
{ style: "aura", label: "光环", icon: <Orbit size={14} /> },
|
||||
{ style: "nebula", label: "星云", icon: <Sparkles size={14} /> },
|
||||
{ style: "bars", label: "频谱", icon: <AudioLines size={14} /> },
|
||||
{ style: "wave", label: "波形", icon: <Waves size={14} /> },
|
||||
];
|
||||
|
||||
function DebugDrawer() {
|
||||
function SegmentedIconGroup({
|
||||
children,
|
||||
label,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
label: string;
|
||||
}) {
|
||||
return (
|
||||
<div
|
||||
role="group"
|
||||
aria-label={label}
|
||||
className="flex items-center gap-0.5 rounded-full border border-hairline bg-canvas-soft p-0.5"
|
||||
>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SegmentedIconButton({
|
||||
selected,
|
||||
label,
|
||||
onClick,
|
||||
children,
|
||||
}: {
|
||||
selected: boolean;
|
||||
label: string;
|
||||
onClick: () => void;
|
||||
children: React.ReactNode;
|
||||
}) {
|
||||
return (
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClick}
|
||||
aria-label={label}
|
||||
aria-pressed={selected}
|
||||
title={label}
|
||||
className={[
|
||||
"flex h-7 w-7 items-center justify-center rounded-full transition-colors",
|
||||
selected
|
||||
? "bg-surface-strong text-foreground shadow-sm"
|
||||
: "text-muted-soft hover:text-foreground",
|
||||
].join(" ")}
|
||||
>
|
||||
{children}
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
function DebugDrawer({ assistantId }: { assistantId: string | null }) {
|
||||
const [showTranscript, setShowTranscript] = useState(false);
|
||||
const [vizStyle, setVizStyle] = useState<VizStyle>("wave");
|
||||
const [vizStyle, setVizStyle] = useState<VizStyle>("aura");
|
||||
|
||||
return (
|
||||
<aside className="hidden min-w-0 flex-1 flex-col overflow-hidden rounded-2xl border border-hairline bg-card shadow-sm lg:flex">
|
||||
<div className="flex shrink-0 items-center justify-between gap-3 border-b border-hairline px-5 py-4">
|
||||
<div className="flex shrink-0 items-center justify-between gap-3 border-b border-hairline px-5 py-3">
|
||||
<div className="text-sm font-medium text-foreground">调试与预览</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{!showTranscript && (
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="icon"
|
||||
className="h-8 w-8 rounded-full"
|
||||
onClick={() =>
|
||||
setVizStyle(
|
||||
(value) =>
|
||||
VIZ_ORDER[
|
||||
(VIZ_ORDER.indexOf(value) + 1) % VIZ_ORDER.length
|
||||
],
|
||||
)
|
||||
}
|
||||
aria-label={`切换可视化样式(当前:${VIZ_LABEL[vizStyle]})`}
|
||||
title={`可视化:${VIZ_LABEL[vizStyle]}`}
|
||||
>
|
||||
{vizStyle === "aura" ? (
|
||||
<Orbit size={16} />
|
||||
) : vizStyle === "bars" ? (
|
||||
<AudioLines size={16} />
|
||||
) : (
|
||||
<Waves size={16} />
|
||||
)}
|
||||
</Button>
|
||||
<SegmentedIconGroup label="可视化样式">
|
||||
{VIZ_OPTIONS.map((option) => (
|
||||
<SegmentedIconButton
|
||||
key={option.style}
|
||||
selected={vizStyle === option.style}
|
||||
label={`可视化样式:${option.label}`}
|
||||
onClick={() => setVizStyle(option.style)}
|
||||
>
|
||||
{option.icon}
|
||||
</SegmentedIconButton>
|
||||
))}
|
||||
</SegmentedIconGroup>
|
||||
)}
|
||||
<Button
|
||||
type="button"
|
||||
variant={showTranscript ? "default" : "outline"}
|
||||
size="icon"
|
||||
className="h-8 w-8 rounded-full text-xs font-medium"
|
||||
onClick={() => setShowTranscript((value) => !value)}
|
||||
aria-label={showTranscript ? "显示音频可视化" : "显示文字聊天记录"}
|
||||
aria-pressed={showTranscript}
|
||||
>
|
||||
文
|
||||
</Button>
|
||||
<SegmentedIconGroup label="预览视图">
|
||||
<SegmentedIconButton
|
||||
selected={!showTranscript}
|
||||
label="语音可视化视图"
|
||||
onClick={() => setShowTranscript(false)}
|
||||
>
|
||||
<Mic size={14} />
|
||||
</SegmentedIconButton>
|
||||
<SegmentedIconButton
|
||||
selected={showTranscript}
|
||||
label="文字聊天记录视图"
|
||||
onClick={() => setShowTranscript(true)}
|
||||
>
|
||||
<MessageSquareText size={14} />
|
||||
</SegmentedIconButton>
|
||||
</SegmentedIconGroup>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<DebugVoicePanel showTranscript={showTranscript} vizStyle={vizStyle} />
|
||||
<DebugVoicePanel
|
||||
showTranscript={showTranscript}
|
||||
vizStyle={vizStyle}
|
||||
assistantId={assistantId}
|
||||
/>
|
||||
</aside>
|
||||
);
|
||||
}
|
||||
@@ -1736,15 +1784,22 @@ function DebugDrawer() {
|
||||
function DebugVoicePanel({
|
||||
showTranscript,
|
||||
vizStyle,
|
||||
assistantId,
|
||||
}: {
|
||||
showTranscript: boolean;
|
||||
vizStyle: VizStyle;
|
||||
assistantId: string | null;
|
||||
}) {
|
||||
const [recording, setRecording] = useState(false);
|
||||
const [micError, setMicError] = useState(false);
|
||||
const { status, error, localStream, connect, disconnect, audioRef } =
|
||||
useVoicePreview(assistantId, { onMicError: () => setMicError(true) });
|
||||
// 连接中或已连通都视作"会话进行中"
|
||||
const recording = status === "connecting" || status === "connected";
|
||||
|
||||
return (
|
||||
<div className="flex min-h-0 flex-1 flex-col">
|
||||
{/* 后端 TTS 音频经 WebRTC 媒体流过来,挂这里播放 */}
|
||||
<audio ref={audioRef} autoPlay playsInline className="hidden" />
|
||||
{showTranscript ? (
|
||||
<DebugTranscriptPanel />
|
||||
) : (
|
||||
@@ -1774,40 +1829,55 @@ function DebugVoicePanel({
|
||||
{(() => {
|
||||
const onVizError = () => {
|
||||
setMicError(true);
|
||||
setRecording(false);
|
||||
disconnect();
|
||||
};
|
||||
const shared = {
|
||||
active: recording,
|
||||
active: Boolean(localStream),
|
||||
stream: localStream,
|
||||
className: "relative shrink-0",
|
||||
onError: onVizError,
|
||||
} as const;
|
||||
if (vizStyle === "aura")
|
||||
return <AuraVisualizer {...shared} size={200} />;
|
||||
if (vizStyle === "nebula")
|
||||
return <NebulaVisualizer {...shared} size={200} />;
|
||||
if (vizStyle === "bars")
|
||||
return (
|
||||
<SpectrumVisualizer {...shared} size={200} barCount={64} />
|
||||
);
|
||||
return <SpectrumVisualizer {...shared} size={200} />;
|
||||
return <WaveVisualizer {...shared} size={200} />;
|
||||
})()}
|
||||
</div>
|
||||
|
||||
<div className="relative max-w-xs space-y-1.5">
|
||||
<div className="font-display display-sm text-foreground">
|
||||
{recording ? "我在聆听" : "开始一次语音对话"}
|
||||
{status === "connecting"
|
||||
? "连接中…"
|
||||
: status === "connected"
|
||||
? "我在聆听"
|
||||
: "开始一次语音对话"}
|
||||
</div>
|
||||
<p className="mx-auto text-xs leading-5 text-muted-foreground">
|
||||
{micError
|
||||
? "无法访问麦克风,请检查浏览器权限后重试。"
|
||||
: recording
|
||||
? "直接说话即可。助手会在您停顿后自然回应。"
|
||||
: "测试语音识别、响应速度与助手的播报效果。"}
|
||||
: status === "failed"
|
||||
? error ||
|
||||
"连接失败,请确认后端已启动且助手已保存后重试。"
|
||||
: !assistantId
|
||||
? "请先保存助手,再开始语音预览。"
|
||||
: recording
|
||||
? "直接说话即可。助手会在您停顿后自然回应。"
|
||||
: "测试语音识别、响应速度与助手的播报效果。"}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<Button
|
||||
disabled={!assistantId || status === "connecting"}
|
||||
onClick={() => {
|
||||
setMicError(false);
|
||||
setRecording((value) => !value);
|
||||
if (recording) {
|
||||
disconnect();
|
||||
} else {
|
||||
void connect();
|
||||
}
|
||||
}}
|
||||
className={[
|
||||
"relative h-11 gap-2 rounded-full px-6 text-sm font-medium shadow-sm transition-transform hover:scale-[1.03]",
|
||||
@@ -1817,7 +1887,13 @@ function DebugVoicePanel({
|
||||
].join(" ")}
|
||||
aria-label={recording ? "结束语音测试" : "开始语音测试"}
|
||||
>
|
||||
{recording ? <PhoneOff size={18} /> : <Mic size={18} />}
|
||||
{status === "connecting" ? (
|
||||
<Loader2 size={18} className="animate-spin" />
|
||||
) : recording ? (
|
||||
<PhoneOff size={18} />
|
||||
) : (
|
||||
<Mic size={18} />
|
||||
)}
|
||||
{recording ? "结束对话" : "开始对话"}
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
256
frontend/src/hooks/use-voice-preview.ts
Normal file
256
frontend/src/hooks/use-voice-preview.ts
Normal file
@@ -0,0 +1,256 @@
|
||||
"use client";
|
||||
|
||||
/**
|
||||
* 语音预览:把麦克风接到后端 /ws/voice(WebRTC 信令),听到助手实时回应。
|
||||
*
|
||||
* 走原生 RTCPeerConnection + 一条 ws 信令通道,与后端 voice_webrtc.py 的约定对齐:
|
||||
* client → {type:"offer", payload:{pc_id, sdp, type, assistant_id}}
|
||||
* server → {type:"answer", payload:{pc_id, sdp, type}}
|
||||
* client → {type:"ice-candidate", payload:{pc_id, candidate:{...}}}
|
||||
* 音频本身走 WebRTC 媒体流(Opus),不经 ws;后端 TTS 帧从 ontrack 拿到直接播放。
|
||||
*
|
||||
* 纯本机(localhost)即可跑:localhost 是 secure context,麦克风可用,ws 用明文。
|
||||
* 局域网/别的设备要 https+wss,见 deploy/README.md。
|
||||
*/
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
|
||||
import { API_BASE } from "@/lib/api";
|
||||
|
||||
export type VoicePreviewStatus = "idle" | "connecting" | "connected" | "failed";
|
||||
|
||||
// http→ws、https→wss,自动跟随 API 基址(同源反代时也对)
|
||||
function wsBaseUrl(): string {
|
||||
const url = new URL(API_BASE, window.location.origin);
|
||||
url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
|
||||
return url.toString().replace(/\/$/, "");
|
||||
}
|
||||
|
||||
function generatePcId(): string {
|
||||
const bytes = new Uint8Array(16);
|
||||
crypto.getRandomValues(bytes);
|
||||
return (
|
||||
"PC-" +
|
||||
Array.from(bytes)
|
||||
.map((b) => b.toString(16).padStart(2, "0"))
|
||||
.join("")
|
||||
);
|
||||
}
|
||||
|
||||
type UseVoicePreviewOptions = {
|
||||
/** 取麦克风失败(权限/无设备)时回调,供 UI 提示。 */
|
||||
onMicError?: () => void;
|
||||
};
|
||||
|
||||
function errorMessage(error: unknown, fallback: string): string {
|
||||
if (error instanceof Error && error.message) return error.message;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
export function useVoicePreview(
|
||||
assistantId: string | null,
|
||||
{ onMicError }: UseVoicePreviewOptions = {},
|
||||
) {
|
||||
const [status, setStatus] = useState<VoicePreviewStatus>("idle");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [localStream, setLocalStream] = useState<MediaStream | null>(null);
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
const pcRef = useRef<RTCPeerConnection | null>(null);
|
||||
const wsRef = useRef<WebSocket | null>(null);
|
||||
const localStreamRef = useRef<MediaStream | null>(null);
|
||||
const startingRef = useRef(false);
|
||||
|
||||
const releaseResources = useCallback(() => {
|
||||
const ws = wsRef.current;
|
||||
wsRef.current = null;
|
||||
if (ws) {
|
||||
ws.onclose = null;
|
||||
ws.onerror = null;
|
||||
ws.onmessage = null;
|
||||
ws.close();
|
||||
}
|
||||
|
||||
const pc = pcRef.current;
|
||||
pcRef.current = null;
|
||||
if (pc) {
|
||||
pc.onconnectionstatechange = null;
|
||||
pc.onicecandidate = null;
|
||||
pc.oniceconnectionstatechange = null;
|
||||
pc.ontrack = null;
|
||||
pc.close();
|
||||
}
|
||||
|
||||
localStreamRef.current?.getTracks().forEach((track) => track.stop());
|
||||
localStreamRef.current = null;
|
||||
if (audioRef.current) audioRef.current.srcObject = null;
|
||||
startingRef.current = false;
|
||||
}, []);
|
||||
|
||||
const disconnect = useCallback(() => {
|
||||
releaseResources();
|
||||
setLocalStream(null);
|
||||
setError(null);
|
||||
setStatus("idle");
|
||||
}, [releaseResources]);
|
||||
|
||||
const fail = useCallback(
|
||||
(message: string) => {
|
||||
releaseResources();
|
||||
setLocalStream(null);
|
||||
setError(message);
|
||||
setStatus("failed");
|
||||
},
|
||||
[releaseResources],
|
||||
);
|
||||
|
||||
const connect = useCallback(async () => {
|
||||
if (startingRef.current || pcRef.current || wsRef.current) return;
|
||||
if (!assistantId) {
|
||||
setError("请先保存助手,再开始语音预览。");
|
||||
setStatus("failed");
|
||||
return;
|
||||
}
|
||||
|
||||
startingRef.current = true;
|
||||
setError(null);
|
||||
setStatus("connecting");
|
||||
|
||||
const pcId = generatePcId();
|
||||
const ws = new WebSocket(`${wsBaseUrl()}/ws/voice`);
|
||||
wsRef.current = ws;
|
||||
|
||||
ws.onmessage = async (event) => {
|
||||
try {
|
||||
const msg = JSON.parse(event.data);
|
||||
if (msg.type === "answer") {
|
||||
await pcRef.current?.setRemoteDescription({
|
||||
type: "answer",
|
||||
sdp: msg.payload.sdp,
|
||||
});
|
||||
} else if (msg.type === "ice-candidate" && msg.payload?.candidate) {
|
||||
// 后端当前不主动 trickle,留兼容
|
||||
try {
|
||||
await pcRef.current?.addIceCandidate(msg.payload.candidate);
|
||||
} catch {
|
||||
/* 忽略迟到/重复 candidate */
|
||||
}
|
||||
} else if (msg.type === "error") {
|
||||
fail(msg.payload?.message || "后端无法启动语音会话。");
|
||||
}
|
||||
} catch {
|
||||
/* 非 JSON / 未知消息,忽略 */
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
// 1) 等 ws 连上
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
ws.onopen = () => resolve();
|
||||
ws.onerror = (e) => reject(e);
|
||||
ws.onclose = () => reject(new Error("语音信令连接已关闭。"));
|
||||
});
|
||||
// 连上后,信令异常或关闭都结束当前会话并保留失败状态。
|
||||
ws.onerror = () => {
|
||||
if (wsRef.current === ws) fail("语音信令连接失败。");
|
||||
};
|
||||
ws.onclose = () => {
|
||||
if (wsRef.current === ws) fail("语音信令连接已断开。");
|
||||
};
|
||||
|
||||
// 2) 建 PeerConnection(纯 STUN,本机/局域网够用)
|
||||
const pc = new RTCPeerConnection({
|
||||
iceServers: [{ urls: "stun:stun.l.google.com:19302" }],
|
||||
});
|
||||
pcRef.current = pc;
|
||||
|
||||
pc.onicecandidate = (e) => {
|
||||
if (ws.readyState !== WebSocket.OPEN) return;
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
type: "ice-candidate",
|
||||
payload: {
|
||||
pc_id: pcId,
|
||||
candidate: e.candidate
|
||||
? {
|
||||
candidate: e.candidate.candidate,
|
||||
sdpMid: e.candidate.sdpMid,
|
||||
sdpMLineIndex: e.candidate.sdpMLineIndex,
|
||||
}
|
||||
: null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
};
|
||||
|
||||
pc.ontrack = (e) => {
|
||||
if (e.track.kind === "audio" && audioRef.current) {
|
||||
audioRef.current.srcObject =
|
||||
e.streams[0] ?? new MediaStream([e.track]);
|
||||
void audioRef.current.play().catch(() => {});
|
||||
}
|
||||
};
|
||||
|
||||
pc.onconnectionstatechange = () => {
|
||||
if (pcRef.current !== pc) return;
|
||||
if (pc.connectionState === "connected") setStatus("connected");
|
||||
else if (pc.connectionState === "failed")
|
||||
fail("WebRTC 音频连接失败。");
|
||||
};
|
||||
|
||||
pc.oniceconnectionstatechange = () => {
|
||||
if (pcRef.current !== pc) return;
|
||||
const st = pc.iceConnectionState;
|
||||
if (st === "connected" || st === "completed") setStatus("connected");
|
||||
else if (st === "failed") fail("WebRTC 音频连接失败。");
|
||||
else if (st === "disconnected") fail("WebRTC 音频连接已断开。");
|
||||
};
|
||||
|
||||
// 3) 取麦克风 → 加入连接
|
||||
let stream: MediaStream;
|
||||
try {
|
||||
stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
} catch (mediaError) {
|
||||
onMicError?.();
|
||||
fail(errorMessage(mediaError, "无法访问麦克风。"));
|
||||
return;
|
||||
}
|
||||
localStreamRef.current = stream;
|
||||
setLocalStream(stream);
|
||||
stream.getTracks().forEach((track) => pc.addTrack(track, stream));
|
||||
|
||||
// 4) 生成 offer 并发给后端(assistant_id 在 payload 顶层)
|
||||
const offer = await pc.createOffer();
|
||||
await pc.setLocalDescription(offer);
|
||||
const localDescription = pc.localDescription;
|
||||
if (!localDescription?.sdp) {
|
||||
throw new Error("浏览器无法创建 WebRTC offer。");
|
||||
}
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
type: "offer",
|
||||
payload: {
|
||||
pc_id: pcId,
|
||||
sdp: localDescription.sdp,
|
||||
type: localDescription.type,
|
||||
assistant_id: assistantId,
|
||||
},
|
||||
}),
|
||||
);
|
||||
} catch (connectionError) {
|
||||
fail(errorMessage(connectionError, "无法连接语音服务。"));
|
||||
} finally {
|
||||
startingRef.current = false;
|
||||
}
|
||||
}, [assistantId, fail, onMicError]);
|
||||
|
||||
// 卸载时收尾
|
||||
useEffect(() => releaseResources, [releaseResources]);
|
||||
|
||||
return { status, error, localStream, connect, disconnect, audioRef };
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
* 注意:api_key 读取时后端永远打码,写回打码占位符表示"不改 key"(写时哨兵)。
|
||||
*/
|
||||
|
||||
const API_BASE =
|
||||
export const API_BASE =
|
||||
process.env.NEXT_PUBLIC_API_BASE_URL ?? "http://localhost:8000";
|
||||
|
||||
export type ModelType = "LLM" | "ASR" | "TTS" | "Realtime" | "Embedding";
|
||||
@@ -34,6 +34,9 @@ export type CredentialUpsert = {
|
||||
interfaceType: InterfaceType;
|
||||
apiUrl: string;
|
||||
apiKey: string;
|
||||
voice: string;
|
||||
speed: number;
|
||||
language: string;
|
||||
isDefault: boolean;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user