Add tts/text output schema

2026-02-11 09:50:46 +08:00
parent 2d7fc2b700
commit 15523d9ec2
13 changed files with 219 additions and 50 deletions
--- a/web/pages/Assistants.tsx
+++ b/web/pages/Assistants.tsx
@@ -118,6 +118,7 @@ export const AssistantsPage: React.FC = () => {
      prompt: '',
      knowledgeBaseId: '',
      language: 'zh',
+      voiceOutputEnabled: true,
      voice: voices[0]?.id || '',
      speed: 1,
      hotwords: [],
@@ -531,6 +532,7 @@ export const AssistantsPage: React.FC = () => {
                                        placeholder="设定小助手的人设、语气、行为规范以及业务逻辑..."
                                    />
                                </div>
+
                            </div>
                        )}

@@ -624,15 +626,32 @@ export const AssistantsPage: React.FC = () => {
                                    </p>
                                </div>

+                                <div className="space-y-2">
+                                    <label className="text-sm font-medium text-white flex items-center">
+                                        <Volume2 className="w-4 h-4 mr-2 text-primary" /> 启用语音输出
+                                    </label>
+                                    <label className="flex h-12 items-center justify-between rounded-xl border border-white/10 bg-white/5 px-4 text-sm">
+                                        <span className="text-foreground">TTS 输出</span>
+                                        <input
+                                          type="checkbox"
+                                          checked={selectedAssistant.voiceOutputEnabled !== false}
+                                          onChange={(e) => updateAssistant('voiceOutputEnabled', e.target.checked)}
+                                          className="accent-primary"
+                                        />
+                                    </label>
+                                    <p className="text-xs text-muted-foreground">关闭后将进入纯文本输出模式，不会产生语音音频。</p>
+                                </div>
+
                                <div className="space-y-2">
                                    <label className="text-sm font-medium text-white flex items-center">
                                        <Volume2 className="w-4 h-4 mr-2 text-primary"/> 选择音色 (From Voice Library)
                                    </label>
                                    <div className="relative group">
                                        <select 
-                                            className="flex h-12 w-full rounded-xl border border-white/10 bg-white/5 px-4 py-1 text-sm shadow-sm transition-all focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground appearance-none cursor-pointer"
+                                            className="flex h-12 w-full rounded-xl border border-white/10 bg-white/5 px-4 py-1 text-sm shadow-sm transition-all focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground appearance-none cursor-pointer disabled:opacity-50 disabled:cursor-not-allowed"
                                            value={selectedAssistant.voice}
                                            onChange={(e) => updateAssistant('voice', e.target.value)}
+                                            disabled={selectedAssistant.voiceOutputEnabled === false}
                                        >
                                            <option value="" disabled>请选择声音库中的声音...</option>
                                            {voices.map(voice => (
@@ -645,7 +664,9 @@ export const AssistantsPage: React.FC = () => {
                                    </div>
                                    <p className="text-xs text-muted-foreground flex items-center mt-1">
                                        <Sparkles className="w-3 h-3 mr-1 text-primary opacity-70" />
-                                        音色配置同步自声音库。如需添加更多音色，请前往“声音库”模块。
+                                        {selectedAssistant.voiceOutputEnabled === false
+                                          ? '启用语音输出后才可选择音色。'
+                                          : '音色配置同步自声音库。如需添加更多音色，请前往“声音库”模块。'}
                                    </p>
                                </div>

@@ -916,6 +937,7 @@ export const AssistantsPage: React.FC = () => {
           llmModels={llmModels}
           asrModels={asrModels}
           tools={tools}
+           textTtsEnabled={selectedAssistant.voiceOutputEnabled !== false}
         />
      )}

@@ -1030,6 +1052,7 @@ export const DebugDrawer: React.FC<{
  llmModels?: LLMModel[];
  asrModels?: ASRModel[];
  tools?: Tool[];
+  textTtsEnabled: boolean;
  sessionMetadataExtras?: Record<string, any>;
  onProtocolEvent?: (event: Record<string, any>) => void;
 }> = ({
@@ -1040,6 +1063,7 @@ export const DebugDrawer: React.FC<{
  llmModels = [],
  asrModels = [],
  tools = [],
+  textTtsEnabled,
  sessionMetadataExtras,
  onProtocolEvent,
 }) => {
@@ -1117,7 +1141,6 @@ export const DebugDrawer: React.FC<{
  const [selectedCamera, setSelectedCamera] = useState<string>('');
  const [selectedMic, setSelectedMic] = useState<string>('');
  const [isSwapped, setIsSwapped] = useState(false); 
-  const [textTtsEnabled, setTextTtsEnabled] = useState(true);
  const [aecEnabled, setAecEnabled] = useState<boolean>(() => localStorage.getItem('debug_audio_aec') !== '0');
  const [nsEnabled, setNsEnabled] = useState<boolean>(() => localStorage.getItem('debug_audio_ns') !== '0');
  const [agcEnabled, setAgcEnabled] = useState<boolean>(() => localStorage.getItem('debug_audio_agc') !== '0');
@@ -1546,6 +1569,7 @@ export const DebugDrawer: React.FC<{
  const buildLocalResolvedRuntime = () => {
    const warnings: string[] = [];
    const services: Record<string, any> = {};
+    const ttsEnabled = Boolean(textTtsEnabled);
    const isExternalLlm = assistant.configMode === 'dify' || assistant.configMode === 'fastgpt';
    const knowledgeBaseId = String(assistant.knowledgeBaseId || '').trim();
    const knowledge = knowledgeBaseId
@@ -1597,6 +1621,7 @@ export const DebugDrawer: React.FC<{
      if (voice) {
        const ttsProvider = isSiliconflowVendor(voice.vendor) ? 'siliconflow' : 'edge';
        services.tts = {
+          enabled: ttsEnabled,
          provider: ttsProvider,
          model: voice.model,
          apiKey: ttsProvider === 'siliconflow' ? voice.apiKey : null,
@@ -1605,17 +1630,25 @@ export const DebugDrawer: React.FC<{
        };
      } else {
        services.tts = {
+          enabled: ttsEnabled,
          voice: assistant.voice,
          speed: assistant.speed || 1.0,
        };
        warnings.push(`Voice resource not found in loaded list: ${assistant.voice}`);
      }
+    } else if (!ttsEnabled) {
+      services.tts = {
+        enabled: false,
+      };
    }

    const localResolved = {
      assistantId: assistant.id,
      warnings,
      sessionStartMetadata: {
+        output: {
+          mode: ttsEnabled ? 'audio' : 'text',
+        },
        systemPrompt: assistant.prompt || '',
        greeting: assistant.opener || '',
        knowledgeBaseId,
@@ -2007,15 +2040,9 @@ export const DebugDrawer: React.FC<{
        </div>
        <div className="flex items-center justify-between gap-2">
          <Badge variant="outline" className="text-xs">WS: {wsStatus}</Badge>
-          <label className="inline-flex items-center gap-1 text-xs text-muted-foreground px-2 py-1 rounded border border-white/10">
-            <input
-              type="checkbox"
-              checked={textTtsEnabled}
-              onChange={(e) => setTextTtsEnabled(e.target.checked)}
-              className="accent-primary"
-            />
-            TTS
-          </label>
+          <Badge variant={textTtsEnabled ? 'outline' : 'secondary'} className="text-xs">
+            TTS: {textTtsEnabled ? 'ON' : 'OFF'}
+          </Badge>
        </div>
        <div className="rounded-md border border-white/10 bg-black/20 p-2 space-y-2">
          <p className="text-[10px] uppercase tracking-widest text-muted-foreground">Audio 3A</p>