Merge branch 'master' of https://gitea.xiaowang.eu.org/wx44wx/AI-VideoAssistant

2026-02-08 22:42:11 +08:00
parent cd5d0a668d 68f69f9b09
commit 8ec91a7fa8
11 changed files with 697 additions and 69 deletions
--- a/api/app/models.py
+++ b/api/app/models.py
@@ -38,6 +38,17 @@ class Voice(Base):
    user = relationship("User", foreign_keys=[user_id])
 class VendorCredential(Base):
    __tablename__ = "vendor_credentials"
    vendor_key: Mapped[str] = mapped_column(String(64), primary_key=True)
    vendor_name: Mapped[str] = mapped_column(String(128), nullable=False)
    api_key: Mapped[str] = mapped_column(String(512), nullable=False)
    base_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
 # ============ LLM Model ============
 class LLMModel(Base):
    __tablename__ = "llm_models"
--- a/api/app/routers/knowledge.py
+++ b/api/app/routers/knowledge.py
@@ -146,12 +146,27 @@ def delete_knowledge_base(kb_id: str, db: Session = Depends(get_db)):
@router.post("/bases/{kb_id}/documents")
 def upload_document(
    kb_id: str,
-    data: KnowledgeDocumentCreate,
+    data: Optional[KnowledgeDocumentCreate] = None,
    name: Optional[str] = Query(default=None),
    size: Optional[str] = Query(default=None),
    file_type: Optional[str] = Query(default=None),
    storage_url: Optional[str] = Query(default=None),
    db: Session = Depends(get_db)
 ):
    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
    if not kb:
        raise HTTPException(status_code=404, detail="Knowledge base not found")
    if data is None:
        if not name or not size:
            raise HTTPException(status_code=422, detail="name and size are required")
        data = KnowledgeDocumentCreate(
            name=name,
            size=size,
            fileType=file_type or "txt",
            storageUrl=storage_url,
        )
    doc = KnowledgeDocument(
        id=str(uuid.uuid4())[:8],
        kb_id=kb_id,
--- a/api/app/routers/voices.py
+++ b/api/app/routers/voices.py
@@ -1,14 +1,65 @@
 import base64
 import os
 import uuid
 from datetime import datetime
 from typing import Optional
 import httpx
 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.orm import Session
 from typing import Optional
 import uuid
 from ..db import get_db
-from ..models import Voice
+from ..models import VendorCredential, Voice
-from ..schemas import VoiceCreate, VoiceUpdate, VoiceOut
+from ..schemas import (
    VendorCredentialOut,
    VendorCredentialUpsert,
    VoiceCreate,
    VoiceOut,
    VoicePreviewRequest,
    VoicePreviewResponse,
    VoiceUpdate,
 )
 router = APIRouter(prefix="/voices", tags=["Voices"])
 SILICONFLOW_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
 def _is_siliconflow_vendor(vendor: str) -> bool:
    return vendor.strip().lower() in {"siliconflow", "硅基流动"}
 def _canonical_vendor_key(vendor: str) -> str:
    normalized = vendor.strip().lower()
    alias_map = {
        "硅基流动": "siliconflow",
        "siliconflow": "siliconflow",
        "ali": "ali",
        "volcano": "volcano",
        "minimax": "minimax",
    }
    return alias_map.get(normalized, normalized)
 def _default_tts_base_url(vendor_key: str) -> Optional[str]:
    defaults = {
        "siliconflow": "https://api.siliconflow.cn/v1",
    }
    return defaults.get(vendor_key)
 def _resolve_vendor_credential(db: Session, vendor: str) -> Optional[VendorCredential]:
    vendor_key = _canonical_vendor_key(vendor)
    return db.query(VendorCredential).filter(VendorCredential.vendor_key == vendor_key).first()
 def _build_siliconflow_voice_key(voice: Voice, model: str) -> str:
    if voice.voice_key:
        return voice.voice_key
    if ":" in voice.id:
        return voice.id
    return f"{model}:{voice.id}"
@router.get("")
 def list_voices(
@@ -37,16 +88,26 @@ def list_voices(
@router.post("", response_model=VoiceOut)
 def create_voice(data: VoiceCreate, db: Session = Depends(get_db)):
    """创建声音"""
    vendor = data.vendor.strip()
    model = data.model
    voice_key = data.voice_key
    if _is_siliconflow_vendor(vendor):
        model = model or SILICONFLOW_DEFAULT_MODEL
        if not voice_key:
            raw_id = (data.id or data.name).strip()
            voice_key = raw_id if ":" in raw_id else f"{model}:{raw_id}"
    voice = Voice(
        id=data.id or str(uuid.uuid4())[:8],
        user_id=1,
        name=data.name,
-        vendor=data.vendor,
+        vendor=vendor,
        gender=data.gender,
        language=data.language,
        description=data.description,
-        model=data.model,
+        model=model,
-        voice_key=data.voice_key,
+        voice_key=voice_key,
        speed=data.speed,
        gain=data.gain,
        pitch=data.pitch,
@@ -75,6 +136,16 @@ def update_voice(id: str, data: VoiceUpdate, db: Session = Depends(get_db)):
        raise HTTPException(status_code=404, detail="Voice not found")
    update_data = data.model_dump(exclude_unset=True)
    if "vendor" in update_data and update_data["vendor"] is not None:
        update_data["vendor"] = update_data["vendor"].strip()
    vendor_for_defaults = update_data.get("vendor", voice.vendor)
    if _is_siliconflow_vendor(vendor_for_defaults):
        model = update_data.get("model") or voice.model or SILICONFLOW_DEFAULT_MODEL
        voice_key = update_data.get("voice_key") or voice.voice_key
        update_data["model"] = model
        update_data["voice_key"] = voice_key or _build_siliconflow_voice_key(voice, model)
    for field, value in update_data.items():
        setattr(voice, field, value)
@@ -92,3 +163,111 @@ def delete_voice(id: str, db: Session = Depends(get_db)):
    db.delete(voice)
    db.commit()
    return {"message": "Deleted successfully"}
@router.get("/vendors/credentials")
 def list_vendor_credentials(db: Session = Depends(get_db)):
    items = db.query(VendorCredential).order_by(VendorCredential.updated_at.desc()).all()
    return {"list": items, "total": len(items)}
@router.get("/vendors/credentials/{vendor_key}", response_model=VendorCredentialOut)
 def get_vendor_credential(vendor_key: str, db: Session = Depends(get_db)):
    key = _canonical_vendor_key(vendor_key)
    item = db.query(VendorCredential).filter(VendorCredential.vendor_key == key).first()
    if not item:
        raise HTTPException(status_code=404, detail="Vendor credential not found")
    return item
@router.put("/vendors/credentials/{vendor_key}", response_model=VendorCredentialOut)
 def upsert_vendor_credential(vendor_key: str, data: VendorCredentialUpsert, db: Session = Depends(get_db)):
    key = _canonical_vendor_key(vendor_key)
    item = db.query(VendorCredential).filter(VendorCredential.vendor_key == key).first()
    if item:
        item.vendor_name = data.vendor_name or item.vendor_name
        item.api_key = data.api_key
        item.base_url = data.base_url
        item.updated_at = datetime.utcnow()
    else:
        item = VendorCredential(
            vendor_key=key,
            vendor_name=data.vendor_name or vendor_key,
            api_key=data.api_key,
            base_url=data.base_url,
        )
        db.add(item)
    db.commit()
    db.refresh(item)
    return item
@router.delete("/vendors/credentials/{vendor_key}")
 def delete_vendor_credential(vendor_key: str, db: Session = Depends(get_db)):
    key = _canonical_vendor_key(vendor_key)
    item = db.query(VendorCredential).filter(VendorCredential.vendor_key == key).first()
    if not item:
        raise HTTPException(status_code=404, detail="Vendor credential not found")
    db.delete(item)
    db.commit()
    return {"message": "Deleted successfully"}
@router.post("/{id}/preview", response_model=VoicePreviewResponse)
 def preview_voice(id: str, data: VoicePreviewRequest, db: Session = Depends(get_db)):
    """试听指定声音，基于 OpenAI-compatible /audio/speech 接口。"""
    voice = db.query(Voice).filter(Voice.id == id).first()
    if not voice:
        raise HTTPException(status_code=404, detail="Voice not found")
    text = data.text.strip()
    if not text:
        raise HTTPException(status_code=400, detail="Preview text cannot be empty")
    credential = _resolve_vendor_credential(db, voice.vendor)
    api_key = (data.api_key or "").strip()
    if not api_key and credential:
        api_key = credential.api_key
    if not api_key:
        api_key = os.getenv("SILICONFLOW_API_KEY") if _is_siliconflow_vendor(voice.vendor) else ""
    if not api_key:
        raise HTTPException(status_code=400, detail=f"Vendor API key is required for {voice.vendor}")
    model = voice.model or SILICONFLOW_DEFAULT_MODEL
    vendor_key = _canonical_vendor_key(voice.vendor)
    base_url = (credential.base_url.strip() if credential and credential.base_url else "") or _default_tts_base_url(vendor_key)
    if not base_url:
        raise HTTPException(status_code=400, detail=f"Vendor base_url is required for {voice.vendor}")
    tts_api_url = f"{base_url.rstrip('/')}/audio/speech"
    payload = {
        "model": model,
        "input": text,
        "voice": voice.voice_key or _build_siliconflow_voice_key(voice, model),
        "response_format": "mp3",
        "speed": data.speed if data.speed is not None else voice.speed,
    }
    try:
        with httpx.Client(timeout=45.0) as client:
            response = client.post(
                tts_api_url,
                headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
                json=payload,
            )
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"TTS request failed: {exc}") from exc
    if response.status_code != 200:
        detail = response.text
        try:
            detail_json = response.json()
            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
        except Exception:
            pass
        raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
    audio_base64 = base64.b64encode(response.content).decode("utf-8")
    return VoicePreviewResponse(success=True, audio_url=f"data:audio/mpeg;base64,{audio_base64}")
--- a/api/app/routers/workflows.py
+++ b/api/app/routers/workflows.py
@@ -1,6 +1,5 @@
 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.orm import Session
 from typing import List
 import uuid
 from datetime import datetime
@@ -11,7 +10,7 @@ from ..schemas import WorkflowCreate, WorkflowUpdate, WorkflowOut
 router = APIRouter(prefix="/workflows", tags=["Workflows"])
-@router.get("", response_model=List[WorkflowOut])
+@router.get("")
 def list_workflows(
    page: int = 1,
    limit: int = 50,
@@ -62,8 +61,12 @@ def update_workflow(id: str, data: WorkflowUpdate, db: Session = Depends(get_db)
        raise HTTPException(status_code=404, detail="Workflow not found")
    update_data = data.model_dump(exclude_unset=True)
    field_map = {
        "nodeCount": "node_count",
        "globalPrompt": "global_prompt",
    }
    for field, value in update_data.items():
-        setattr(workflow, field, value)
+        setattr(workflow, field_map.get(field, field), value)
    workflow.updated_at = datetime.utcnow().isoformat()
    db.commit()
--- a/api/app/schemas.py
+++ b/api/app/schemas.py
@@ -61,6 +61,9 @@ class VoiceCreate(VoiceBase):
 class VoiceUpdate(BaseModel):
    name: Optional[str] = None
    vendor: Optional[str] = None
    gender: Optional[str] = None
    language: Optional[str] = None
    description: Optional[str] = None
    model: Optional[str] = None
    voice_key: Optional[str] = None
@@ -88,6 +91,7 @@ class VoiceOut(VoiceBase):
 class VoicePreviewRequest(BaseModel):
    text: str
    api_key: Optional[str] = None
    speed: Optional[float] = None
    gain: Optional[int] = None
    pitch: Optional[int] = None
@@ -100,6 +104,24 @@ class VoicePreviewResponse(BaseModel):
    error: Optional[str] = None
 class VendorCredentialUpsert(BaseModel):
    vendor_name: Optional[str] = None
    api_key: str
    base_url: Optional[str] = None
 class VendorCredentialOut(BaseModel):
    vendor_key: str
    vendor_name: str
    api_key: str
    base_url: Optional[str] = None
    created_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    class Config:
        from_attributes = True
 # ============ LLM Model ============
 class LLMModelBase(BaseModel):
    name: str
--- a/api/tests/test_voices.py
+++ b/api/tests/test_voices.py
@@ -1,4 +1,5 @@
 """Tests for Voice API endpoints"""
 import base64
 import pytest
@@ -130,3 +131,110 @@ class TestVoiceAPI:
        data = response.json()
        for voice in data["list"]:
            assert voice["gender"] == "Female"
    def test_preview_voice_success(self, client, monkeypatch):
        """Test preview voice endpoint returns audio data URL"""
        from app.routers import voices as voice_router
        class DummyResponse:
            status_code = 200
            content = b"fake-mp3-bytes"
            text = "ok"
            def json(self):
                return {}
        class DummyClient:
            def __init__(self, *args, **kwargs):
                pass
            def __enter__(self):
                return self
            def __exit__(self, exc_type, exc, tb):
                return False
            def post(self, *args, **kwargs):
                return DummyResponse()
        monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
        monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
        create_resp = client.post("/api/voices", json={
            "id": "anna",
            "name": "Anna",
            "vendor": "SiliconFlow",
            "gender": "Female",
            "language": "zh",
            "description": "system voice",
            "model": "FunAudioLLM/CosyVoice2-0.5B",
            "voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
        })
        assert create_resp.status_code == 200
        preview_resp = client.post("/api/voices/anna/preview", json={"text": "你好"})
        assert preview_resp.status_code == 200
        payload = preview_resp.json()
        assert payload["success"] is True
        assert payload["audio_url"].startswith("data:audio/mpeg;base64,")
        encoded = payload["audio_url"].split(",", 1)[1]
        assert base64.b64decode(encoded) == b"fake-mp3-bytes"
    def test_vendor_credential_persist_and_preview_use_db_key(self, client, monkeypatch):
        """Test vendor credential persisted in DB and used by preview endpoint"""
        from app.routers import voices as voice_router
        captured_auth = {"value": ""}
        class DummyResponse:
            status_code = 200
            content = b"fake-mp3"
            text = "ok"
            def json(self):
                return {}
        class DummyClient:
            def __init__(self, *args, **kwargs):
                pass
            def __enter__(self):
                return self
            def __exit__(self, exc_type, exc, tb):
                return False
            def post(self, *args, **kwargs):
                headers = kwargs.get("headers", {})
                captured_auth["value"] = headers.get("Authorization", "")
                return DummyResponse()
        monkeypatch.delenv("SILICONFLOW_API_KEY", raising=False)
        monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
        save_cred = client.put(
            "/api/voices/vendors/credentials/siliconflow",
            json={
                "vendor_name": "SiliconFlow",
                "api_key": "db-key-123",
                "base_url": "https://api.siliconflow.cn/v1"
            },
        )
        assert save_cred.status_code == 200
        assert save_cred.json()["vendor_key"] == "siliconflow"
        create_resp = client.post("/api/voices", json={
            "id": "anna2",
            "name": "Anna 2",
            "vendor": "SiliconFlow",
            "gender": "Female",
            "language": "zh",
            "description": "voice",
            "model": "FunAudioLLM/CosyVoice2-0.5B",
            "voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
        })
        assert create_resp.status_code == 200
        preview_resp = client.post("/api/voices/anna2/preview", json={"text": "hello"})
        assert preview_resp.status_code == 200
        assert captured_auth["value"] == "Bearer db-key-123"
--- a/web/README.md
+++ b/web/README.md
@@ -16,6 +16,6 @@ View your app in AI Studio: https://ai.studio/apps/drive/1Cg9WH_2bOQEHVVj-lSN5l2
 1. Install dependencies:
   `npm install`
 2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key
-3. Optional: set `VITE_API_BASE_URL` (for backend API, default `http://localhost:8000/api`)
+3. Optional: set `VITE_API_BASE_URL` (for backend API, default `http://127.0.0.1:8100/api`)
 4. Run the app:
   `npm run dev`
--- a/web/pages/VoiceLibrary.tsx
+++ b/web/pages/VoiceLibrary.tsx
@@ -1,40 +1,65 @@
 import React, { useEffect, useState, useRef } from 'react';
-import { Search, Mic2, Play, Pause, Upload, X, Filter, Plus, Volume2, Sparkles, Wand2, ChevronDown } from 'lucide-react';
+import { Search, Mic2, Play, Pause, Upload, X, Filter, Plus, Volume2, Sparkles, Wand2, ChevronDown, Pencil, Trash2 } from 'lucide-react';
 import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
-import { mockVoices } from '../services/mockData';
+import { VendorCredential, Voice } from '../types';
-import { Voice } from '../types';
+import { createVoice, deleteVoice, fetchVendorCredentials, fetchVoices, previewVoice, saveVendorCredential, updateVoice } from '../services/backendApi';
-import { fetchVoices } from '../services/backendApi';
+
 const VENDOR_OPTIONS = [
  { key: 'siliconflow', label: '硅基流动 (SiliconFlow)' },
  { key: 'ali', label: 'Ali' },
  { key: 'volcano', label: 'Volcano' },
  { key: 'minimax', label: 'Minimax' },
 ];
 export const VoiceLibraryPage: React.FC = () => {
  const [voices, setVoices] = useState<Voice[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
-  const [vendorFilter, setVendorFilter] = useState<'all' | 'Ali' | 'Volcano' | 'Minimax' | '硅基流动'>('all');
+  const [vendorFilter, setVendorFilter] = useState<'all' | 'Ali' | 'Volcano' | 'Minimax' | '硅基流动' | 'SiliconFlow'>('all');
  const [genderFilter, setGenderFilter] = useState<'all' | 'Male' | 'Female'>('all');
  const [langFilter, setLangFilter] = useState<'all' | 'zh' | 'en'>('all');
  const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
  const [isCloneModalOpen, setIsCloneModalOpen] = useState(false);
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
  const [editingVoice, setEditingVoice] = useState<Voice | null>(null);
  const [isLoading, setIsLoading] = useState(true);
  const [playLoadingId, setPlayLoadingId] = useState<string | null>(null);
  const [vendorCredentials, setVendorCredentials] = useState<Record<string, VendorCredential>>({});
  const [credentialVendorKey, setCredentialVendorKey] = useState('siliconflow');
  const [credentialApiKey, setCredentialApiKey] = useState('');
  const [credentialBaseUrl, setCredentialBaseUrl] = useState('');
  const [isSavingCredential, setIsSavingCredential] = useState(false);
  const audioRef = useRef<HTMLAudioElement | null>(null);
  useEffect(() => {
-    const loadVoices = async () => {
+    const loadVoicesAndCredentials = async () => {
      setIsLoading(true);
      try {
-        const list = await fetchVoices();
+        const [list, credentials] = await Promise.all([fetchVoices(), fetchVendorCredentials()]);
-        setVoices(list.length > 0 ? list : mockVoices);
+        setVoices(list);
        const mapped = credentials.reduce((acc, item) => {
          acc[item.vendorKey] = item;
          return acc;
        }, {} as Record<string, VendorCredential>);
        setVendorCredentials(mapped);
      } catch (error) {
        console.error(error);
-        setVoices(mockVoices);
+        setVoices([]);
      } finally {
        setIsLoading(false);
      }
    };
-    loadVoices();
+    loadVoicesAndCredentials();
  }, []);
  useEffect(() => {
    const selected = vendorCredentials[credentialVendorKey];
    setCredentialApiKey(selected?.apiKey || '');
    setCredentialBaseUrl(selected?.baseUrl || '');
  }, [credentialVendorKey, vendorCredentials]);
  const filteredVoices = voices.filter(voice => {
    const matchesSearch = voice.name.toLowerCase().includes(searchTerm.toLowerCase());
    const matchesVendor = vendorFilter === 'all' || voice.vendor === vendorFilter;
@@ -43,23 +68,80 @@ export const VoiceLibraryPage: React.FC = () => {
    return matchesSearch && matchesVendor && matchesGender && matchesLang;
  });
-  const handlePlayToggle = (id: string) => {
+  const handlePlayToggle = async (voice: Voice) => {
-    if (playingVoiceId === id) {
+    if (playingVoiceId === voice.id && audioRef.current) {
      audioRef.current.pause();
      audioRef.current.currentTime = 0;
      setPlayingVoiceId(null);
-    } else {
+      return;
-      setPlayingVoiceId(id);
+    }
-      setTimeout(() => {
+    try {
-          setPlayingVoiceId((current) => current === id ? null : current);
+      setPlayLoadingId(voice.id);
-      }, 3000);
+      const audioUrl = await previewVoice(
        voice.id,
        voice.language === 'en' ? 'Hello, this is a voice preview.' : '你好，这是一段语音试听。',
        voice.speed
      );
      if (audioRef.current) {
        audioRef.current.pause();
      }
      const audio = new Audio(audioUrl);
      audio.onended = () => setPlayingVoiceId(null);
      audio.onerror = () => {
        setPlayingVoiceId(null);
        alert('试听失败，请检查 SiliconFlow 配置。');
      };
      audioRef.current = audio;
      setPlayingVoiceId(voice.id);
      await audio.play();
    } catch (error: any) {
      alert(error?.message || '试听失败');
      setPlayingVoiceId(null);
    } finally {
      setPlayLoadingId(null);
    }
  };
-  const handleAddSuccess = (newVoice: Voice) => {
+  const handleAddSuccess = async (newVoice: Voice) => {
-      setVoices([newVoice, ...voices]);
+      const created = await createVoice(newVoice);
      setVoices((prev) => [created, ...prev]);
      setIsAddModalOpen(false);
      setIsCloneModalOpen(false);
  };
  const handleUpdateSuccess = async (id: string, data: Voice) => {
      const updated = await updateVoice(id, data);
      setVoices((prev) => prev.map((voice) => (voice.id === id ? updated : voice)));
      setEditingVoice(null);
  };
  const handleDelete = async (id: string) => {
      if (!confirm('确认删除这个声音吗？')) return;
      await deleteVoice(id);
      setVoices((prev) => prev.filter((voice) => voice.id !== id));
  };
  const handleSaveVendorCredential = async () => {
      if (!credentialApiKey.trim()) {
        alert('请填写 API Key');
        return;
      }
      try {
        setIsSavingCredential(true);
        const option = VENDOR_OPTIONS.find((item) => item.key === credentialVendorKey);
        const saved = await saveVendorCredential(credentialVendorKey, {
          vendorName: option?.label || credentialVendorKey,
          apiKey: credentialApiKey.trim(),
          baseUrl: credentialBaseUrl.trim(),
        });
        setVendorCredentials((prev) => ({ ...prev, [saved.vendorKey]: saved }));
      } catch (error: any) {
        alert(error?.message || '保存厂商配置失败');
      } finally {
        setIsSavingCredential(false);
      }
  };
  return (
    <div className="space-y-6 animate-in fade-in py-4 pb-10">
      <div className="flex items-center justify-between">
@@ -94,6 +176,7 @@ export const VoiceLibraryPage: React.FC = () => {
            >
                <option value="all">所有厂商</option>
                <option value="硅基流动">硅基流动 (SiliconFlow)</option>
                <option value="SiliconFlow">SiliconFlow</option>
                <option value="Ali">阿里 (Ali)</option>
                <option value="Volcano">火山 (Volcano)</option>
                <option value="Minimax">Minimax</option>
@@ -123,6 +206,34 @@ export const VoiceLibraryPage: React.FC = () => {
         </div>
      </div>
      <div className="grid grid-cols-1 md:grid-cols-4 gap-3 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
         <select
            className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
            value={credentialVendorKey}
            onChange={(e) => setCredentialVendorKey(e.target.value)}
         >
            {VENDOR_OPTIONS.map((item) => (
              <option key={item.key} value={item.key}>{item.label}</option>
            ))}
         </select>
         <Input
            type="password"
            placeholder="Vendor API Key (持久化到后端)"
            className="border-0 bg-white/5"
            value={credentialApiKey}
            onChange={e => setCredentialApiKey(e.target.value)}
         />
         <Input
            placeholder="Base URL (OpenAI compatible, 选填)"
            className="border-0 bg-white/5"
            value={credentialBaseUrl}
            onChange={e => setCredentialBaseUrl(e.target.value)}
         />
         <Button onClick={handleSaveVendorCredential} disabled={isSavingCredential}>
            {isSavingCredential ? '保存中...' : '保存厂商配置'}
         </Button>
      </div>
      <div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
        <table className="w-full text-sm">
          <TableHeader>
@@ -132,6 +243,7 @@ export const VoiceLibraryPage: React.FC = () => {
              <TableHead>性别</TableHead>
              <TableHead>语言</TableHead>
              <TableHead className="text-right">试听</TableHead>
              <TableHead className="text-right">操作</TableHead>
            </TableRow>
          </TableHeader>
          <tbody>
@@ -155,22 +267,31 @@ export const VoiceLibraryPage: React.FC = () => {
                    <Button 
                        variant="ghost" 
                        size="icon" 
-                        onClick={() => handlePlayToggle(voice.id)}
+                        onClick={() => handlePlayToggle(voice)}
                        disabled={playLoadingId === voice.id}
                        className={playingVoiceId === voice.id ? "text-primary animate-pulse" : ""}
                    >
                        {playingVoiceId === voice.id ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
                    </Button>
                </TableCell>
                <TableCell className="text-right">
                    <Button variant="ghost" size="icon" onClick={() => setEditingVoice(voice)}>
                        <Pencil className="h-4 w-4" />
                    </Button>
                    <Button variant="ghost" size="icon" onClick={() => handleDelete(voice.id)} className="text-red-400">
                        <Trash2 className="h-4 w-4" />
                    </Button>
                </TableCell>
              </TableRow>
            ))}
             {!isLoading && filteredVoices.length === 0 && (
                 <TableRow>
-                     <TableCell colSpan={5} className="text-center py-6 text-muted-foreground">暂无声音数据</TableCell>
+                     <TableCell colSpan={6} className="text-center py-6 text-muted-foreground">暂无声音数据</TableCell>
                 </TableRow>
             )}
             {isLoading && (
                 <TableRow>
-                     <TableCell colSpan={5} className="text-center py-6 text-muted-foreground">加载中...</TableCell>
+                     <TableCell colSpan={6} className="text-center py-6 text-muted-foreground">加载中...</TableCell>
                 </TableRow>
             )}
          </tbody>
@@ -183,6 +304,13 @@ export const VoiceLibraryPage: React.FC = () => {
        onSuccess={handleAddSuccess}
      />
      <AddVoiceModal
        isOpen={!!editingVoice}
        onClose={() => setEditingVoice(null)}
        onSuccess={(voice) => handleUpdateSuccess(editingVoice!.id, voice)}
        initialVoice={editingVoice || undefined}
      />
      <CloneVoiceModal 
        isOpen={isCloneModalOpen} 
        onClose={() => setIsCloneModalOpen(false)} 
@@ -196,15 +324,17 @@ export const VoiceLibraryPage: React.FC = () => {
 const AddVoiceModal: React.FC<{
    isOpen: boolean;
    onClose: () => void;
-    onSuccess: (voice: Voice) => void;
+    onSuccess: (voice: Voice) => Promise<void>;
-}> = ({ isOpen, onClose, onSuccess }) => {
+    initialVoice?: Voice;
 }> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
    const [vendor, setVendor] = useState<'硅基流动' | 'Ali' | 'Volcano' | 'Minimax'>('硅基流动');
    const [name, setName] = useState('');
-    const [sfModel, setSfModel] = useState('fishaudio/fish-speech-1.5');
+    const [sfModel, setSfModel] = useState('FunAudioLLM/CosyVoice2-0.5B');
-    const [sfVoiceId, setSfVoiceId] = useState('fishaudio:amy');
+    const [sfVoiceId, setSfVoiceId] = useState('FunAudioLLM/CosyVoice2-0.5B:anna');
    const [sfSpeed, setSfSpeed] = useState(1);
    const [sfGain, setSfGain] = useState(0);
    const [sfPitch, setSfPitch] = useState(0);
    const [model, setModel] = useState('');
    const [voiceKey, setVoiceKey] = useState('');
@@ -214,40 +344,90 @@ const AddVoiceModal: React.FC<{
    const [testInput, setTestInput] = useState('你好，正在测试语音合成效果。');
    const [isAuditioning, setIsAuditioning] = useState(false);
    const [isSaving, setIsSaving] = useState(false);
    const testAudioRef = useRef<HTMLAudioElement | null>(null);
-    const handleAudition = () => {
+    useEffect(() => {
        if (!initialVoice) return;
        const nextVendor = initialVoice.vendor === 'SiliconFlow' ? '硅基流动' : initialVoice.vendor;
        setVendor((nextVendor as any) || '硅基流动');
        setName(initialVoice.name || '');
        setGender(initialVoice.gender || 'Female');
        setLanguage(initialVoice.language || 'zh');
        setDescription(initialVoice.description || '');
        setModel(initialVoice.model || '');
        setVoiceKey(initialVoice.voiceKey || '');
        setSfModel(initialVoice.model || 'FunAudioLLM/CosyVoice2-0.5B');
        setSfVoiceId(initialVoice.voiceKey || 'FunAudioLLM/CosyVoice2-0.5B:anna');
        setSfSpeed(initialVoice.speed ?? 1);
        setSfGain(initialVoice.gain ?? 0);
        setSfPitch(initialVoice.pitch ?? 0);
    }, [initialVoice, isOpen]);
    const handleAudition = async () => {
        if (!testInput.trim()) return;
-        setIsAuditioning(true);
+        if (!initialVoice?.id) {
-        setTimeout(() => setIsAuditioning(false), 2000);
+            alert('请先创建声音，再进行试听。');
            return;
        }
        try {
            setIsAuditioning(true);
            const audioUrl = await previewVoice(initialVoice.id, testInput, sfSpeed);
            if (testAudioRef.current) {
                testAudioRef.current.pause();
            }
            const audio = new Audio(audioUrl);
            testAudioRef.current = audio;
            await audio.play();
        } catch (error: any) {
            alert(error?.message || '试听失败');
        } finally {
            setIsAuditioning(false);
        }
    };
-    const handleSubmit = () => {
+    const handleSubmit = async () => {
        if (!name) { alert("请填写声音显示名称"); return; }
-        
+
-        let newVoice: Voice = {
+        const newVoice: Voice = {
-            id: `${vendor === '硅基流动' ? 'sf' : 'gen'}-${Date.now()}`,
+            id: initialVoice?.id || `${vendor === '硅基流动' ? 'sf' : 'gen'}-${Date.now()}`,
            name: name,
            vendor: vendor,
            gender: gender,
            language: language,
-            description: description || (vendor === '硅基流动' ? `Model: ${sfModel}` : `Model: ${model}`)
+            description: description || (vendor === '硅基流动' ? `Model: ${sfModel}` : `Model: ${model}`),
            model: vendor === '硅基流动' ? sfModel : model,
            voiceKey: vendor === '硅基流动' ? sfVoiceId : voiceKey,
            speed: sfSpeed,
            gain: sfGain,
            pitch: sfPitch,
        };
-
+        try {
-        onSuccess(newVoice);
+            setIsSaving(true);
-        setName('');
+            await onSuccess(newVoice);
-        setVendor('硅基流动');
+            setName('');
-        setDescription('');
+            setVendor('硅基流动');
            setDescription('');
            setModel('');
            setVoiceKey('');
        } catch (error: any) {
            alert(error?.message || '保存失败');
        } finally {
            setIsSaving(false);
        }
    };
    return (
        <Dialog 
            isOpen={isOpen} 
            onClose={onClose} 
-            title="添加声音"
+            title={initialVoice ? "编辑声音" : "添加声音"}
            footer={
                <>
                    <Button variant="ghost" onClick={onClose}>取消</Button>
-                    <Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90">确认添加</Button>
+                    <Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90" disabled={isSaving}>
                        {isSaving ? '保存中...' : (initialVoice ? '保存修改' : '确认添加')}
                    </Button>
                </>
            }
        >
@@ -286,9 +466,9 @@ const AddVoiceModal: React.FC<{
                                    value={sfModel}
                                    onChange={e => setSfModel(e.target.value)}
                                >
                                    <option value="FunAudioLLM/CosyVoice2-0.5B">FunAudioLLM/CosyVoice2-0.5B</option>
                                    <option value="fishaudio/fish-speech-1.5">fishaudio/fish-speech-1.5</option>
                                    <option value="fishaudio/fish-speech-1.4">fishaudio/fish-speech-1.4</option>
                                    <option value="ByteDance/SA-Speech">ByteDance/SA-Speech</option>
                                </select>
                            </div>
                            <div className="space-y-1.5">
@@ -312,6 +492,13 @@ const AddVoiceModal: React.FC<{
                                    <span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfGain}dB</span>
                                </div>
                            </div>
                            <div className="space-y-1.5">
                                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">音调 (Pitch)</label>
                                <div className="flex items-center space-x-2">
                                    <input type="range" min="-12" max="12" step="1" value={sfPitch} onChange={e => setSfPitch(parseInt(e.target.value))} className="flex-1 accent-primary" />
                                    <span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfPitch}</span>
                                </div>
                            </div>
                        </div>
                    </div>
                ) : (
@@ -382,7 +569,7 @@ const AddVoiceModal: React.FC<{
                            variant="primary" 
                            size="sm" 
                            onClick={handleAudition} 
-                            disabled={isAuditioning}
+                            disabled={isAuditioning || !initialVoice}
                            className="shrink-0 h-9"
                        >
                            {isAuditioning ? <Pause className="h-3.5 w-3.5 animate-pulse" /> : <Play className="h-3.5 w-3.5" />}
@@ -397,7 +584,7 @@ const AddVoiceModal: React.FC<{
 const CloneVoiceModal: React.FC<{ 
    isOpen: boolean; 
    onClose: () => void; 
-    onSuccess: (voice: Voice) => void 
+    onSuccess: (voice: Voice) => Promise<void> 
 }> = ({ isOpen, onClose, onSuccess }) => {
    const [name, setName] = useState('');
    const [description, setDescription] = useState('');
@@ -410,7 +597,7 @@ const CloneVoiceModal: React.FC<{
        }
    };
-    const handleSubmit = () => {
+    const handleSubmit = async () => {
        if (!name || !file) {
            alert("请填写名称并上传音频文件");
            return;
@@ -425,7 +612,7 @@ const CloneVoiceModal: React.FC<{
            description: description || 'User cloned voice'
        };
-        onSuccess(newVoice);
+        await onSuccess(newVoice);
        setName('');
        setDescription('');
        setFile(null);
--- a/web/services/apiClient.ts
+++ b/web/services/apiClient.ts
@@ -1,4 +1,4 @@
-const DEFAULT_API_BASE_URL = 'http://localhost:8100/api';
+const DEFAULT_API_BASE_URL = 'http://127.0.0.1:8100/api';
 const trimTrailingSlash = (value: string): string => value.replace(/\/+$/, '');
--- a/web/services/backendApi.ts
+++ b/web/services/backendApi.ts
@@ -1,4 +1,4 @@
-import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
+import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, VendorCredential, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
 import { apiRequest } from './apiClient';
 type AnyRecord = Record<string, any>;
@@ -46,10 +46,27 @@ const mapAssistant = (raw: AnyRecord): Assistant => ({
 const mapVoice = (raw: AnyRecord): Voice => ({
  id: String(readField(raw, ['id'], '')),
  name: readField(raw, ['name'], ''),
-  vendor: readField(raw, ['vendor'], ''),
+  vendor: ((): string => {
    const vendor = String(readField(raw, ['vendor'], ''));
    return vendor.toLowerCase() === 'siliconflow' ? '硅基流动' : vendor;
  })(),
  gender: readField(raw, ['gender'], ''),
  language: readField(raw, ['language'], ''),
  description: readField(raw, ['description'], ''),
  model: readField(raw, ['model'], ''),
  voiceKey: readField(raw, ['voiceKey', 'voice_key'], ''),
  speed: Number(readField(raw, ['speed'], 1)),
  gain: Number(readField(raw, ['gain'], 0)),
  pitch: Number(readField(raw, ['pitch'], 0)),
  enabled: Boolean(readField(raw, ['enabled'], true)),
  isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)),
 });
 const mapVendorCredential = (raw: AnyRecord): VendorCredential => ({
  vendorKey: String(readField(raw, ['vendorKey', 'vendor_key'], '')),
  vendorName: readField(raw, ['vendorName', 'vendor_name'], ''),
  apiKey: readField(raw, ['apiKey', 'api_key'], ''),
  baseUrl: readField(raw, ['baseUrl', 'base_url'], ''),
 });
 const mapWorkflowNode = (raw: AnyRecord): WorkflowNode => ({
@@ -173,13 +190,85 @@ export const deleteAssistant = async (id: string): Promise<void> => {
 };
 export const fetchVoices = async (): Promise<Voice[]> => {
-  const response = await apiRequest<AnyRecord[]>('/voices');
+  const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/voices');
-  return response.map((item) => mapVoice(item));
+  const list = Array.isArray(response) ? response : (response.list || []);
  return list.map((item) => mapVoice(item));
 };
 export const createVoice = async (data: Partial<Voice>): Promise<Voice> => {
  const payload = {
    id: data.id || undefined,
    name: data.name || 'New Voice',
    vendor: data.vendor === '硅基流动' ? 'SiliconFlow' : (data.vendor || 'SiliconFlow'),
    gender: data.gender || 'Female',
    language: data.language || 'zh',
    description: data.description || '',
    model: data.model || undefined,
    voice_key: data.voiceKey || undefined,
    speed: data.speed ?? 1,
    gain: data.gain ?? 0,
    pitch: data.pitch ?? 0,
    enabled: data.enabled ?? true,
  };
  const response = await apiRequest<AnyRecord>('/voices', { method: 'POST', body: payload });
  return mapVoice(response);
 };
 export const updateVoice = async (id: string, data: Partial<Voice>): Promise<Voice> => {
  const payload = {
    name: data.name,
    vendor: data.vendor === '硅基流动' ? 'SiliconFlow' : data.vendor,
    gender: data.gender,
    language: data.language,
    description: data.description,
    model: data.model,
    voice_key: data.voiceKey,
    speed: data.speed,
    gain: data.gain,
    pitch: data.pitch,
    enabled: data.enabled,
  };
  const response = await apiRequest<AnyRecord>(`/voices/${id}`, { method: 'PUT', body: payload });
  return mapVoice(response);
 };
 export const deleteVoice = async (id: string): Promise<void> => {
  await apiRequest(`/voices/${id}`, { method: 'DELETE' });
 };
 export const previewVoice = async (id: string, text: string, speed?: number, apiKey?: string): Promise<string> => {
  const response = await apiRequest<{ success: boolean; audio_url?: string; error?: string }>(`/voices/${id}/preview`, {
    method: 'POST',
    body: { text, speed, api_key: apiKey },
  });
  if (!response.success || !response.audio_url) {
    throw new Error(response.error || 'Preview failed');
  }
  return response.audio_url;
 };
 export const fetchVendorCredentials = async (): Promise<VendorCredential[]> => {
  const response = await apiRequest<{ list?: AnyRecord[] }>('/voices/vendors/credentials');
  const list = response.list || [];
  return list.map((item) => mapVendorCredential(item));
 };
 export const saveVendorCredential = async (vendorKey: string, data: { vendorName: string; apiKey: string; baseUrl?: string }): Promise<VendorCredential> => {
  const response = await apiRequest<AnyRecord>(`/voices/vendors/credentials/${vendorKey}`, {
    method: 'PUT',
    body: {
      vendor_name: data.vendorName,
      api_key: data.apiKey,
      base_url: data.baseUrl || undefined,
    },
  });
  return mapVendorCredential(response);
 };
 export const fetchWorkflows = async (): Promise<Workflow[]> => {
-  const response = await apiRequest<AnyRecord[]>('/workflows');
+  const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/workflows');
-  return response.map((item) => mapWorkflow(item));
+  const list = Array.isArray(response) ? response : (response.list || []);
  return list.map((item) => mapWorkflow(item));
 };
 export const fetchWorkflowById = async (id: string): Promise<Workflow> => {
@@ -238,12 +327,12 @@ export const deleteKnowledgeBase = async (kbId: string): Promise<void> => {
 };
 export const uploadKnowledgeDocument = async (kbId: string, file: File): Promise<void> => {
-  const params = new URLSearchParams({
+  const payload = {
    name: file.name,
    size: `${(file.size / 1024).toFixed(1)} KB`,
-    file_type: file.type || 'txt',
+    fileType: file.type || 'txt',
-  });
+  };
-  await apiRequest(`/knowledge/bases/${kbId}/documents?${params.toString()}`, { method: 'POST' });
+  await apiRequest(`/knowledge/bases/${kbId}/documents`, { method: 'POST', body: payload });
 };
 export const deleteKnowledgeDocument = async (kbId: string, docId: string): Promise<void> => {
--- a/web/types.ts
+++ b/web/types.ts
@@ -28,6 +28,20 @@ export interface Voice {
  gender: string;
  language: string;
  description: string;
  model?: string;
  voiceKey?: string;
  speed?: number;
  gain?: number;
  pitch?: number;
  enabled?: boolean;
  isSystem?: boolean;
 }
 export interface VendorCredential {
  vendorKey: string;
  vendorName: string;
  apiKey: string;
  baseUrl?: string;
 }
 export interface KnowledgeBase {
`@@ -1,4 +1,4 @@`
	`const DEFAULT_API_BASE_URL = 'http://localhost:8100/api';`	`const DEFAULT_API_BASE_URL = 'http://127.0.0.1:8100/api';`

	`const trimTrailingSlash = (value: string): string => value.replace(/\/+$/, '');`	`const trimTrailingSlash = (value: string): string => value.replace(/\/+$/, '');`