Voice libary data presistence after codex

This commit is contained in:
Xin Wang
2026-02-08 22:39:55 +08:00
parent 8069a16227
commit 68f69f9b09
7 changed files with 663 additions and 55 deletions

View File

@@ -38,6 +38,17 @@ class Voice(Base):
user = relationship("User", foreign_keys=[user_id])
class VendorCredential(Base):
__tablename__ = "vendor_credentials"
vendor_key: Mapped[str] = mapped_column(String(64), primary_key=True)
vendor_name: Mapped[str] = mapped_column(String(128), nullable=False)
api_key: Mapped[str] = mapped_column(String(512), nullable=False)
base_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
# ============ LLM Model ============
class LLMModel(Base):
__tablename__ = "llm_models"

View File

@@ -1,14 +1,65 @@
import base64
import os
import uuid
from datetime import datetime
from typing import Optional
import httpx
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from typing import Optional
import uuid
from ..db import get_db
from ..models import Voice
from ..schemas import VoiceCreate, VoiceUpdate, VoiceOut
from ..models import VendorCredential, Voice
from ..schemas import (
VendorCredentialOut,
VendorCredentialUpsert,
VoiceCreate,
VoiceOut,
VoicePreviewRequest,
VoicePreviewResponse,
VoiceUpdate,
)
router = APIRouter(prefix="/voices", tags=["Voices"])
SILICONFLOW_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
def _is_siliconflow_vendor(vendor: str) -> bool:
return vendor.strip().lower() in {"siliconflow", "硅基流动"}
def _canonical_vendor_key(vendor: str) -> str:
normalized = vendor.strip().lower()
alias_map = {
"硅基流动": "siliconflow",
"siliconflow": "siliconflow",
"ali": "ali",
"volcano": "volcano",
"minimax": "minimax",
}
return alias_map.get(normalized, normalized)
def _default_tts_base_url(vendor_key: str) -> Optional[str]:
defaults = {
"siliconflow": "https://api.siliconflow.cn/v1",
}
return defaults.get(vendor_key)
def _resolve_vendor_credential(db: Session, vendor: str) -> Optional[VendorCredential]:
vendor_key = _canonical_vendor_key(vendor)
return db.query(VendorCredential).filter(VendorCredential.vendor_key == vendor_key).first()
def _build_siliconflow_voice_key(voice: Voice, model: str) -> str:
if voice.voice_key:
return voice.voice_key
if ":" in voice.id:
return voice.id
return f"{model}:{voice.id}"
@router.get("")
def list_voices(
@@ -37,16 +88,26 @@ def list_voices(
@router.post("", response_model=VoiceOut)
def create_voice(data: VoiceCreate, db: Session = Depends(get_db)):
"""创建声音"""
vendor = data.vendor.strip()
model = data.model
voice_key = data.voice_key
if _is_siliconflow_vendor(vendor):
model = model or SILICONFLOW_DEFAULT_MODEL
if not voice_key:
raw_id = (data.id or data.name).strip()
voice_key = raw_id if ":" in raw_id else f"{model}:{raw_id}"
voice = Voice(
id=data.id or str(uuid.uuid4())[:8],
user_id=1,
name=data.name,
vendor=data.vendor,
vendor=vendor,
gender=data.gender,
language=data.language,
description=data.description,
model=data.model,
voice_key=data.voice_key,
model=model,
voice_key=voice_key,
speed=data.speed,
gain=data.gain,
pitch=data.pitch,
@@ -75,6 +136,16 @@ def update_voice(id: str, data: VoiceUpdate, db: Session = Depends(get_db)):
raise HTTPException(status_code=404, detail="Voice not found")
update_data = data.model_dump(exclude_unset=True)
if "vendor" in update_data and update_data["vendor"] is not None:
update_data["vendor"] = update_data["vendor"].strip()
vendor_for_defaults = update_data.get("vendor", voice.vendor)
if _is_siliconflow_vendor(vendor_for_defaults):
model = update_data.get("model") or voice.model or SILICONFLOW_DEFAULT_MODEL
voice_key = update_data.get("voice_key") or voice.voice_key
update_data["model"] = model
update_data["voice_key"] = voice_key or _build_siliconflow_voice_key(voice, model)
for field, value in update_data.items():
setattr(voice, field, value)
@@ -92,3 +163,111 @@ def delete_voice(id: str, db: Session = Depends(get_db)):
db.delete(voice)
db.commit()
return {"message": "Deleted successfully"}
@router.get("/vendors/credentials")
def list_vendor_credentials(db: Session = Depends(get_db)):
items = db.query(VendorCredential).order_by(VendorCredential.updated_at.desc()).all()
return {"list": items, "total": len(items)}
@router.get("/vendors/credentials/{vendor_key}", response_model=VendorCredentialOut)
def get_vendor_credential(vendor_key: str, db: Session = Depends(get_db)):
key = _canonical_vendor_key(vendor_key)
item = db.query(VendorCredential).filter(VendorCredential.vendor_key == key).first()
if not item:
raise HTTPException(status_code=404, detail="Vendor credential not found")
return item
@router.put("/vendors/credentials/{vendor_key}", response_model=VendorCredentialOut)
def upsert_vendor_credential(vendor_key: str, data: VendorCredentialUpsert, db: Session = Depends(get_db)):
key = _canonical_vendor_key(vendor_key)
item = db.query(VendorCredential).filter(VendorCredential.vendor_key == key).first()
if item:
item.vendor_name = data.vendor_name or item.vendor_name
item.api_key = data.api_key
item.base_url = data.base_url
item.updated_at = datetime.utcnow()
else:
item = VendorCredential(
vendor_key=key,
vendor_name=data.vendor_name or vendor_key,
api_key=data.api_key,
base_url=data.base_url,
)
db.add(item)
db.commit()
db.refresh(item)
return item
@router.delete("/vendors/credentials/{vendor_key}")
def delete_vendor_credential(vendor_key: str, db: Session = Depends(get_db)):
key = _canonical_vendor_key(vendor_key)
item = db.query(VendorCredential).filter(VendorCredential.vendor_key == key).first()
if not item:
raise HTTPException(status_code=404, detail="Vendor credential not found")
db.delete(item)
db.commit()
return {"message": "Deleted successfully"}
@router.post("/{id}/preview", response_model=VoicePreviewResponse)
def preview_voice(id: str, data: VoicePreviewRequest, db: Session = Depends(get_db)):
"""试听指定声音,基于 OpenAI-compatible /audio/speech 接口。"""
voice = db.query(Voice).filter(Voice.id == id).first()
if not voice:
raise HTTPException(status_code=404, detail="Voice not found")
text = data.text.strip()
if not text:
raise HTTPException(status_code=400, detail="Preview text cannot be empty")
credential = _resolve_vendor_credential(db, voice.vendor)
api_key = (data.api_key or "").strip()
if not api_key and credential:
api_key = credential.api_key
if not api_key:
api_key = os.getenv("SILICONFLOW_API_KEY") if _is_siliconflow_vendor(voice.vendor) else ""
if not api_key:
raise HTTPException(status_code=400, detail=f"Vendor API key is required for {voice.vendor}")
model = voice.model or SILICONFLOW_DEFAULT_MODEL
vendor_key = _canonical_vendor_key(voice.vendor)
base_url = (credential.base_url.strip() if credential and credential.base_url else "") or _default_tts_base_url(vendor_key)
if not base_url:
raise HTTPException(status_code=400, detail=f"Vendor base_url is required for {voice.vendor}")
tts_api_url = f"{base_url.rstrip('/')}/audio/speech"
payload = {
"model": model,
"input": text,
"voice": voice.voice_key or _build_siliconflow_voice_key(voice, model),
"response_format": "mp3",
"speed": data.speed if data.speed is not None else voice.speed,
}
try:
with httpx.Client(timeout=45.0) as client:
response = client.post(
tts_api_url,
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
json=payload,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"TTS request failed: {exc}") from exc
if response.status_code != 200:
detail = response.text
try:
detail_json = response.json()
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
except Exception:
pass
raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
audio_base64 = base64.b64encode(response.content).decode("utf-8")
return VoicePreviewResponse(success=True, audio_url=f"data:audio/mpeg;base64,{audio_base64}")

View File

@@ -61,6 +61,9 @@ class VoiceCreate(VoiceBase):
class VoiceUpdate(BaseModel):
name: Optional[str] = None
vendor: Optional[str] = None
gender: Optional[str] = None
language: Optional[str] = None
description: Optional[str] = None
model: Optional[str] = None
voice_key: Optional[str] = None
@@ -88,6 +91,7 @@ class VoiceOut(VoiceBase):
class VoicePreviewRequest(BaseModel):
text: str
api_key: Optional[str] = None
speed: Optional[float] = None
gain: Optional[int] = None
pitch: Optional[int] = None
@@ -100,6 +104,24 @@ class VoicePreviewResponse(BaseModel):
error: Optional[str] = None
class VendorCredentialUpsert(BaseModel):
vendor_name: Optional[str] = None
api_key: str
base_url: Optional[str] = None
class VendorCredentialOut(BaseModel):
vendor_key: str
vendor_name: str
api_key: str
base_url: Optional[str] = None
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None
class Config:
from_attributes = True
# ============ LLM Model ============
class LLMModelBase(BaseModel):
name: str

View File

@@ -1,4 +1,5 @@
"""Tests for Voice API endpoints"""
import base64
import pytest
@@ -130,3 +131,110 @@ class TestVoiceAPI:
data = response.json()
for voice in data["list"]:
assert voice["gender"] == "Female"
def test_preview_voice_success(self, client, monkeypatch):
"""Test preview voice endpoint returns audio data URL"""
from app.routers import voices as voice_router
class DummyResponse:
status_code = 200
content = b"fake-mp3-bytes"
text = "ok"
def json(self):
return {}
class DummyClient:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def post(self, *args, **kwargs):
return DummyResponse()
monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
create_resp = client.post("/api/voices", json={
"id": "anna",
"name": "Anna",
"vendor": "SiliconFlow",
"gender": "Female",
"language": "zh",
"description": "system voice",
"model": "FunAudioLLM/CosyVoice2-0.5B",
"voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
})
assert create_resp.status_code == 200
preview_resp = client.post("/api/voices/anna/preview", json={"text": "你好"})
assert preview_resp.status_code == 200
payload = preview_resp.json()
assert payload["success"] is True
assert payload["audio_url"].startswith("data:audio/mpeg;base64,")
encoded = payload["audio_url"].split(",", 1)[1]
assert base64.b64decode(encoded) == b"fake-mp3-bytes"
def test_vendor_credential_persist_and_preview_use_db_key(self, client, monkeypatch):
"""Test vendor credential persisted in DB and used by preview endpoint"""
from app.routers import voices as voice_router
captured_auth = {"value": ""}
class DummyResponse:
status_code = 200
content = b"fake-mp3"
text = "ok"
def json(self):
return {}
class DummyClient:
def __init__(self, *args, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def post(self, *args, **kwargs):
headers = kwargs.get("headers", {})
captured_auth["value"] = headers.get("Authorization", "")
return DummyResponse()
monkeypatch.delenv("SILICONFLOW_API_KEY", raising=False)
monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
save_cred = client.put(
"/api/voices/vendors/credentials/siliconflow",
json={
"vendor_name": "SiliconFlow",
"api_key": "db-key-123",
"base_url": "https://api.siliconflow.cn/v1"
},
)
assert save_cred.status_code == 200
assert save_cred.json()["vendor_key"] == "siliconflow"
create_resp = client.post("/api/voices", json={
"id": "anna2",
"name": "Anna 2",
"vendor": "SiliconFlow",
"gender": "Female",
"language": "zh",
"description": "voice",
"model": "FunAudioLLM/CosyVoice2-0.5B",
"voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
})
assert create_resp.status_code == 200
preview_resp = client.post("/api/voices/anna2/preview", json={"text": "hello"})
assert preview_resp.status_code == 200
assert captured_auth["value"] == "Bearer db-key-123"

View File

@@ -1,40 +1,65 @@
import React, { useEffect, useState, useRef } from 'react';
import { Search, Mic2, Play, Pause, Upload, X, Filter, Plus, Volume2, Sparkles, Wand2, ChevronDown } from 'lucide-react';
import { Search, Mic2, Play, Pause, Upload, X, Filter, Plus, Volume2, Sparkles, Wand2, ChevronDown, Pencil, Trash2 } from 'lucide-react';
import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
import { mockVoices } from '../services/mockData';
import { Voice } from '../types';
import { fetchVoices } from '../services/backendApi';
import { VendorCredential, Voice } from '../types';
import { createVoice, deleteVoice, fetchVendorCredentials, fetchVoices, previewVoice, saveVendorCredential, updateVoice } from '../services/backendApi';
const VENDOR_OPTIONS = [
{ key: 'siliconflow', label: '硅基流动 (SiliconFlow)' },
{ key: 'ali', label: 'Ali' },
{ key: 'volcano', label: 'Volcano' },
{ key: 'minimax', label: 'Minimax' },
];
export const VoiceLibraryPage: React.FC = () => {
const [voices, setVoices] = useState<Voice[]>([]);
const [searchTerm, setSearchTerm] = useState('');
const [vendorFilter, setVendorFilter] = useState<'all' | 'Ali' | 'Volcano' | 'Minimax' | '硅基流动'>('all');
const [vendorFilter, setVendorFilter] = useState<'all' | 'Ali' | 'Volcano' | 'Minimax' | '硅基流动' | 'SiliconFlow'>('all');
const [genderFilter, setGenderFilter] = useState<'all' | 'Male' | 'Female'>('all');
const [langFilter, setLangFilter] = useState<'all' | 'zh' | 'en'>('all');
const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
const [isCloneModalOpen, setIsCloneModalOpen] = useState(false);
const [isAddModalOpen, setIsAddModalOpen] = useState(false);
const [editingVoice, setEditingVoice] = useState<Voice | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [playLoadingId, setPlayLoadingId] = useState<string | null>(null);
const [vendorCredentials, setVendorCredentials] = useState<Record<string, VendorCredential>>({});
const [credentialVendorKey, setCredentialVendorKey] = useState('siliconflow');
const [credentialApiKey, setCredentialApiKey] = useState('');
const [credentialBaseUrl, setCredentialBaseUrl] = useState('');
const [isSavingCredential, setIsSavingCredential] = useState(false);
const audioRef = useRef<HTMLAudioElement | null>(null);
useEffect(() => {
const loadVoices = async () => {
const loadVoicesAndCredentials = async () => {
setIsLoading(true);
try {
const list = await fetchVoices();
setVoices(list.length > 0 ? list : mockVoices);
const [list, credentials] = await Promise.all([fetchVoices(), fetchVendorCredentials()]);
setVoices(list);
const mapped = credentials.reduce((acc, item) => {
acc[item.vendorKey] = item;
return acc;
}, {} as Record<string, VendorCredential>);
setVendorCredentials(mapped);
} catch (error) {
console.error(error);
setVoices(mockVoices);
setVoices([]);
} finally {
setIsLoading(false);
}
};
loadVoices();
loadVoicesAndCredentials();
}, []);
useEffect(() => {
const selected = vendorCredentials[credentialVendorKey];
setCredentialApiKey(selected?.apiKey || '');
setCredentialBaseUrl(selected?.baseUrl || '');
}, [credentialVendorKey, vendorCredentials]);
const filteredVoices = voices.filter(voice => {
const matchesSearch = voice.name.toLowerCase().includes(searchTerm.toLowerCase());
const matchesVendor = vendorFilter === 'all' || voice.vendor === vendorFilter;
@@ -43,23 +68,80 @@ export const VoiceLibraryPage: React.FC = () => {
return matchesSearch && matchesVendor && matchesGender && matchesLang;
});
const handlePlayToggle = (id: string) => {
if (playingVoiceId === id) {
const handlePlayToggle = async (voice: Voice) => {
if (playingVoiceId === voice.id && audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
setPlayingVoiceId(null);
} else {
setPlayingVoiceId(id);
setTimeout(() => {
setPlayingVoiceId((current) => current === id ? null : current);
}, 3000);
return;
}
try {
setPlayLoadingId(voice.id);
const audioUrl = await previewVoice(
voice.id,
voice.language === 'en' ? 'Hello, this is a voice preview.' : '你好,这是一段语音试听。',
voice.speed
);
if (audioRef.current) {
audioRef.current.pause();
}
const audio = new Audio(audioUrl);
audio.onended = () => setPlayingVoiceId(null);
audio.onerror = () => {
setPlayingVoiceId(null);
alert('试听失败,请检查 SiliconFlow 配置。');
};
audioRef.current = audio;
setPlayingVoiceId(voice.id);
await audio.play();
} catch (error: any) {
alert(error?.message || '试听失败');
setPlayingVoiceId(null);
} finally {
setPlayLoadingId(null);
}
};
const handleAddSuccess = (newVoice: Voice) => {
setVoices([newVoice, ...voices]);
const handleAddSuccess = async (newVoice: Voice) => {
const created = await createVoice(newVoice);
setVoices((prev) => [created, ...prev]);
setIsAddModalOpen(false);
setIsCloneModalOpen(false);
};
const handleUpdateSuccess = async (id: string, data: Voice) => {
const updated = await updateVoice(id, data);
setVoices((prev) => prev.map((voice) => (voice.id === id ? updated : voice)));
setEditingVoice(null);
};
const handleDelete = async (id: string) => {
if (!confirm('确认删除这个声音吗?')) return;
await deleteVoice(id);
setVoices((prev) => prev.filter((voice) => voice.id !== id));
};
const handleSaveVendorCredential = async () => {
if (!credentialApiKey.trim()) {
alert('请填写 API Key');
return;
}
try {
setIsSavingCredential(true);
const option = VENDOR_OPTIONS.find((item) => item.key === credentialVendorKey);
const saved = await saveVendorCredential(credentialVendorKey, {
vendorName: option?.label || credentialVendorKey,
apiKey: credentialApiKey.trim(),
baseUrl: credentialBaseUrl.trim(),
});
setVendorCredentials((prev) => ({ ...prev, [saved.vendorKey]: saved }));
} catch (error: any) {
alert(error?.message || '保存厂商配置失败');
} finally {
setIsSavingCredential(false);
}
};
return (
<div className="space-y-6 animate-in fade-in py-4 pb-10">
<div className="flex items-center justify-between">
@@ -94,6 +176,7 @@ export const VoiceLibraryPage: React.FC = () => {
>
<option value="all"></option>
<option value="硅基流动"> (SiliconFlow)</option>
<option value="SiliconFlow">SiliconFlow</option>
<option value="Ali"> (Ali)</option>
<option value="Volcano"> (Volcano)</option>
<option value="Minimax">Minimax</option>
@@ -123,6 +206,34 @@ export const VoiceLibraryPage: React.FC = () => {
</div>
</div>
<div className="grid grid-cols-1 md:grid-cols-4 gap-3 bg-card/50 p-4 rounded-lg border border-white/5 shadow-sm">
<select
className="flex h-9 w-full rounded-md border-0 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 [&>option]:bg-card text-foreground"
value={credentialVendorKey}
onChange={(e) => setCredentialVendorKey(e.target.value)}
>
{VENDOR_OPTIONS.map((item) => (
<option key={item.key} value={item.key}>{item.label}</option>
))}
</select>
<Input
type="password"
placeholder="Vendor API Key (持久化到后端)"
className="border-0 bg-white/5"
value={credentialApiKey}
onChange={e => setCredentialApiKey(e.target.value)}
/>
<Input
placeholder="Base URL (OpenAI compatible, 选填)"
className="border-0 bg-white/5"
value={credentialBaseUrl}
onChange={e => setCredentialBaseUrl(e.target.value)}
/>
<Button onClick={handleSaveVendorCredential} disabled={isSavingCredential}>
{isSavingCredential ? '保存中...' : '保存厂商配置'}
</Button>
</div>
<div className="rounded-md border border-white/5 bg-card/40 backdrop-blur-md overflow-hidden">
<table className="w-full text-sm">
<TableHeader>
@@ -132,6 +243,7 @@ export const VoiceLibraryPage: React.FC = () => {
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead className="text-right"></TableHead>
<TableHead className="text-right"></TableHead>
</TableRow>
</TableHeader>
<tbody>
@@ -155,22 +267,31 @@ export const VoiceLibraryPage: React.FC = () => {
<Button
variant="ghost"
size="icon"
onClick={() => handlePlayToggle(voice.id)}
onClick={() => handlePlayToggle(voice)}
disabled={playLoadingId === voice.id}
className={playingVoiceId === voice.id ? "text-primary animate-pulse" : ""}
>
{playingVoiceId === voice.id ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
</Button>
</TableCell>
<TableCell className="text-right">
<Button variant="ghost" size="icon" onClick={() => setEditingVoice(voice)}>
<Pencil className="h-4 w-4" />
</Button>
<Button variant="ghost" size="icon" onClick={() => handleDelete(voice.id)} className="text-red-400">
<Trash2 className="h-4 w-4" />
</Button>
</TableCell>
</TableRow>
))}
{!isLoading && filteredVoices.length === 0 && (
<TableRow>
<TableCell colSpan={5} className="text-center py-6 text-muted-foreground"></TableCell>
<TableCell colSpan={6} className="text-center py-6 text-muted-foreground"></TableCell>
</TableRow>
)}
{isLoading && (
<TableRow>
<TableCell colSpan={5} className="text-center py-6 text-muted-foreground">...</TableCell>
<TableCell colSpan={6} className="text-center py-6 text-muted-foreground">...</TableCell>
</TableRow>
)}
</tbody>
@@ -183,6 +304,13 @@ export const VoiceLibraryPage: React.FC = () => {
onSuccess={handleAddSuccess}
/>
<AddVoiceModal
isOpen={!!editingVoice}
onClose={() => setEditingVoice(null)}
onSuccess={(voice) => handleUpdateSuccess(editingVoice!.id, voice)}
initialVoice={editingVoice || undefined}
/>
<CloneVoiceModal
isOpen={isCloneModalOpen}
onClose={() => setIsCloneModalOpen(false)}
@@ -196,15 +324,17 @@ export const VoiceLibraryPage: React.FC = () => {
const AddVoiceModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSuccess: (voice: Voice) => void;
}> = ({ isOpen, onClose, onSuccess }) => {
onSuccess: (voice: Voice) => Promise<void>;
initialVoice?: Voice;
}> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
const [vendor, setVendor] = useState<'硅基流动' | 'Ali' | 'Volcano' | 'Minimax'>('硅基流动');
const [name, setName] = useState('');
const [sfModel, setSfModel] = useState('fishaudio/fish-speech-1.5');
const [sfVoiceId, setSfVoiceId] = useState('fishaudio:amy');
const [sfModel, setSfModel] = useState('FunAudioLLM/CosyVoice2-0.5B');
const [sfVoiceId, setSfVoiceId] = useState('FunAudioLLM/CosyVoice2-0.5B:anna');
const [sfSpeed, setSfSpeed] = useState(1);
const [sfGain, setSfGain] = useState(0);
const [sfPitch, setSfPitch] = useState(0);
const [model, setModel] = useState('');
const [voiceKey, setVoiceKey] = useState('');
@@ -214,40 +344,90 @@ const AddVoiceModal: React.FC<{
const [testInput, setTestInput] = useState('你好,正在测试语音合成效果。');
const [isAuditioning, setIsAuditioning] = useState(false);
const [isSaving, setIsSaving] = useState(false);
const testAudioRef = useRef<HTMLAudioElement | null>(null);
const handleAudition = () => {
useEffect(() => {
if (!initialVoice) return;
const nextVendor = initialVoice.vendor === 'SiliconFlow' ? '硅基流动' : initialVoice.vendor;
setVendor((nextVendor as any) || '硅基流动');
setName(initialVoice.name || '');
setGender(initialVoice.gender || 'Female');
setLanguage(initialVoice.language || 'zh');
setDescription(initialVoice.description || '');
setModel(initialVoice.model || '');
setVoiceKey(initialVoice.voiceKey || '');
setSfModel(initialVoice.model || 'FunAudioLLM/CosyVoice2-0.5B');
setSfVoiceId(initialVoice.voiceKey || 'FunAudioLLM/CosyVoice2-0.5B:anna');
setSfSpeed(initialVoice.speed ?? 1);
setSfGain(initialVoice.gain ?? 0);
setSfPitch(initialVoice.pitch ?? 0);
}, [initialVoice, isOpen]);
const handleAudition = async () => {
if (!testInput.trim()) return;
setIsAuditioning(true);
setTimeout(() => setIsAuditioning(false), 2000);
if (!initialVoice?.id) {
alert('请先创建声音,再进行试听。');
return;
}
try {
setIsAuditioning(true);
const audioUrl = await previewVoice(initialVoice.id, testInput, sfSpeed);
if (testAudioRef.current) {
testAudioRef.current.pause();
}
const audio = new Audio(audioUrl);
testAudioRef.current = audio;
await audio.play();
} catch (error: any) {
alert(error?.message || '试听失败');
} finally {
setIsAuditioning(false);
}
};
const handleSubmit = () => {
const handleSubmit = async () => {
if (!name) { alert("请填写声音显示名称"); return; }
let newVoice: Voice = {
id: `${vendor === '硅基流动' ? 'sf' : 'gen'}-${Date.now()}`,
const newVoice: Voice = {
id: initialVoice?.id || `${vendor === '硅基流动' ? 'sf' : 'gen'}-${Date.now()}`,
name: name,
vendor: vendor,
gender: gender,
language: language,
description: description || (vendor === '硅基流动' ? `Model: ${sfModel}` : `Model: ${model}`)
description: description || (vendor === '硅基流动' ? `Model: ${sfModel}` : `Model: ${model}`),
model: vendor === '硅基流动' ? sfModel : model,
voiceKey: vendor === '硅基流动' ? sfVoiceId : voiceKey,
speed: sfSpeed,
gain: sfGain,
pitch: sfPitch,
};
onSuccess(newVoice);
setName('');
setVendor('硅基流动');
setDescription('');
try {
setIsSaving(true);
await onSuccess(newVoice);
setName('');
setVendor('硅基流动');
setDescription('');
setModel('');
setVoiceKey('');
} catch (error: any) {
alert(error?.message || '保存失败');
} finally {
setIsSaving(false);
}
};
return (
<Dialog
isOpen={isOpen}
onClose={onClose}
title="添加声音"
title={initialVoice ? "编辑声音" : "添加声音"}
footer={
<>
<Button variant="ghost" onClick={onClose}></Button>
<Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90"></Button>
<Button onClick={handleSubmit} className="bg-primary hover:bg-primary/90" disabled={isSaving}>
{isSaving ? '保存中...' : (initialVoice ? '保存修改' : '确认添加')}
</Button>
</>
}
>
@@ -286,9 +466,9 @@ const AddVoiceModal: React.FC<{
value={sfModel}
onChange={e => setSfModel(e.target.value)}
>
<option value="FunAudioLLM/CosyVoice2-0.5B">FunAudioLLM/CosyVoice2-0.5B</option>
<option value="fishaudio/fish-speech-1.5">fishaudio/fish-speech-1.5</option>
<option value="fishaudio/fish-speech-1.4">fishaudio/fish-speech-1.4</option>
<option value="ByteDance/SA-Speech">ByteDance/SA-Speech</option>
</select>
</div>
<div className="space-y-1.5">
@@ -312,6 +492,13 @@ const AddVoiceModal: React.FC<{
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfGain}dB</span>
</div>
</div>
<div className="space-y-1.5">
<label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block"> (Pitch)</label>
<div className="flex items-center space-x-2">
<input type="range" min="-12" max="12" step="1" value={sfPitch} onChange={e => setSfPitch(parseInt(e.target.value))} className="flex-1 accent-primary" />
<span className="text-[10px] font-mono text-primary bg-primary/10 px-1.5 py-0.5 rounded">{sfPitch}</span>
</div>
</div>
</div>
</div>
) : (
@@ -382,7 +569,7 @@ const AddVoiceModal: React.FC<{
variant="primary"
size="sm"
onClick={handleAudition}
disabled={isAuditioning}
disabled={isAuditioning || !initialVoice}
className="shrink-0 h-9"
>
{isAuditioning ? <Pause className="h-3.5 w-3.5 animate-pulse" /> : <Play className="h-3.5 w-3.5" />}
@@ -397,7 +584,7 @@ const AddVoiceModal: React.FC<{
const CloneVoiceModal: React.FC<{
isOpen: boolean;
onClose: () => void;
onSuccess: (voice: Voice) => void
onSuccess: (voice: Voice) => Promise<void>
}> = ({ isOpen, onClose, onSuccess }) => {
const [name, setName] = useState('');
const [description, setDescription] = useState('');
@@ -410,7 +597,7 @@ const CloneVoiceModal: React.FC<{
}
};
const handleSubmit = () => {
const handleSubmit = async () => {
if (!name || !file) {
alert("请填写名称并上传音频文件");
return;
@@ -425,7 +612,7 @@ const CloneVoiceModal: React.FC<{
description: description || 'User cloned voice'
};
onSuccess(newVoice);
await onSuccess(newVoice);
setName('');
setDescription('');
setFile(null);

View File

@@ -1,4 +1,4 @@
import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
import { Assistant, CallLog, InteractionDetail, KnowledgeBase, KnowledgeDocument, VendorCredential, Voice, Workflow, WorkflowEdge, WorkflowNode } from '../types';
import { apiRequest } from './apiClient';
type AnyRecord = Record<string, any>;
@@ -46,10 +46,27 @@ const mapAssistant = (raw: AnyRecord): Assistant => ({
const mapVoice = (raw: AnyRecord): Voice => ({
id: String(readField(raw, ['id'], '')),
name: readField(raw, ['name'], ''),
vendor: readField(raw, ['vendor'], ''),
vendor: ((): string => {
const vendor = String(readField(raw, ['vendor'], ''));
return vendor.toLowerCase() === 'siliconflow' ? '硅基流动' : vendor;
})(),
gender: readField(raw, ['gender'], ''),
language: readField(raw, ['language'], ''),
description: readField(raw, ['description'], ''),
model: readField(raw, ['model'], ''),
voiceKey: readField(raw, ['voiceKey', 'voice_key'], ''),
speed: Number(readField(raw, ['speed'], 1)),
gain: Number(readField(raw, ['gain'], 0)),
pitch: Number(readField(raw, ['pitch'], 0)),
enabled: Boolean(readField(raw, ['enabled'], true)),
isSystem: Boolean(readField(raw, ['isSystem', 'is_system'], false)),
});
const mapVendorCredential = (raw: AnyRecord): VendorCredential => ({
vendorKey: String(readField(raw, ['vendorKey', 'vendor_key'], '')),
vendorName: readField(raw, ['vendorName', 'vendor_name'], ''),
apiKey: readField(raw, ['apiKey', 'api_key'], ''),
baseUrl: readField(raw, ['baseUrl', 'base_url'], ''),
});
const mapWorkflowNode = (raw: AnyRecord): WorkflowNode => ({
@@ -178,6 +195,76 @@ export const fetchVoices = async (): Promise<Voice[]> => {
return list.map((item) => mapVoice(item));
};
export const createVoice = async (data: Partial<Voice>): Promise<Voice> => {
const payload = {
id: data.id || undefined,
name: data.name || 'New Voice',
vendor: data.vendor === '硅基流动' ? 'SiliconFlow' : (data.vendor || 'SiliconFlow'),
gender: data.gender || 'Female',
language: data.language || 'zh',
description: data.description || '',
model: data.model || undefined,
voice_key: data.voiceKey || undefined,
speed: data.speed ?? 1,
gain: data.gain ?? 0,
pitch: data.pitch ?? 0,
enabled: data.enabled ?? true,
};
const response = await apiRequest<AnyRecord>('/voices', { method: 'POST', body: payload });
return mapVoice(response);
};
export const updateVoice = async (id: string, data: Partial<Voice>): Promise<Voice> => {
const payload = {
name: data.name,
vendor: data.vendor === '硅基流动' ? 'SiliconFlow' : data.vendor,
gender: data.gender,
language: data.language,
description: data.description,
model: data.model,
voice_key: data.voiceKey,
speed: data.speed,
gain: data.gain,
pitch: data.pitch,
enabled: data.enabled,
};
const response = await apiRequest<AnyRecord>(`/voices/${id}`, { method: 'PUT', body: payload });
return mapVoice(response);
};
export const deleteVoice = async (id: string): Promise<void> => {
await apiRequest(`/voices/${id}`, { method: 'DELETE' });
};
export const previewVoice = async (id: string, text: string, speed?: number, apiKey?: string): Promise<string> => {
const response = await apiRequest<{ success: boolean; audio_url?: string; error?: string }>(`/voices/${id}/preview`, {
method: 'POST',
body: { text, speed, api_key: apiKey },
});
if (!response.success || !response.audio_url) {
throw new Error(response.error || 'Preview failed');
}
return response.audio_url;
};
export const fetchVendorCredentials = async (): Promise<VendorCredential[]> => {
const response = await apiRequest<{ list?: AnyRecord[] }>('/voices/vendors/credentials');
const list = response.list || [];
return list.map((item) => mapVendorCredential(item));
};
export const saveVendorCredential = async (vendorKey: string, data: { vendorName: string; apiKey: string; baseUrl?: string }): Promise<VendorCredential> => {
const response = await apiRequest<AnyRecord>(`/voices/vendors/credentials/${vendorKey}`, {
method: 'PUT',
body: {
vendor_name: data.vendorName,
api_key: data.apiKey,
base_url: data.baseUrl || undefined,
},
});
return mapVendorCredential(response);
};
export const fetchWorkflows = async (): Promise<Workflow[]> => {
const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>('/workflows');
const list = Array.isArray(response) ? response : (response.list || []);

View File

@@ -28,6 +28,20 @@ export interface Voice {
gender: string;
language: string;
description: string;
model?: string;
voiceKey?: string;
speed?: number;
gain?: number;
pitch?: number;
enabled?: boolean;
isSystem?: boolean;
}
export interface VendorCredential {
vendorKey: string;
vendorName: string;
apiKey: string;
baseUrl?: string;
}
export interface KnowledgeBase {