Fix talking voice error
This commit is contained in:
@@ -12,6 +12,18 @@ from ..schemas import (
|
|||||||
|
|
||||||
router = APIRouter(prefix="/assistants", tags=["Assistants"])
|
router = APIRouter(prefix="/assistants", tags=["Assistants"])
|
||||||
|
|
||||||
|
OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
||||||
|
OPENAI_COMPATIBLE_KNOWN_VOICES = {
|
||||||
|
"alex",
|
||||||
|
"anna",
|
||||||
|
"bella",
|
||||||
|
"benjamin",
|
||||||
|
"charles",
|
||||||
|
"claire",
|
||||||
|
"david",
|
||||||
|
"diana",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
|
def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
|
||||||
return (vendor or "").strip().lower() in {
|
return (vendor or "").strip().lower() in {
|
||||||
@@ -22,6 +34,24 @@ def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_openai_compatible_voice_key(voice_value: str, model: str) -> str:
|
||||||
|
raw = (voice_value or "").strip()
|
||||||
|
model_name = (model or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||||
|
if not raw:
|
||||||
|
return f"{model_name}:anna"
|
||||||
|
|
||||||
|
if ":" in raw:
|
||||||
|
voice_model, voice_id = raw.split(":", 1)
|
||||||
|
voice_model = voice_model.strip() or model_name
|
||||||
|
voice_id = voice_id.strip()
|
||||||
|
if voice_id.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES:
|
||||||
|
voice_id = voice_id.lower()
|
||||||
|
return f"{voice_model}:{voice_id}"
|
||||||
|
|
||||||
|
voice_id = raw.lower() if raw.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES else raw
|
||||||
|
return f"{model_name}:{voice_id}"
|
||||||
|
|
||||||
|
|
||||||
def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
|
def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
|
||||||
metadata = {
|
metadata = {
|
||||||
"systemPrompt": assistant.prompt or "",
|
"systemPrompt": assistant.prompt or "",
|
||||||
@@ -67,12 +97,17 @@ def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
|
|||||||
voice = db.query(Voice).filter(Voice.id == assistant.voice).first()
|
voice = db.query(Voice).filter(Voice.id == assistant.voice).first()
|
||||||
if voice:
|
if voice:
|
||||||
tts_provider = "openai_compatible" if _is_openai_compatible_vendor(voice.vendor) else "edge"
|
tts_provider = "openai_compatible" if _is_openai_compatible_vendor(voice.vendor) else "edge"
|
||||||
|
model = voice.model
|
||||||
|
runtime_voice = voice.voice_key or voice.id
|
||||||
|
if tts_provider == "openai_compatible":
|
||||||
|
model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||||
|
runtime_voice = _normalize_openai_compatible_voice_key(runtime_voice, model)
|
||||||
metadata["services"]["tts"] = {
|
metadata["services"]["tts"] = {
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"provider": tts_provider,
|
"provider": tts_provider,
|
||||||
"model": voice.model,
|
"model": model,
|
||||||
"apiKey": voice.api_key if tts_provider == "openai_compatible" else None,
|
"apiKey": voice.api_key if tts_provider == "openai_compatible" else None,
|
||||||
"voice": voice.voice_key or voice.id,
|
"voice": runtime_voice,
|
||||||
"speed": assistant.speed or voice.speed,
|
"speed": assistant.speed or voice.speed,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -53,11 +53,20 @@ class OpenAICompatibleTTSService(BaseTTSService):
|
|||||||
sample_rate: Output sample rate (8000, 16000, 24000, 32000, 44100)
|
sample_rate: Output sample rate (8000, 16000, 24000, 32000, 44100)
|
||||||
speed: Speech speed (0.25 to 4.0)
|
speed: Speech speed (0.25 to 4.0)
|
||||||
"""
|
"""
|
||||||
# Resolve voice name
|
# Resolve voice name (case-insensitive), and normalize "model:VoiceId" suffix.
|
||||||
if voice in self.VOICES:
|
resolved_voice = (voice or "").strip()
|
||||||
full_voice = self.VOICES[voice]
|
voice_lookup = resolved_voice.lower()
|
||||||
|
if voice_lookup in self.VOICES:
|
||||||
|
full_voice = self.VOICES[voice_lookup]
|
||||||
|
elif ":" in resolved_voice:
|
||||||
|
model_part, voice_part = resolved_voice.split(":", 1)
|
||||||
|
normalized_voice_part = voice_part.strip().lower()
|
||||||
|
if normalized_voice_part in self.VOICES:
|
||||||
|
full_voice = f"{(model_part or model).strip()}:{normalized_voice_part}"
|
||||||
else:
|
else:
|
||||||
full_voice = voice
|
full_voice = resolved_voice
|
||||||
|
else:
|
||||||
|
full_voice = resolved_voice
|
||||||
|
|
||||||
super().__init__(voice=full_voice, sample_rate=sample_rate, speed=speed)
|
super().__init__(voice=full_voice, sample_rate=sample_rate, speed=speed)
|
||||||
|
|
||||||
|
|||||||
@@ -16,12 +16,37 @@ const isOpenAICompatibleVendor = (vendor?: string) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
|
const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
|
||||||
|
const OPENAI_COMPATIBLE_KNOWN_VOICES = new Set([
|
||||||
|
'alex',
|
||||||
|
'anna',
|
||||||
|
'bella',
|
||||||
|
'benjamin',
|
||||||
|
'charles',
|
||||||
|
'claire',
|
||||||
|
'david',
|
||||||
|
'diana',
|
||||||
|
]);
|
||||||
|
|
||||||
|
const normalizeOpenAICompatibleVoiceKey = (voiceValue: string, model?: string) => {
|
||||||
|
const raw = String(voiceValue || '').trim();
|
||||||
|
const modelName = String(model || '').trim() || OPENAI_COMPATIBLE_DEFAULT_MODEL;
|
||||||
|
if (!raw) return `${modelName}:anna`;
|
||||||
|
|
||||||
|
if (raw.includes(':')) {
|
||||||
|
const [prefix, ...rest] = raw.split(':');
|
||||||
|
const voiceIdRaw = rest.join(':').trim();
|
||||||
|
const voiceIdLower = voiceIdRaw.toLowerCase();
|
||||||
|
const normalizedVoiceId = OPENAI_COMPATIBLE_KNOWN_VOICES.has(voiceIdLower) ? voiceIdLower : voiceIdRaw;
|
||||||
|
return `${(prefix || modelName).trim()}:${normalizedVoiceId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawLower = raw.toLowerCase();
|
||||||
|
const normalizedVoiceId = OPENAI_COMPATIBLE_KNOWN_VOICES.has(rawLower) ? rawLower : raw;
|
||||||
|
return `${modelName}:${normalizedVoiceId}`;
|
||||||
|
};
|
||||||
|
|
||||||
const buildOpenAICompatibleVoiceKey = (voiceId: string, model?: string) => {
|
const buildOpenAICompatibleVoiceKey = (voiceId: string, model?: string) => {
|
||||||
const id = String(voiceId || '').trim();
|
return normalizeOpenAICompatibleVoiceKey(voiceId, model);
|
||||||
if (!id) return '';
|
|
||||||
if (id.includes(':')) return id;
|
|
||||||
return `${model || OPENAI_COMPATIBLE_DEFAULT_MODEL}:${id}`;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const resolveRuntimeTtsVoice = (selectedVoiceId: string, voice: Voice) => {
|
const resolveRuntimeTtsVoice = (selectedVoiceId: string, voice: Voice) => {
|
||||||
@@ -29,13 +54,14 @@ const resolveRuntimeTtsVoice = (selectedVoiceId: string, voice: Voice) => {
|
|||||||
if (!isOpenAICompatibleVendor(voice.vendor)) {
|
if (!isOpenAICompatibleVendor(voice.vendor)) {
|
||||||
return explicitKey || selectedVoiceId;
|
return explicitKey || selectedVoiceId;
|
||||||
}
|
}
|
||||||
|
const resolved = normalizeOpenAICompatibleVoiceKey(explicitKey || selectedVoiceId, voice.model);
|
||||||
if (voice.isSystem) {
|
if (voice.isSystem) {
|
||||||
const canonical = buildOpenAICompatibleVoiceKey(selectedVoiceId, voice.model);
|
const canonical = normalizeOpenAICompatibleVoiceKey(selectedVoiceId, voice.model);
|
||||||
if (!explicitKey) return canonical;
|
if (!explicitKey) return canonical;
|
||||||
const explicitSuffix = explicitKey.includes(':') ? explicitKey.split(':').pop() : explicitKey;
|
const explicitSuffix = explicitKey.includes(':') ? explicitKey.split(':').pop() : explicitKey;
|
||||||
if (explicitSuffix && explicitSuffix !== selectedVoiceId) return canonical;
|
if (explicitSuffix && explicitSuffix !== selectedVoiceId) return canonical;
|
||||||
}
|
}
|
||||||
return explicitKey || buildOpenAICompatibleVoiceKey(selectedVoiceId, voice.model);
|
return resolved;
|
||||||
};
|
};
|
||||||
|
|
||||||
const renderToolIcon = (icon: string) => {
|
const renderToolIcon = (icon: string) => {
|
||||||
|
|||||||
Reference in New Issue
Block a user