Add opener audio functionality to Assistant model and related schemas, enabling audio generation and playback features. Update API routes and frontend components to support opener audio management, including status retrieval and generation controls.
This commit is contained in:
@@ -138,6 +138,25 @@ class Assistant(Base):
|
|||||||
|
|
||||||
user = relationship("User")
|
user = relationship("User")
|
||||||
call_records = relationship("CallRecord", back_populates="assistant")
|
call_records = relationship("CallRecord", back_populates="assistant")
|
||||||
|
opener_audio = relationship("AssistantOpenerAudio", back_populates="assistant", uselist=False, cascade="all, delete-orphan")
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantOpenerAudio(Base):
|
||||||
|
__tablename__ = "assistant_opener_audio"
|
||||||
|
|
||||||
|
assistant_id: Mapped[str] = mapped_column(String(64), ForeignKey("assistants.id"), primary_key=True)
|
||||||
|
enabled: Mapped[bool] = mapped_column(default=False)
|
||||||
|
file_path: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
|
||||||
|
encoding: Mapped[str] = mapped_column(String(32), default="pcm_s16le")
|
||||||
|
sample_rate_hz: Mapped[int] = mapped_column(Integer, default=16000)
|
||||||
|
channels: Mapped[int] = mapped_column(Integer, default=1)
|
||||||
|
duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
||||||
|
text_hash: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||||
|
tts_fingerprint: Mapped[Optional[str]] = mapped_column(String(256), nullable=True)
|
||||||
|
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||||
|
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||||
|
|
||||||
|
assistant = relationship("Assistant", back_populates="opener_audio")
|
||||||
|
|
||||||
|
|
||||||
# ============ Knowledge Base ============
|
# ============ Knowledge Base ============
|
||||||
|
|||||||
@@ -1,18 +1,33 @@
|
|||||||
|
import audioop
|
||||||
|
import hashlib
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import wave
|
||||||
|
from pathlib import Path
|
||||||
|
import httpx
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from ..db import get_db
|
from ..db import get_db
|
||||||
from ..models import Assistant, LLMModel, ASRModel, Voice
|
from ..models import Assistant, AssistantOpenerAudio, LLMModel, ASRModel, Voice
|
||||||
from ..schemas import (
|
from ..schemas import (
|
||||||
AssistantCreate, AssistantUpdate, AssistantOut, AssistantEngineConfigResponse
|
AssistantCreate,
|
||||||
|
AssistantUpdate,
|
||||||
|
AssistantOut,
|
||||||
|
AssistantEngineConfigResponse,
|
||||||
|
AssistantOpenerAudioGenerateRequest,
|
||||||
|
AssistantOpenerAudioOut,
|
||||||
)
|
)
|
||||||
|
|
||||||
router = APIRouter(prefix="/assistants", tags=["Assistants"])
|
router = APIRouter(prefix="/assistants", tags=["Assistants"])
|
||||||
|
|
||||||
OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
||||||
|
OPENAI_COMPATIBLE_DEFAULT_BASE_URL = "https://api.siliconflow.cn/v1"
|
||||||
|
OPENER_AUDIO_DIR = Path(__file__).resolve().parents[2] / "data" / "opener_audio"
|
||||||
OPENAI_COMPATIBLE_KNOWN_VOICES = {
|
OPENAI_COMPATIBLE_KNOWN_VOICES = {
|
||||||
"alex",
|
"alex",
|
||||||
"anna",
|
"anna",
|
||||||
@@ -163,6 +178,19 @@ def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> tuple[Dict[s
|
|||||||
"kbId": assistant.knowledge_base_id,
|
"kbId": assistant.knowledge_base_id,
|
||||||
"nResults": 5,
|
"nResults": 5,
|
||||||
}
|
}
|
||||||
|
opener_audio = assistant.opener_audio
|
||||||
|
opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
|
||||||
|
metadata["openerAudio"] = {
|
||||||
|
"enabled": bool(opener_audio.enabled) if opener_audio else False,
|
||||||
|
"ready": opener_audio_ready,
|
||||||
|
"encoding": opener_audio.encoding if opener_audio else "pcm_s16le",
|
||||||
|
"sampleRateHz": int(opener_audio.sample_rate_hz) if opener_audio else 16000,
|
||||||
|
"channels": int(opener_audio.channels) if opener_audio else 1,
|
||||||
|
"durationMs": int(opener_audio.duration_ms) if opener_audio else 0,
|
||||||
|
"textHash": opener_audio.text_hash if opener_audio else None,
|
||||||
|
"ttsFingerprint": opener_audio.tts_fingerprint if opener_audio else None,
|
||||||
|
"pcmUrl": f"/api/assistants/{assistant.id}/opener-audio/pcm" if opener_audio_ready else None,
|
||||||
|
}
|
||||||
return metadata, warnings
|
return metadata, warnings
|
||||||
|
|
||||||
|
|
||||||
@@ -189,6 +217,8 @@ def _build_engine_assistant_config(db: Session, assistant: Assistant) -> Dict[st
|
|||||||
|
|
||||||
|
|
||||||
def assistant_to_dict(assistant: Assistant) -> dict:
|
def assistant_to_dict(assistant: Assistant) -> dict:
|
||||||
|
opener_audio = assistant.opener_audio
|
||||||
|
opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
|
||||||
return {
|
return {
|
||||||
"id": assistant.id,
|
"id": assistant.id,
|
||||||
"name": assistant.name,
|
"name": assistant.name,
|
||||||
@@ -196,6 +226,10 @@ def assistant_to_dict(assistant: Assistant) -> dict:
|
|||||||
"firstTurnMode": assistant.first_turn_mode or "bot_first",
|
"firstTurnMode": assistant.first_turn_mode or "bot_first",
|
||||||
"opener": assistant.opener or "",
|
"opener": assistant.opener or "",
|
||||||
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
|
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
|
||||||
|
"openerAudioEnabled": bool(opener_audio.enabled) if opener_audio else False,
|
||||||
|
"openerAudioReady": opener_audio_ready,
|
||||||
|
"openerAudioDurationMs": int(opener_audio.duration_ms) if opener_audio else 0,
|
||||||
|
"openerAudioUpdatedAt": opener_audio.updated_at if opener_audio else None,
|
||||||
"prompt": assistant.prompt or "",
|
"prompt": assistant.prompt or "",
|
||||||
"knowledgeBaseId": assistant.knowledge_base_id,
|
"knowledgeBaseId": assistant.knowledge_base_id,
|
||||||
"language": assistant.language,
|
"language": assistant.language,
|
||||||
@@ -238,6 +272,114 @@ def _apply_assistant_update(assistant: Assistant, update_data: dict) -> None:
|
|||||||
setattr(assistant, field_map.get(field, field), value)
|
setattr(assistant, field_map.get(field, field), value)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_assistant_opener_audio(db: Session, assistant: Assistant) -> AssistantOpenerAudio:
|
||||||
|
record = assistant.opener_audio
|
||||||
|
if record:
|
||||||
|
return record
|
||||||
|
record = AssistantOpenerAudio(assistant_id=assistant.id, enabled=False)
|
||||||
|
db.add(record)
|
||||||
|
db.flush()
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_tts_runtime_for_assistant(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], Optional[Voice]]:
|
||||||
|
metadata, _ = _resolve_runtime_metadata(db, assistant)
|
||||||
|
services = metadata.get("services") if isinstance(metadata.get("services"), dict) else {}
|
||||||
|
tts = services.get("tts") if isinstance(services, dict) and isinstance(services.get("tts"), dict) else {}
|
||||||
|
voice = db.query(Voice).filter(Voice.id == assistant.voice).first() if assistant.voice else None
|
||||||
|
return tts, voice
|
||||||
|
|
||||||
|
|
||||||
|
def _tts_fingerprint(tts_cfg: Dict[str, Any], opener_text: str) -> str:
|
||||||
|
identity = {
|
||||||
|
"provider": tts_cfg.get("provider"),
|
||||||
|
"model": tts_cfg.get("model"),
|
||||||
|
"voice": tts_cfg.get("voice"),
|
||||||
|
"speed": tts_cfg.get("speed"),
|
||||||
|
"text": opener_text,
|
||||||
|
}
|
||||||
|
return hashlib.sha256(str(identity).encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _synthesize_openai_compatible_wav(
|
||||||
|
*,
|
||||||
|
text: str,
|
||||||
|
model: str,
|
||||||
|
voice_key: str,
|
||||||
|
speed: float,
|
||||||
|
api_key: str,
|
||||||
|
base_url: str,
|
||||||
|
) -> bytes:
|
||||||
|
payload = {
|
||||||
|
"model": model or OPENAI_COMPATIBLE_DEFAULT_MODEL,
|
||||||
|
"input": text,
|
||||||
|
"voice": voice_key,
|
||||||
|
"response_format": "wav",
|
||||||
|
"speed": speed,
|
||||||
|
}
|
||||||
|
with httpx.Client(timeout=45.0) as client:
|
||||||
|
response = client.post(
|
||||||
|
f"{base_url.rstrip('/')}/audio/speech",
|
||||||
|
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||||
|
json=payload,
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
detail = response.text
|
||||||
|
try:
|
||||||
|
detail_json = response.json()
|
||||||
|
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
|
||||||
|
return response.content
|
||||||
|
|
||||||
|
|
||||||
|
def _wav_to_pcm16_mono_16k(wav_bytes: bytes) -> tuple[bytes, int]:
|
||||||
|
with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
|
||||||
|
channels = wav_file.getnchannels()
|
||||||
|
sample_width = wav_file.getsampwidth()
|
||||||
|
sample_rate = wav_file.getframerate()
|
||||||
|
frames = wav_file.getnframes()
|
||||||
|
raw = wav_file.readframes(frames)
|
||||||
|
|
||||||
|
if sample_width != 2:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported WAV sample width: {sample_width * 8}bit")
|
||||||
|
|
||||||
|
if channels > 1:
|
||||||
|
raw = audioop.tomono(raw, sample_width, 0.5, 0.5)
|
||||||
|
|
||||||
|
if sample_rate != 16000:
|
||||||
|
raw, _ = audioop.ratecv(raw, sample_width, 1, sample_rate, 16000, None)
|
||||||
|
|
||||||
|
duration_ms = int((len(raw) / (16000 * 2)) * 1000)
|
||||||
|
return raw, duration_ms
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_opener_audio_pcm(assistant_id: str, pcm_bytes: bytes) -> str:
|
||||||
|
OPENER_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_path = OPENER_AUDIO_DIR / f"{assistant_id}.pcm"
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(pcm_bytes)
|
||||||
|
return str(file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _opener_audio_out(record: Optional[AssistantOpenerAudio]) -> AssistantOpenerAudioOut:
|
||||||
|
if not record:
|
||||||
|
return AssistantOpenerAudioOut()
|
||||||
|
ready = bool(record.file_path and Path(record.file_path).exists())
|
||||||
|
return AssistantOpenerAudioOut(
|
||||||
|
enabled=bool(record.enabled),
|
||||||
|
ready=ready,
|
||||||
|
encoding=record.encoding,
|
||||||
|
sample_rate_hz=record.sample_rate_hz,
|
||||||
|
channels=record.channels,
|
||||||
|
duration_ms=record.duration_ms,
|
||||||
|
updated_at=record.updated_at,
|
||||||
|
text_hash=record.text_hash,
|
||||||
|
tts_fingerprint=record.tts_fingerprint,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ============ Assistants ============
|
# ============ Assistants ============
|
||||||
@router.get("")
|
@router.get("")
|
||||||
def list_assistants(
|
def list_assistants(
|
||||||
@@ -316,9 +458,132 @@ def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)):
|
|||||||
db.add(assistant)
|
db.add(assistant)
|
||||||
db.commit()
|
db.commit()
|
||||||
db.refresh(assistant)
|
db.refresh(assistant)
|
||||||
|
opener_audio = _ensure_assistant_opener_audio(db, assistant)
|
||||||
|
opener_audio.enabled = bool(data.openerAudioEnabled)
|
||||||
|
opener_audio.updated_at = datetime.utcnow()
|
||||||
|
db.commit()
|
||||||
|
db.refresh(assistant)
|
||||||
return assistant_to_dict(assistant)
|
return assistant_to_dict(assistant)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{id}/opener-audio", response_model=AssistantOpenerAudioOut)
|
||||||
|
def get_assistant_opener_audio(id: str, db: Session = Depends(get_db)):
|
||||||
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||||
|
if not assistant:
|
||||||
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||||
|
return _opener_audio_out(assistant.opener_audio)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{id}/opener-audio/pcm")
|
||||||
|
def get_assistant_opener_audio_pcm(id: str, db: Session = Depends(get_db)):
|
||||||
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||||
|
if not assistant:
|
||||||
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||||
|
record = assistant.opener_audio
|
||||||
|
if not record or not record.file_path:
|
||||||
|
raise HTTPException(status_code=404, detail="Opener audio not generated")
|
||||||
|
file_path = Path(record.file_path)
|
||||||
|
if not file_path.exists():
|
||||||
|
raise HTTPException(status_code=404, detail="Opener audio file missing")
|
||||||
|
return FileResponse(
|
||||||
|
str(file_path),
|
||||||
|
media_type="application/octet-stream",
|
||||||
|
filename=f"{assistant.id}.pcm",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{id}/opener-audio/generate", response_model=AssistantOpenerAudioOut)
|
||||||
|
def generate_assistant_opener_audio(
|
||||||
|
id: str,
|
||||||
|
data: AssistantOpenerAudioGenerateRequest,
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||||
|
if not assistant:
|
||||||
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||||
|
if not assistant.voice_output_enabled:
|
||||||
|
raise HTTPException(status_code=400, detail="Voice output is disabled")
|
||||||
|
|
||||||
|
opener_text = (data.text if data.text is not None else assistant.opener or "").strip()
|
||||||
|
if not opener_text:
|
||||||
|
raise HTTPException(status_code=400, detail="Opener text is empty")
|
||||||
|
|
||||||
|
tts_cfg, voice = _resolve_tts_runtime_for_assistant(db, assistant)
|
||||||
|
provider = str(tts_cfg.get("provider") or "").strip().lower()
|
||||||
|
if provider not in {"openai_compatible", "dashscope"}:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Unsupported provider for preloaded opener audio: {provider or 'unknown'}")
|
||||||
|
|
||||||
|
speed = float(tts_cfg.get("speed") or assistant.speed or 1.0)
|
||||||
|
voice_key = str(tts_cfg.get("voice") or "").strip()
|
||||||
|
model = str(tts_cfg.get("model") or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||||
|
api_key = str(tts_cfg.get("apiKey") or "").strip()
|
||||||
|
base_url = str(tts_cfg.get("baseUrl") or "").strip()
|
||||||
|
|
||||||
|
if provider == "openai_compatible":
|
||||||
|
if not api_key:
|
||||||
|
if voice and voice.api_key:
|
||||||
|
api_key = voice.api_key.strip()
|
||||||
|
if not api_key:
|
||||||
|
api_key = (os.getenv("SILICONFLOW_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
|
||||||
|
if not api_key:
|
||||||
|
raise HTTPException(status_code=400, detail="TTS API key is missing")
|
||||||
|
if not base_url:
|
||||||
|
base_url = OPENAI_COMPATIBLE_DEFAULT_BASE_URL
|
||||||
|
wav_bytes = _synthesize_openai_compatible_wav(
|
||||||
|
text=opener_text,
|
||||||
|
model=model,
|
||||||
|
voice_key=voice_key,
|
||||||
|
speed=speed,
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
from .voices import _synthesize_dashscope_preview, DASHSCOPE_DEFAULT_BASE_URL, DASHSCOPE_DEFAULT_MODEL, DASHSCOPE_DEFAULT_VOICE_KEY
|
||||||
|
if not api_key:
|
||||||
|
if voice and voice.api_key:
|
||||||
|
api_key = voice.api_key.strip()
|
||||||
|
if not api_key:
|
||||||
|
api_key = (os.getenv("DASHSCOPE_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
|
||||||
|
if not api_key:
|
||||||
|
raise HTTPException(status_code=400, detail="DashScope API key is missing")
|
||||||
|
if not base_url:
|
||||||
|
base_url = DASHSCOPE_DEFAULT_BASE_URL
|
||||||
|
if not model:
|
||||||
|
model = DASHSCOPE_DEFAULT_MODEL
|
||||||
|
if not voice_key:
|
||||||
|
voice_key = DASHSCOPE_DEFAULT_VOICE_KEY
|
||||||
|
try:
|
||||||
|
wav_bytes = _synthesize_dashscope_preview(
|
||||||
|
text=opener_text,
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
model=model,
|
||||||
|
voice_key=voice_key,
|
||||||
|
speed=speed,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
raise HTTPException(status_code=502, detail=f"DashScope opener audio generation failed: {exc}") from exc
|
||||||
|
|
||||||
|
pcm_bytes, duration_ms = _wav_to_pcm16_mono_16k(wav_bytes)
|
||||||
|
record = _ensure_assistant_opener_audio(db, assistant)
|
||||||
|
record.enabled = True
|
||||||
|
record.file_path = _persist_opener_audio_pcm(assistant.id, pcm_bytes)
|
||||||
|
record.encoding = "pcm_s16le"
|
||||||
|
record.sample_rate_hz = 16000
|
||||||
|
record.channels = 1
|
||||||
|
record.duration_ms = duration_ms
|
||||||
|
record.text_hash = hashlib.sha256(opener_text.encode("utf-8")).hexdigest()
|
||||||
|
record.tts_fingerprint = _tts_fingerprint(tts_cfg, opener_text)
|
||||||
|
now = datetime.utcnow()
|
||||||
|
if not record.created_at:
|
||||||
|
record.created_at = now
|
||||||
|
record.updated_at = now
|
||||||
|
assistant.updated_at = now
|
||||||
|
db.commit()
|
||||||
|
db.refresh(assistant)
|
||||||
|
return _opener_audio_out(assistant.opener_audio)
|
||||||
|
|
||||||
|
|
||||||
@router.put("/{id}")
|
@router.put("/{id}")
|
||||||
def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
|
def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
|
||||||
"""更新助手"""
|
"""更新助手"""
|
||||||
@@ -327,7 +592,12 @@ def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_d
|
|||||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||||
|
|
||||||
update_data = data.model_dump(exclude_unset=True)
|
update_data = data.model_dump(exclude_unset=True)
|
||||||
|
opener_audio_enabled = update_data.pop("openerAudioEnabled", None)
|
||||||
_apply_assistant_update(assistant, update_data)
|
_apply_assistant_update(assistant, update_data)
|
||||||
|
if opener_audio_enabled is not None:
|
||||||
|
record = _ensure_assistant_opener_audio(db, assistant)
|
||||||
|
record.enabled = bool(opener_audio_enabled)
|
||||||
|
record.updated_at = datetime.utcnow()
|
||||||
|
|
||||||
assistant.updated_at = datetime.utcnow()
|
assistant.updated_at = datetime.utcnow()
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|||||||
@@ -275,6 +275,7 @@ class AssistantBase(BaseModel):
|
|||||||
firstTurnMode: str = "bot_first"
|
firstTurnMode: str = "bot_first"
|
||||||
opener: str = ""
|
opener: str = ""
|
||||||
generatedOpenerEnabled: bool = False
|
generatedOpenerEnabled: bool = False
|
||||||
|
openerAudioEnabled: bool = False
|
||||||
prompt: str = ""
|
prompt: str = ""
|
||||||
knowledgeBaseId: Optional[str] = None
|
knowledgeBaseId: Optional[str] = None
|
||||||
language: str = "zh"
|
language: str = "zh"
|
||||||
@@ -304,6 +305,7 @@ class AssistantUpdate(BaseModel):
|
|||||||
firstTurnMode: Optional[str] = None
|
firstTurnMode: Optional[str] = None
|
||||||
opener: Optional[str] = None
|
opener: Optional[str] = None
|
||||||
generatedOpenerEnabled: Optional[bool] = None
|
generatedOpenerEnabled: Optional[bool] = None
|
||||||
|
openerAudioEnabled: Optional[bool] = None
|
||||||
prompt: Optional[str] = None
|
prompt: Optional[str] = None
|
||||||
knowledgeBaseId: Optional[str] = None
|
knowledgeBaseId: Optional[str] = None
|
||||||
language: Optional[str] = None
|
language: Optional[str] = None
|
||||||
@@ -349,6 +351,7 @@ class AssistantRuntimeMetadata(BaseModel):
|
|||||||
knowledgeBaseId: Optional[str] = None
|
knowledgeBaseId: Optional[str] = None
|
||||||
knowledge: Dict[str, Any] = Field(default_factory=dict)
|
knowledge: Dict[str, Any] = Field(default_factory=dict)
|
||||||
history: Dict[str, Any] = Field(default_factory=dict)
|
history: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
openerAudio: Dict[str, Any] = Field(default_factory=dict)
|
||||||
assistantId: Optional[str] = None
|
assistantId: Optional[str] = None
|
||||||
configVersionId: Optional[str] = None
|
configVersionId: Optional[str] = None
|
||||||
|
|
||||||
@@ -362,6 +365,22 @@ class AssistantEngineConfigResponse(BaseModel):
|
|||||||
warnings: List[str] = Field(default_factory=list)
|
warnings: List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantOpenerAudioGenerateRequest(BaseModel):
|
||||||
|
text: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class AssistantOpenerAudioOut(BaseModel):
|
||||||
|
enabled: bool = False
|
||||||
|
ready: bool = False
|
||||||
|
encoding: str = "pcm_s16le"
|
||||||
|
sample_rate_hz: int = 16000
|
||||||
|
channels: int = 1
|
||||||
|
duration_ms: int = 0
|
||||||
|
updated_at: Optional[datetime] = None
|
||||||
|
text_hash: Optional[str] = None
|
||||||
|
tts_fingerprint: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class AssistantStats(BaseModel):
|
class AssistantStats(BaseModel):
|
||||||
assistant_id: str
|
assistant_id: str
|
||||||
total_calls: int = 0
|
total_calls: int = 0
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ _AGENT_SECTION_KEY_MAP: Dict[str, Dict[str, str]] = {
|
|||||||
"enabled": "duplex_enabled",
|
"enabled": "duplex_enabled",
|
||||||
"greeting": "duplex_greeting",
|
"greeting": "duplex_greeting",
|
||||||
"system_prompt": "duplex_system_prompt",
|
"system_prompt": "duplex_system_prompt",
|
||||||
|
"opener_audio_file": "duplex_opener_audio_file",
|
||||||
},
|
},
|
||||||
"barge_in": {
|
"barge_in": {
|
||||||
"min_duration_ms": "barge_in_min_duration_ms",
|
"min_duration_ms": "barge_in_min_duration_ms",
|
||||||
@@ -96,6 +97,7 @@ _AGENT_SETTING_KEYS = {
|
|||||||
"duplex_enabled",
|
"duplex_enabled",
|
||||||
"duplex_greeting",
|
"duplex_greeting",
|
||||||
"duplex_system_prompt",
|
"duplex_system_prompt",
|
||||||
|
"duplex_opener_audio_file",
|
||||||
"barge_in_min_duration_ms",
|
"barge_in_min_duration_ms",
|
||||||
"barge_in_silence_tolerance_ms",
|
"barge_in_silence_tolerance_ms",
|
||||||
"tools",
|
"tools",
|
||||||
@@ -452,6 +454,10 @@ class Settings(BaseSettings):
|
|||||||
default="You are a helpful, friendly voice assistant. Keep your responses concise and conversational.",
|
default="You are a helpful, friendly voice assistant. Keep your responses concise and conversational.",
|
||||||
description="System prompt for LLM"
|
description="System prompt for LLM"
|
||||||
)
|
)
|
||||||
|
duplex_opener_audio_file: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Optional opener audio file path for standalone engine mode (.pcm or .wav)"
|
||||||
|
)
|
||||||
|
|
||||||
# Barge-in (interruption) Configuration
|
# Barge-in (interruption) Configuration
|
||||||
barge_in_min_duration_ms: int = Field(
|
barge_in_min_duration_ms: int = Field(
|
||||||
|
|||||||
@@ -12,12 +12,17 @@ event-driven design.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import audioop
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
import wave
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
from typing import Any, Awaitable, Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import aiohttp
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
@@ -203,6 +208,7 @@ class DuplexPipeline:
|
|||||||
self._runtime_first_turn_mode: str = "bot_first"
|
self._runtime_first_turn_mode: str = "bot_first"
|
||||||
self._runtime_greeting: Optional[str] = None
|
self._runtime_greeting: Optional[str] = None
|
||||||
self._runtime_generated_opener_enabled: Optional[bool] = None
|
self._runtime_generated_opener_enabled: Optional[bool] = None
|
||||||
|
self._runtime_opener_audio: Dict[str, Any] = {}
|
||||||
self._runtime_barge_in_enabled: Optional[bool] = None
|
self._runtime_barge_in_enabled: Optional[bool] = None
|
||||||
self._runtime_barge_in_min_duration_ms: Optional[int] = None
|
self._runtime_barge_in_min_duration_ms: Optional[int] = None
|
||||||
self._runtime_knowledge: Dict[str, Any] = {}
|
self._runtime_knowledge: Dict[str, Any] = {}
|
||||||
@@ -320,6 +326,9 @@ class DuplexPipeline:
|
|||||||
knowledge = metadata.get("knowledge")
|
knowledge = metadata.get("knowledge")
|
||||||
if isinstance(knowledge, dict):
|
if isinstance(knowledge, dict):
|
||||||
self._runtime_knowledge = knowledge
|
self._runtime_knowledge = knowledge
|
||||||
|
opener_audio = metadata.get("openerAudio")
|
||||||
|
if isinstance(opener_audio, dict):
|
||||||
|
self._runtime_opener_audio = dict(opener_audio)
|
||||||
kb_id = str(knowledge.get("kbId") or knowledge.get("knowledgeBaseId") or "").strip()
|
kb_id = str(knowledge.get("kbId") or knowledge.get("knowledgeBaseId") or "").strip()
|
||||||
if kb_id:
|
if kb_id:
|
||||||
self._runtime_knowledge_base_id = kb_id
|
self._runtime_knowledge_base_id = kb_id
|
||||||
@@ -770,10 +779,117 @@ class DuplexPipeline:
|
|||||||
)
|
)
|
||||||
await self.conversation.add_assistant_turn(greeting_to_speak)
|
await self.conversation.add_assistant_turn(greeting_to_speak)
|
||||||
|
|
||||||
if self._tts_output_enabled():
|
used_preloaded_audio = await self._play_preloaded_opener_audio()
|
||||||
|
if self._tts_output_enabled() and not used_preloaded_audio:
|
||||||
# Keep opener text ahead of opener voice start.
|
# Keep opener text ahead of opener voice start.
|
||||||
await self._speak(greeting_to_speak, audio_event_priority=30)
|
await self._speak(greeting_to_speak, audio_event_priority=30)
|
||||||
|
|
||||||
|
async def _play_preloaded_opener_audio(self) -> bool:
|
||||||
|
"""
|
||||||
|
Play opener audio from runtime metadata cache or YAML-configured local file.
|
||||||
|
|
||||||
|
Returns True when preloaded audio is played successfully.
|
||||||
|
"""
|
||||||
|
if not self._tts_output_enabled():
|
||||||
|
return False
|
||||||
|
|
||||||
|
pcm_bytes = await self._load_preloaded_opener_pcm()
|
||||||
|
if not pcm_bytes:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._drop_outbound_audio = False
|
||||||
|
self._start_tts()
|
||||||
|
await self._send_event(
|
||||||
|
{
|
||||||
|
**ev(
|
||||||
|
"output.audio.start",
|
||||||
|
trackId=self.track_audio_out,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
priority=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._is_bot_speaking = True
|
||||||
|
await self._send_audio(pcm_bytes, priority=50)
|
||||||
|
await self._flush_audio_out_frames(priority=50)
|
||||||
|
await self._send_event(
|
||||||
|
{
|
||||||
|
**ev(
|
||||||
|
"output.audio.end",
|
||||||
|
trackId=self.track_audio_out,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
priority=30,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to play preloaded opener audio, fallback to TTS: {e}")
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
self._is_bot_speaking = False
|
||||||
|
|
||||||
|
async def _load_preloaded_opener_pcm(self) -> Optional[bytes]:
|
||||||
|
# 1) Runtime metadata from backend config
|
||||||
|
opener_audio = self._runtime_opener_audio if isinstance(self._runtime_opener_audio, dict) else {}
|
||||||
|
if bool(opener_audio.get("enabled")) and bool(opener_audio.get("ready")):
|
||||||
|
pcm_url = str(opener_audio.get("pcmUrl") or "").strip()
|
||||||
|
if pcm_url:
|
||||||
|
resolved_url = pcm_url
|
||||||
|
if pcm_url.startswith("/"):
|
||||||
|
backend_url = str(settings.backend_url or "").strip().rstrip("/")
|
||||||
|
if backend_url:
|
||||||
|
resolved_url = f"{backend_url}{pcm_url}"
|
||||||
|
try:
|
||||||
|
timeout = aiohttp.ClientTimeout(total=10)
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
async with session.get(resolved_url) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
payload = await resp.read()
|
||||||
|
if payload:
|
||||||
|
return payload
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to fetch opener audio from backend ({resolved_url}): {e}")
|
||||||
|
|
||||||
|
# 2) Standalone fallback via YAML
|
||||||
|
opener_audio_file = str(settings.duplex_opener_audio_file or "").strip()
|
||||||
|
if not opener_audio_file:
|
||||||
|
return None
|
||||||
|
path = Path(opener_audio_file)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = (Path.cwd() / path).resolve()
|
||||||
|
if not path.exists() or not path.is_file():
|
||||||
|
logger.warning(f"Configured opener audio file does not exist: {path}")
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
raw = path.read_bytes()
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
if suffix == ".wav":
|
||||||
|
pcm, _ = self._wav_to_pcm16_mono_16k(raw)
|
||||||
|
return pcm
|
||||||
|
# .pcm raw pcm_s16le 16k mono
|
||||||
|
return raw
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to read opener audio file {path}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _wav_to_pcm16_mono_16k(self, wav_bytes: bytes) -> Tuple[bytes, int]:
|
||||||
|
with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
|
||||||
|
channels = wav_file.getnchannels()
|
||||||
|
sample_width = wav_file.getsampwidth()
|
||||||
|
sample_rate = wav_file.getframerate()
|
||||||
|
nframes = wav_file.getnframes()
|
||||||
|
raw = wav_file.readframes(nframes)
|
||||||
|
|
||||||
|
if sample_width != 2:
|
||||||
|
raise ValueError(f"Unsupported WAV sample width: {sample_width * 8}bit")
|
||||||
|
if channels > 1:
|
||||||
|
raw = audioop.tomono(raw, sample_width, 0.5, 0.5)
|
||||||
|
if sample_rate != 16000:
|
||||||
|
raw, _ = audioop.ratecv(raw, sample_width, 1, sample_rate, 16000, None)
|
||||||
|
duration_ms = int((len(raw) / (16000 * 2)) * 1000)
|
||||||
|
return raw, duration_ms
|
||||||
|
|
||||||
async def _enqueue_outbound(self, kind: str, payload: Any, priority: int) -> None:
|
async def _enqueue_outbound(self, kind: str, payload: Any, priority: int) -> None:
|
||||||
"""Queue outbound message with priority ordering."""
|
"""Queue outbound message with priority ordering."""
|
||||||
self._outbound_seq += 1
|
self._outbound_seq += 1
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ class Session:
|
|||||||
"bargeIn",
|
"bargeIn",
|
||||||
"knowledge",
|
"knowledge",
|
||||||
"knowledgeBaseId",
|
"knowledgeBaseId",
|
||||||
|
"openerAudio",
|
||||||
"history",
|
"history",
|
||||||
"userId",
|
"userId",
|
||||||
"assistantId",
|
"assistantId",
|
||||||
@@ -840,6 +841,7 @@ class Session:
|
|||||||
"bargeIn",
|
"bargeIn",
|
||||||
"knowledgeBaseId",
|
"knowledgeBaseId",
|
||||||
"knowledge",
|
"knowledge",
|
||||||
|
"openerAudio",
|
||||||
"history",
|
"history",
|
||||||
"userId",
|
"userId",
|
||||||
"source",
|
"source",
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import React, { useState, useEffect, useMemo, useRef } from 'react';
|
|||||||
import { Plus, Search, Play, Copy, Trash2, Mic, MessageSquare, Save, Video, PhoneOff, Camera, ArrowLeftRight, Send, Phone, Rocket, AlertTriangle, PhoneCall, CameraOff, Image, Images, CloudSun, Calendar, TrendingUp, Coins, Wrench, Globe, Terminal, X, ClipboardCheck, Sparkles, Volume2, Timer, ChevronDown, Database, Server, Zap, ExternalLink, Key, BrainCircuit, Ear, Book, Filter } from 'lucide-react';
|
import { Plus, Search, Play, Copy, Trash2, Mic, MessageSquare, Save, Video, PhoneOff, Camera, ArrowLeftRight, Send, Phone, Rocket, AlertTriangle, PhoneCall, CameraOff, Image, Images, CloudSun, Calendar, TrendingUp, Coins, Wrench, Globe, Terminal, X, ClipboardCheck, Sparkles, Volume2, Timer, ChevronDown, Database, Server, Zap, ExternalLink, Key, BrainCircuit, Ear, Book, Filter } from 'lucide-react';
|
||||||
import { Button, Input, Badge, Drawer, Dialog } from '../components/UI';
|
import { Button, Input, Badge, Drawer, Dialog } from '../components/UI';
|
||||||
import { ASRModel, Assistant, KnowledgeBase, LLMModel, TabValue, Tool, Voice } from '../types';
|
import { ASRModel, Assistant, KnowledgeBase, LLMModel, TabValue, Tool, Voice } from '../types';
|
||||||
import { createAssistant, deleteAssistant, fetchASRModels, fetchAssistants, fetchKnowledgeBases, fetchLLMModels, fetchTools, fetchVoices, updateAssistant as updateAssistantApi } from '../services/backendApi';
|
import { createAssistant, deleteAssistant, fetchASRModels, fetchAssistants, fetchKnowledgeBases, fetchLLMModels, fetchTools, fetchVoices, generateAssistantOpenerAudio, updateAssistant as updateAssistantApi } from '../services/backendApi';
|
||||||
|
|
||||||
const isOpenAICompatibleVendor = (vendor?: string) => {
|
const isOpenAICompatibleVendor = (vendor?: string) => {
|
||||||
const normalized = String(vendor || '').trim().toLowerCase();
|
const normalized = String(vendor || '').trim().toLowerCase();
|
||||||
@@ -108,6 +108,7 @@ export const AssistantsPage: React.FC = () => {
|
|||||||
const [isLoading, setIsLoading] = useState(true);
|
const [isLoading, setIsLoading] = useState(true);
|
||||||
const [persistedAssistantSnapshotById, setPersistedAssistantSnapshotById] = useState<Record<string, string>>({});
|
const [persistedAssistantSnapshotById, setPersistedAssistantSnapshotById] = useState<Record<string, string>>({});
|
||||||
const [unsavedDebugConfirmOpen, setUnsavedDebugConfirmOpen] = useState(false);
|
const [unsavedDebugConfirmOpen, setUnsavedDebugConfirmOpen] = useState(false);
|
||||||
|
const [openerAudioGenerating, setOpenerAudioGenerating] = useState(false);
|
||||||
|
|
||||||
const selectedAssistant = assistants.find(a => a.id === selectedId) || null;
|
const selectedAssistant = assistants.find(a => a.id === selectedId) || null;
|
||||||
const serializeAssistant = (assistant: Assistant) => JSON.stringify(assistant);
|
const serializeAssistant = (assistant: Assistant) => JSON.stringify(assistant);
|
||||||
@@ -164,6 +165,7 @@ export const AssistantsPage: React.FC = () => {
|
|||||||
firstTurnMode: 'bot_first',
|
firstTurnMode: 'bot_first',
|
||||||
opener: '',
|
opener: '',
|
||||||
generatedOpenerEnabled: false,
|
generatedOpenerEnabled: false,
|
||||||
|
openerAudioEnabled: false,
|
||||||
prompt: '',
|
prompt: '',
|
||||||
knowledgeBaseId: '',
|
knowledgeBaseId: '',
|
||||||
language: 'zh',
|
language: 'zh',
|
||||||
@@ -269,6 +271,31 @@ export const AssistantsPage: React.FC = () => {
|
|||||||
setDebugOpen(true);
|
setDebugOpen(true);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleGenerateOpenerAudio = async () => {
|
||||||
|
if (!selectedAssistant) return;
|
||||||
|
setOpenerAudioGenerating(true);
|
||||||
|
try {
|
||||||
|
const status = await generateAssistantOpenerAudio(selectedAssistant.id, {
|
||||||
|
text: selectedAssistant.opener || '',
|
||||||
|
});
|
||||||
|
setAssistants((prev) => prev.map((item) => {
|
||||||
|
if (item.id !== selectedAssistant.id) return item;
|
||||||
|
return {
|
||||||
|
...item,
|
||||||
|
openerAudioEnabled: status.enabled,
|
||||||
|
openerAudioReady: status.ready,
|
||||||
|
openerAudioDurationMs: status.duration_ms,
|
||||||
|
openerAudioUpdatedAt: status.updated_at || '',
|
||||||
|
};
|
||||||
|
}));
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
alert((error as Error)?.message || '生成开场白预加载音频失败');
|
||||||
|
} finally {
|
||||||
|
setOpenerAudioGenerating(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const handleConfirmOpenDebug = () => {
|
const handleConfirmOpenDebug = () => {
|
||||||
setUnsavedDebugConfirmOpen(false);
|
setUnsavedDebugConfirmOpen(false);
|
||||||
setDebugOpen(true);
|
setDebugOpen(true);
|
||||||
@@ -676,6 +703,58 @@ export const AssistantsPage: React.FC = () => {
|
|||||||
? '通话接通后将根据提示词自动生成开场白。'
|
? '通话接通后将根据提示词自动生成开场白。'
|
||||||
: '接通通话后的第一句话。'}
|
: '接通通话后的第一句话。'}
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<div className="mt-3 p-3 rounded-lg border border-white/10 bg-white/[0.03] space-y-2">
|
||||||
|
<div className="flex items-center justify-between gap-3">
|
||||||
|
<label className="text-xs font-semibold text-white flex items-center">
|
||||||
|
<Volume2 className="w-4 h-4 mr-2 text-primary" />
|
||||||
|
使用预加载开场音频
|
||||||
|
</label>
|
||||||
|
<div className="inline-flex rounded-lg border border-white/10 bg-white/5 p-1">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => updateAssistant('openerAudioEnabled', false)}
|
||||||
|
className={`px-3 py-1 text-xs rounded-md transition-colors ${
|
||||||
|
selectedAssistant.openerAudioEnabled === true
|
||||||
|
? 'text-muted-foreground hover:text-foreground'
|
||||||
|
: 'bg-primary text-primary-foreground shadow-sm'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
关闭
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
onClick={() => updateAssistant('openerAudioEnabled', true)}
|
||||||
|
className={`px-3 py-1 text-xs rounded-md transition-colors ${
|
||||||
|
selectedAssistant.openerAudioEnabled === true
|
||||||
|
? 'bg-primary text-primary-foreground shadow-sm'
|
||||||
|
: 'text-muted-foreground hover:text-foreground'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
开启
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center justify-between gap-3">
|
||||||
|
<p className="text-[11px] text-muted-foreground">
|
||||||
|
状态:
|
||||||
|
{selectedAssistant.openerAudioReady
|
||||||
|
? `已生成 (${Math.round((selectedAssistant.openerAudioDurationMs || 0) / 1000)}s)`
|
||||||
|
: '未生成'}
|
||||||
|
</p>
|
||||||
|
<Button
|
||||||
|
variant="secondary"
|
||||||
|
size="sm"
|
||||||
|
onClick={handleGenerateOpenerAudio}
|
||||||
|
disabled={openerAudioGenerating || selectedAssistant.generatedOpenerEnabled === true}
|
||||||
|
>
|
||||||
|
{openerAudioGenerating ? '生成中...' : '生成开场预加载音频'}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
<p className="text-[11px] text-muted-foreground">
|
||||||
|
使用当前 TTS 配置生成并保存到后端;引擎可直接播放以降低首包延迟。
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,10 @@ const mapAssistant = (raw: AnyRecord): Assistant => ({
|
|||||||
firstTurnMode: readField(raw, ['firstTurnMode', 'first_turn_mode'], 'bot_first') as 'bot_first' | 'user_first',
|
firstTurnMode: readField(raw, ['firstTurnMode', 'first_turn_mode'], 'bot_first') as 'bot_first' | 'user_first',
|
||||||
opener: readField(raw, ['opener'], ''),
|
opener: readField(raw, ['opener'], ''),
|
||||||
generatedOpenerEnabled: Boolean(readField(raw, ['generatedOpenerEnabled', 'generated_opener_enabled'], false)),
|
generatedOpenerEnabled: Boolean(readField(raw, ['generatedOpenerEnabled', 'generated_opener_enabled'], false)),
|
||||||
|
openerAudioEnabled: Boolean(readField(raw, ['openerAudioEnabled', 'opener_audio_enabled'], false)),
|
||||||
|
openerAudioReady: Boolean(readField(raw, ['openerAudioReady', 'opener_audio_ready'], false)),
|
||||||
|
openerAudioDurationMs: Number(readField(raw, ['openerAudioDurationMs', 'opener_audio_duration_ms'], 0)),
|
||||||
|
openerAudioUpdatedAt: readField(raw, ['openerAudioUpdatedAt', 'opener_audio_updated_at'], ''),
|
||||||
prompt: readField(raw, ['prompt'], ''),
|
prompt: readField(raw, ['prompt'], ''),
|
||||||
knowledgeBaseId: readField(raw, ['knowledgeBaseId', 'knowledge_base_id'], ''),
|
knowledgeBaseId: readField(raw, ['knowledgeBaseId', 'knowledge_base_id'], ''),
|
||||||
language: readField(raw, ['language'], 'zh') as 'zh' | 'en',
|
language: readField(raw, ['language'], 'zh') as 'zh' | 'en',
|
||||||
@@ -228,6 +232,7 @@ export const createAssistant = async (data: Partial<Assistant>): Promise<Assista
|
|||||||
firstTurnMode: data.firstTurnMode || 'bot_first',
|
firstTurnMode: data.firstTurnMode || 'bot_first',
|
||||||
opener: data.opener || '',
|
opener: data.opener || '',
|
||||||
generatedOpenerEnabled: data.generatedOpenerEnabled ?? false,
|
generatedOpenerEnabled: data.generatedOpenerEnabled ?? false,
|
||||||
|
openerAudioEnabled: data.openerAudioEnabled ?? false,
|
||||||
prompt: data.prompt || '',
|
prompt: data.prompt || '',
|
||||||
knowledgeBaseId: data.knowledgeBaseId || '',
|
knowledgeBaseId: data.knowledgeBaseId || '',
|
||||||
language: data.language || 'zh',
|
language: data.language || 'zh',
|
||||||
@@ -256,6 +261,7 @@ export const updateAssistant = async (id: string, data: Partial<Assistant>): Pro
|
|||||||
firstTurnMode: data.firstTurnMode,
|
firstTurnMode: data.firstTurnMode,
|
||||||
opener: data.opener,
|
opener: data.opener,
|
||||||
generatedOpenerEnabled: data.generatedOpenerEnabled,
|
generatedOpenerEnabled: data.generatedOpenerEnabled,
|
||||||
|
openerAudioEnabled: data.openerAudioEnabled,
|
||||||
prompt: data.prompt,
|
prompt: data.prompt,
|
||||||
knowledgeBaseId: data.knowledgeBaseId,
|
knowledgeBaseId: data.knowledgeBaseId,
|
||||||
language: data.language,
|
language: data.language,
|
||||||
@@ -295,10 +301,36 @@ export interface AssistantRuntimeConfigResponse {
|
|||||||
warnings?: string[];
|
warnings?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface AssistantOpenerAudioStatus {
|
||||||
|
enabled: boolean;
|
||||||
|
ready: boolean;
|
||||||
|
encoding: string;
|
||||||
|
sample_rate_hz: number;
|
||||||
|
channels: number;
|
||||||
|
duration_ms: number;
|
||||||
|
updated_at?: string | null;
|
||||||
|
text_hash?: string | null;
|
||||||
|
tts_fingerprint?: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
export const fetchAssistantRuntimeConfig = async (assistantId: string): Promise<AssistantRuntimeConfigResponse> => {
|
export const fetchAssistantRuntimeConfig = async (assistantId: string): Promise<AssistantRuntimeConfigResponse> => {
|
||||||
return apiRequest<AssistantRuntimeConfigResponse>(`/assistants/${assistantId}/config`);
|
return apiRequest<AssistantRuntimeConfigResponse>(`/assistants/${assistantId}/config`);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const fetchAssistantOpenerAudioStatus = async (assistantId: string): Promise<AssistantOpenerAudioStatus> => {
|
||||||
|
return apiRequest<AssistantOpenerAudioStatus>(`/assistants/${assistantId}/opener-audio`);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const generateAssistantOpenerAudio = async (
|
||||||
|
assistantId: string,
|
||||||
|
payload?: { text?: string }
|
||||||
|
): Promise<AssistantOpenerAudioStatus> => {
|
||||||
|
return apiRequest<AssistantOpenerAudioStatus>(`/assistants/${assistantId}/opener-audio/generate`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: payload || {},
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
export const fetchVoices = async (): Promise<Voice[]> => {
|
export const fetchVoices = async (): Promise<Voice[]> => {
|
||||||
const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>(withLimit('/voices'));
|
const response = await apiRequest<{ list?: AnyRecord[] } | AnyRecord[]>(withLimit('/voices'));
|
||||||
const list = Array.isArray(response) ? response : (response.list || []);
|
const list = Array.isArray(response) ? response : (response.list || []);
|
||||||
|
|||||||
@@ -6,6 +6,10 @@ export interface Assistant {
|
|||||||
firstTurnMode?: 'bot_first' | 'user_first';
|
firstTurnMode?: 'bot_first' | 'user_first';
|
||||||
opener: string;
|
opener: string;
|
||||||
generatedOpenerEnabled?: boolean;
|
generatedOpenerEnabled?: boolean;
|
||||||
|
openerAudioEnabled?: boolean;
|
||||||
|
openerAudioReady?: boolean;
|
||||||
|
openerAudioDurationMs?: number;
|
||||||
|
openerAudioUpdatedAt?: string;
|
||||||
prompt: string;
|
prompt: string;
|
||||||
knowledgeBaseId: string;
|
knowledgeBaseId: string;
|
||||||
language: 'zh' | 'en';
|
language: 'zh' | 'en';
|
||||||
|
|||||||
Reference in New Issue
Block a user