942 lines
36 KiB
Python
942 lines
36 KiB
Python
import audioop
|
|
import hashlib
|
|
import io
|
|
import os
|
|
import wave
|
|
from pathlib import Path
|
|
import httpx
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import inspect, text
|
|
from typing import Any, Dict, List, Optional
|
|
import uuid
|
|
from datetime import datetime
|
|
|
|
from ..db import get_db
|
|
from ..models import Assistant, AssistantOpenerAudio, LLMModel, ASRModel, Voice, ToolResource
|
|
from ..schemas import (
|
|
AssistantCreate,
|
|
AssistantUpdate,
|
|
AssistantOut,
|
|
AssistantEngineConfigResponse,
|
|
AssistantOpenerAudioGenerateRequest,
|
|
AssistantOpenerAudioOut,
|
|
)
|
|
from .tools import (
|
|
TOOL_REGISTRY,
|
|
TOOL_CATEGORY_MAP,
|
|
TOOL_PARAMETER_DEFAULTS,
|
|
TOOL_WAIT_FOR_RESPONSE_DEFAULTS,
|
|
_ensure_tool_resource_schema,
|
|
)
|
|
|
|
router = APIRouter(prefix="/assistants", tags=["Assistants"])
|
|
|
|
OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
|
OPENAI_COMPATIBLE_DEFAULT_BASE_URL = "https://api.siliconflow.cn/v1"
|
|
OPENER_AUDIO_DIR = Path(__file__).resolve().parents[2] / "data" / "opener_audio"
|
|
PRESENCE_PROBE_MIN_IDLE_SECONDS = 5.0
|
|
PRESENCE_PROBE_MAX_IDLE_SECONDS = 3600.0
|
|
PRESENCE_PROBE_DEFAULT_IDLE_SECONDS = 20.0
|
|
PRESENCE_PROBE_MIN_COOLDOWN_SECONDS = 5.0
|
|
PRESENCE_PROBE_MAX_COOLDOWN_SECONDS = 7200.0
|
|
PRESENCE_PROBE_DEFAULT_COOLDOWN_SECONDS = 45.0
|
|
PRESENCE_PROBE_MAX_PROMPTS_CAP = 10
|
|
PRESENCE_PROBE_DEFAULT_MAX_PROMPTS = 2
|
|
OPENAI_COMPATIBLE_KNOWN_VOICES = {
|
|
"alex",
|
|
"anna",
|
|
"bella",
|
|
"benjamin",
|
|
"charles",
|
|
"claire",
|
|
"david",
|
|
"diana",
|
|
}
|
|
|
|
|
|
def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
|
|
return (vendor or "").strip().lower() in {
|
|
"siliconflow",
|
|
"硅基流动",
|
|
"openai compatible",
|
|
"openai-compatible",
|
|
}
|
|
|
|
|
|
def _is_dashscope_vendor(vendor: Optional[str]) -> bool:
|
|
return (vendor or "").strip().lower() in {
|
|
"dashscope",
|
|
}
|
|
|
|
|
|
def _normalize_openai_compatible_voice_key(voice_value: str, model: str) -> str:
|
|
raw = (voice_value or "").strip()
|
|
model_name = (model or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
|
if not raw:
|
|
return f"{model_name}:anna"
|
|
|
|
if ":" in raw:
|
|
voice_model, voice_id = raw.split(":", 1)
|
|
voice_model = voice_model.strip() or model_name
|
|
voice_id = voice_id.strip()
|
|
if voice_id.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES:
|
|
voice_id = voice_id.lower()
|
|
return f"{voice_model}:{voice_id}"
|
|
|
|
voice_id = raw.lower() if raw.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES else raw
|
|
return f"{model_name}:{voice_id}"
|
|
|
|
|
|
def _config_version_id(assistant: Assistant) -> str:
|
|
updated = assistant.updated_at or assistant.created_at or datetime.utcnow()
|
|
return f"asst_{assistant.id}_{updated.strftime('%Y%m%d%H%M%S')}"
|
|
|
|
|
|
def _ensure_assistant_schema(db: Session) -> None:
|
|
"""Apply lightweight SQLite migrations for newly added assistant columns."""
|
|
bind = db.get_bind()
|
|
inspector = inspect(bind)
|
|
try:
|
|
columns = {col["name"] for col in inspector.get_columns("assistants")}
|
|
except Exception:
|
|
return
|
|
|
|
altered = False
|
|
if "presence_probe_enabled" not in columns:
|
|
db.execute(text("ALTER TABLE assistants ADD COLUMN presence_probe_enabled BOOLEAN DEFAULT 0"))
|
|
altered = True
|
|
if "presence_probe_idle_seconds" not in columns:
|
|
db.execute(
|
|
text(
|
|
"ALTER TABLE assistants ADD COLUMN presence_probe_idle_seconds FLOAT DEFAULT 20.0"
|
|
)
|
|
)
|
|
altered = True
|
|
if "presence_probe_cooldown_seconds" not in columns:
|
|
db.execute(
|
|
text(
|
|
"ALTER TABLE assistants ADD COLUMN presence_probe_cooldown_seconds FLOAT DEFAULT 45.0"
|
|
)
|
|
)
|
|
altered = True
|
|
if "presence_probe_max_prompts" not in columns:
|
|
db.execute(text("ALTER TABLE assistants ADD COLUMN presence_probe_max_prompts INTEGER DEFAULT 2"))
|
|
altered = True
|
|
if "presence_probe_include_context" not in columns:
|
|
db.execute(
|
|
text("ALTER TABLE assistants ADD COLUMN presence_probe_include_context BOOLEAN DEFAULT 1")
|
|
)
|
|
altered = True
|
|
if "presence_probe_question" not in columns:
|
|
db.execute(text("ALTER TABLE assistants ADD COLUMN presence_probe_question TEXT DEFAULT ''"))
|
|
altered = True
|
|
if altered:
|
|
db.commit()
|
|
|
|
|
|
def _coerce_bounded_float(
|
|
raw_value: Any,
|
|
*,
|
|
default_value: float,
|
|
min_value: float,
|
|
max_value: float,
|
|
) -> float:
|
|
if isinstance(raw_value, (int, float)):
|
|
parsed = float(raw_value)
|
|
elif isinstance(raw_value, str):
|
|
try:
|
|
parsed = float(raw_value.strip())
|
|
except ValueError:
|
|
parsed = default_value
|
|
else:
|
|
parsed = default_value
|
|
if parsed < min_value:
|
|
return min_value
|
|
if parsed > max_value:
|
|
return max_value
|
|
return parsed
|
|
|
|
|
|
def _coerce_bounded_int(
|
|
raw_value: Any,
|
|
*,
|
|
default_value: int,
|
|
min_value: int,
|
|
max_value: int,
|
|
) -> int:
|
|
if isinstance(raw_value, (int, float)):
|
|
parsed = int(raw_value)
|
|
elif isinstance(raw_value, str):
|
|
try:
|
|
parsed = int(raw_value.strip())
|
|
except ValueError:
|
|
parsed = default_value
|
|
else:
|
|
parsed = default_value
|
|
if parsed < min_value:
|
|
return min_value
|
|
if parsed > max_value:
|
|
return max_value
|
|
return parsed
|
|
|
|
|
|
def _resolve_presence_probe_config_from_assistant(assistant: Assistant) -> Dict[str, Any]:
|
|
question = str(assistant.presence_probe_question or "").strip()
|
|
if len(question) > 160:
|
|
question = question[:160]
|
|
include_context_raw = getattr(assistant, "presence_probe_include_context", True)
|
|
include_context = True if include_context_raw is None else bool(include_context_raw)
|
|
return {
|
|
"enabled": bool(assistant.presence_probe_enabled),
|
|
"idleSeconds": _coerce_bounded_float(
|
|
assistant.presence_probe_idle_seconds,
|
|
default_value=PRESENCE_PROBE_DEFAULT_IDLE_SECONDS,
|
|
min_value=PRESENCE_PROBE_MIN_IDLE_SECONDS,
|
|
max_value=PRESENCE_PROBE_MAX_IDLE_SECONDS,
|
|
),
|
|
"cooldownSeconds": _coerce_bounded_float(
|
|
assistant.presence_probe_cooldown_seconds,
|
|
default_value=PRESENCE_PROBE_DEFAULT_COOLDOWN_SECONDS,
|
|
min_value=PRESENCE_PROBE_MIN_COOLDOWN_SECONDS,
|
|
max_value=PRESENCE_PROBE_MAX_COOLDOWN_SECONDS,
|
|
),
|
|
"maxPrompts": _coerce_bounded_int(
|
|
assistant.presence_probe_max_prompts,
|
|
default_value=PRESENCE_PROBE_DEFAULT_MAX_PROMPTS,
|
|
min_value=1,
|
|
max_value=PRESENCE_PROBE_MAX_PROMPTS_CAP,
|
|
),
|
|
"includeContext": include_context,
|
|
"question": question,
|
|
}
|
|
|
|
|
|
def _normalize_runtime_tool_schema(tool_id: str, raw_schema: Any) -> Dict[str, Any]:
|
|
schema = dict(raw_schema) if isinstance(raw_schema, dict) else {}
|
|
if not schema:
|
|
fallback = TOOL_REGISTRY.get(tool_id, {}).get("parameters")
|
|
if isinstance(fallback, dict):
|
|
schema = dict(fallback)
|
|
schema.setdefault("type", "object")
|
|
if not isinstance(schema.get("properties"), dict):
|
|
schema["properties"] = {}
|
|
required = schema.get("required")
|
|
if required is None or not isinstance(required, list):
|
|
schema["required"] = []
|
|
return schema
|
|
|
|
|
|
def _compose_runtime_system_prompt(base_prompt: Optional[str]) -> str:
|
|
raw = str(base_prompt or "").strip()
|
|
tool_policy = (
|
|
"Tool usage policy:\n"
|
|
"- Tool function names/IDs are internal and must never be shown to users.\n"
|
|
"- When users ask which tools are available, describe capabilities in natural language.\n"
|
|
"- Do not expose raw tool call payloads, IDs, or executor details."
|
|
)
|
|
return f"{raw}\n\n{tool_policy}" if raw else tool_policy
|
|
|
|
|
|
def _resolve_runtime_tools(db: Session, selected_tool_ids: List[str], warnings: List[str]) -> List[Dict[str, Any]]:
|
|
_ensure_tool_resource_schema(db)
|
|
ids = [str(tool_id).strip() for tool_id in selected_tool_ids if str(tool_id).strip()]
|
|
if not ids:
|
|
return []
|
|
|
|
resources = (
|
|
db.query(ToolResource)
|
|
.filter(ToolResource.id.in_(ids))
|
|
.all()
|
|
)
|
|
by_id = {str(item.id): item for item in resources}
|
|
|
|
runtime_tools: List[Dict[str, Any]] = []
|
|
for tool_id in ids:
|
|
resource = by_id.get(tool_id)
|
|
if resource and resource.enabled is False:
|
|
warnings.append(f"Tool is disabled and skipped in runtime config: {tool_id}")
|
|
continue
|
|
|
|
category = str(resource.category if resource else TOOL_CATEGORY_MAP.get(tool_id, "query"))
|
|
display_name = (
|
|
str(resource.name or tool_id).strip()
|
|
if resource
|
|
else str(TOOL_REGISTRY.get(tool_id, {}).get("name") or tool_id).strip()
|
|
)
|
|
description = (
|
|
str(resource.description or resource.name or "").strip()
|
|
if resource
|
|
else str(TOOL_REGISTRY.get(tool_id, {}).get("description") or "").strip()
|
|
)
|
|
schema = _normalize_runtime_tool_schema(
|
|
tool_id,
|
|
resource.parameter_schema if resource else TOOL_REGISTRY.get(tool_id, {}).get("parameters"),
|
|
)
|
|
defaults_raw = resource.parameter_defaults if resource else TOOL_PARAMETER_DEFAULTS.get(tool_id)
|
|
defaults = dict(defaults_raw) if isinstance(defaults_raw, dict) else {}
|
|
wait_for_response = (
|
|
bool(resource.wait_for_response)
|
|
if resource
|
|
else bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False))
|
|
)
|
|
|
|
if not resource and tool_id not in TOOL_REGISTRY:
|
|
warnings.append(f"Tool resource not found: {tool_id}")
|
|
|
|
runtime_tool: Dict[str, Any] = {
|
|
"type": "function",
|
|
"executor": "client" if category == "system" else "server",
|
|
"function": {
|
|
"name": tool_id,
|
|
"description": (
|
|
f"Display name: {display_name}. {description}".strip()
|
|
if display_name
|
|
else (description or tool_id)
|
|
),
|
|
"parameters": schema,
|
|
},
|
|
"displayName": display_name or tool_id,
|
|
"toolId": tool_id,
|
|
"waitForResponse": wait_for_response,
|
|
}
|
|
if defaults:
|
|
runtime_tool["defaultArgs"] = defaults
|
|
runtime_tools.append(runtime_tool)
|
|
|
|
return runtime_tools
|
|
|
|
|
|
def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], List[str]]:
|
|
warnings: List[str] = []
|
|
presence_probe_cfg = _resolve_presence_probe_config_from_assistant(assistant)
|
|
metadata: Dict[str, Any] = {
|
|
"systemPrompt": _compose_runtime_system_prompt(assistant.prompt),
|
|
"firstTurnMode": assistant.first_turn_mode or "bot_first",
|
|
"greeting": assistant.opener or "",
|
|
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
|
|
"output": {"mode": "audio" if assistant.voice_output_enabled else "text"},
|
|
"bargeIn": {
|
|
"enabled": not bool(assistant.bot_cannot_be_interrupted),
|
|
"minDurationMs": int(assistant.interruption_sensitivity or 500),
|
|
},
|
|
"services": {},
|
|
"tools": _resolve_runtime_tools(db, assistant.tools or [], warnings),
|
|
"history": {
|
|
"assistantId": assistant.id,
|
|
"userId": int(assistant.user_id or 1),
|
|
"source": "debug",
|
|
},
|
|
"presenceProbe": {
|
|
"enabled": bool(presence_probe_cfg.get("enabled")),
|
|
"idleSeconds": float(presence_probe_cfg.get("idleSeconds") or PRESENCE_PROBE_DEFAULT_IDLE_SECONDS),
|
|
"cooldownSeconds": float(
|
|
presence_probe_cfg.get("cooldownSeconds") or PRESENCE_PROBE_DEFAULT_COOLDOWN_SECONDS
|
|
),
|
|
"maxPrompts": int(presence_probe_cfg.get("maxPrompts") or PRESENCE_PROBE_DEFAULT_MAX_PROMPTS),
|
|
"includeContext": bool(presence_probe_cfg.get("includeContext", True)),
|
|
**(
|
|
{"question": str(presence_probe_cfg.get("question") or "")}
|
|
if str(presence_probe_cfg.get("question") or "").strip()
|
|
else {}
|
|
),
|
|
},
|
|
}
|
|
|
|
config_mode = str(assistant.config_mode or "platform").strip().lower()
|
|
|
|
if config_mode in {"dify", "fastgpt"}:
|
|
metadata["services"]["llm"] = {
|
|
"provider": "openai",
|
|
"model": "",
|
|
"apiKey": assistant.api_key,
|
|
"baseUrl": assistant.api_url,
|
|
}
|
|
if not (assistant.api_url or "").strip():
|
|
warnings.append(f"External LLM API URL is empty for mode: {assistant.config_mode}")
|
|
if not (assistant.api_key or "").strip():
|
|
warnings.append(f"External LLM API key is empty for mode: {assistant.config_mode}")
|
|
elif assistant.llm_model_id:
|
|
llm = db.query(LLMModel).filter(LLMModel.id == assistant.llm_model_id).first()
|
|
if llm:
|
|
metadata["services"]["llm"] = {
|
|
"provider": "openai",
|
|
"model": llm.model_name or llm.name,
|
|
"apiKey": llm.api_key,
|
|
"baseUrl": llm.base_url,
|
|
}
|
|
else:
|
|
warnings.append(f"LLM model not found: {assistant.llm_model_id}")
|
|
|
|
if assistant.asr_model_id:
|
|
asr = db.query(ASRModel).filter(ASRModel.id == assistant.asr_model_id).first()
|
|
if asr:
|
|
asr_provider = "openai_compatible" if _is_openai_compatible_vendor(asr.vendor) else "buffered"
|
|
metadata["services"]["asr"] = {
|
|
"provider": asr_provider,
|
|
"model": asr.model_name or asr.name,
|
|
"apiKey": asr.api_key if asr_provider == "openai_compatible" else None,
|
|
"baseUrl": asr.base_url if asr_provider == "openai_compatible" else None,
|
|
}
|
|
else:
|
|
warnings.append(f"ASR model not found: {assistant.asr_model_id}")
|
|
|
|
if not assistant.voice_output_enabled:
|
|
metadata["services"]["tts"] = {"enabled": False}
|
|
elif assistant.voice:
|
|
voice = db.query(Voice).filter(Voice.id == assistant.voice).first()
|
|
if voice:
|
|
if _is_dashscope_vendor(voice.vendor):
|
|
tts_provider = "dashscope"
|
|
elif _is_openai_compatible_vendor(voice.vendor):
|
|
tts_provider = "openai_compatible"
|
|
else:
|
|
tts_provider = "edge"
|
|
model = voice.model
|
|
runtime_voice = voice.voice_key or voice.id
|
|
if tts_provider == "openai_compatible":
|
|
model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
|
runtime_voice = _normalize_openai_compatible_voice_key(runtime_voice, model)
|
|
metadata["services"]["tts"] = {
|
|
"enabled": True,
|
|
"provider": tts_provider,
|
|
"model": model,
|
|
"apiKey": voice.api_key if tts_provider in {"openai_compatible", "dashscope"} else None,
|
|
"baseUrl": voice.base_url if tts_provider in {"openai_compatible", "dashscope"} else None,
|
|
"voice": runtime_voice,
|
|
"speed": assistant.speed or voice.speed,
|
|
}
|
|
else:
|
|
# Keep assistant.voice as direct voice identifier fallback
|
|
metadata["services"]["tts"] = {
|
|
"enabled": True,
|
|
"voice": assistant.voice,
|
|
"speed": assistant.speed or 1.0,
|
|
}
|
|
warnings.append(f"Voice resource not found: {assistant.voice}")
|
|
|
|
if assistant.knowledge_base_id:
|
|
metadata["knowledgeBaseId"] = assistant.knowledge_base_id
|
|
metadata["knowledge"] = {
|
|
"enabled": True,
|
|
"kbId": assistant.knowledge_base_id,
|
|
"nResults": 5,
|
|
}
|
|
opener_audio = assistant.opener_audio
|
|
opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
|
|
metadata["openerAudio"] = {
|
|
"enabled": bool(opener_audio.enabled) if opener_audio else False,
|
|
"ready": opener_audio_ready,
|
|
"encoding": opener_audio.encoding if opener_audio else "pcm_s16le",
|
|
"sampleRateHz": int(opener_audio.sample_rate_hz) if opener_audio else 16000,
|
|
"channels": int(opener_audio.channels) if opener_audio else 1,
|
|
"durationMs": int(opener_audio.duration_ms) if opener_audio else 0,
|
|
"textHash": opener_audio.text_hash if opener_audio else None,
|
|
"ttsFingerprint": opener_audio.tts_fingerprint if opener_audio else None,
|
|
"pcmUrl": f"/api/assistants/{assistant.id}/opener-audio/pcm" if opener_audio_ready else None,
|
|
}
|
|
return metadata, warnings
|
|
|
|
|
|
def _build_engine_assistant_config(db: Session, assistant: Assistant) -> Dict[str, Any]:
|
|
session_metadata, warnings = _resolve_runtime_metadata(db, assistant)
|
|
config_version_id = _config_version_id(assistant)
|
|
assistant_cfg = dict(session_metadata)
|
|
assistant_cfg["assistantId"] = assistant.id
|
|
assistant_cfg["configVersionId"] = config_version_id
|
|
|
|
return {
|
|
"assistantId": assistant.id,
|
|
"configVersionId": config_version_id,
|
|
"assistant": assistant_cfg,
|
|
"sessionStartMetadata": session_metadata,
|
|
"sources": {
|
|
"llmModelId": assistant.llm_model_id,
|
|
"asrModelId": assistant.asr_model_id,
|
|
"voiceId": assistant.voice,
|
|
"knowledgeBaseId": assistant.knowledge_base_id,
|
|
},
|
|
"warnings": warnings,
|
|
}
|
|
|
|
|
|
def assistant_to_dict(assistant: Assistant) -> dict:
|
|
opener_audio = assistant.opener_audio
|
|
opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
|
|
presence_probe_cfg = _resolve_presence_probe_config_from_assistant(assistant)
|
|
return {
|
|
"id": assistant.id,
|
|
"name": assistant.name,
|
|
"callCount": assistant.call_count,
|
|
"firstTurnMode": assistant.first_turn_mode or "bot_first",
|
|
"opener": assistant.opener or "",
|
|
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
|
|
"openerAudioEnabled": bool(opener_audio.enabled) if opener_audio else False,
|
|
"openerAudioReady": opener_audio_ready,
|
|
"openerAudioDurationMs": int(opener_audio.duration_ms) if opener_audio else 0,
|
|
"openerAudioUpdatedAt": opener_audio.updated_at if opener_audio else None,
|
|
"prompt": assistant.prompt or "",
|
|
"knowledgeBaseId": assistant.knowledge_base_id,
|
|
"language": assistant.language,
|
|
"voiceOutputEnabled": assistant.voice_output_enabled,
|
|
"voice": assistant.voice,
|
|
"speed": assistant.speed,
|
|
"hotwords": assistant.hotwords or [],
|
|
"tools": assistant.tools or [],
|
|
"botCannotBeInterrupted": bool(assistant.bot_cannot_be_interrupted),
|
|
"interruptionSensitivity": assistant.interruption_sensitivity,
|
|
"presenceProbeEnabled": bool(presence_probe_cfg.get("enabled")),
|
|
"presenceProbeIdleSeconds": float(
|
|
presence_probe_cfg.get("idleSeconds") or PRESENCE_PROBE_DEFAULT_IDLE_SECONDS
|
|
),
|
|
"presenceProbeCooldownSeconds": float(
|
|
presence_probe_cfg.get("cooldownSeconds") or PRESENCE_PROBE_DEFAULT_COOLDOWN_SECONDS
|
|
),
|
|
"presenceProbeMaxPrompts": int(
|
|
presence_probe_cfg.get("maxPrompts") or PRESENCE_PROBE_DEFAULT_MAX_PROMPTS
|
|
),
|
|
"presenceProbeIncludeContext": bool(presence_probe_cfg.get("includeContext", True)),
|
|
"presenceProbeQuestion": str(presence_probe_cfg.get("question") or ""),
|
|
"configMode": assistant.config_mode,
|
|
"apiUrl": assistant.api_url,
|
|
"apiKey": assistant.api_key,
|
|
"llmModelId": assistant.llm_model_id,
|
|
"asrModelId": assistant.asr_model_id,
|
|
"embeddingModelId": assistant.embedding_model_id,
|
|
"rerankModelId": assistant.rerank_model_id,
|
|
"created_at": assistant.created_at,
|
|
"updated_at": assistant.updated_at,
|
|
}
|
|
|
|
|
|
def _apply_assistant_update(assistant: Assistant, update_data: dict) -> None:
|
|
field_map = {
|
|
"knowledgeBaseId": "knowledge_base_id",
|
|
"firstTurnMode": "first_turn_mode",
|
|
"interruptionSensitivity": "interruption_sensitivity",
|
|
"botCannotBeInterrupted": "bot_cannot_be_interrupted",
|
|
"presenceProbeEnabled": "presence_probe_enabled",
|
|
"presenceProbeIdleSeconds": "presence_probe_idle_seconds",
|
|
"presenceProbeCooldownSeconds": "presence_probe_cooldown_seconds",
|
|
"presenceProbeMaxPrompts": "presence_probe_max_prompts",
|
|
"presenceProbeIncludeContext": "presence_probe_include_context",
|
|
"presenceProbeQuestion": "presence_probe_question",
|
|
"configMode": "config_mode",
|
|
"voiceOutputEnabled": "voice_output_enabled",
|
|
"generatedOpenerEnabled": "generated_opener_enabled",
|
|
"apiUrl": "api_url",
|
|
"apiKey": "api_key",
|
|
"llmModelId": "llm_model_id",
|
|
"asrModelId": "asr_model_id",
|
|
"embeddingModelId": "embedding_model_id",
|
|
"rerankModelId": "rerank_model_id",
|
|
}
|
|
for field, value in update_data.items():
|
|
target = field_map.get(field, field)
|
|
if target == "presence_probe_idle_seconds":
|
|
value = _coerce_bounded_float(
|
|
value,
|
|
default_value=PRESENCE_PROBE_DEFAULT_IDLE_SECONDS,
|
|
min_value=PRESENCE_PROBE_MIN_IDLE_SECONDS,
|
|
max_value=PRESENCE_PROBE_MAX_IDLE_SECONDS,
|
|
)
|
|
elif target == "presence_probe_cooldown_seconds":
|
|
value = _coerce_bounded_float(
|
|
value,
|
|
default_value=PRESENCE_PROBE_DEFAULT_COOLDOWN_SECONDS,
|
|
min_value=PRESENCE_PROBE_MIN_COOLDOWN_SECONDS,
|
|
max_value=PRESENCE_PROBE_MAX_COOLDOWN_SECONDS,
|
|
)
|
|
elif target == "presence_probe_max_prompts":
|
|
value = _coerce_bounded_int(
|
|
value,
|
|
default_value=PRESENCE_PROBE_DEFAULT_MAX_PROMPTS,
|
|
min_value=1,
|
|
max_value=PRESENCE_PROBE_MAX_PROMPTS_CAP,
|
|
)
|
|
elif target == "presence_probe_question":
|
|
value = str(value or "").strip()
|
|
if len(value) > 160:
|
|
value = value[:160]
|
|
elif target == "presence_probe_enabled":
|
|
value = bool(value)
|
|
elif target == "presence_probe_include_context":
|
|
value = bool(value)
|
|
setattr(assistant, target, value)
|
|
|
|
|
|
def _ensure_assistant_opener_audio(db: Session, assistant: Assistant) -> AssistantOpenerAudio:
|
|
record = assistant.opener_audio
|
|
if record:
|
|
return record
|
|
record = AssistantOpenerAudio(assistant_id=assistant.id, enabled=False)
|
|
db.add(record)
|
|
db.flush()
|
|
return record
|
|
|
|
|
|
def _resolve_tts_runtime_for_assistant(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], Optional[Voice]]:
|
|
metadata, _ = _resolve_runtime_metadata(db, assistant)
|
|
services = metadata.get("services") if isinstance(metadata.get("services"), dict) else {}
|
|
tts = services.get("tts") if isinstance(services, dict) and isinstance(services.get("tts"), dict) else {}
|
|
voice = db.query(Voice).filter(Voice.id == assistant.voice).first() if assistant.voice else None
|
|
return tts, voice
|
|
|
|
|
|
def _tts_fingerprint(tts_cfg: Dict[str, Any], opener_text: str) -> str:
|
|
identity = {
|
|
"provider": tts_cfg.get("provider"),
|
|
"model": tts_cfg.get("model"),
|
|
"voice": tts_cfg.get("voice"),
|
|
"speed": tts_cfg.get("speed"),
|
|
"text": opener_text,
|
|
}
|
|
return hashlib.sha256(str(identity).encode("utf-8")).hexdigest()
|
|
|
|
|
|
def _synthesize_openai_compatible_wav(
|
|
*,
|
|
text: str,
|
|
model: str,
|
|
voice_key: str,
|
|
speed: float,
|
|
api_key: str,
|
|
base_url: str,
|
|
) -> bytes:
|
|
payload = {
|
|
"model": model or OPENAI_COMPATIBLE_DEFAULT_MODEL,
|
|
"input": text,
|
|
"voice": voice_key,
|
|
"response_format": "wav",
|
|
"speed": speed,
|
|
}
|
|
with httpx.Client(timeout=45.0) as client:
|
|
response = client.post(
|
|
f"{base_url.rstrip('/')}/audio/speech",
|
|
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
|
json=payload,
|
|
)
|
|
if response.status_code != 200:
|
|
detail = response.text
|
|
try:
|
|
detail_json = response.json()
|
|
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
|
except Exception:
|
|
pass
|
|
raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
|
|
return response.content
|
|
|
|
|
|
def _wav_to_pcm16_mono_16k(wav_bytes: bytes) -> tuple[bytes, int]:
|
|
with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
|
|
channels = wav_file.getnchannels()
|
|
sample_width = wav_file.getsampwidth()
|
|
sample_rate = wav_file.getframerate()
|
|
frames = wav_file.getnframes()
|
|
raw = wav_file.readframes(frames)
|
|
|
|
if sample_width != 2:
|
|
raise HTTPException(status_code=400, detail=f"Unsupported WAV sample width: {sample_width * 8}bit")
|
|
|
|
if channels > 1:
|
|
raw = audioop.tomono(raw, sample_width, 0.5, 0.5)
|
|
|
|
if sample_rate != 16000:
|
|
raw, _ = audioop.ratecv(raw, sample_width, 1, sample_rate, 16000, None)
|
|
|
|
duration_ms = int((len(raw) / (16000 * 2)) * 1000)
|
|
return raw, duration_ms
|
|
|
|
|
|
def _persist_opener_audio_pcm(assistant_id: str, pcm_bytes: bytes) -> str:
|
|
OPENER_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
|
file_path = OPENER_AUDIO_DIR / f"{assistant_id}.pcm"
|
|
with open(file_path, "wb") as f:
|
|
f.write(pcm_bytes)
|
|
return str(file_path)
|
|
|
|
|
|
def _opener_audio_out(record: Optional[AssistantOpenerAudio]) -> AssistantOpenerAudioOut:
|
|
if not record:
|
|
return AssistantOpenerAudioOut()
|
|
ready = bool(record.file_path and Path(record.file_path).exists())
|
|
return AssistantOpenerAudioOut(
|
|
enabled=bool(record.enabled),
|
|
ready=ready,
|
|
encoding=record.encoding,
|
|
sample_rate_hz=record.sample_rate_hz,
|
|
channels=record.channels,
|
|
duration_ms=record.duration_ms,
|
|
updated_at=record.updated_at,
|
|
text_hash=record.text_hash,
|
|
tts_fingerprint=record.tts_fingerprint,
|
|
)
|
|
|
|
|
|
# ============ Assistants ============
|
|
@router.get("")
|
|
def list_assistants(
|
|
page: int = 1,
|
|
limit: int = 50,
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""获取助手列表"""
|
|
_ensure_assistant_schema(db)
|
|
query = db.query(Assistant)
|
|
total = query.count()
|
|
assistants = query.order_by(Assistant.created_at.desc()) \
|
|
.offset((page-1)*limit).limit(limit).all()
|
|
return {
|
|
"total": total,
|
|
"page": page,
|
|
"limit": limit,
|
|
"list": [assistant_to_dict(a) for a in assistants]
|
|
}
|
|
|
|
|
|
@router.get("/{id}", response_model=AssistantOut)
|
|
def get_assistant(id: str, db: Session = Depends(get_db)):
|
|
"""获取单个助手详情"""
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
return assistant_to_dict(assistant)
|
|
|
|
|
|
@router.get("/{id}/config", response_model=AssistantEngineConfigResponse)
|
|
def get_assistant_config(id: str, db: Session = Depends(get_db)):
|
|
"""Canonical engine config endpoint consumed by engine backend adapter."""
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
return _build_engine_assistant_config(db, assistant)
|
|
|
|
|
|
@router.get("/{id}/runtime-config", response_model=AssistantEngineConfigResponse)
|
|
def get_assistant_runtime_config(id: str, db: Session = Depends(get_db)):
|
|
"""Legacy alias for resolved engine runtime config."""
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
return _build_engine_assistant_config(db, assistant)
|
|
|
|
|
|
@router.post("", response_model=AssistantOut)
|
|
def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)):
|
|
"""创建新助手"""
|
|
_ensure_assistant_schema(db)
|
|
assistant = Assistant(
|
|
id=str(uuid.uuid4())[:8],
|
|
user_id=1, # 默认用户,后续添加认证
|
|
name=data.name,
|
|
first_turn_mode=data.firstTurnMode,
|
|
opener=data.opener,
|
|
generated_opener_enabled=data.generatedOpenerEnabled,
|
|
prompt=data.prompt,
|
|
knowledge_base_id=data.knowledgeBaseId,
|
|
language=data.language,
|
|
voice_output_enabled=data.voiceOutputEnabled,
|
|
voice=data.voice,
|
|
speed=data.speed,
|
|
hotwords=data.hotwords,
|
|
tools=data.tools,
|
|
bot_cannot_be_interrupted=data.botCannotBeInterrupted,
|
|
interruption_sensitivity=data.interruptionSensitivity,
|
|
presence_probe_enabled=bool(data.presenceProbeEnabled),
|
|
presence_probe_idle_seconds=_coerce_bounded_float(
|
|
data.presenceProbeIdleSeconds,
|
|
default_value=PRESENCE_PROBE_DEFAULT_IDLE_SECONDS,
|
|
min_value=PRESENCE_PROBE_MIN_IDLE_SECONDS,
|
|
max_value=PRESENCE_PROBE_MAX_IDLE_SECONDS,
|
|
),
|
|
presence_probe_cooldown_seconds=_coerce_bounded_float(
|
|
data.presenceProbeCooldownSeconds,
|
|
default_value=PRESENCE_PROBE_DEFAULT_COOLDOWN_SECONDS,
|
|
min_value=PRESENCE_PROBE_MIN_COOLDOWN_SECONDS,
|
|
max_value=PRESENCE_PROBE_MAX_COOLDOWN_SECONDS,
|
|
),
|
|
presence_probe_max_prompts=_coerce_bounded_int(
|
|
data.presenceProbeMaxPrompts,
|
|
default_value=PRESENCE_PROBE_DEFAULT_MAX_PROMPTS,
|
|
min_value=1,
|
|
max_value=PRESENCE_PROBE_MAX_PROMPTS_CAP,
|
|
),
|
|
presence_probe_include_context=bool(data.presenceProbeIncludeContext),
|
|
presence_probe_question=str(data.presenceProbeQuestion or "").strip()[:160],
|
|
config_mode=data.configMode,
|
|
api_url=data.apiUrl,
|
|
api_key=data.apiKey,
|
|
llm_model_id=data.llmModelId,
|
|
asr_model_id=data.asrModelId,
|
|
embedding_model_id=data.embeddingModelId,
|
|
rerank_model_id=data.rerankModelId,
|
|
)
|
|
db.add(assistant)
|
|
db.commit()
|
|
db.refresh(assistant)
|
|
opener_audio = _ensure_assistant_opener_audio(db, assistant)
|
|
opener_audio.enabled = bool(data.openerAudioEnabled)
|
|
opener_audio.updated_at = datetime.utcnow()
|
|
db.commit()
|
|
db.refresh(assistant)
|
|
return assistant_to_dict(assistant)
|
|
|
|
|
|
@router.get("/{id}/opener-audio", response_model=AssistantOpenerAudioOut)
|
|
def get_assistant_opener_audio(id: str, db: Session = Depends(get_db)):
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
return _opener_audio_out(assistant.opener_audio)
|
|
|
|
|
|
@router.get("/{id}/opener-audio/pcm")
|
|
def get_assistant_opener_audio_pcm(id: str, db: Session = Depends(get_db)):
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
record = assistant.opener_audio
|
|
if not record or not record.file_path:
|
|
raise HTTPException(status_code=404, detail="Opener audio not generated")
|
|
file_path = Path(record.file_path)
|
|
if not file_path.exists():
|
|
raise HTTPException(status_code=404, detail="Opener audio file missing")
|
|
return FileResponse(
|
|
str(file_path),
|
|
media_type="application/octet-stream",
|
|
filename=f"{assistant.id}.pcm",
|
|
)
|
|
|
|
|
|
@router.post("/{id}/opener-audio/generate", response_model=AssistantOpenerAudioOut)
|
|
def generate_assistant_opener_audio(
|
|
id: str,
|
|
data: AssistantOpenerAudioGenerateRequest,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
if not assistant.voice_output_enabled:
|
|
raise HTTPException(status_code=400, detail="Voice output is disabled")
|
|
|
|
opener_text = (data.text if data.text is not None else assistant.opener or "").strip()
|
|
if not opener_text:
|
|
raise HTTPException(status_code=400, detail="Opener text is empty")
|
|
|
|
tts_cfg, voice = _resolve_tts_runtime_for_assistant(db, assistant)
|
|
provider = str(tts_cfg.get("provider") or "").strip().lower()
|
|
if provider not in {"openai_compatible", "dashscope"}:
|
|
raise HTTPException(status_code=400, detail=f"Unsupported provider for preloaded opener audio: {provider or 'unknown'}")
|
|
|
|
speed = float(tts_cfg.get("speed") or assistant.speed or 1.0)
|
|
voice_key = str(tts_cfg.get("voice") or "").strip()
|
|
model = str(tts_cfg.get("model") or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
|
api_key = str(tts_cfg.get("apiKey") or "").strip()
|
|
base_url = str(tts_cfg.get("baseUrl") or "").strip()
|
|
|
|
if provider == "openai_compatible":
|
|
if not api_key:
|
|
if voice and voice.api_key:
|
|
api_key = voice.api_key.strip()
|
|
if not api_key:
|
|
api_key = (os.getenv("SILICONFLOW_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
|
|
if not api_key:
|
|
raise HTTPException(status_code=400, detail="TTS API key is missing")
|
|
if not base_url:
|
|
base_url = OPENAI_COMPATIBLE_DEFAULT_BASE_URL
|
|
wav_bytes = _synthesize_openai_compatible_wav(
|
|
text=opener_text,
|
|
model=model,
|
|
voice_key=voice_key,
|
|
speed=speed,
|
|
api_key=api_key,
|
|
base_url=base_url,
|
|
)
|
|
else:
|
|
from .voices import _synthesize_dashscope_preview, DASHSCOPE_DEFAULT_BASE_URL, DASHSCOPE_DEFAULT_MODEL, DASHSCOPE_DEFAULT_VOICE_KEY
|
|
if not api_key:
|
|
if voice and voice.api_key:
|
|
api_key = voice.api_key.strip()
|
|
if not api_key:
|
|
api_key = (os.getenv("DASHSCOPE_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
|
|
if not api_key:
|
|
raise HTTPException(status_code=400, detail="DashScope API key is missing")
|
|
if not base_url:
|
|
base_url = DASHSCOPE_DEFAULT_BASE_URL
|
|
if not model:
|
|
model = DASHSCOPE_DEFAULT_MODEL
|
|
if not voice_key:
|
|
voice_key = DASHSCOPE_DEFAULT_VOICE_KEY
|
|
try:
|
|
wav_bytes = _synthesize_dashscope_preview(
|
|
text=opener_text,
|
|
api_key=api_key,
|
|
base_url=base_url,
|
|
model=model,
|
|
voice_key=voice_key,
|
|
speed=speed,
|
|
)
|
|
except Exception as exc:
|
|
raise HTTPException(status_code=502, detail=f"DashScope opener audio generation failed: {exc}") from exc
|
|
|
|
pcm_bytes, duration_ms = _wav_to_pcm16_mono_16k(wav_bytes)
|
|
record = _ensure_assistant_opener_audio(db, assistant)
|
|
record.enabled = True
|
|
record.file_path = _persist_opener_audio_pcm(assistant.id, pcm_bytes)
|
|
record.encoding = "pcm_s16le"
|
|
record.sample_rate_hz = 16000
|
|
record.channels = 1
|
|
record.duration_ms = duration_ms
|
|
record.text_hash = hashlib.sha256(opener_text.encode("utf-8")).hexdigest()
|
|
record.tts_fingerprint = _tts_fingerprint(tts_cfg, opener_text)
|
|
now = datetime.utcnow()
|
|
if not record.created_at:
|
|
record.created_at = now
|
|
record.updated_at = now
|
|
assistant.updated_at = now
|
|
db.commit()
|
|
db.refresh(assistant)
|
|
return _opener_audio_out(assistant.opener_audio)
|
|
|
|
|
|
@router.put("/{id}")
|
|
def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
|
|
"""更新助手"""
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
|
|
update_data = data.model_dump(exclude_unset=True)
|
|
opener_audio_enabled = update_data.pop("openerAudioEnabled", None)
|
|
_apply_assistant_update(assistant, update_data)
|
|
if opener_audio_enabled is not None:
|
|
record = _ensure_assistant_opener_audio(db, assistant)
|
|
record.enabled = bool(opener_audio_enabled)
|
|
record.updated_at = datetime.utcnow()
|
|
|
|
assistant.updated_at = datetime.utcnow()
|
|
db.commit()
|
|
db.refresh(assistant)
|
|
return assistant_to_dict(assistant)
|
|
|
|
|
|
@router.delete("/{id}")
|
|
def delete_assistant(id: str, db: Session = Depends(get_db)):
|
|
"""删除助手"""
|
|
_ensure_assistant_schema(db)
|
|
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
|
if not assistant:
|
|
raise HTTPException(status_code=404, detail="Assistant not found")
|
|
db.delete(assistant)
|
|
db.commit()
|
|
return {"message": "Deleted successfully"}
|