Merge branch 'engine-v3'

Fix fastgpt client tool 3 rounds bugs
Add fastgpt as seperate assistant mode
2026-03-11 11:42:29 +08:00 · 2026-03-11 11:33:27 +08:00 · 2026-03-11 08:37:34 +08:00 · 2026-03-10 16:21:58 +08:00 · 2026-03-10 03:31:39 +08:00 · 2026-03-10 03:13:47 +08:00
263 changed files with 46305 additions and 6263 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+# OS artifacts
+.DS_Store
+Thumbs.db
--- a/api/.gitignore
+++ b/api/.gitignore
@@ -36,8 +36,12 @@ env/
 *.sqlite
 *.sqlite3

-# Vector store data
-data/vector_store/
+# Runtime data (SQLite, vector store, uploads, generated artifacts)
+data/**
+!data/
+!data/.gitkeep
+!data/vector_store/
+data/vector_store/**
 !data/vector_store/.gitkeep

 # IDE
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -1,17 +1,19 @@
-FROM python:3.11-slim
+FROM python:3.12-slim
+
+# Install build tools for C++11 (needed for native extensions, e.g. chromadb)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*

 WORKDIR /app

-# 安装依赖
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

-# 复制代码
 COPY . .

-# 创建数据目录
 RUN mkdir -p /app/data

-EXPOSE 8000
+EXPOSE 8100

-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8100", "--reload"]
--- a/api/README.md
+++ b/api/README.md
@@ -1,13 +1,13 @@
 # AI VideoAssistant Backend

-Python 后端 API，配合前端 `ai-videoassistant-frontend` 使用。
+Python 后端 API，配合前端 `web/` 模块使用。

 ## 快速开始

 ### 1. 安装依赖

 ```bash
-cd ~/Code/ai-videoassistant-backend
+cd api
 pip install -r requirements.txt
 ```

@@ -21,48 +21,182 @@ python init_db.py
 - 创建 `data/app.db` SQLite 数据库
 - 初始化默认声音数据

+可选参数（按需重建）：
+
+```bash
+# 仅重建数据库（drop + create）并初始化默认数据
+python init_db.py --rebuild-db
+
+# 仅重建向量库集合（不动 DB 表结构）；会重置文档索引状态为 pending
+python init_db.py --rebuild-vector-store
+
+# 同时重建 DB 和向量库
+python init_db.py --rebuild-db --rebuild-vector-store
+
+# 仅执行重建，不写入默认数据
+python init_db.py --rebuild-db --skip-seed
+```
+
 ### 3. 启动服务

 ```bash
 # 开发模式 (热重载)
-python -m uvicorn main:app --reload --host 0.0.0.0 --port 8000
+python -m uvicorn app.main:app --reload --host 0.0.0.0 --port 8100
 ```

+服务运行在: http://localhost:8100
+
 ### 4. 测试 API

 ```bash
 # 健康检查
-curl http://localhost:8000/health
+curl http://localhost:8100/health

 # 获取助手列表
-curl http://localhost:8000/api/assistants
+curl http://localhost:8100/api/assistants

 # 获取声音列表
-curl http://localhost:8000/api/voices
+curl http://localhost:8100/api/voices

 # 获取通话历史
-curl http://localhost:8000/api/history
+curl http://localhost:8100/api/history
 ```

+---
+
 ## API 文档

-| 端点 | 方法 | 说明 |
+完整 API 文档位于 [docs/](docs/) 目录：
+
+| 模块 | 端点 | 方法 | 说明 |
+|------|------|------|------|
+| **Assistant** | `/api/assistants` | GET | 助手列表 |
+| | | POST | 创建助手 |
+| | `/api/assistants/{id}` | GET | 助手详情 |
+| | | PUT | 更新助手 |
+| | | DELETE | 删除助手 |
+| **Voice** | `/api/voices` | GET | 声音库列表 |
+| | | POST | 添加声音 |
+| | `/api/voices/{id}` | GET | 声音详情 |
+| | | PUT | 更新声音 |
+| | | DELETE | 删除声音 |
+| | `/api/voices/{id}/preview` | POST | 预览声音 |
+| **LLM Models** | `/api/llm` | GET | LLM 模型列表 |
+| | | POST | 添加模型 |
+| | `/api/llm/{id}` | GET | 模型详情 |
+| | | PUT | 更新模型 |
+| | | DELETE | 删除模型 |
+| | `/api/llm/{id}/test` | POST | 测试模型连接 |
+| **ASR Models** | `/api/asr` | GET | ASR 模型列表 |
+| | | POST | 添加模型 |
+| | `/api/asr/{id}` | GET | 模型详情 |
+| | | PUT | 更新模型 |
+| | | DELETE | 删除模型 |
+| | `/api/asr/{id}/test` | POST | 测试识别 |
+| **History** | `/api/history` | GET | 通话历史列表 |
+| | `/api/history/{id}` | GET | 通话详情 |
+| | | PUT | 更新通话记录 |
+| | | DELETE | 删除记录 |
+| | `/api/history/{id}/transcripts` | POST | 添加转写 |
+| | `/api/history/search` | GET | 搜索历史 |
+| | `/api/history/stats` | GET | 统计数据 |
+| **Knowledge** | `/api/knowledge/bases` | GET | 知识库列表 |
+| | | POST | 创建知识库 |
+| | `/api/knowledge/bases/{id}` | GET | 知识库详情 |
+| | | PUT | 更新知识库 |
+| | | DELETE | 删除知识库 |
+| | `/api/knowledge/bases/{kb_id}/documents` | POST | 上传文档 |
+| | `/api/knowledge/bases/{kb_id}/documents/{doc_id}` | DELETE | 删除文档 |
+| | `/api/knowledge/bases/{kb_id}/documents/{doc_id}/index` | POST | 索引文档 |
+| | `/api/knowledge/search` | POST | 知识搜索 |
+| **Workflow** | `/api/workflows` | GET | 工作流列表 |
+| | | POST | 创建工作流 |
+| | `/api/workflows/{id}` | GET | 工作流详情 |
+| | | PUT | 更新工作流 |
+| | | DELETE | 删除工作流 |
+
+---
+
+## 数据模型
+
+### Assistant (小助手)
+
+| 字段 | 类型 | 说明 |
 |------|------|------|
-| `/api/assistants` | GET | 助手列表 |
-| `/api/assistants` | POST | 创建助手 |
-| `/api/assistants/{id}` | GET | 助手详情 |
-| `/api/assistants/{id}` | PUT | 更新助手 |
-| `/api/assistants/{id}` | DELETE | 删除助手 |
-| `/api/voices` | GET | 声音库列表 |
-| `/api/history` | GET | 通话历史列表 |
-| `/api/history/{id}` | GET | 通话详情 |
-| `/api/history/{id}/transcripts` | POST | 添加转写 |
-| `/api/history/{id}/audio/{turn}` | GET | 获取音频 |
+| id | string | 助手 ID |
+| name | string | 助手名称 |
+| opener | string | 开场白 |
+| prompt | string | 系统提示词 |
+| knowledgeBaseId | string | 关联知识库 ID |
+| language | string | 语言: zh/en |
+| voice | string | 声音 ID |
+| speed | float | 语速 (0.5-2.0) |
+| hotwords | array | 热词列表 |
+| tools | array | 启用的工具列表 |
+| llmModelId | string | LLM 模型 ID |
+| asrModelId | string | ASR 模型 ID |
+| embeddingModelId | string | Embedding 模型 ID |
+| rerankModelId | string | Rerank 模型 ID |
+
+### Voice (声音资源)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| id | string | 声音 ID |
+| name | string | 声音名称 |
+| vendor | string | 厂商: Ali/Volcano/Minimax |
+| gender | string | 性别: Male/Female |
+| language | string | 语言: zh/en |
+| model | string | 厂商模型标识 |
+| voice_key | string | 厂商 voice_key |
+| speed | float | 语速 |
+| gain | int | 增益 (dB) |
+| pitch | int | 音调 |
+
+### LLMModel (模型接入)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| id | string | 模型 ID |
+| name | string | 模型名称 |
+| vendor | string | 厂商 |
+| type | string | 类型: text/embedding/rerank |
+| base_url | string | API 地址 |
+| api_key | string | API 密钥 |
+| model_name | string | 模型名称 |
+| temperature | float | 温度参数 |
+
+### ASRModel (语音识别)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| id | string | 模型 ID |
+| name | string | 模型名称 |
+| vendor | string | 厂商 |
+| language | string | 语言: zh/en/Multi-lingual |
+| base_url | string | API 地址 |
+| api_key | string | API 密钥 |
+| hotwords | array | 热词列表 |
+
+### CallRecord (通话记录)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| id | string | 记录 ID |
+| assistant_id | string | 助手 ID |
+| source | string | 来源: debug/external |
+| status | string | 状态: connected/missed/failed |
+| started_at | string | 开始时间 |
+| duration_seconds | int | 通话时长 |
+| summary | string | 通话摘要 |
+| transcripts | array | 对话转写 |
+
+---

 ## 使用 Docker 启动

 ```bash
-cd ~/Code/ai-videoassistant-backend
+cd api

 # 启动所有服务
 docker-compose up -d
@@ -71,33 +205,143 @@ docker-compose up -d
 docker-compose logs -f backend
 ```

+---
+
 ## 目录结构

 ```
-backend/
+api/
 ├── app/
 │   ├── __init__.py
 │   ├── main.py           # FastAPI 入口
 │   ├── db.py             # SQLite 连接
-│   ├── models.py         # 数据模型
+│   ├── models.py         # SQLAlchemy 数据模型
 │   ├── schemas.py        # Pydantic 模型
 │   ├── storage.py        # MinIO 存储
+│   ├── vector_store.py   # 向量存储
 │   └── routers/
 │       ├── __init__.py
 │       ├── assistants.py # 助手 API
-│       └── history.py    # 通话记录 API
+│       ├── history.py    # 通话记录 API
+│       └── knowledge.py  # 知识库 API
 ├── data/                 # 数据库文件
+├── docs/                 # API 文档
 ├── requirements.txt
 ├── .env
+├── init_db.py
 └── docker-compose.yml
 ```

+---
+
 ## 环境变量

 | 变量 | 默认值 | 说明 |
 |------|--------|------|
+| `PORT` | `8100` | 服务端口 |
 | `DATABASE_URL` | `sqlite:///./data/app.db` | 数据库连接 |
 | `MINIO_ENDPOINT` | `localhost:9000` | MinIO 地址 |
 | `MINIO_ACCESS_KEY` | `admin` | MinIO 密钥 |
 | `MINIO_SECRET_KEY` | `password123` | MinIO 密码 |
 | `MINIO_BUCKET` | `ai-audio` | 存储桶名称 |
+
+---
+
+## 数据库迁移
+
+开发环境重新创建数据库：
+
+```bash
+rm -f api/data/app.db
+python api/init_db.py
+```
+
+---
+
+## 测试
+
+### 安装测试依赖
+
+```bash
+cd api
+pip install pytest pytest-cov -q
+```
+
+### 运行所有测试
+
+```bash
+# Windows
+run_tests.bat
+
+# 或使用 pytest
+pytest tests/ -v
+```
+
+### 运行特定测试
+
+```bash
+# 只测试声音 API
+pytest tests/test_voices.py -v
+
+# 只测试助手 API
+pytest tests/test_assistants.py -v
+
+# 只测试历史记录 API
+pytest tests/test_history.py -v
+
+# 只测试知识库 API
+pytest tests/test_knowledge.py -v
+```
+
+### 测试覆盖率
+
+```bash
+pytest tests/ --cov=app --cov-report=html
+# 查看报告: open htmlcov/index.html
+```
+
+### 测试目录结构
+
+```
+tests/
+├── __init__.py
+├── conftest.py          # pytest fixtures
+├── test_voices.py       # 声音 API 测试
+├── test_assistants.py   # 助手 API 测试
+├── test_history.py      # 历史记录 API 测试
+└── test_knowledge.py    # 知识库 API 测试
+```
+
+### 测试用例统计
+
+| 模块 | 测试用例数 |
+|------|-----------|
+| Voice | 13 |
+| Assistant | 14 |
+| History | 18 |
+| Knowledge | 19 |
+| **总计** | **64** |
+
+### CI/CD 示例 (.github/workflows/test.yml)
+
+```yaml
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          pip install -r api/requirements.txt
+          pip install pytest pytest-cov
+      - name: Run tests
+        run: pytest api/tests/ -v --cov=app
+```
--- a/api/app/db.py
+++ b/api/app/db.py
@@ -1,7 +1,10 @@
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker, DeclarativeBase
+import os

-DATABASE_URL = "sqlite:///./data/app.db"
+# 使用绝对路径
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+DATABASE_URL = f"sqlite:///{os.path.join(BASE_DIR, 'data', 'app.db')}"

 engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
--- a/api/app/id_generator.py
+++ b/api/app/id_generator.py
@@ -0,0 +1,17 @@
+import uuid
+from typing import Any, Type
+
+from sqlalchemy.orm import Session
+
+
+def short_id(prefix: str, size: int = 8) -> str:
+    return f"{prefix}_{uuid.uuid4().hex[:size]}"
+
+
+def unique_short_id(prefix: str, db: Session, model_cls: Type[Any], size: int = 8) -> str:
+    for _ in range(10):
+        candidate = short_id(prefix, size=size)
+        exists = db.query(model_cls.id).filter(model_cls.id == candidate).first()
+        if not exists:
+            return candidate
+    raise RuntimeError(f"failed to generate unique id for {model_cls.__name__}")
--- a/api/app/main.py
+++ b/api/app/main.py
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
 import os

 from .db import Base, engine
-from .routers import assistants, history
+from .routers import assistants, voices, workflows, history, knowledge, llm, asr, tools


@asynccontextmanager
@@ -32,7 +32,13 @@ app.add_middleware(

 # 路由
 app.include_router(assistants.router, prefix="/api")
+app.include_router(voices.router, prefix="/api")
+app.include_router(workflows.router, prefix="/api")
 app.include_router(history.router, prefix="/api")
+app.include_router(knowledge.router, prefix="/api")
+app.include_router(llm.router, prefix="/api")
+app.include_router(asr.router, prefix="/api")
+app.include_router(tools.router, prefix="/api")


@app.get("/")
@@ -43,30 +49,3 @@ def root():
@app.get("/health")
 def health():
    return {"status": "ok"}
-
-
-# 初始化默认数据
-@app.on_event("startup")
-def init_default_data():
-    from sqlalchemy.orm import Session
-    from .db import SessionLocal
-    from .models import Voice
-    
-    db = SessionLocal()
-    try:
-        # 检查是否已有数据
-        if db.query(Voice).count() == 0:
-            # 插入默认声音
-            voices = [
-                Voice(id="v1", name="Xiaoyun", vendor="Ali", gender="Female", language="zh", description="Gentle and professional."),
-                Voice(id="v2", name="Kevin", vendor="Volcano", gender="Male", language="en", description="Deep and authoritative."),
-                Voice(id="v3", name="Abby", vendor="Minimax", gender="Female", language="en", description="Cheerful and lively."),
-                Voice(id="v4", name="Guang", vendor="Ali", gender="Male", language="zh", description="Standard newscast style."),
-                Voice(id="v5", name="Doubao", vendor="Volcano", gender="Female", language="zh", description="Cute and young."),
-            ]
-            for v in voices:
-                db.add(v)
-            db.commit()
-            print("✅ 默认声音数据已初始化")
-    finally:
-        db.close()
--- a/api/app/models.py
+++ b/api/app/models.py
@@ -1,6 +1,6 @@
 from datetime import datetime
 from typing import List, Optional
-from sqlalchemy import String, Integer, DateTime, Text, Float, ForeignKey, JSON
+from sqlalchemy import String, Integer, DateTime, Text, Float, ForeignKey, JSON, Enum
 from sqlalchemy.orm import Mapped, mapped_column, relationship

 from .db import Base
@@ -15,18 +15,99 @@ class User(Base):
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)


+# ============ Voice ============
 class Voice(Base):
    __tablename__ = "voices"

    id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    user_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("users.id"), index=True, nullable=True)
    name: Mapped[str] = mapped_column(String(128), nullable=False)
    vendor: Mapped[str] = mapped_column(String(64), nullable=False)
    gender: Mapped[str] = mapped_column(String(32), nullable=False)
    language: Mapped[str] = mapped_column(String(16), nullable=False)
    description: Mapped[str] = mapped_column(String(255), nullable=False)
-    voice_params: Mapped[dict] = mapped_column(JSON, default=dict)
+    model: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)  # 厂商语音模型标识
+    voice_key: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)  # 厂商voice_key
+    api_key: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)  # 每个声音独立 API key
+    base_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)  # 每个声音独立 OpenAI-compatible base_url
+    speed: Mapped[float] = mapped_column(Float, default=1.0)
+    gain: Mapped[int] = mapped_column(Integer, default=0)
+    pitch: Mapped[int] = mapped_column(Integer, default=0)
+    enabled: Mapped[bool] = mapped_column(default=True)
+    is_system: Mapped[bool] = mapped_column(default=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+
+    user = relationship("User", foreign_keys=[user_id])


+# ============ LLM Model ============
+class LLMModel(Base):
+    __tablename__ = "llm_models"
+
+    id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True)
+    name: Mapped[str] = mapped_column(String(128), nullable=False)
+    vendor: Mapped[str] = mapped_column(String(64), nullable=False)
+    type: Mapped[str] = mapped_column(String(32), nullable=False)  # text/embedding/rerank
+    base_url: Mapped[str] = mapped_column(String(512), nullable=False)
+    api_key: Mapped[str] = mapped_column(String(512), nullable=False)
+    model_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
+    temperature: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
+    context_length: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
+    enabled: Mapped[bool] = mapped_column(default=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+
+    user = relationship("User")
+
+
+# ============ ASR Model ============
+class ASRModel(Base):
+    __tablename__ = "asr_models"
+
+    id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True)
+    name: Mapped[str] = mapped_column(String(128), nullable=False)
+    vendor: Mapped[str] = mapped_column(String(64), nullable=False)
+    language: Mapped[str] = mapped_column(String(32), nullable=False)  # zh/en/Multi-lingual
+    base_url: Mapped[str] = mapped_column(String(512), nullable=False)
+    api_key: Mapped[str] = mapped_column(String(512), nullable=False)
+    model_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
+    hotwords: Mapped[dict] = mapped_column(JSON, default=list)
+    enable_punctuation: Mapped[bool] = mapped_column(default=True)
+    enable_normalization: Mapped[bool] = mapped_column(default=True)
+    enabled: Mapped[bool] = mapped_column(default=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+
+    user = relationship("User")
+
+
+# ============ Tool Resource ============
+class ToolResource(Base):
+    __tablename__ = "tool_resources"
+
+    id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    user_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("users.id"), index=True, nullable=True)
+    name: Mapped[str] = mapped_column(String(128), nullable=False)
+    description: Mapped[str] = mapped_column(String(512), nullable=False, default="")
+    category: Mapped[str] = mapped_column(String(32), nullable=False, default="system")  # system/query
+    icon: Mapped[str] = mapped_column(String(64), nullable=False, default="Wrench")
+    http_method: Mapped[str] = mapped_column(String(16), nullable=False, default="GET")
+    http_url: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True)
+    http_headers: Mapped[dict] = mapped_column(JSON, default=dict)
+    http_timeout_ms: Mapped[int] = mapped_column(Integer, default=10000)
+    parameter_schema: Mapped[dict] = mapped_column(JSON, default=dict)
+    parameter_defaults: Mapped[dict] = mapped_column(JSON, default=dict)
+    wait_for_response: Mapped[bool] = mapped_column(default=False)
+    enabled: Mapped[bool] = mapped_column(default=True)
+    is_system: Mapped[bool] = mapped_column(default=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+
+    user = relationship("User")
+
+
+# ============ Assistant ============
 class Assistant(Base):
    __tablename__ = "assistants"

@@ -34,25 +115,57 @@ class Assistant(Base):
    user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True)
    name: Mapped[str] = mapped_column(String(255), nullable=False)
    call_count: Mapped[int] = mapped_column(Integer, default=0)
+    first_turn_mode: Mapped[str] = mapped_column(String(32), default="bot_first")
    opener: Mapped[str] = mapped_column(Text, default="")
+    manual_opener_tool_calls: Mapped[list] = mapped_column(JSON, default=list)
+    generated_opener_enabled: Mapped[bool] = mapped_column(default=False)
    prompt: Mapped[str] = mapped_column(Text, default="")
    knowledge_base_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
    language: Mapped[str] = mapped_column(String(16), default="zh")
+    voice_output_enabled: Mapped[bool] = mapped_column(default=True)
    voice: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
    speed: Mapped[float] = mapped_column(Float, default=1.0)
    hotwords: Mapped[dict] = mapped_column(JSON, default=list)
    tools: Mapped[dict] = mapped_column(JSON, default=list)
+    asr_interim_enabled: Mapped[bool] = mapped_column(default=False)
+    bot_cannot_be_interrupted: Mapped[bool] = mapped_column(default=False)
    interruption_sensitivity: Mapped[int] = mapped_column(Integer, default=500)
    config_mode: Mapped[str] = mapped_column(String(32), default="platform")
    api_url: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
    api_key: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
+    app_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
+    # 模型关联
+    llm_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
+    asr_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
+    embedding_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
+    rerank_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

    user = relationship("User")
    call_records = relationship("CallRecord", back_populates="assistant")
+    opener_audio = relationship("AssistantOpenerAudio", back_populates="assistant", uselist=False, cascade="all, delete-orphan")


+class AssistantOpenerAudio(Base):
+    __tablename__ = "assistant_opener_audio"
+
+    assistant_id: Mapped[str] = mapped_column(String(64), ForeignKey("assistants.id"), primary_key=True)
+    enabled: Mapped[bool] = mapped_column(default=False)
+    file_path: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
+    encoding: Mapped[str] = mapped_column(String(32), default="pcm_s16le")
+    sample_rate_hz: Mapped[int] = mapped_column(Integer, default=16000)
+    channels: Mapped[int] = mapped_column(Integer, default=1)
+    duration_ms: Mapped[int] = mapped_column(Integer, default=0)
+    text_hash: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
+    tts_fingerprint: Mapped[Optional[str]] = mapped_column(String(256), nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+
+    assistant = relationship("Assistant", back_populates="opener_audio")
+
+
+# ============ Knowledge Base ============
 class KnowledgeBase(Base):
    __tablename__ = "knowledge_bases"

@@ -92,6 +205,7 @@ class KnowledgeDocument(Base):
    kb = relationship("KnowledgeBase", back_populates="documents")


+# ============ Workflow ============
 class Workflow(Base):
    __tablename__ = "workflows"

@@ -108,6 +222,7 @@ class Workflow(Base):
    user = relationship("User")


+# ============ Call Record ============
 class CallRecord(Base):
    __tablename__ = "call_records"

--- a/api/app/routers/init.py
+++ b/api/app/routers/init.py
@@ -1,11 +1,21 @@
 from fastapi import APIRouter

 from . import assistants
+from . import voices
+from . import workflows
 from . import history
 from . import knowledge
+from . import llm
+from . import asr
+from . import tools

 router = APIRouter()

 router.include_router(assistants.router)
+router.include_router(voices.router)
+router.include_router(workflows.router)
 router.include_router(history.router)
 router.include_router(knowledge.router)
+router.include_router(llm.router)
+router.include_router(asr.router)
+router.include_router(tools.router)
--- a/api/app/routers/asr.py
+++ b/api/app/routers/asr.py
@@ -0,0 +1,785 @@
+import asyncio
+import base64
+import io
+import json
+import os
+import sys
+import threading
+import time
+import wave
+from array import array
+from typing import Any, Dict, List, Optional, Tuple
+
+import httpx
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from sqlalchemy.orm import Session
+
+from ..db import get_db
+from ..id_generator import unique_short_id
+from ..models import ASRModel
+from ..schemas import (
+    ASRModelCreate, ASRModelUpdate, ASRModelOut,
+    ASRTestRequest, ASRTestResponse
+)
+
+router = APIRouter(prefix="/asr", tags=["ASR Models"])
+
+OPENAI_COMPATIBLE_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
+DASHSCOPE_DEFAULT_ASR_MODEL = "qwen3-asr-flash-realtime"
+DASHSCOPE_DEFAULT_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+
+try:
+    import dashscope
+    from dashscope.audio.qwen_omni import MultiModality, OmniRealtimeCallback, OmniRealtimeConversation
+
+    try:
+        from dashscope.audio.qwen_omni import TranscriptionParams
+    except ImportError:
+        from dashscope.audio.qwen_omni.omni_realtime import TranscriptionParams
+
+    DASHSCOPE_SDK_AVAILABLE = True
+    DASHSCOPE_IMPORT_ERROR = ""
+except Exception as exc:
+    dashscope = None  # type: ignore[assignment]
+    MultiModality = None  # type: ignore[assignment]
+    OmniRealtimeConversation = None  # type: ignore[assignment]
+    TranscriptionParams = None  # type: ignore[assignment]
+    DASHSCOPE_SDK_AVAILABLE = False
+    DASHSCOPE_IMPORT_ERROR = f"{type(exc).__name__}: {exc}"
+
+    class OmniRealtimeCallback:  # type: ignore[no-redef]
+        """Fallback callback base when DashScope SDK is unavailable."""
+
+        pass
+
+
+def _is_openai_compatible_vendor(vendor: str) -> bool:
+    normalized = (vendor or "").strip().lower()
+    return normalized in {
+        "openai compatible",
+        "openai-compatible",
+        "siliconflow",  # backward compatibility
+        "硅基流动",      # backward compatibility
+    }
+
+
+def _is_dashscope_vendor(vendor: str) -> bool:
+    return (vendor or "").strip().lower() == "dashscope"
+
+
+def _default_asr_model(vendor: str) -> str:
+    if _is_openai_compatible_vendor(vendor):
+        return OPENAI_COMPATIBLE_DEFAULT_ASR_MODEL
+    if _is_dashscope_vendor(vendor):
+        return DASHSCOPE_DEFAULT_ASR_MODEL
+    return "whisper-1"
+
+
+def _dashscope_language(language: Optional[str]) -> Optional[str]:
+    normalized = (language or "").strip().lower()
+    if not normalized or normalized in {"multi-lingual", "multilingual", "multi_lingual", "auto"}:
+        return None
+    if normalized.startswith("zh"):
+        return "zh"
+    if normalized.startswith("en"):
+        return "en"
+    return normalized
+
+
+class _DashScopePreviewCallback(OmniRealtimeCallback):
+    """Collect DashScope ASR websocket events for preview/test flows."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._open_event = threading.Event()
+        self._session_ready_event = threading.Event()
+        self._done_event = threading.Event()
+        self._lock = threading.Lock()
+        self._final_text = ""
+        self._last_interim_text = ""
+        self._error_message: Optional[str] = None
+
+    def on_open(self) -> None:
+        self._open_event.set()
+
+    def on_close(self, code: int, reason: str) -> None:
+        if self._done_event.is_set():
+            return
+        self._error_message = f"DashScope websocket closed unexpectedly: {code} {reason}"
+        self._done_event.set()
+        self._session_ready_event.set()
+
+    def on_error(self, message: Any) -> None:
+        self._error_message = str(message)
+        self._done_event.set()
+        self._session_ready_event.set()
+
+    def on_event(self, response: Any) -> None:
+        payload = _coerce_dashscope_event(response)
+        event_type = str(payload.get("type") or "").strip()
+        if not event_type:
+            return
+
+        if event_type in {"session.created", "session.updated"}:
+            self._session_ready_event.set()
+            return
+
+        if event_type == "error" or event_type.endswith(".failed"):
+            self._error_message = _format_dashscope_error_event(payload)
+            self._done_event.set()
+            self._session_ready_event.set()
+            return
+
+        if event_type == "conversation.item.input_audio_transcription.text":
+            interim_text = _extract_dashscope_text(payload, keys=("stash", "text", "transcript"))
+            if interim_text:
+                with self._lock:
+                    self._last_interim_text = interim_text
+            return
+
+        if event_type == "conversation.item.input_audio_transcription.completed":
+            final_text = _extract_dashscope_text(payload, keys=("transcript", "text", "stash"))
+            with self._lock:
+                if final_text:
+                    self._final_text = final_text
+            self._done_event.set()
+            return
+
+        if event_type in {"response.done", "session.finished"}:
+            self._done_event.set()
+
+    def wait_for_open(self, timeout: float = 10.0) -> None:
+        if not self._open_event.wait(timeout):
+            raise TimeoutError("DashScope websocket open timeout")
+
+    def wait_for_session_ready(self, timeout: float = 6.0) -> bool:
+        return self._session_ready_event.wait(timeout)
+
+    def wait_for_done(self, timeout: float = 20.0) -> None:
+        if not self._done_event.wait(timeout):
+            raise TimeoutError("DashScope transcription timeout")
+
+    def raise_if_error(self) -> None:
+        if self._error_message:
+            raise RuntimeError(self._error_message)
+
+    def read_text(self) -> str:
+        with self._lock:
+            return self._final_text or self._last_interim_text
+
+
+def _coerce_dashscope_event(response: Any) -> Dict[str, Any]:
+    if isinstance(response, dict):
+        return response
+    if isinstance(response, str):
+        try:
+            parsed = json.loads(response)
+            if isinstance(parsed, dict):
+                return parsed
+        except json.JSONDecodeError:
+            pass
+    return {"type": "raw", "message": str(response)}
+
+
+def _format_dashscope_error_event(payload: Dict[str, Any]) -> str:
+    error = payload.get("error")
+    if isinstance(error, dict):
+        code = str(error.get("code") or "").strip()
+        message = str(error.get("message") or "").strip()
+        if code and message:
+            return f"{code}: {message}"
+        return message or str(error)
+    return str(error or "DashScope realtime ASR error")
+
+
+def _extract_dashscope_text(payload: Dict[str, Any], *, keys: Tuple[str, ...]) -> str:
+    for key in keys:
+        value = payload.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+        if isinstance(value, dict):
+            nested = _extract_dashscope_text(value, keys=keys)
+            if nested:
+                return nested
+
+    for value in payload.values():
+        if isinstance(value, dict):
+            nested = _extract_dashscope_text(value, keys=keys)
+            if nested:
+                return nested
+    return ""
+
+
+def _create_dashscope_realtime_client(
+    *,
+    model: str,
+    callback: _DashScopePreviewCallback,
+    url: str,
+    api_key: str,
+) -> Any:
+    if OmniRealtimeConversation is None:
+        raise RuntimeError("DashScope SDK unavailable")
+
+    init_kwargs = {
+        "model": model,
+        "callback": callback,
+        "url": url,
+    }
+    try:
+        return OmniRealtimeConversation(api_key=api_key, **init_kwargs)  # type: ignore[misc]
+    except TypeError as exc:
+        if "api_key" not in str(exc):
+            raise
+        return OmniRealtimeConversation(**init_kwargs)  # type: ignore[misc]
+
+
+def _close_dashscope_client(client: Any) -> None:
+    finish_fn = getattr(client, "finish", None)
+    if callable(finish_fn):
+        try:
+            finish_fn()
+        except Exception:
+            pass
+
+    close_fn = getattr(client, "close", None)
+    if callable(close_fn):
+        try:
+            close_fn()
+        except Exception:
+            pass
+
+
+def _configure_dashscope_session(
+    *,
+    client: Any,
+    callback: _DashScopePreviewCallback,
+    sample_rate: int,
+    language: Optional[str],
+) -> None:
+    update_fn = getattr(client, "update_session", None)
+    if not callable(update_fn):
+        raise RuntimeError("DashScope ASR SDK missing update_session method")
+
+    text_modality: Any = "text"
+    if MultiModality is not None and hasattr(MultiModality, "TEXT"):
+        text_modality = MultiModality.TEXT
+
+    transcription_params: Optional[Any] = None
+    language_hint = _dashscope_language(language)
+    if TranscriptionParams is not None:
+        try:
+            params_kwargs: Dict[str, Any] = {
+                "sample_rate": sample_rate,
+                "input_audio_format": "pcm",
+            }
+            if language_hint:
+                params_kwargs["language"] = language_hint
+            transcription_params = TranscriptionParams(**params_kwargs)
+        except Exception:
+            transcription_params = None
+
+    update_attempts = [
+        {
+            "output_modalities": [text_modality],
+            "enable_turn_detection": False,
+            "enable_input_audio_transcription": True,
+            "transcription_params": transcription_params,
+        },
+        {
+            "output_modalities": [text_modality],
+            "enable_turn_detection": False,
+            "enable_input_audio_transcription": True,
+        },
+        {
+            "output_modalities": [text_modality],
+        },
+    ]
+
+    last_error: Optional[Exception] = None
+    for params in update_attempts:
+        if params.get("transcription_params") is None:
+            params = {key: value for key, value in params.items() if key != "transcription_params"}
+        try:
+            update_fn(**params)
+            callback.wait_for_session_ready()
+            callback.raise_if_error()
+            return
+        except TypeError as exc:
+            last_error = exc
+            continue
+        except Exception as exc:
+            last_error = exc
+            continue
+
+    raise RuntimeError(f"DashScope ASR session.update failed: {last_error}")
+
+
+def _load_wav_pcm16_mono(audio_bytes: bytes) -> Tuple[bytes, int]:
+    try:
+        with wave.open(io.BytesIO(audio_bytes), "rb") as wav_file:
+            channel_count = wav_file.getnchannels()
+            sample_width = wav_file.getsampwidth()
+            sample_rate = wav_file.getframerate()
+            compression = wav_file.getcomptype()
+            pcm_frames = wav_file.readframes(wav_file.getnframes())
+    except wave.Error as exc:
+        raise RuntimeError("DashScope preview currently supports WAV audio. Record in browser or upload a .wav file.") from exc
+
+    if compression != "NONE":
+        raise RuntimeError("DashScope preview requires uncompressed PCM WAV audio.")
+    if sample_width != 2:
+        raise RuntimeError("DashScope preview requires 16-bit PCM WAV audio.")
+    if not pcm_frames:
+        raise RuntimeError("Uploaded WAV file is empty")
+    if channel_count <= 1:
+        return pcm_frames, sample_rate
+
+    samples = array("h")
+    samples.frombytes(pcm_frames)
+    if sys.byteorder == "big":
+        samples.byteswap()
+
+    mono_samples = array(
+        "h",
+        (
+            int(sum(samples[index:index + channel_count]) / channel_count)
+            for index in range(0, len(samples), channel_count)
+        ),
+    )
+    if sys.byteorder == "big":
+        mono_samples.byteswap()
+    return mono_samples.tobytes(), sample_rate
+
+
+def _probe_dashscope_asr_connection(*, api_key: str, base_url: str, model: str, language: Optional[str]) -> None:
+    if not DASHSCOPE_SDK_AVAILABLE:
+        hint = f"`{sys.executable} -m pip install dashscope>=1.25.11`"
+        detail = f"; import error: {DASHSCOPE_IMPORT_ERROR}" if DASHSCOPE_IMPORT_ERROR else ""
+        raise RuntimeError(f"dashscope package not installed; install with {hint}{detail}")
+
+    callback = _DashScopePreviewCallback()
+    if dashscope is not None:
+        dashscope.api_key = api_key
+    client = _create_dashscope_realtime_client(
+        model=model,
+        callback=callback,
+        url=base_url,
+        api_key=api_key,
+    )
+
+    try:
+        client.connect()
+        callback.wait_for_open()
+        _configure_dashscope_session(
+            client=client,
+            callback=callback,
+            sample_rate=16000,
+            language=language,
+        )
+    finally:
+        _close_dashscope_client(client)
+
+
+def _transcribe_dashscope_preview(
+    *,
+    audio_bytes: bytes,
+    api_key: str,
+    base_url: str,
+    model: str,
+    language: Optional[str],
+) -> Dict[str, Any]:
+    if not DASHSCOPE_SDK_AVAILABLE:
+        hint = f"`{sys.executable} -m pip install dashscope>=1.25.11`"
+        detail = f"; import error: {DASHSCOPE_IMPORT_ERROR}" if DASHSCOPE_IMPORT_ERROR else ""
+        raise RuntimeError(f"dashscope package not installed; install with {hint}{detail}")
+
+    pcm_audio, sample_rate = _load_wav_pcm16_mono(audio_bytes)
+    callback = _DashScopePreviewCallback()
+    if dashscope is not None:
+        dashscope.api_key = api_key
+    client = _create_dashscope_realtime_client(
+        model=model,
+        callback=callback,
+        url=base_url,
+        api_key=api_key,
+    )
+
+    try:
+        client.connect()
+        callback.wait_for_open()
+        _configure_dashscope_session(
+            client=client,
+            callback=callback,
+            sample_rate=sample_rate,
+            language=language,
+        )
+
+        append_fn = getattr(client, "append_audio", None)
+        if not callable(append_fn):
+            raise RuntimeError("DashScope ASR SDK missing append_audio method")
+        commit_fn = getattr(client, "commit", None)
+        if not callable(commit_fn):
+            raise RuntimeError("DashScope ASR SDK missing commit method")
+
+        append_fn(base64.b64encode(pcm_audio).decode("ascii"))
+        commit_fn()
+        callback.wait_for_done()
+        callback.raise_if_error()
+        return {
+            "transcript": callback.read_text(),
+            "language": _dashscope_language(language) or "Multi-lingual",
+            "confidence": None,
+        }
+    finally:
+        _close_dashscope_client(client)
+
+
+# ============ ASR Models CRUD ============
+@router.get("")
+def list_asr_models(
+    language: Optional[str] = None,
+    enabled: Optional[bool] = None,
+    page: int = 1,
+    limit: int = 50,
+    db: Session = Depends(get_db)
+):
+    """获取ASR模型列表"""
+    query = db.query(ASRModel)
+
+    if language:
+        query = query.filter(ASRModel.language == language)
+    if enabled is not None:
+        query = query.filter(ASRModel.enabled == enabled)
+
+    total = query.count()
+    models = query.order_by(ASRModel.created_at.desc()) \
+        .offset((page-1)*limit).limit(limit).all()
+
+    return {"total": total, "page": page, "limit": limit, "list": models}
+
+
+@router.get("/{id}", response_model=ASRModelOut)
+def get_asr_model(id: str, db: Session = Depends(get_db)):
+    """获取单个ASR模型详情"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+    return model
+
+
+@router.post("", response_model=ASRModelOut)
+def create_asr_model(data: ASRModelCreate, db: Session = Depends(get_db)):
+    """创建ASR模型"""
+    asr_model = ASRModel(
+        id=unique_short_id("asr", db, ASRModel),
+        user_id=1,  # 默认用户
+        name=data.name,
+        vendor=data.vendor,
+        language=data.language,
+        base_url=data.base_url,
+        api_key=data.api_key,
+        model_name=data.model_name,
+        hotwords=data.hotwords,
+        enable_punctuation=data.enable_punctuation,
+        enable_normalization=data.enable_normalization,
+        enabled=data.enabled,
+    )
+    db.add(asr_model)
+    db.commit()
+    db.refresh(asr_model)
+    return asr_model
+
+
+@router.put("/{id}", response_model=ASRModelOut)
+def update_asr_model(id: str, data: ASRModelUpdate, db: Session = Depends(get_db)):
+    """更新ASR模型"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+
+    update_data = data.model_dump(exclude_unset=True)
+    for field, value in update_data.items():
+        setattr(model, field, value)
+
+    db.commit()
+    db.refresh(model)
+    return model
+
+
+@router.delete("/{id}")
+def delete_asr_model(id: str, db: Session = Depends(get_db)):
+    """删除ASR模型"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+    db.delete(model)
+    db.commit()
+    return {"message": "Deleted successfully"}
+
+
+@router.post("/{id}/test", response_model=ASRTestResponse)
+def test_asr_model(
+    id: str,
+    request: Optional[ASRTestRequest] = None,
+    db: Session = Depends(get_db)
+):
+    """测试ASR模型"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+
+    start_time = time.time()
+
+    try:
+        if _is_dashscope_vendor(model.vendor):
+            effective_api_key = (model.api_key or "").strip() or os.getenv("DASHSCOPE_API_KEY", "").strip() or os.getenv("ASR_API_KEY", "").strip()
+            if not effective_api_key:
+                return ASRTestResponse(success=False, error=f"API key is required for ASR model: {model.name}")
+
+            base_url = (model.base_url or "").strip() or DASHSCOPE_DEFAULT_BASE_URL
+            selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
+            _probe_dashscope_asr_connection(
+                api_key=effective_api_key,
+                base_url=base_url,
+                model=selected_model,
+                language=model.language,
+            )
+            latency_ms = int((time.time() - start_time) * 1000)
+            return ASRTestResponse(
+                success=True,
+                language=model.language,
+                latency_ms=latency_ms,
+                message="DashScope realtime ASR connected",
+            )
+
+        # 连接性测试优先，避免依赖真实音频输入
+        headers = {"Authorization": f"Bearer {model.api_key}"}
+        with httpx.Client(timeout=60.0) as client:
+            if _is_openai_compatible_vendor(model.vendor) or model.vendor.lower() == "paraformer":
+                response = client.get(f"{model.base_url}/asr", headers=headers)
+            elif model.vendor.lower() == "openai":
+                response = client.get(f"{model.base_url}/audio/models", headers=headers)
+            else:
+                response = client.get(f"{model.base_url}/health", headers=headers)
+            response.raise_for_status()
+            raw_result = response.json()
+
+        # 兼容不同供应商格式
+        if isinstance(raw_result, dict) and "results" in raw_result:
+            result = raw_result
+        elif isinstance(raw_result, dict) and "text" in raw_result:
+            result = {"results": [{"transcript": raw_result.get("text", "")}]}
+        else:
+            result = {"results": [{"transcript": ""}]}
+
+        latency_ms = int((time.time() - start_time) * 1000)
+
+        # 解析结果
+        if result_data := result.get("results", [{}])[0]:
+            transcript = result_data.get("transcript", "")
+            return ASRTestResponse(
+                success=True,
+                transcript=transcript,
+                language=result_data.get("language", model.language),
+                confidence=result_data.get("confidence"),
+                latency_ms=latency_ms,
+            )
+
+        return ASRTestResponse(
+            success=False,
+            message="No transcript in response",
+            latency_ms=latency_ms
+        )
+
+    except httpx.HTTPStatusError as e:
+        return ASRTestResponse(
+            success=False,
+            error=f"HTTP Error: {e.response.status_code} - {e.response.text[:200]}"
+        )
+    except Exception as e:
+        return ASRTestResponse(
+            success=False,
+            error=str(e)[:200]
+        )
+
+
+@router.post("/{id}/transcribe")
+def transcribe_audio(
+    id: str,
+    audio_url: Optional[str] = None,
+    audio_data: Optional[str] = None,
+    hotwords: Optional[List[str]] = None,
+    db: Session = Depends(get_db)
+):
+    """转写音频"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+
+    try:
+        payload = {
+            "model": model.model_name or "paraformer-v2",
+            "input": {},
+            "parameters": {
+                "hotwords": " ".join(hotwords or model.hotwords or []),
+                "enable_punctuation": model.enable_punctuation,
+                "enable_normalization": model.enable_normalization,
+            }
+        }
+
+        headers = {"Authorization": f"Bearer {model.api_key}"}
+
+        if audio_url:
+            payload["input"]["url"] = audio_url
+        elif audio_data:
+            payload["input"]["file_urls"] = []
+
+        with httpx.Client(timeout=120.0) as client:
+            response = client.post(
+                f"{model.base_url}/asr",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+        result = response.json()
+
+        if result_data := result.get("results", [{}])[0]:
+            return {
+                "success": True,
+                "transcript": result_data.get("transcript", ""),
+                "language": result_data.get("language", model.language),
+                "confidence": result_data.get("confidence"),
+            }
+
+        return {"success": False, "error": "No transcript in response"}
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/{id}/preview", response_model=ASRTestResponse)
+async def preview_asr_model(
+    id: str,
+    file: UploadFile = File(...),
+    language: Optional[str] = Form(None),
+    api_key: Optional[str] = Form(None),
+    db: Session = Depends(get_db),
+):
+    """预览 ASR：根据供应商调用 OpenAI-compatible 或 DashScope 实时识别。"""
+    model = db.query(ASRModel).filter(ASRModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="ASR Model not found")
+
+    if not file:
+        raise HTTPException(status_code=400, detail="Audio file is required")
+
+    filename = file.filename or "preview.wav"
+    content_type = file.content_type or "application/octet-stream"
+    if not content_type.startswith("audio/"):
+        raise HTTPException(status_code=400, detail="Only audio files are supported")
+
+    audio_bytes = await file.read()
+    if not audio_bytes:
+        raise HTTPException(status_code=400, detail="Uploaded audio file is empty")
+
+    effective_api_key = (api_key or "").strip() or (model.api_key or "").strip()
+    if not effective_api_key:
+        if _is_openai_compatible_vendor(model.vendor):
+            effective_api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
+        elif _is_dashscope_vendor(model.vendor):
+            effective_api_key = os.getenv("DASHSCOPE_API_KEY", "").strip() or os.getenv("ASR_API_KEY", "").strip()
+    if not effective_api_key:
+        raise HTTPException(status_code=400, detail=f"API key is required for ASR model: {model.name}")
+
+    base_url = (model.base_url or "").strip().rstrip("/")
+    if _is_dashscope_vendor(model.vendor) and not base_url:
+        base_url = DASHSCOPE_DEFAULT_BASE_URL
+    if not base_url:
+        raise HTTPException(status_code=400, detail=f"Base URL is required for ASR model: {model.name}")
+
+    selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
+    effective_language = (language or "").strip() or None
+
+    start_time = time.time()
+    if _is_dashscope_vendor(model.vendor):
+        try:
+            payload = await asyncio.to_thread(
+                _transcribe_dashscope_preview,
+                audio_bytes=audio_bytes,
+                api_key=effective_api_key,
+                base_url=base_url,
+                model=selected_model,
+                language=effective_language or model.language,
+            )
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"DashScope ASR request failed: {exc}") from exc
+
+        transcript = str(payload.get("transcript") or "")
+        response_language = str(payload.get("language") or effective_language or model.language)
+        latency_ms = int((time.time() - start_time) * 1000)
+        return ASRTestResponse(
+            success=bool(transcript),
+            transcript=transcript,
+            language=response_language,
+            confidence=None,
+            latency_ms=latency_ms,
+            message=None if transcript else "No transcript in response",
+        )
+
+    data = {"model": selected_model}
+    if effective_language:
+        data["language"] = effective_language
+    if model.hotwords:
+        data["prompt"] = " ".join(model.hotwords)
+
+    headers = {"Authorization": f"Bearer {effective_api_key}"}
+    files = {"file": (filename, audio_bytes, content_type)}
+
+    try:
+        with httpx.Client(timeout=90.0) as client:
+            response = client.post(
+                f"{base_url}/audio/transcriptions",
+                headers=headers,
+                data=data,
+                files=files,
+            )
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"ASR request failed: {exc}") from exc
+
+    if response.status_code != 200:
+        detail = response.text
+        try:
+            detail_json = response.json()
+            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
+        except Exception:
+            pass
+        raise HTTPException(status_code=502, detail=f"ASR vendor error: {detail}")
+
+    try:
+        payload = response.json()
+    except Exception:
+        payload = {"text": response.text}
+
+    transcript = ""
+    response_language = model.language
+    confidence = None
+    if isinstance(payload, dict):
+        transcript = str(payload.get("text") or payload.get("transcript") or "")
+        response_language = str(payload.get("language") or effective_language or model.language)
+        raw_confidence = payload.get("confidence")
+        if raw_confidence is not None:
+            try:
+                confidence = float(raw_confidence)
+            except (TypeError, ValueError):
+                confidence = None
+
+    latency_ms = int((time.time() - start_time) * 1000)
+    return ASRTestResponse(
+        success=bool(transcript),
+        transcript=transcript,
+        language=response_language,
+        confidence=confidence,
+        latency_ms=latency_ms,
+        message=None if transcript else "No transcript in response",
+    )
--- a/api/app/routers/assistants.py
+++ b/api/app/routers/assistants.py
@@ -1,157 +1,860 @@
+import audioop
+import hashlib
+import io
+import os
+import wave
+from pathlib import Path
+import httpx
 from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import FileResponse
+from sqlalchemy import inspect, text
 from sqlalchemy.orm import Session
-from typing import List
+from typing import Any, Dict, List, Optional
 import uuid
 from datetime import datetime

 from ..db import get_db
-from ..models import Assistant, Voice, Workflow
+from ..models import Assistant, AssistantOpenerAudio, LLMModel, ASRModel, Voice, ToolResource
 from ..schemas import (
-    AssistantCreate, AssistantUpdate, AssistantOut,
-    VoiceOut,
-    WorkflowCreate, WorkflowUpdate, WorkflowOut
+    AssistantCreate,
+    AssistantUpdate,
+    AssistantOut,
+    AssistantEngineConfigResponse,
+    AssistantOpenerAudioGenerateRequest,
+    AssistantOpenerAudioOut,
+)
+from .tools import (
+    TOOL_REGISTRY,
+    TOOL_CATEGORY_MAP,
+    TOOL_PARAMETER_DEFAULTS,
+    TOOL_WAIT_FOR_RESPONSE_DEFAULTS,
+    normalize_tool_id,
+    _ensure_tool_resource_schema,
 )

-router = APIRouter()
+router = APIRouter(prefix="/assistants", tags=["Assistants"])
+
+OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
+OPENAI_COMPATIBLE_DEFAULT_BASE_URL = "https://api.siliconflow.cn/v1"
+OPENER_AUDIO_DIR = Path(__file__).resolve().parents[2] / "data" / "opener_audio"
+OPENAI_COMPATIBLE_KNOWN_VOICES = {
+    "alex",
+    "anna",
+    "bella",
+    "benjamin",
+    "charles",
+    "claire",
+    "david",
+    "diana",
+}


-# ============ Voices ============
-@router.get("/voices", response_model=List[VoiceOut])
-def list_voices(db: Session = Depends(get_db)):
-    """获取声音库列表"""
-    voices = db.query(Voice).all()
-    return voices
+def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
+    return (vendor or "").strip().lower() in {
+        "siliconflow",
+        "硅基流动",
+        "openai compatible",
+        "openai-compatible",
+    }
+
+
+def _is_dashscope_vendor(vendor: Optional[str]) -> bool:
+    return (vendor or "").strip().lower() in {
+        "dashscope",
+    }
+
+
+def _normalize_openai_compatible_voice_key(voice_value: str, model: str) -> str:
+    raw = (voice_value or "").strip()
+    model_name = (model or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
+    if not raw:
+        return f"{model_name}:anna"
+
+    if ":" in raw:
+        voice_model, voice_id = raw.split(":", 1)
+        voice_model = voice_model.strip() or model_name
+        voice_id = voice_id.strip()
+        if voice_id.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES:
+            voice_id = voice_id.lower()
+        return f"{voice_model}:{voice_id}"
+
+    voice_id = raw.lower() if raw.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES else raw
+    return f"{model_name}:{voice_id}"
+
+
+def _config_version_id(assistant: Assistant) -> str:
+    updated = assistant.updated_at or assistant.created_at or datetime.utcnow()
+    return f"asst_{assistant.id}_{updated.strftime('%Y%m%d%H%M%S')}"
+
+
+def _normalize_runtime_tool_schema(tool_id: str, raw_schema: Any) -> Dict[str, Any]:
+    schema = dict(raw_schema) if isinstance(raw_schema, dict) else {}
+    if not schema:
+        fallback = TOOL_REGISTRY.get(tool_id, {}).get("parameters")
+        if isinstance(fallback, dict):
+            schema = dict(fallback)
+    schema.setdefault("type", "object")
+    if not isinstance(schema.get("properties"), dict):
+        schema["properties"] = {}
+    required = schema.get("required")
+    if required is None or not isinstance(required, list):
+        schema["required"] = []
+    return schema
+
+
+def _compose_runtime_system_prompt(base_prompt: Optional[str]) -> str:
+    raw = str(base_prompt or "").strip()
+    tool_policy = (
+        "Tool usage policy:\n"
+        "- Tool function names/IDs are internal and must never be shown to users.\n"
+        "- When users ask which tools are available, describe capabilities in natural language.\n"
+        "- Do not expose raw tool call payloads, IDs, or executor details."
+    )
+    return f"{raw}\n\n{tool_policy}" if raw else tool_policy
+
+
+def _ensure_assistant_schema(db: Session) -> None:
+    """Apply lightweight SQLite migrations for newly added assistants columns."""
+    bind = db.get_bind()
+    inspector = inspect(bind)
+    try:
+        columns = {col["name"] for col in inspector.get_columns("assistants")}
+    except Exception:
+        return
+
+    altered = False
+    if "manual_opener_tool_calls" not in columns:
+        db.execute(text("ALTER TABLE assistants ADD COLUMN manual_opener_tool_calls JSON"))
+        altered = True
+    if "asr_interim_enabled" not in columns:
+        db.execute(text("ALTER TABLE assistants ADD COLUMN asr_interim_enabled BOOLEAN DEFAULT 0"))
+        altered = True
+    if "app_id" not in columns:
+        db.execute(text("ALTER TABLE assistants ADD COLUMN app_id VARCHAR(255)"))
+        altered = True
+
+    if altered:
+        db.commit()
+
+
+def _normalize_manual_opener_tool_calls(raw: Any, warnings: Optional[List[str]] = None) -> List[Dict[str, Any]]:
+    normalized: List[Dict[str, Any]] = []
+    if not isinstance(raw, list):
+        return normalized
+
+    for idx, item in enumerate(raw):
+        if not isinstance(item, dict):
+            if warnings is not None:
+                warnings.append(f"Ignored invalid manual opener tool call at index {idx}: not an object")
+            continue
+
+        tool_name = normalize_tool_id(str(
+            item.get("toolName")
+            or item.get("tool_name")
+            or item.get("name")
+            or ""
+        ).strip())
+        if not tool_name:
+            if warnings is not None:
+                warnings.append(f"Ignored invalid manual opener tool call at index {idx}: missing toolName")
+            continue
+
+        args_raw = item.get("arguments")
+        args: Dict[str, Any] = {}
+        if isinstance(args_raw, dict):
+            args = dict(args_raw)
+        elif isinstance(args_raw, str):
+            text_value = args_raw.strip()
+            if text_value:
+                try:
+                    parsed = json.loads(text_value)
+                    if isinstance(parsed, dict):
+                        args = parsed
+                    else:
+                        if warnings is not None:
+                            warnings.append(
+                                f"Ignored non-object arguments for manual opener tool call '{tool_name}' at index {idx}"
+                            )
+                except Exception:
+                    if warnings is not None:
+                        warnings.append(f"Ignored invalid JSON arguments for manual opener tool call '{tool_name}' at index {idx}")
+        elif args_raw is not None and warnings is not None:
+            warnings.append(f"Ignored unsupported arguments type for manual opener tool call '{tool_name}' at index {idx}")
+
+        normalized.append({"toolName": tool_name, "arguments": args})
+
+    # Keep opener sequence intentionally short to avoid long pre-dialog delays.
+    return normalized[:8]
+
+
+def _normalize_assistant_tool_ids(raw: Any) -> List[str]:
+    if not isinstance(raw, list):
+        return []
+    normalized: List[str] = []
+    seen: set[str] = set()
+    for item in raw:
+        tool_id = normalize_tool_id(item)
+        if not tool_id or tool_id in seen:
+            continue
+        seen.add(tool_id)
+        normalized.append(tool_id)
+    return normalized
+
+
+def _resolve_runtime_tools(db: Session, selected_tool_ids: List[str], warnings: List[str]) -> List[Dict[str, Any]]:
+    _ensure_tool_resource_schema(db)
+    ids = _normalize_assistant_tool_ids(selected_tool_ids)
+    if not ids:
+        return []
+
+    resources = (
+        db.query(ToolResource)
+        .filter(ToolResource.id.in_(ids))
+        .all()
+    )
+    by_id = {str(item.id): item for item in resources}
+
+    runtime_tools: List[Dict[str, Any]] = []
+    for tool_id in ids:
+        resource = by_id.get(tool_id)
+        if resource and resource.enabled is False:
+            warnings.append(f"Tool is disabled and skipped in runtime config: {tool_id}")
+            continue
+
+        category = str(resource.category if resource else TOOL_CATEGORY_MAP.get(tool_id, "query"))
+        display_name = (
+            str(resource.name or tool_id).strip()
+            if resource
+            else str(TOOL_REGISTRY.get(tool_id, {}).get("name") or tool_id).strip()
+        )
+        description = (
+            str(resource.description or resource.name or "").strip()
+            if resource
+            else str(TOOL_REGISTRY.get(tool_id, {}).get("description") or "").strip()
+        )
+        schema = _normalize_runtime_tool_schema(
+            tool_id,
+            resource.parameter_schema if resource else TOOL_REGISTRY.get(tool_id, {}).get("parameters"),
+        )
+        defaults_raw = resource.parameter_defaults if resource else TOOL_PARAMETER_DEFAULTS.get(tool_id)
+        defaults = dict(defaults_raw) if isinstance(defaults_raw, dict) else {}
+        wait_for_response = (
+            bool(resource.wait_for_response)
+            if resource
+            else bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False))
+        )
+
+        if not resource and tool_id not in TOOL_REGISTRY:
+            warnings.append(f"Tool resource not found: {tool_id}")
+
+        runtime_tool: Dict[str, Any] = {
+            "type": "function",
+            "executor": "client" if category == "system" else "server",
+            "function": {
+                "name": tool_id,
+                "description": (
+                    f"Display name: {display_name}. {description}".strip()
+                    if display_name
+                    else (description or tool_id)
+                ),
+                "parameters": schema,
+            },
+            "displayName": display_name or tool_id,
+            "toolId": tool_id,
+            "waitForResponse": wait_for_response,
+        }
+        if defaults:
+            runtime_tool["defaultArgs"] = defaults
+        runtime_tools.append(runtime_tool)
+
+    return runtime_tools
+
+
+def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], List[str]]:
+    warnings: List[str] = []
+    generated_opener_enabled = bool(assistant.generated_opener_enabled)
+    manual_opener_tool_calls = _normalize_manual_opener_tool_calls(
+        assistant.manual_opener_tool_calls,
+        warnings=warnings,
+    )
+    metadata: Dict[str, Any] = {
+        "systemPrompt": _compose_runtime_system_prompt(assistant.prompt),
+        "firstTurnMode": assistant.first_turn_mode or "bot_first",
+        # Generated opener should rely on systemPrompt instead of fixed opener text.
+        "greeting": "" if generated_opener_enabled else (assistant.opener or ""),
+        "generatedOpenerEnabled": generated_opener_enabled,
+        "manualOpenerToolCalls": manual_opener_tool_calls,
+        "output": {"mode": "audio" if assistant.voice_output_enabled else "text"},
+        "bargeIn": {
+            "enabled": not bool(assistant.bot_cannot_be_interrupted),
+            "minDurationMs": int(assistant.interruption_sensitivity or 500),
+        },
+        "services": {},
+        "tools": _resolve_runtime_tools(db, assistant.tools or [], warnings),
+        "history": {
+            "assistantId": assistant.id,
+            "userId": int(assistant.user_id or 1),
+            "source": "debug",
+        },
+    }
+
+    config_mode = str(assistant.config_mode or "platform").strip().lower()
+
+    if config_mode == "dify":
+        metadata["services"]["llm"] = {
+            "provider": "openai",
+            "model": "",
+            "apiKey": assistant.api_key,
+            "baseUrl": assistant.api_url,
+        }
+        if not (assistant.api_url or "").strip():
+            warnings.append(f"External LLM API URL is empty for mode: {assistant.config_mode}")
+        if not (assistant.api_key or "").strip():
+            warnings.append(f"External LLM API key is empty for mode: {assistant.config_mode}")
+    elif config_mode == "fastgpt":
+        metadata["services"]["llm"] = {
+            "provider": "fastgpt",
+            "model": "fastgpt",
+            "apiKey": assistant.api_key,
+            "baseUrl": assistant.api_url,
+        }
+        if (assistant.app_id or "").strip():
+            metadata["services"]["llm"]["appId"] = assistant.app_id
+        if not (assistant.api_url or "").strip():
+            warnings.append(f"FastGPT API URL is empty for mode: {assistant.config_mode}")
+        if not (assistant.api_key or "").strip():
+            warnings.append(f"FastGPT API key is empty for mode: {assistant.config_mode}")
+    elif assistant.llm_model_id:
+        llm = db.query(LLMModel).filter(LLMModel.id == assistant.llm_model_id).first()
+        if llm:
+            metadata["services"]["llm"] = {
+                "provider": "openai",
+                "model": llm.model_name or llm.name,
+                "apiKey": llm.api_key,
+                "baseUrl": llm.base_url,
+            }
+        else:
+            warnings.append(f"LLM model not found: {assistant.llm_model_id}")
+
+    asr_runtime: Dict[str, Any] = {
+        "enableInterim": bool(assistant.asr_interim_enabled),
+    }
+    if assistant.asr_model_id:
+        asr = db.query(ASRModel).filter(ASRModel.id == assistant.asr_model_id).first()
+        if asr:
+            if _is_dashscope_vendor(asr.vendor):
+                asr_provider = "dashscope"
+            elif _is_openai_compatible_vendor(asr.vendor):
+                asr_provider = "openai_compatible"
+            else:
+                asr_provider = "buffered"
+            asr_runtime.update({
+                "provider": asr_provider,
+                "model": asr.model_name or asr.name,
+                "apiKey": asr.api_key if asr_provider in {"openai_compatible", "dashscope"} else None,
+                "baseUrl": asr.base_url if asr_provider in {"openai_compatible", "dashscope"} else None,
+            })
+        else:
+            warnings.append(f"ASR model not found: {assistant.asr_model_id}")
+    metadata["services"]["asr"] = asr_runtime
+
+    if not assistant.voice_output_enabled:
+        metadata["services"]["tts"] = {"enabled": False}
+    elif assistant.voice:
+        voice = db.query(Voice).filter(Voice.id == assistant.voice).first()
+        if voice:
+            if _is_dashscope_vendor(voice.vendor):
+                tts_provider = "dashscope"
+            elif _is_openai_compatible_vendor(voice.vendor):
+                tts_provider = "openai_compatible"
+            else:
+                tts_provider = "edge"
+            model = voice.model
+            runtime_voice = voice.voice_key or voice.id
+            if tts_provider == "openai_compatible":
+                model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
+                runtime_voice = _normalize_openai_compatible_voice_key(runtime_voice, model)
+            metadata["services"]["tts"] = {
+                "enabled": True,
+                "provider": tts_provider,
+                "model": model,
+                "apiKey": voice.api_key if tts_provider in {"openai_compatible", "dashscope"} else None,
+                "baseUrl": voice.base_url if tts_provider in {"openai_compatible", "dashscope"} else None,
+                "voice": runtime_voice,
+                "speed": assistant.speed or voice.speed,
+            }
+        else:
+            # Keep assistant.voice as direct voice identifier fallback
+            metadata["services"]["tts"] = {
+                "enabled": True,
+                "voice": assistant.voice,
+                "speed": assistant.speed or 1.0,
+            }
+            warnings.append(f"Voice resource not found: {assistant.voice}")
+
+    if assistant.knowledge_base_id:
+        metadata["knowledgeBaseId"] = assistant.knowledge_base_id
+        metadata["knowledge"] = {
+            "enabled": True,
+            "kbId": assistant.knowledge_base_id,
+            "nResults": 5,
+        }
+    opener_audio = assistant.opener_audio
+    opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
+    metadata["openerAudio"] = {
+        "enabled": bool(opener_audio.enabled) if opener_audio else False,
+        "ready": opener_audio_ready,
+        "encoding": opener_audio.encoding if opener_audio else "pcm_s16le",
+        "sampleRateHz": int(opener_audio.sample_rate_hz) if opener_audio else 16000,
+        "channels": int(opener_audio.channels) if opener_audio else 1,
+        "durationMs": int(opener_audio.duration_ms) if opener_audio else 0,
+        "textHash": opener_audio.text_hash if opener_audio else None,
+        "ttsFingerprint": opener_audio.tts_fingerprint if opener_audio else None,
+        "pcmUrl": f"/api/assistants/{assistant.id}/opener-audio/pcm" if opener_audio_ready else None,
+    }
+    return metadata, warnings
+
+
+def _build_engine_assistant_config(db: Session, assistant: Assistant) -> Dict[str, Any]:
+    session_metadata, warnings = _resolve_runtime_metadata(db, assistant)
+    config_version_id = _config_version_id(assistant)
+    assistant_cfg = dict(session_metadata)
+    assistant_cfg["assistantId"] = assistant.id
+    assistant_cfg["configVersionId"] = config_version_id
+
+    return {
+        "assistantId": assistant.id,
+        "configVersionId": config_version_id,
+        "assistant": assistant_cfg,
+        "sessionStartMetadata": session_metadata,
+        "sources": {
+            "llmModelId": assistant.llm_model_id,
+            "asrModelId": assistant.asr_model_id,
+            "voiceId": assistant.voice,
+            "knowledgeBaseId": assistant.knowledge_base_id,
+        },
+        "warnings": warnings,
+    }
+
+
+def assistant_to_dict(assistant: Assistant) -> dict:
+    opener_audio = assistant.opener_audio
+    opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
+    return {
+        "id": assistant.id,
+        "name": assistant.name,
+        "callCount": assistant.call_count,
+        "firstTurnMode": assistant.first_turn_mode or "bot_first",
+        "opener": assistant.opener or "",
+        "manualOpenerToolCalls": _normalize_manual_opener_tool_calls(assistant.manual_opener_tool_calls),
+        "generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
+        "openerAudioEnabled": bool(opener_audio.enabled) if opener_audio else False,
+        "openerAudioReady": opener_audio_ready,
+        "openerAudioDurationMs": int(opener_audio.duration_ms) if opener_audio else 0,
+        "openerAudioUpdatedAt": opener_audio.updated_at if opener_audio else None,
+        "prompt": assistant.prompt or "",
+        "knowledgeBaseId": assistant.knowledge_base_id,
+        "language": assistant.language,
+        "voiceOutputEnabled": assistant.voice_output_enabled,
+        "voice": assistant.voice,
+        "speed": assistant.speed,
+        "hotwords": assistant.hotwords or [],
+        "tools": _normalize_assistant_tool_ids(assistant.tools),
+        "asrInterimEnabled": bool(assistant.asr_interim_enabled),
+        "botCannotBeInterrupted": bool(assistant.bot_cannot_be_interrupted),
+        "interruptionSensitivity": assistant.interruption_sensitivity,
+        "configMode": assistant.config_mode,
+        "apiUrl": assistant.api_url,
+        "apiKey": assistant.api_key,
+        "appId": assistant.app_id,
+        "llmModelId": assistant.llm_model_id,
+        "asrModelId": assistant.asr_model_id,
+        "embeddingModelId": assistant.embedding_model_id,
+        "rerankModelId": assistant.rerank_model_id,
+        "created_at": assistant.created_at,
+        "updated_at": assistant.updated_at,
+    }
+
+
+def _apply_assistant_update(assistant: Assistant, update_data: dict) -> None:
+    field_map = {
+        "knowledgeBaseId": "knowledge_base_id",
+        "firstTurnMode": "first_turn_mode",
+        "manualOpenerToolCalls": "manual_opener_tool_calls",
+        "interruptionSensitivity": "interruption_sensitivity",
+        "asrInterimEnabled": "asr_interim_enabled",
+        "botCannotBeInterrupted": "bot_cannot_be_interrupted",
+        "configMode": "config_mode",
+        "voiceOutputEnabled": "voice_output_enabled",
+        "generatedOpenerEnabled": "generated_opener_enabled",
+        "apiUrl": "api_url",
+        "apiKey": "api_key",
+        "appId": "app_id",
+        "llmModelId": "llm_model_id",
+        "asrModelId": "asr_model_id",
+        "embeddingModelId": "embedding_model_id",
+        "rerankModelId": "rerank_model_id",
+    }
+    for field, value in update_data.items():
+        setattr(assistant, field_map.get(field, field), value)
+
+
+def _ensure_assistant_opener_audio(db: Session, assistant: Assistant) -> AssistantOpenerAudio:
+    record = assistant.opener_audio
+    if record:
+        return record
+    record = AssistantOpenerAudio(assistant_id=assistant.id, enabled=False)
+    db.add(record)
+    db.flush()
+    return record
+
+
+def _resolve_tts_runtime_for_assistant(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], Optional[Voice]]:
+    metadata, _ = _resolve_runtime_metadata(db, assistant)
+    services = metadata.get("services") if isinstance(metadata.get("services"), dict) else {}
+    tts = services.get("tts") if isinstance(services, dict) and isinstance(services.get("tts"), dict) else {}
+    voice = db.query(Voice).filter(Voice.id == assistant.voice).first() if assistant.voice else None
+    return tts, voice
+
+
+def _tts_fingerprint(tts_cfg: Dict[str, Any], opener_text: str) -> str:
+    identity = {
+        "provider": tts_cfg.get("provider"),
+        "model": tts_cfg.get("model"),
+        "voice": tts_cfg.get("voice"),
+        "speed": tts_cfg.get("speed"),
+        "text": opener_text,
+    }
+    return hashlib.sha256(str(identity).encode("utf-8")).hexdigest()
+
+
+def _synthesize_openai_compatible_wav(
+    *,
+    text: str,
+    model: str,
+    voice_key: str,
+    speed: float,
+    api_key: str,
+    base_url: str,
+) -> bytes:
+    payload = {
+        "model": model or OPENAI_COMPATIBLE_DEFAULT_MODEL,
+        "input": text,
+        "voice": voice_key,
+        "response_format": "wav",
+        "speed": speed,
+    }
+    with httpx.Client(timeout=45.0) as client:
+        response = client.post(
+            f"{base_url.rstrip('/')}/audio/speech",
+            headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+            json=payload,
+        )
+    if response.status_code != 200:
+        detail = response.text
+        try:
+            detail_json = response.json()
+            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
+        except Exception:
+            pass
+        raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
+    return response.content
+
+
+def _wav_to_pcm16_mono_16k(wav_bytes: bytes) -> tuple[bytes, int]:
+    with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
+        channels = wav_file.getnchannels()
+        sample_width = wav_file.getsampwidth()
+        sample_rate = wav_file.getframerate()
+        frames = wav_file.getnframes()
+        raw = wav_file.readframes(frames)
+
+    if sample_width != 2:
+        raise HTTPException(status_code=400, detail=f"Unsupported WAV sample width: {sample_width * 8}bit")
+
+    if channels > 1:
+        raw = audioop.tomono(raw, sample_width, 0.5, 0.5)
+
+    if sample_rate != 16000:
+        raw, _ = audioop.ratecv(raw, sample_width, 1, sample_rate, 16000, None)
+
+    duration_ms = int((len(raw) / (16000 * 2)) * 1000)
+    return raw, duration_ms
+
+
+def _persist_opener_audio_pcm(assistant_id: str, pcm_bytes: bytes) -> str:
+    OPENER_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
+    file_path = OPENER_AUDIO_DIR / f"{assistant_id}.pcm"
+    with open(file_path, "wb") as f:
+        f.write(pcm_bytes)
+    return str(file_path)
+
+
+def _opener_audio_out(record: Optional[AssistantOpenerAudio]) -> AssistantOpenerAudioOut:
+    if not record:
+        return AssistantOpenerAudioOut()
+    ready = bool(record.file_path and Path(record.file_path).exists())
+    return AssistantOpenerAudioOut(
+        enabled=bool(record.enabled),
+        ready=ready,
+        encoding=record.encoding,
+        sample_rate_hz=record.sample_rate_hz,
+        channels=record.channels,
+        duration_ms=record.duration_ms,
+        updated_at=record.updated_at,
+        text_hash=record.text_hash,
+        tts_fingerprint=record.tts_fingerprint,
+    )


 # ============ Assistants ============
-@router.get("/assistants")
+@router.get("")
 def list_assistants(
    page: int = 1,
    limit: int = 50,
    db: Session = Depends(get_db)
 ):
    """获取助手列表"""
+    _ensure_assistant_schema(db)
    query = db.query(Assistant)
    total = query.count()
    assistants = query.order_by(Assistant.created_at.desc()) \
        .offset((page-1)*limit).limit(limit).all()
-    return {"total": total, "page": page, "limit": limit, "list": assistants}
+    return {
+        "total": total,
+        "page": page,
+        "limit": limit,
+        "list": [assistant_to_dict(a) for a in assistants]
+    }


-@router.get("/assistants/{id}", response_model=AssistantOut)
+@router.get("/{id}", response_model=AssistantOut)
 def get_assistant(id: str, db: Session = Depends(get_db)):
    """获取单个助手详情"""
+    _ensure_assistant_schema(db)
    assistant = db.query(Assistant).filter(Assistant.id == id).first()
    if not assistant:
        raise HTTPException(status_code=404, detail="Assistant not found")
-    return assistant
+    return assistant_to_dict(assistant)


-@router.post("/assistants", response_model=AssistantOut)
+@router.get("/{id}/config", response_model=AssistantEngineConfigResponse)
+def get_assistant_config(id: str, db: Session = Depends(get_db)):
+    """Canonical engine config endpoint consumed by engine backend adapter."""
+    _ensure_assistant_schema(db)
+    assistant = db.query(Assistant).filter(Assistant.id == id).first()
+    if not assistant:
+        raise HTTPException(status_code=404, detail="Assistant not found")
+    return _build_engine_assistant_config(db, assistant)
+
+
+@router.get("/{id}/runtime-config", response_model=AssistantEngineConfigResponse)
+def get_assistant_runtime_config(id: str, db: Session = Depends(get_db)):
+    """Legacy alias for resolved engine runtime config."""
+    _ensure_assistant_schema(db)
+    assistant = db.query(Assistant).filter(Assistant.id == id).first()
+    if not assistant:
+        raise HTTPException(status_code=404, detail="Assistant not found")
+    return _build_engine_assistant_config(db, assistant)
+
+
+@router.post("", response_model=AssistantOut)
 def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)):
    """创建新助手"""
+    _ensure_assistant_schema(db)
    assistant = Assistant(
        id=str(uuid.uuid4())[:8],
        user_id=1,  # 默认用户，后续添加认证
        name=data.name,
+        first_turn_mode=data.firstTurnMode,
        opener=data.opener,
+        manual_opener_tool_calls=_normalize_manual_opener_tool_calls(data.manualOpenerToolCalls),
+        generated_opener_enabled=data.generatedOpenerEnabled,
        prompt=data.prompt,
        knowledge_base_id=data.knowledgeBaseId,
        language=data.language,
+        voice_output_enabled=data.voiceOutputEnabled,
        voice=data.voice,
        speed=data.speed,
        hotwords=data.hotwords,
-        tools=data.tools,
+        tools=_normalize_assistant_tool_ids(data.tools),
+        asr_interim_enabled=data.asrInterimEnabled,
+        bot_cannot_be_interrupted=data.botCannotBeInterrupted,
        interruption_sensitivity=data.interruptionSensitivity,
        config_mode=data.configMode,
        api_url=data.apiUrl,
        api_key=data.apiKey,
+        app_id=data.appId,
+        llm_model_id=data.llmModelId,
+        asr_model_id=data.asrModelId,
+        embedding_model_id=data.embeddingModelId,
+        rerank_model_id=data.rerankModelId,
    )
    db.add(assistant)
    db.commit()
    db.refresh(assistant)
-    return assistant
+    opener_audio = _ensure_assistant_opener_audio(db, assistant)
+    opener_audio.enabled = bool(data.openerAudioEnabled)
+    opener_audio.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(assistant)
+    return assistant_to_dict(assistant)


-@router.put("/assistants/{id}")
-def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
-    """更新助手"""
+@router.get("/{id}/opener-audio", response_model=AssistantOpenerAudioOut)
+def get_assistant_opener_audio(id: str, db: Session = Depends(get_db)):
+    _ensure_assistant_schema(db)
    assistant = db.query(Assistant).filter(Assistant.id == id).first()
    if not assistant:
        raise HTTPException(status_code=404, detail="Assistant not found")
-    
+    return _opener_audio_out(assistant.opener_audio)
+
+
+@router.get("/{id}/opener-audio/pcm")
+def get_assistant_opener_audio_pcm(id: str, db: Session = Depends(get_db)):
+    _ensure_assistant_schema(db)
+    assistant = db.query(Assistant).filter(Assistant.id == id).first()
+    if not assistant:
+        raise HTTPException(status_code=404, detail="Assistant not found")
+    record = assistant.opener_audio
+    if not record or not record.file_path:
+        raise HTTPException(status_code=404, detail="Opener audio not generated")
+    file_path = Path(record.file_path)
+    if not file_path.exists():
+        raise HTTPException(status_code=404, detail="Opener audio file missing")
+    return FileResponse(
+        str(file_path),
+        media_type="application/octet-stream",
+        filename=f"{assistant.id}.pcm",
+    )
+
+
+@router.post("/{id}/opener-audio/generate", response_model=AssistantOpenerAudioOut)
+def generate_assistant_opener_audio(
+    id: str,
+    data: AssistantOpenerAudioGenerateRequest,
+    db: Session = Depends(get_db),
+):
+    _ensure_assistant_schema(db)
+    assistant = db.query(Assistant).filter(Assistant.id == id).first()
+    if not assistant:
+        raise HTTPException(status_code=404, detail="Assistant not found")
+    if not assistant.voice_output_enabled:
+        raise HTTPException(status_code=400, detail="Voice output is disabled")
+
+    opener_text = (data.text if data.text is not None else assistant.opener or "").strip()
+    if not opener_text:
+        raise HTTPException(status_code=400, detail="Opener text is empty")
+
+    tts_cfg, voice = _resolve_tts_runtime_for_assistant(db, assistant)
+    provider = str(tts_cfg.get("provider") or "").strip().lower()
+    if provider not in {"openai_compatible", "dashscope"}:
+        raise HTTPException(status_code=400, detail=f"Unsupported provider for preloaded opener audio: {provider or 'unknown'}")
+
+    speed = float(tts_cfg.get("speed") or assistant.speed or 1.0)
+    voice_key = str(tts_cfg.get("voice") or "").strip()
+    model = str(tts_cfg.get("model") or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
+    api_key = str(tts_cfg.get("apiKey") or "").strip()
+    base_url = str(tts_cfg.get("baseUrl") or "").strip()
+
+    if provider == "openai_compatible":
+        if not api_key:
+            if voice and voice.api_key:
+                api_key = voice.api_key.strip()
+            if not api_key:
+                api_key = (os.getenv("SILICONFLOW_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
+        if not api_key:
+            raise HTTPException(status_code=400, detail="TTS API key is missing")
+        if not base_url:
+            base_url = OPENAI_COMPATIBLE_DEFAULT_BASE_URL
+        wav_bytes = _synthesize_openai_compatible_wav(
+            text=opener_text,
+            model=model,
+            voice_key=voice_key,
+            speed=speed,
+            api_key=api_key,
+            base_url=base_url,
+        )
+    else:
+        from .voices import _synthesize_dashscope_preview, DASHSCOPE_DEFAULT_BASE_URL, DASHSCOPE_DEFAULT_MODEL, DASHSCOPE_DEFAULT_VOICE_KEY
+        if not api_key:
+            if voice and voice.api_key:
+                api_key = voice.api_key.strip()
+            if not api_key:
+                api_key = (os.getenv("DASHSCOPE_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
+        if not api_key:
+            raise HTTPException(status_code=400, detail="DashScope API key is missing")
+        if not base_url:
+            base_url = DASHSCOPE_DEFAULT_BASE_URL
+        if not model:
+            model = DASHSCOPE_DEFAULT_MODEL
+        if not voice_key:
+            voice_key = DASHSCOPE_DEFAULT_VOICE_KEY
+        try:
+            wav_bytes = _synthesize_dashscope_preview(
+                text=opener_text,
+                api_key=api_key,
+                base_url=base_url,
+                model=model,
+                voice_key=voice_key,
+                speed=speed,
+            )
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"DashScope opener audio generation failed: {exc}") from exc
+
+    pcm_bytes, duration_ms = _wav_to_pcm16_mono_16k(wav_bytes)
+    record = _ensure_assistant_opener_audio(db, assistant)
+    record.enabled = True
+    record.file_path = _persist_opener_audio_pcm(assistant.id, pcm_bytes)
+    record.encoding = "pcm_s16le"
+    record.sample_rate_hz = 16000
+    record.channels = 1
+    record.duration_ms = duration_ms
+    record.text_hash = hashlib.sha256(opener_text.encode("utf-8")).hexdigest()
+    record.tts_fingerprint = _tts_fingerprint(tts_cfg, opener_text)
+    now = datetime.utcnow()
+    if not record.created_at:
+        record.created_at = now
+    record.updated_at = now
+    assistant.updated_at = now
+    db.commit()
+    db.refresh(assistant)
+    return _opener_audio_out(assistant.opener_audio)
+
+
+@router.put("/{id}")
+def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
+    """更新助手"""
+    _ensure_assistant_schema(db)
+    assistant = db.query(Assistant).filter(Assistant.id == id).first()
+    if not assistant:
+        raise HTTPException(status_code=404, detail="Assistant not found")
+
    update_data = data.model_dump(exclude_unset=True)
-    for field, value in update_data.items():
-        setattr(assistant, field, value)
-    
+    opener_audio_enabled = update_data.pop("openerAudioEnabled", None)
+    if "manualOpenerToolCalls" in update_data:
+        update_data["manualOpenerToolCalls"] = _normalize_manual_opener_tool_calls(update_data.get("manualOpenerToolCalls"))
+    if "tools" in update_data:
+        update_data["tools"] = _normalize_assistant_tool_ids(update_data.get("tools"))
+    _apply_assistant_update(assistant, update_data)
+    if opener_audio_enabled is not None:
+        record = _ensure_assistant_opener_audio(db, assistant)
+        record.enabled = bool(opener_audio_enabled)
+        record.updated_at = datetime.utcnow()
+
    assistant.updated_at = datetime.utcnow()
    db.commit()
    db.refresh(assistant)
-    return assistant
+    return assistant_to_dict(assistant)


-@router.delete("/assistants/{id}")
+@router.delete("/{id}")
 def delete_assistant(id: str, db: Session = Depends(get_db)):
    """删除助手"""
+    _ensure_assistant_schema(db)
    assistant = db.query(Assistant).filter(Assistant.id == id).first()
    if not assistant:
        raise HTTPException(status_code=404, detail="Assistant not found")
    db.delete(assistant)
    db.commit()
    return {"message": "Deleted successfully"}
-
-
-# ============ Workflows ============
-@router.get("/workflows", response_model=List[WorkflowOut])
-def list_workflows(db: Session = Depends(get_db)):
-    """获取工作流列表"""
-    workflows = db.query(Workflow).all()
-    return workflows
-
-
-@router.post("/workflows", response_model=WorkflowOut)
-def create_workflow(data: WorkflowCreate, db: Session = Depends(get_db)):
-    """创建工作流"""
-    workflow = Workflow(
-        id=str(uuid.uuid4())[:8],
-        user_id=1,
-        name=data.name,
-        node_count=data.nodeCount,
-        created_at=data.createdAt or datetime.utcnow().isoformat(),
-        updated_at=data.updatedAt or "",
-        global_prompt=data.globalPrompt,
-        nodes=data.nodes,
-        edges=data.edges,
-    )
-    db.add(workflow)
-    db.commit()
-    db.refresh(workflow)
-    return workflow
-
-
-@router.put("/workflows/{id}", response_model=WorkflowOut)
-def update_workflow(id: str, data: WorkflowUpdate, db: Session = Depends(get_db)):
-    """更新工作流"""
-    workflow = db.query(Workflow).filter(Workflow.id == id).first()
-    if not workflow:
-        raise HTTPException(status_code=404, detail="Workflow not found")
-    
-    update_data = data.model_dump(exclude_unset=True)
-    for field, value in update_data.items():
-        setattr(workflow, field, value)
-    
-    workflow.updated_at = datetime.utcnow().isoformat()
-    db.commit()
-    db.refresh(workflow)
-    return workflow
-
-
-@router.delete("/workflows/{id}")
-def delete_workflow(id: str, db: Session = Depends(get_db)):
-    """删除工作流"""
-    workflow = db.query(Workflow).filter(Workflow.id == id).first()
-    if not workflow:
-        raise HTTPException(status_code=404, detail="Workflow not found")
-    db.delete(workflow)
-    db.commit()
-    return {"message": "Deleted successfully"}
--- a/api/app/routers/history.py
+++ b/api/app/routers/history.py
@@ -7,14 +7,32 @@ from datetime import datetime
 from ..db import get_db
 from ..models import CallRecord, CallTranscript, CallAudioSegment
 from ..storage import get_audio_url
+from ..schemas import CallRecordCreate, CallRecordUpdate, TranscriptCreate

 router = APIRouter(prefix="/history", tags=["history"])


+def record_to_dict(record: CallRecord) -> dict:
+    return {
+        "id": record.id,
+        "user_id": record.user_id,
+        "assistant_id": record.assistant_id,
+        "source": record.source,
+        "status": record.status,
+        "started_at": record.started_at,
+        "ended_at": record.ended_at,
+        "duration_seconds": record.duration_seconds,
+        "summary": record.summary,
+        "cost": record.cost,
+        "created_at": record.created_at,
+    }
+
+
@router.get("")
 def list_history(
    assistant_id: Optional[str] = None,
    status: Optional[str] = None,
+    source: Optional[str] = None,
    page: int = 1,
    limit: int = 20,
    db: Session = Depends(get_db)
@@ -26,12 +44,19 @@ def list_history(
        query = query.filter(CallRecord.assistant_id == assistant_id)
    if status:
        query = query.filter(CallRecord.status == status)
+    if source:
+        query = query.filter(CallRecord.source == source)
    
    total = query.count()
    records = query.order_by(CallRecord.started_at.desc()) \
        .offset((page-1)*limit).limit(limit).all()
    
-    return {"total": total, "page": page, "limit": limit, "list": records}
+    return {
+        "total": total,
+        "page": page,
+        "limit": limit,
+        "list": [record_to_dict(r) for r in records]
+    }


@router.get("/{call_id}")
@@ -46,10 +71,12 @@ def get_history_detail(call_id: str, db: Session = Depends(get_db)):
        .filter(CallTranscript.call_id == call_id) \
        .order_by(CallTranscript.turn_index).all()
    
-    # 补充音频 URL
+    audio_segments = db.query(CallAudioSegment).filter(CallAudioSegment.call_id == call_id).all()
+    audio_by_turn = {seg.turn_index: seg.audio_url for seg in audio_segments if seg.turn_index is not None}
+
    transcript_list = []
    for t in transcripts:
-        audio_url = t.audio_url or get_audio_url(call_id, t.turn_index)
+        audio_url = audio_by_turn.get(t.turn_index) or get_audio_url(call_id, t.turn_index)
        transcript_list.append({
            "turnIndex": t.turn_index,
            "speaker": t.speaker,
@@ -77,32 +104,29 @@ def get_history_detail(call_id: str, db: Session = Depends(get_db)):

@router.post("")
 def create_call_record(
-    user_id: int,
-    assistant_id: Optional[str] = None,
-    source: str = "debug",
+    data: CallRecordCreate,
    db: Session = Depends(get_db)
 ):
    """创建通话记录（引擎回调使用）"""
    record = CallRecord(
        id=str(uuid.uuid4())[:8],
-        user_id=user_id,
-        assistant_id=assistant_id,
-        source=source,
-        status="connected",
+        user_id=data.user_id,
+        assistant_id=data.assistant_id,
+        source=data.source,
+        status=data.status or "connected",
        started_at=datetime.utcnow().isoformat(),
+        cost=data.cost or 0.0,
    )
    db.add(record)
    db.commit()
    db.refresh(record)
-    return record
+    return record_to_dict(record)


@router.put("/{call_id}")
 def update_call_record(
    call_id: str,
-    status: Optional[str] = None,
-    summary: Optional[str] = None,
-    duration_seconds: Optional[int] = None,
+    data: CallRecordUpdate,
    db: Session = Depends(get_db)
 ):
    """更新通话记录"""
@@ -110,59 +134,64 @@ def update_call_record(
    if not record:
        raise HTTPException(status_code=404, detail="Call record not found")
    
-    if status:
-        record.status = status
-    if summary:
-        record.summary = summary
-    if duration_seconds:
-        record.duration_seconds = duration_seconds
+    if data.status is not None:
+        record.status = data.status
+    if data.summary is not None:
+        record.summary = data.summary
+    if data.duration_seconds is not None:
+        record.duration_seconds = data.duration_seconds
        record.ended_at = datetime.utcnow().isoformat()
+    if data.ended_at is not None:
+        record.ended_at = data.ended_at
+    if data.cost is not None:
+        record.cost = data.cost
+    if data.metadata is not None:
+        record.call_metadata = data.metadata
    
    db.commit()
-    return {"message": "Updated successfully"}
+    db.refresh(record)
+    return record_to_dict(record)


@router.post("/{call_id}/transcripts")
 def add_transcript(
    call_id: str,
-    turn_index: int,
-    speaker: str,
-    content: str,
-    start_ms: int,
-    end_ms: int,
-    confidence: Optional[float] = None,
-    duration_ms: Optional[int] = None,
-    emotion: Optional[str] = None,
+    data: TranscriptCreate,
    db: Session = Depends(get_db)
 ):
    """添加转写片段"""
+    record = db.query(CallRecord).filter(CallRecord.id == call_id).first()
+    if not record:
+        raise HTTPException(status_code=404, detail="Call record not found")
+
    transcript = CallTranscript(
        call_id=call_id,
-        turn_index=turn_index,
-        speaker=speaker,
-        content=content,
-        confidence=confidence,
-        start_ms=start_ms,
-        end_ms=end_ms,
-        duration_ms=duration_ms,
-        emotion=emotion,
+        turn_index=data.turn_index,
+        speaker=data.speaker,
+        content=data.content,
+        confidence=data.confidence,
+        start_ms=data.start_ms,
+        end_ms=data.end_ms,
+        duration_ms=data.duration_ms,
+        emotion=data.emotion,
    )
    db.add(transcript)
    db.commit()
    db.refresh(transcript)
    
    # 补充音频 URL
-    audio_url = get_audio_url(call_id, turn_index)
+    audio_url = get_audio_url(call_id, data.turn_index)
    
    return {
        "id": transcript.id,
-        "turn_index": turn_index,
-        "speaker": speaker,
-        "content": content,
-        "confidence": confidence,
-        "start_ms": start_ms,
-        "end_ms": end_ms,
-        "duration_ms": duration_ms,
+        "turn_index": data.turn_index,
+        "speaker": data.speaker,
+        "content": data.content,
+        "confidence": data.confidence,
+        "start_ms": data.start_ms,
+        "end_ms": data.end_ms,
+        "duration_ms": data.duration_ms,
+        "emotion": data.emotion,
        "audio_url": audio_url,
    }

--- a/api/app/routers/knowledge.py
+++ b/api/app/routers/knowledge.py
@@ -1,8 +1,10 @@
-from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request
 from sqlalchemy.orm import Session
 from typing import Optional
 import uuid
 import os
+import json
+from io import BytesIO
 from datetime import datetime

 from ..db import get_db
@@ -11,6 +13,7 @@ from ..schemas import (
    KnowledgeBaseCreate, KnowledgeBaseUpdate, KnowledgeBaseOut,
    KnowledgeSearchQuery, KnowledgeSearchResult, KnowledgeStats,
    DocumentIndexRequest,
+    KnowledgeDocumentCreate,
 )
 from ..vector_store import (
    vector_store, search_knowledge, index_document, delete_document_from_vector
@@ -19,20 +22,71 @@ from ..vector_store import (
 router = APIRouter(prefix="/knowledge", tags=["knowledge"])


+def _refresh_kb_stats(db: Session, kb_id: str) -> None:
+    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
+    if not kb:
+        return
+    docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all()
+    completed_docs = [d for d in docs if d.status == "completed"]
+    kb.doc_count = len(completed_docs)
+    kb.chunk_count = sum(max(0, d.chunk_count or 0) for d in completed_docs)
+
+
+def _decode_text_bytes(raw: bytes) -> str:
+    for encoding in ("utf-8", "utf-8-sig", "gb18030", "gbk", "latin-1"):
+        try:
+            return raw.decode(encoding)
+        except UnicodeDecodeError:
+            continue
+    return raw.decode("utf-8", errors="ignore")
+
+
+def _extract_text_from_upload(filename: str, content_type: Optional[str], raw: bytes) -> str:
+    ext = os.path.splitext((filename or "").lower())[1]
+    if ext in {".txt", ".md", ".csv"}:
+        return _decode_text_bytes(raw)
+    if ext == ".json":
+        try:
+            parsed = json.loads(_decode_text_bytes(raw))
+            return json.dumps(parsed, ensure_ascii=False, indent=2)
+        except Exception:
+            return _decode_text_bytes(raw)
+    if ext == ".pdf":
+        try:
+            from pypdf import PdfReader  # type: ignore
+        except Exception as exc:
+            raise ValueError("PDF parsing requires installing pypdf") from exc
+        reader = PdfReader(BytesIO(raw))
+        return "\n".join((page.extract_text() or "") for page in reader.pages).strip()
+    if ext == ".docx":
+        try:
+            from docx import Document  # type: ignore
+        except Exception as exc:
+            raise ValueError("DOCX parsing requires installing python-docx") from exc
+        doc = Document(BytesIO(raw))
+        return "\n".join(p.text for p in doc.paragraphs).strip()
+    if ext == ".doc":
+        raise ValueError("DOC format is not supported for auto indexing. Please convert to DOCX/TXT.")
+    # fallback: attempt plain text decode
+    if (content_type or "").startswith("text/"):
+        return _decode_text_bytes(raw)
+    raise ValueError(f"Unsupported file type for auto indexing: {ext or content_type or 'unknown'}")
+
+
 def kb_to_dict(kb: KnowledgeBase) -> dict:
    return {
        "id": kb.id,
        "user_id": kb.user_id,
        "name": kb.name,
        "description": kb.description,
-        "embedding_model": kb.embedding_model,
-        "chunk_size": kb.chunk_size,
-        "chunk_overlap": kb.chunk_overlap,
-        "doc_count": kb.doc_count,
-        "chunk_count": kb.chunk_count,
+        "embeddingModel": kb.embedding_model,
+        "chunkSize": kb.chunk_size,
+        "chunkOverlap": kb.chunk_overlap,
+        "docCount": kb.doc_count,
+        "chunkCount": kb.chunk_count,
        "status": kb.status,
-        "created_at": kb.created_at.isoformat() if kb.created_at else None,
-        "updated_at": kb.updated_at.isoformat() if kb.updated_at else None,
+        "createdAt": kb.created_at.isoformat() if kb.created_at else None,
+        "updatedAt": kb.updated_at.isoformat() if kb.updated_at else None,
    }


@@ -42,28 +96,35 @@ def doc_to_dict(d: KnowledgeDocument) -> dict:
        "kb_id": d.kb_id,
        "name": d.name,
        "size": d.size,
-        "file_type": d.file_type,
-        "storage_url": d.storage_url,
+        "fileType": d.file_type,
+        "storageUrl": d.storage_url,
        "status": d.status,
-        "chunk_count": d.chunk_count,
-        "error_message": d.error_message,
-        "upload_date": d.upload_date,
-        "created_at": d.created_at.isoformat() if d.created_at else None,
-        "processed_at": d.processed_at.isoformat() if d.processed_at else None,
+        "chunkCount": d.chunk_count,
+        "errorMessage": d.error_message,
+        "uploadDate": d.upload_date,
+        "createdAt": d.created_at.isoformat() if d.created_at else None,
+        "processedAt": d.processed_at.isoformat() if d.processed_at else None,
    }


 # ============ Knowledge Bases ============
@router.get("/bases")
-def list_knowledge_bases(user_id: int = 1, db: Session = Depends(get_db)):
-    kbs = db.query(KnowledgeBase).filter(KnowledgeBase.user_id == user_id).all()
+def list_knowledge_bases(
+    user_id: int = 1,
+    page: int = 1,
+    limit: int = 50,
+    db: Session = Depends(get_db)
+):
+    query = db.query(KnowledgeBase).filter(KnowledgeBase.user_id == user_id)
+    total = query.count()
+    kbs = query.order_by(KnowledgeBase.created_at.desc()).offset((page - 1) * limit).limit(limit).all()
    result = []
    for kb in kbs:
        docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb.id).all()
        kb_data = kb_to_dict(kb)
        kb_data["documents"] = [doc_to_dict(d) for d in docs]
        result.append(kb_data)
-    return {"total": len(result), "list": result}
+    return {"total": total, "page": page, "limit": limit, "list": result}


@router.get("/bases/{kb_id}")
@@ -79,10 +140,21 @@ def get_knowledge_base(kb_id: str, db: Session = Depends(get_db)):

@router.post("/bases")
 def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Session = Depends(get_db)):
+    name = (data.name or "").strip()
+    if not name:
+        raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty")
+
+    exists = db.query(KnowledgeBase).filter(
+        KnowledgeBase.user_id == user_id,
+        KnowledgeBase.name == name
+    ).first()
+    if exists:
+        raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {name}")
+
    kb = KnowledgeBase(
        id=str(uuid.uuid4())[:8],
        user_id=user_id,
-        name=data.name,
+        name=name,
        description=data.description,
        embedding_model=data.embeddingModel,
        chunk_size=data.chunkSize,
@@ -91,7 +163,13 @@ def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Sessi
    db.add(kb)
    db.commit()
    db.refresh(kb)
-    vector_store.create_collection(kb.id, data.embeddingModel)
+    try:
+        vector_store.create_collection(kb.id, data.embeddingModel)
+    except Exception as exc:
+        # Keep DB and vector store consistent on create failure
+        db.delete(kb)
+        db.commit()
+        raise HTTPException(status_code=502, detail=f"Failed to create ChromaDB collection: {exc}") from exc
    return kb_to_dict(kb)


@@ -101,8 +179,43 @@ def update_knowledge_base(kb_id: str, data: KnowledgeBaseUpdate, db: Session = D
    if not kb:
        raise HTTPException(status_code=404, detail="Knowledge base not found")
    update_data = data.model_dump(exclude_unset=True)
+    field_map = {
+        "embeddingModel": "embedding_model",
+        "chunkSize": "chunk_size",
+        "chunkOverlap": "chunk_overlap",
+    }
+    if "name" in update_data:
+        update_data["name"] = (update_data["name"] or "").strip()
+        if not update_data["name"]:
+            raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty")
+        name_exists = db.query(KnowledgeBase).filter(
+            KnowledgeBase.user_id == kb.user_id,
+            KnowledgeBase.name == update_data["name"],
+            KnowledgeBase.id != kb.id
+        ).first()
+        if name_exists:
+            raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {update_data['name']}")
+
+    embedding_changed = "embeddingModel" in update_data and update_data["embeddingModel"] != kb.embedding_model
+    if embedding_changed and kb.chunk_count > 0:
+        raise HTTPException(
+            status_code=400,
+            detail="Cannot change embedding model when knowledge base has indexed chunks. Remove documents first."
+        )
+
    for field, value in update_data.items():
-        setattr(kb, field, value)
+        setattr(kb, field_map.get(field, field), value)
+
+    if embedding_changed:
+        try:
+            vector_store.delete_collection(kb_id)
+        except Exception:
+            pass
+        try:
+            vector_store.create_collection(kb_id, kb.embedding_model)
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"Failed to update ChromaDB collection: {exc}") from exc
+
    kb.updated_at = datetime.utcnow()
    db.commit()
    db.refresh(kb)
@@ -114,42 +227,141 @@ def delete_knowledge_base(kb_id: str, db: Session = Depends(get_db)):
    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
    if not kb:
        raise HTTPException(status_code=404, detail="Knowledge base not found")
-    vector_store.delete_collection(kb_id)
+    vector_deleted = True
+    try:
+        vector_store.delete_collection(kb_id)
+    except Exception:
+        vector_deleted = False
    docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all()
    for doc in docs:
        db.delete(doc)
    db.delete(kb)
    db.commit()
+    if not vector_deleted:
+        return {"message": "Deleted successfully", "warning": "Knowledge base deleted but failed to remove ChromaDB collection"}
    return {"message": "Deleted successfully"}


 # ============ Documents ============
@router.post("/bases/{kb_id}/documents")
-def upload_document(
+async def upload_document(
    kb_id: str,
-    name: str = Query(...),
-    size: str = Query(...),
-    file_type: str = Query("txt"),
-    storage_url: Optional[str] = Query(None),
+    file: Optional[UploadFile] = File(default=None),
+    name: Optional[str] = Form(default=None),
+    size: Optional[str] = Form(default=None),
+    file_type: Optional[str] = Form(default=None),
+    storage_url: Optional[str] = Form(default=None),
+    data: Optional[KnowledgeDocumentCreate] = None,
+    request: Request = None,
    db: Session = Depends(get_db)
 ):
    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
    if not kb:
        raise HTTPException(status_code=404, detail="Knowledge base not found")
+
+    # New mode: multipart file upload with automatic indexing
+    if file is not None:
+        filename = file.filename or "uploaded.txt"
+        file_type_value = file.content_type or file_type or "application/octet-stream"
+        raw = file.file.read()
+        if not raw:
+            raise HTTPException(status_code=400, detail="Uploaded file is empty")
+
+        doc = KnowledgeDocument(
+            id=str(uuid.uuid4())[:8],
+            kb_id=kb_id,
+            name=filename,
+            size=f"{len(raw)} bytes",
+            file_type=file_type_value,
+            storage_url=storage_url,
+            status="processing",
+            upload_date=datetime.utcnow().isoformat()
+        )
+        db.add(doc)
+        db.commit()
+        db.refresh(doc)
+
+        try:
+            if vector_store.get_collection(kb_id) is None:
+                vector_store.create_collection(kb_id, kb.embedding_model)
+
+            text = _extract_text_from_upload(filename, file.content_type, raw)
+            if not text.strip():
+                raise ValueError("No textual content extracted from file")
+
+            chunk_count = index_document(kb_id, doc.id, text)
+            doc.status = "completed"
+            doc.chunk_count = chunk_count
+            doc.processed_at = datetime.utcnow()
+            doc.error_message = None
+            _refresh_kb_stats(db, kb_id)
+            db.commit()
+            return {
+                "id": doc.id,
+                "name": doc.name,
+                "size": doc.size,
+                "fileType": doc.file_type,
+                "storageUrl": doc.storage_url,
+                "status": doc.status,
+                "chunkCount": doc.chunk_count,
+                "message": "Document uploaded and indexed",
+            }
+        except ValueError as exc:
+            doc.status = "failed"
+            doc.error_message = str(exc)
+            _refresh_kb_stats(db, kb_id)
+            db.commit()
+            raise HTTPException(status_code=400, detail=str(exc)) from exc
+        except Exception as exc:
+            doc.status = "failed"
+            doc.error_message = str(exc)
+            _refresh_kb_stats(db, kb_id)
+            db.commit()
+            raise HTTPException(status_code=500, detail=f"Failed to index uploaded file: {exc}") from exc
+
+    # Backward-compatible mode: metadata-only document creation
+    if data is None:
+        if not name and not size and request is not None:
+            try:
+                raw_payload = await request.json()
+                if isinstance(raw_payload, dict):
+                    name = raw_payload.get("name")
+                    size = raw_payload.get("size")
+                    file_type = raw_payload.get("fileType") or raw_payload.get("file_type") or file_type
+                    storage_url = raw_payload.get("storageUrl") or raw_payload.get("storage_url") or storage_url
+            except Exception:
+                pass
+        if not name or not size:
+            raise HTTPException(status_code=422, detail="name and size are required")
+        data = KnowledgeDocumentCreate(
+            name=name,
+            size=size,
+            fileType=file_type or "txt",
+            storageUrl=storage_url,
+        )
+
    doc = KnowledgeDocument(
        id=str(uuid.uuid4())[:8],
        kb_id=kb_id,
-        name=name,
-        size=size,
-        file_type=file_type,
-        storage_url=storage_url,
+        name=data.name,
+        size=data.size,
+        file_type=data.fileType,
+        storage_url=data.storageUrl,
        status="pending",
        upload_date=datetime.utcnow().isoformat()
    )
    db.add(doc)
    db.commit()
    db.refresh(doc)
-    return {"id": doc.id, "name": doc.name, "status": doc.status, "message": "Document created"}
+    return {
+        "id": doc.id,
+        "name": doc.name,
+        "size": doc.size,
+        "fileType": doc.file_type,
+        "storageUrl": doc.storage_url,
+        "status": doc.status,
+        "message": "Document created",
+    }


@router.post("/bases/{kb_id}/documents/{doc_id}/index")
@@ -180,21 +392,21 @@ def index_document_content(kb_id: str, doc_id: str, request: DocumentIndexReques
        db.commit()
    
    try:
+        if vector_store.get_collection(kb_id) is None:
+            kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
+            vector_store.create_collection(kb_id, kb.embedding_model if kb else "text-embedding-3-small")
        chunk_count = index_document(kb_id, doc_id, request.content)
        doc.status = "completed"
        doc.chunk_count = chunk_count
        doc.processed_at = datetime.utcnow()
-        kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
-        kb.doc_count = db.query(KnowledgeDocument).filter(
-            KnowledgeDocument.kb_id == kb_id,
-            KnowledgeDocument.status == "completed"
-        ).count()
-        kb.chunk_count += chunk_count
+        doc.error_message = None
+        _refresh_kb_stats(db, kb_id)
        db.commit()
        return {"message": "Document indexed", "chunkCount": chunk_count}
    except Exception as e:
        doc.status = "failed"
        doc.error_message = str(e)
+        _refresh_kb_stats(db, kb_id)
        db.commit()
        raise HTTPException(status_code=500, detail=str(e))

@@ -211,10 +423,8 @@ def delete_document(kb_id: str, doc_id: str, db: Session = Depends(get_db)):
        delete_document_from_vector(kb_id, doc_id)
    except Exception:
        pass
-    kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
-    kb.chunk_count -= doc.chunk_count
-    kb.doc_count -= 1
    db.delete(doc)
+    _refresh_kb_stats(db, kb_id)
    db.commit()
    return {"message": "Deleted successfully"}

@@ -222,7 +432,10 @@ def delete_document(kb_id: str, doc_id: str, db: Session = Depends(get_db)):
 # ============ Search ============
@router.post("/search")
 def search_knowledge_base(query: KnowledgeSearchQuery):
-    return search_knowledge(kb_id=query.kb_id, query=query.query, n_results=query.nResults)
+    try:
+        return search_knowledge(kb_id=query.kb_id, query=query.query, n_results=query.nResults)
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc


 # ============ Stats ============
--- a/api/app/routers/llm.py
+++ b/api/app/routers/llm.py
@@ -0,0 +1,296 @@
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+from typing import List, Optional
+import httpx
+import time
+from datetime import datetime
+
+from ..db import get_db
+from ..id_generator import unique_short_id
+from ..models import LLMModel
+from ..schemas import (
+    LLMModelCreate, LLMModelUpdate, LLMModelOut,
+    LLMModelTestResponse, LLMPreviewRequest, LLMPreviewResponse
+)
+
+router = APIRouter(prefix="/llm", tags=["LLM Models"])
+
+
+# ============ LLM Models CRUD ============
+@router.get("")
+def list_llm_models(
+    model_type: Optional[str] = None,
+    enabled: Optional[bool] = None,
+    page: int = 1,
+    limit: int = 50,
+    db: Session = Depends(get_db)
+):
+    """获取LLM模型列表"""
+    query = db.query(LLMModel)
+
+    if model_type:
+        query = query.filter(LLMModel.type == model_type)
+    if enabled is not None:
+        query = query.filter(LLMModel.enabled == enabled)
+
+    total = query.count()
+    models = query.order_by(LLMModel.created_at.desc()) \
+        .offset((page-1)*limit).limit(limit).all()
+
+    return {"total": total, "page": page, "limit": limit, "list": models}
+
+
+@router.get("/{id}", response_model=LLMModelOut)
+def get_llm_model(id: str, db: Session = Depends(get_db)):
+    """获取单个LLM模型详情"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+    return model
+
+
+@router.post("", response_model=LLMModelOut)
+def create_llm_model(data: LLMModelCreate, db: Session = Depends(get_db)):
+    """创建LLM模型"""
+    llm_model = LLMModel(
+        id=unique_short_id("llm", db, LLMModel),
+        user_id=1,  # 默认用户
+        name=data.name,
+        vendor=data.vendor,
+        type=data.type.value if hasattr(data.type, 'value') else data.type,
+        base_url=data.base_url,
+        api_key=data.api_key,
+        model_name=data.model_name,
+        temperature=data.temperature,
+        context_length=data.context_length,
+        enabled=data.enabled,
+    )
+    db.add(llm_model)
+    db.commit()
+    db.refresh(llm_model)
+    return llm_model
+
+
+@router.put("/{id}", response_model=LLMModelOut)
+def update_llm_model(id: str, data: LLMModelUpdate, db: Session = Depends(get_db)):
+    """更新LLM模型"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+
+    update_data = data.model_dump(exclude_unset=True)
+    if "type" in update_data and update_data["type"] is not None and hasattr(update_data["type"], "value"):
+        update_data["type"] = update_data["type"].value
+    for field, value in update_data.items():
+        setattr(model, field, value)
+
+    model.updated_at = datetime.utcnow()
+    db.commit()
+    db.refresh(model)
+    return model
+
+
+@router.delete("/{id}")
+def delete_llm_model(id: str, db: Session = Depends(get_db)):
+    """删除LLM模型"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+    db.delete(model)
+    db.commit()
+    return {"message": "Deleted successfully"}
+
+
+@router.post("/{id}/test", response_model=LLMModelTestResponse)
+def test_llm_model(id: str, db: Session = Depends(get_db)):
+    """测试LLM模型连接"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+
+    start_time = time.time()
+    try:
+        # 构造测试请求
+        test_messages = [{"role": "user", "content": "Hello, please reply with 'OK'."}]
+
+        payload = {
+            "model": model.model_name or "gpt-3.5-turbo",
+            "messages": test_messages,
+            "max_tokens": 10,
+            "temperature": 0.1,
+        }
+
+        headers = {"Authorization": f"Bearer {model.api_key}"}
+
+        with httpx.Client(timeout=30.0) as client:
+            response = client.post(
+                f"{model.base_url}/chat/completions",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+        latency_ms = int((time.time() - start_time) * 1000)
+        result = response.json()
+
+        if result.get("choices"):
+            return LLMModelTestResponse(
+                success=True,
+                latency_ms=latency_ms,
+                message="Connection successful"
+            )
+        else:
+            return LLMModelTestResponse(
+                success=False,
+                latency_ms=latency_ms,
+                message="Unexpected response format"
+            )
+
+    except httpx.HTTPStatusError as e:
+        return LLMModelTestResponse(
+            success=False,
+            message=f"HTTP Error: {e.response.status_code} - {e.response.text[:200]}"
+        )
+    except Exception as e:
+        return LLMModelTestResponse(
+            success=False,
+            message=str(e)[:200]
+        )
+
+
+@router.post("/{id}/chat")
+def chat_with_llm(
+    id: str,
+    message: str,
+    system_prompt: Optional[str] = None,
+    max_tokens: Optional[int] = None,
+    temperature: Optional[float] = None,
+    db: Session = Depends(get_db)
+):
+    """与LLM模型对话"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": message})
+
+    payload = {
+        "model": model.model_name or "gpt-3.5-turbo",
+        "messages": messages,
+        "max_tokens": max_tokens or 1000,
+        "temperature": temperature if temperature is not None else model.temperature or 0.7,
+    }
+
+    headers = {"Authorization": f"Bearer {model.api_key}"}
+
+    try:
+        with httpx.Client(timeout=60.0) as client:
+            response = client.post(
+                f"{model.base_url}/chat/completions",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+        result = response.json()
+        if choice := result.get("choices", [{}])[0]:
+            return {
+                "success": True,
+                "reply": choice.get("message", {}).get("content", ""),
+                "usage": result.get("usage", {})
+            }
+        return {"success": False, "reply": "", "error": "No response"}
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/{id}/preview", response_model=LLMPreviewResponse)
+def preview_llm_model(
+    id: str,
+    request: LLMPreviewRequest,
+    db: Session = Depends(get_db)
+):
+    """预览模型输出，支持 text(chat) 与 embedding 两类模型。"""
+    model = db.query(LLMModel).filter(LLMModel.id == id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+
+    user_message = (request.message or "").strip()
+    if not user_message:
+        raise HTTPException(status_code=400, detail="Preview message cannot be empty")
+
+    model_id = model.model_name or "gpt-3.5-turbo"
+    headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
+
+    start_time = time.time()
+    endpoint = "/chat/completions"
+    payload = {}
+
+    if model.type == "embedding":
+        endpoint = "/embeddings"
+        payload = {
+            "model": model_id,
+            "input": user_message,
+        }
+    else:
+        messages = []
+        if request.system_prompt and request.system_prompt.strip():
+            messages.append({"role": "system", "content": request.system_prompt.strip()})
+        messages.append({"role": "user", "content": user_message})
+        payload = {
+            "model": model_id,
+            "messages": messages,
+            "max_tokens": request.max_tokens or 512,
+            "temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
+        }
+
+    try:
+        with httpx.Client(timeout=60.0) as client:
+            response = client.post(
+                f"{model.base_url.rstrip('/')}{endpoint}",
+                json=payload,
+                headers=headers
+            )
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"LLM request failed: {exc}") from exc
+
+    if response.status_code != 200:
+        detail = response.text
+        try:
+            detail_json = response.json()
+            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
+        except Exception:
+            pass
+        raise HTTPException(status_code=502, detail=f"LLM vendor error: {detail}")
+
+    result = response.json()
+    reply = ""
+    if model.type == "embedding":
+        data_list = result.get("data", [])
+        embedding = []
+        if data_list and isinstance(data_list, list):
+            embedding = data_list[0].get("embedding", []) or []
+        dims = len(embedding) if isinstance(embedding, list) else 0
+        preview_values = []
+        if isinstance(embedding, list):
+            preview_values = embedding[:8]
+        values_text = ", ".join(
+            [f"{float(v):.6f}" if isinstance(v, (float, int)) else str(v) for v in preview_values]
+        )
+        reply = f"Embedding generated successfully. dims={dims}. head=[{values_text}]"
+    else:
+        choices = result.get("choices", [])
+        if choices:
+            reply = choices[0].get("message", {}).get("content", "") or ""
+
+    return LLMPreviewResponse(
+        success=bool(reply),
+        reply=reply,
+        usage=result.get("usage"),
+        latency_ms=int((time.time() - start_time) * 1000),
+        error=None if reply else "No response content",
+    )
--- a/api/app/routers/tools.py
+++ b/api/app/routers/tools.py
@@ -0,0 +1,808 @@
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+from sqlalchemy import inspect, text
+from typing import Optional, Dict, Any, List
+import time
+import uuid
+import httpx
+from datetime import datetime
+
+from ..db import get_db
+from ..models import LLMModel, ASRModel, ToolResource
+from ..schemas import ToolResourceCreate, ToolResourceOut, ToolResourceUpdate
+
+router = APIRouter(prefix="/tools", tags=["Tools & Autotest"])
+
+
+TOOL_ID_ALIASES: Dict[str, str] = {
+    # legacy -> canonical
+    "voice_message_prompt": "voice_msg_prompt",
+}
+
+
+def normalize_tool_id(tool_id: Optional[str]) -> str:
+    raw = str(tool_id or "").strip()
+    if not raw:
+        return ""
+    return TOOL_ID_ALIASES.get(raw, raw)
+
+
+# ============ Available Tools ============
+TOOL_REGISTRY = {
+    "calculator": {
+        "name": "计算器",
+        "description": "执行数学计算",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "expression": {"type": "string", "description": "数学表达式，如: 2 + 3 * 4"}
+            },
+            "required": ["expression"]
+        }
+    },
+    "code_interpreter": {
+        "name": "代码执行",
+        "description": "安全地执行Python代码",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "code": {"type": "string", "description": "要执行的Python代码"}
+            },
+            "required": ["code"]
+        }
+    },
+    "current_time": {
+        "name": "当前时间",
+        "description": "获取当前本地时间",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": []
+        }
+    },
+    "turn_on_camera": {
+        "name": "打开摄像头",
+        "description": "执行打开摄像头命令",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": []
+        }
+    },
+    "turn_off_camera": {
+        "name": "关闭摄像头",
+        "description": "执行关闭摄像头命令",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": []
+        }
+    },
+    "increase_volume": {
+        "name": "调高音量",
+        "description": "提升设备音量",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "step": {"type": "integer", "description": "调整步进，默认1"}
+            },
+            "required": []
+        }
+    },
+    "decrease_volume": {
+        "name": "调低音量",
+        "description": "降低设备音量",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "step": {"type": "integer", "description": "调整步进，默认1"}
+            },
+            "required": []
+        }
+    },
+    "voice_msg_prompt": {
+        "name": "语音消息提示",
+        "description": "播报一条语音提示消息",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "msg": {"type": "string", "description": "要播报的消息文本"}
+            },
+            "required": ["msg"]
+        }
+    },
+    "text_msg_prompt": {
+        "name": "文本消息提示",
+        "description": "显示一条文本弹窗提示",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "msg": {"type": "string", "description": "提示文本内容"}
+            },
+            "required": ["msg"]
+        }
+    },
+    "voice_choice_prompt": {
+        "name": "语音选项提示",
+        "description": "播报问题并展示可选项，等待用户选择后回传结果",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "question": {"type": "string", "description": "向用户展示的问题文本"},
+                "options": {
+                    "type": "array",
+                    "description": "可选项（字符串或含 id/label/value 的对象）",
+                    "minItems": 2,
+                    "items": {
+                        "anyOf": [
+                            {"type": "string"},
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "id": {"type": "string"},
+                                    "label": {"type": "string"},
+                                    "value": {"type": "string"}
+                                },
+                                "required": ["label"]
+                            }
+                        ]
+                    }
+                },
+                "voice_text": {"type": "string", "description": "可选，单独指定播报文本；为空则播报 question"}
+            },
+            "required": ["question", "options"]
+        }
+    },
+    "text_choice_prompt": {
+        "name": "文本选项提示",
+        "description": "显示文本选项弹窗并等待用户选择后回传结果",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "question": {"type": "string", "description": "向用户展示的问题文本"},
+                "options": {
+                    "type": "array",
+                    "description": "可选项（字符串或含 id/label/value 的对象）",
+                    "minItems": 2,
+                    "items": {
+                        "anyOf": [
+                            {"type": "string"},
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "id": {"type": "string"},
+                                    "label": {"type": "string"},
+                                    "value": {"type": "string"}
+                                },
+                                "required": ["label"]
+                            }
+                        ]
+                    }
+                }
+            },
+            "required": ["question", "options"]
+        }
+    },
+}
+
+TOOL_CATEGORY_MAP = {
+    "calculator": "query",
+    "current_time": "query",
+    "code_interpreter": "query",
+    "turn_on_camera": "system",
+    "turn_off_camera": "system",
+    "increase_volume": "system",
+    "decrease_volume": "system",
+    "voice_msg_prompt": "system",
+    "voice_message_prompt": "system",  # backward compatibility
+    "text_msg_prompt": "system",
+    "voice_choice_prompt": "system",
+    "text_choice_prompt": "system",
+}
+
+TOOL_ICON_MAP = {
+    "calculator": "Terminal",
+    "current_time": "Calendar",
+    "code_interpreter": "Terminal",
+    "turn_on_camera": "Camera",
+    "turn_off_camera": "CameraOff",
+    "increase_volume": "Volume2",
+    "decrease_volume": "Volume2",
+    "voice_msg_prompt": "Volume2",
+    "voice_message_prompt": "Volume2",  # backward compatibility
+    "text_msg_prompt": "Terminal",
+    "voice_choice_prompt": "Volume2",
+    "text_choice_prompt": "Terminal",
+}
+
+TOOL_HTTP_DEFAULTS = {
+}
+
+TOOL_PARAMETER_DEFAULTS = {
+    "increase_volume": {"step": 1},
+    "decrease_volume": {"step": 1},
+}
+
+TOOL_WAIT_FOR_RESPONSE_DEFAULTS = {
+    "text_msg_prompt": True,
+    "voice_choice_prompt": True,
+    "text_choice_prompt": True,
+}
+
+
+def _normalize_parameter_schema(value: Any, *, tool_id: Optional[str] = None) -> Dict[str, Any]:
+    if not isinstance(value, dict):
+        value = {}
+    normalized = dict(value)
+    if not normalized:
+        fallback = TOOL_REGISTRY.get(str(tool_id or "").strip(), {}).get("parameters")
+        if isinstance(fallback, dict):
+            normalized = dict(fallback)
+    normalized.setdefault("type", "object")
+    if normalized.get("type") != "object":
+        raise HTTPException(status_code=400, detail="parameter_schema.type must be 'object'")
+    properties = normalized.get("properties")
+    if not isinstance(properties, dict):
+        normalized["properties"] = {}
+    required = normalized.get("required")
+    if required is None:
+        normalized["required"] = []
+    elif not isinstance(required, list):
+        raise HTTPException(status_code=400, detail="parameter_schema.required must be an array")
+    return normalized
+
+
+def _normalize_parameter_defaults(value: Any) -> Dict[str, Any]:
+    if value is None:
+        return {}
+    if not isinstance(value, dict):
+        raise HTTPException(status_code=400, detail="parameter_defaults must be an object")
+    return dict(value)
+
+
+def _ensure_tool_resource_schema(db: Session) -> None:
+    """Apply lightweight SQLite migrations for newly added tool_resources columns."""
+    bind = db.get_bind()
+    inspector = inspect(bind)
+    try:
+        columns = {col["name"] for col in inspector.get_columns("tool_resources")}
+    except Exception:
+        return
+
+    altered = False
+    if "parameter_schema" not in columns:
+        db.execute(text("ALTER TABLE tool_resources ADD COLUMN parameter_schema JSON"))
+        altered = True
+    if "parameter_defaults" not in columns:
+        db.execute(text("ALTER TABLE tool_resources ADD COLUMN parameter_defaults JSON"))
+        altered = True
+    if "wait_for_response" not in columns:
+        db.execute(text("ALTER TABLE tool_resources ADD COLUMN wait_for_response BOOLEAN DEFAULT 0"))
+        altered = True
+    if altered:
+        db.commit()
+
+
+def _normalize_http_method(method: Optional[str]) -> str:
+    normalized = str(method or "GET").strip().upper()
+    return normalized if normalized in {"GET", "POST", "PUT", "PATCH", "DELETE"} else "GET"
+
+
+def _requires_http_request(category: str, tool_id: Optional[str]) -> bool:
+    if category != "query":
+        return False
+    return str(tool_id or "").strip() not in {"calculator", "code_interpreter", "current_time"}
+
+
+def _validate_query_http_config(*, category: str, tool_id: Optional[str], http_url: Optional[str]) -> None:
+    if _requires_http_request(category, tool_id) and not str(http_url or "").strip():
+        raise HTTPException(status_code=400, detail="http_url is required for query tools (except calculator/code_interpreter)")
+
+
+def _migrate_legacy_system_tool_ids(db: Session) -> None:
+    """Rename legacy built-in system tool IDs to their canonical IDs."""
+    changed = False
+    for legacy_id, canonical_id in TOOL_ID_ALIASES.items():
+        if legacy_id == canonical_id:
+            continue
+        legacy_item = (
+            db.query(ToolResource)
+            .filter(ToolResource.id == legacy_id)
+            .first()
+        )
+        if not legacy_item or not bool(legacy_item.is_system):
+            continue
+
+        canonical_item = (
+            db.query(ToolResource)
+            .filter(ToolResource.id == canonical_id)
+            .first()
+        )
+        if canonical_item:
+            db.delete(legacy_item)
+            changed = True
+            continue
+
+        legacy_item.id = canonical_id
+        legacy_item.updated_at = datetime.utcnow()
+        changed = True
+
+    if changed:
+        db.commit()
+
+
+def _seed_default_tools_if_empty(db: Session) -> None:
+    """Ensure built-in tools exist in tool_resources without overriding custom edits."""
+    _ensure_tool_resource_schema(db)
+    _migrate_legacy_system_tool_ids(db)
+    existing_system_count = (
+        db.query(ToolResource.id)
+        .filter(ToolResource.is_system.is_(True))
+        .count()
+    )
+    if existing_system_count > 0:
+        return
+    existing_ids = {
+        str(item[0])
+        for item in db.query(ToolResource.id).all()
+    }
+    changed = False
+    for tool_id, payload in TOOL_REGISTRY.items():
+        if tool_id in existing_ids:
+            continue
+        http_defaults = TOOL_HTTP_DEFAULTS.get(tool_id, {})
+        db.add(ToolResource(
+            id=tool_id,
+            user_id=1,
+            name=payload.get("name", tool_id),
+            description=payload.get("description", ""),
+            category=TOOL_CATEGORY_MAP.get(tool_id, "system"),
+            icon=TOOL_ICON_MAP.get(tool_id, "Wrench"),
+            http_method=_normalize_http_method(http_defaults.get("http_method")),
+            http_url=http_defaults.get("http_url"),
+            http_headers=http_defaults.get("http_headers") or {},
+            http_timeout_ms=int(http_defaults.get("http_timeout_ms") or 10000),
+            parameter_schema=_normalize_parameter_schema(payload.get("parameters"), tool_id=tool_id),
+            parameter_defaults=_normalize_parameter_defaults(TOOL_PARAMETER_DEFAULTS.get(tool_id)),
+            wait_for_response=bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False)),
+            enabled=True,
+            is_system=True,
+        ))
+        changed = True
+    if changed:
+        db.commit()
+
+
+def recreate_tool_resources(db: Session) -> None:
+    """Recreate tool resources table content with current built-in defaults."""
+    bind = db.get_bind()
+    ToolResource.__table__.drop(bind=bind, checkfirst=True)
+    ToolResource.__table__.create(bind=bind, checkfirst=True)
+    _seed_default_tools_if_empty(db)
+
+
+@router.get("/list")
+def list_available_tools():
+    """获取可用的工具列表"""
+    return {"tools": TOOL_REGISTRY}
+
+
+@router.get("/list/{tool_id}")
+def get_tool_detail(tool_id: str):
+    """获取工具详情"""
+    canonical_tool_id = normalize_tool_id(tool_id)
+    if canonical_tool_id not in TOOL_REGISTRY:
+        raise HTTPException(status_code=404, detail="Tool not found")
+    return TOOL_REGISTRY[canonical_tool_id]
+
+
+# ============ Tool Resource CRUD ============
+@router.get("/resources")
+def list_tool_resources(
+    category: Optional[str] = None,
+    enabled: Optional[bool] = None,
+    include_system: bool = True,
+    page: int = 1,
+    limit: int = 100,
+    db: Session = Depends(get_db),
+):
+    """获取工具资源列表。system/query 仅表示工具执行类型，不代表权限。"""
+    _seed_default_tools_if_empty(db)
+    query = db.query(ToolResource)
+    if not include_system:
+        query = query.filter(ToolResource.is_system == False)
+    if category:
+        query = query.filter(ToolResource.category == category)
+    if enabled is not None:
+        query = query.filter(ToolResource.enabled == enabled)
+    total = query.count()
+    rows = query.order_by(ToolResource.created_at.desc()).offset(max(page - 1, 0) * limit).limit(limit).all()
+    return {"total": total, "page": page, "limit": limit, "list": rows}
+
+
+@router.get("/resources/{id}", response_model=ToolResourceOut)
+def get_tool_resource(id: str, db: Session = Depends(get_db)):
+    """获取单个工具资源详情。"""
+    _seed_default_tools_if_empty(db)
+    item = db.query(ToolResource).filter(ToolResource.id == id).first()
+    if not item:
+        canonical_id = normalize_tool_id(id)
+        if canonical_id and canonical_id != id:
+            item = db.query(ToolResource).filter(ToolResource.id == canonical_id).first()
+    if not item:
+        raise HTTPException(status_code=404, detail="Tool resource not found")
+    return item
+
+
+@router.post("/resources", response_model=ToolResourceOut)
+def create_tool_resource(data: ToolResourceCreate, db: Session = Depends(get_db)):
+    """创建自定义工具资源。"""
+    _seed_default_tools_if_empty(db)
+    candidate_id = normalize_tool_id((data.id or "").strip())
+    if candidate_id and db.query(ToolResource).filter(ToolResource.id == candidate_id).first():
+        raise HTTPException(status_code=400, detail="Tool ID already exists")
+
+    _validate_query_http_config(category=data.category, tool_id=candidate_id, http_url=data.http_url)
+    parameter_schema = _normalize_parameter_schema(data.parameter_schema, tool_id=candidate_id)
+    parameter_defaults = _normalize_parameter_defaults(data.parameter_defaults)
+
+    item = ToolResource(
+        id=candidate_id or f"tool_{str(uuid.uuid4())[:8]}",
+        user_id=1,
+        name=data.name,
+        description=data.description,
+        category=data.category,
+        icon=data.icon,
+        http_method=_normalize_http_method(data.http_method),
+        http_url=(data.http_url or "").strip() or None,
+        http_headers=data.http_headers or {},
+        http_timeout_ms=max(1000, int(data.http_timeout_ms or 10000)),
+        parameter_schema=parameter_schema,
+        parameter_defaults=parameter_defaults,
+        wait_for_response=bool(data.wait_for_response) if data.category == "system" else False,
+        enabled=data.enabled,
+        is_system=False,
+    )
+    db.add(item)
+    db.commit()
+    db.refresh(item)
+    return item
+
+
+@router.put("/resources/{id}", response_model=ToolResourceOut)
+def update_tool_resource(id: str, data: ToolResourceUpdate, db: Session = Depends(get_db)):
+    """更新工具资源。"""
+    _seed_default_tools_if_empty(db)
+    canonical_id = normalize_tool_id(id)
+    item = db.query(ToolResource).filter(ToolResource.id == id).first()
+    if not item and canonical_id and canonical_id != id:
+        item = db.query(ToolResource).filter(ToolResource.id == canonical_id).first()
+    if not item:
+        raise HTTPException(status_code=404, detail="Tool resource not found")
+
+    update_data = data.model_dump(exclude_unset=True)
+
+    new_category = update_data.get("category", item.category)
+    new_http_url = update_data.get("http_url", item.http_url)
+    _validate_query_http_config(category=new_category, tool_id=item.id, http_url=new_http_url)
+
+    if "http_method" in update_data:
+        update_data["http_method"] = _normalize_http_method(update_data.get("http_method"))
+    if "http_timeout_ms" in update_data and update_data.get("http_timeout_ms") is not None:
+        update_data["http_timeout_ms"] = max(1000, int(update_data["http_timeout_ms"]))
+    if "parameter_schema" in update_data:
+        update_data["parameter_schema"] = _normalize_parameter_schema(update_data.get("parameter_schema"), tool_id=item.id)
+    if "parameter_defaults" in update_data:
+        update_data["parameter_defaults"] = _normalize_parameter_defaults(update_data.get("parameter_defaults"))
+    if new_category != "system":
+        update_data["wait_for_response"] = False
+
+    for field, value in update_data.items():
+        setattr(item, field, value)
+    item.updated_at = datetime.utcnow()
+
+    db.commit()
+    db.refresh(item)
+    return item
+
+
+@router.delete("/resources/{id}")
+def delete_tool_resource(id: str, db: Session = Depends(get_db)):
+    """删除工具资源。"""
+    _seed_default_tools_if_empty(db)
+    canonical_id = normalize_tool_id(id)
+    item = db.query(ToolResource).filter(ToolResource.id == id).first()
+    if not item and canonical_id and canonical_id != id:
+        item = db.query(ToolResource).filter(ToolResource.id == canonical_id).first()
+    if not item:
+        raise HTTPException(status_code=404, detail="Tool resource not found")
+    db.delete(item)
+    db.commit()
+    return {"message": "Deleted successfully"}
+
+
+# ============ Autotest ============
+class AutotestResult:
+    """自动测试结果"""
+
+    def __init__(self):
+        self.id = str(uuid.uuid4())[:8]
+        self.started_at = time.time()
+        self.tests = []
+        self.summary = {"passed": 0, "failed": 0, "total": 0}
+
+    def add_test(self, name: str, passed: bool, message: str = "", duration_ms: int = 0):
+        self.tests.append({
+            "name": name,
+            "passed": passed,
+            "message": message,
+            "duration_ms": duration_ms
+        })
+        if passed:
+            self.summary["passed"] += 1
+        else:
+            self.summary["failed"] += 1
+        self.summary["total"] += 1
+
+    def to_dict(self):
+        return {
+            "id": self.id,
+            "started_at": self.started_at,
+            "duration_ms": int((time.time() - self.started_at) * 1000),
+            "tests": self.tests,
+            "summary": self.summary
+        }
+
+
+@router.post("/autotest")
+def run_autotest(
+    llm_model_id: Optional[str] = None,
+    asr_model_id: Optional[str] = None,
+    test_llm: bool = True,
+    test_asr: bool = True,
+    db: Session = Depends(get_db)
+):
+    """运行自动测试"""
+    result = AutotestResult()
+
+    # 测试 LLM 模型
+    if test_llm and llm_model_id:
+        _test_llm_model(db, llm_model_id, result)
+
+    # 测试 ASR 模型
+    if test_asr and asr_model_id:
+        _test_asr_model(db, asr_model_id, result)
+
+    # 测试 TTS 功能（需要时可添加）
+    if test_llm and not llm_model_id:
+        result.add_test(
+            "LLM Model Check",
+            False,
+            "No LLM model ID provided"
+        )
+
+    if test_asr and not asr_model_id:
+        result.add_test(
+            "ASR Model Check",
+            False,
+            "No ASR model ID provided"
+        )
+
+    return result.to_dict()
+
+
+@router.post("/autotest/llm/{model_id}")
+def autotest_llm_model(model_id: str, db: Session = Depends(get_db)):
+    """测试单个LLM模型"""
+    result = AutotestResult()
+    _test_llm_model(db, model_id, result)
+    return result.to_dict()
+
+
+@router.post("/autotest/asr/{model_id}")
+def autotest_asr_model(model_id: str, db: Session = Depends(get_db)):
+    """测试单个ASR模型"""
+    result = AutotestResult()
+    _test_asr_model(db, model_id, result)
+    return result.to_dict()
+
+
+def _test_llm_model(db: Session, model_id: str, result: AutotestResult):
+    """内部方法：测试LLM模型"""
+    start_time = time.time()
+
+    # 1. 检查模型是否存在
+    model = db.query(LLMModel).filter(LLMModel.id == model_id).first()
+    duration_ms = int((time.time() - start_time) * 1000)
+
+    if not model:
+        result.add_test("Model Existence", False, f"Model {model_id} not found", duration_ms)
+        return
+
+    result.add_test("Model Existence", True, f"Found model: {model.name}", duration_ms)
+
+    # 2. 测试连接
+    test_start = time.time()
+    try:
+        test_messages = [{"role": "user", "content": "Reply with 'OK'."}]
+        payload = {
+            "model": model.model_name or "gpt-3.5-turbo",
+            "messages": test_messages,
+            "max_tokens": 10,
+            "temperature": 0.1,
+        }
+        headers = {"Authorization": f"Bearer {model.api_key}"}
+
+        with httpx.Client(timeout=30.0) as client:
+            response = client.post(
+                f"{model.base_url}/chat/completions",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+        result_text = response.json()
+        latency_ms = int((time.time() - test_start) * 1000)
+
+        if result_text.get("choices"):
+            result.add_test("API Connection", True, f"Latency: {latency_ms}ms", latency_ms)
+        else:
+            result.add_test("API Connection", False, "Empty response", latency_ms)
+
+    except Exception as e:
+        latency_ms = int((time.time() - test_start) * 1000)
+        result.add_test("API Connection", False, str(e)[:200], latency_ms)
+
+    # 3. 检查模型配置
+    if model.temperature is not None:
+        result.add_test("Temperature Setting", True, f"temperature={model.temperature}")
+    else:
+        result.add_test("Temperature Setting", True, "Using default")
+
+    # 4. 测试流式响应（可选）
+    if model.type == "text":
+        test_start = time.time()
+        try:
+            with httpx.Client(timeout=30.0) as client:
+                with client.stream(
+                    "POST",
+                    f"{model.base_url}/chat/completions",
+                    json={
+                        "model": model.model_name or "gpt-3.5-turbo",
+                        "messages": [{"role": "user", "content": "Count from 1 to 3."}],
+                        "stream": True,
+                    },
+                    headers=headers
+                ) as response:
+                    response.raise_for_status()
+                    chunk_count = 0
+                    for _ in response.iter_bytes():
+                        chunk_count += 1
+
+            latency_ms = int((time.time() - test_start) * 1000)
+            result.add_test("Streaming Support", True, f"Received {chunk_count} chunks", latency_ms)
+        except Exception as e:
+            latency_ms = int((time.time() - test_start) * 1000)
+            result.add_test("Streaming Support", False, str(e)[:200], latency_ms)
+
+
+def _test_asr_model(db: Session, model_id: str, result: AutotestResult):
+    """内部方法：测试ASR模型"""
+    start_time = time.time()
+
+    # 1. 检查模型是否存在
+    model = db.query(ASRModel).filter(ASRModel.id == model_id).first()
+    duration_ms = int((time.time() - start_time) * 1000)
+
+    if not model:
+        result.add_test("Model Existence", False, f"Model {model_id} not found", duration_ms)
+        return
+
+    result.add_test("Model Existence", True, f"Found model: {model.name}", duration_ms)
+
+    # 2. 测试配置
+    if model.hotwords:
+        result.add_test("Hotwords Config", True, f"Hotwords: {len(model.hotwords)} words")
+    else:
+        result.add_test("Hotwords Config", True, "No hotwords configured")
+
+    # 3. 测试API可用性
+    test_start = time.time()
+    try:
+        headers = {"Authorization": f"Bearer {model.api_key}"}
+
+        with httpx.Client(timeout=30.0) as client:
+            normalized_vendor = (model.vendor or "").strip().lower()
+            if normalized_vendor in [
+                "openai compatible",
+                "openai-compatible",
+                "siliconflow",  # backward compatibility
+                "paraformer",
+            ]:
+                response = client.get(
+                    f"{model.base_url}/asr",
+                    headers=headers
+                )
+            elif model.vendor.lower() == "openai":
+                response = client.get(
+                    f"{model.base_url}/audio/models",
+                    headers=headers
+                )
+            else:
+                # 通用健康检查
+                response = client.get(
+                    f"{model.base_url}/health",
+                    headers=headers
+                )
+
+            latency_ms = int((time.time() - test_start) * 1000)
+
+            if response.status_code in [200, 405]:  # 405 = method not allowed but endpoint exists
+                result.add_test("API Availability", True, f"Status: {response.status_code}", latency_ms)
+            else:
+                result.add_test("API Availability", False, f"Status: {response.status_code}", latency_ms)
+
+    except httpx.TimeoutException:
+        latency_ms = int((time.time() - test_start) * 1000)
+        result.add_test("API Availability", False, "Connection timeout", latency_ms)
+    except Exception as e:
+        latency_ms = int((time.time() - test_start) * 1000)
+        result.add_test("API Availability", False, str(e)[:200], latency_ms)
+
+    # 4. 检查语言配置
+    if model.language in ["zh", "en", "Multi-lingual"]:
+        result.add_test("Language Config", True, f"Language: {model.language}")
+    else:
+        result.add_test("Language Config", False, f"Unknown language: {model.language}")
+
+
+# ============ Quick Health Check ============
+@router.get("/health")
+def health_check():
+    """快速健康检查"""
+    return {
+        "status": "healthy",
+        "timestamp": time.time(),
+        "tools": list(TOOL_REGISTRY.keys())
+    }
+
+
+@router.post("/test-message")
+def send_test_message(
+    llm_model_id: str,
+    message: str = "Hello, this is a test message.",
+    db: Session = Depends(get_db)
+):
+    """发送测试消息"""
+    model = db.query(LLMModel).filter(LLMModel.id == llm_model_id).first()
+    if not model:
+        raise HTTPException(status_code=404, detail="LLM Model not found")
+
+    try:
+        payload = {
+            "model": model.model_name or "gpt-3.5-turbo",
+            "messages": [{"role": "user", "content": message}],
+            "max_tokens": 500,
+            "temperature": 0.7,
+        }
+        headers = {"Authorization": f"Bearer {model.api_key}"}
+
+        with httpx.Client(timeout=60.0) as client:
+            response = client.post(
+                f"{model.base_url}/chat/completions",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+        result = response.json()
+        reply = result.get("choices", [{}])[0].get("message", {}).get("content", "")
+
+        return {
+            "success": True,
+            "reply": reply,
+            "usage": result.get("usage", {})
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
--- a/api/app/routers/voices.py
+++ b/api/app/routers/voices.py
@@ -0,0 +1,442 @@
+import base64
+import io
+import json
+import os
+import threading
+import wave
+from typing import Any, Dict, Optional
+
+import httpx
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+
+from ..db import get_db
+from ..id_generator import unique_short_id
+from ..models import Voice
+from ..schemas import VoiceCreate, VoiceOut, VoicePreviewRequest, VoicePreviewResponse, VoiceUpdate
+
+router = APIRouter(prefix="/voices", tags=["Voices"])
+
+OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
+DASHSCOPE_DEFAULT_MODEL = "qwen3-tts-flash-realtime"
+DASHSCOPE_DEFAULT_VOICE_KEY = "Cherry"
+DASHSCOPE_DEFAULT_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+
+try:
+    import dashscope
+    from dashscope.audio.qwen_tts_realtime import AudioFormat, QwenTtsRealtime, QwenTtsRealtimeCallback
+
+    DASHSCOPE_SDK_AVAILABLE = True
+except ImportError:
+    dashscope = None  # type: ignore[assignment]
+    AudioFormat = None  # type: ignore[assignment]
+    QwenTtsRealtime = None  # type: ignore[assignment]
+    DASHSCOPE_SDK_AVAILABLE = False
+
+    class QwenTtsRealtimeCallback:  # type: ignore[no-redef]
+        """Fallback callback base when DashScope SDK is unavailable."""
+
+        pass
+
+
+class _DashScopePreviewCallback(QwenTtsRealtimeCallback):
+    """Collect DashScope realtime callback events and PCM chunks."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._open_event = threading.Event()
+        self._done_event = threading.Event()
+        self._lock = threading.Lock()
+        self._audio_chunks: list[bytes] = []
+        self._error_message: Optional[str] = None
+
+    def on_open(self) -> None:
+        self._open_event.set()
+
+    def on_close(self, code: int, reason: str) -> None:
+        if not self._done_event.is_set():
+            self._error_message = f"DashScope websocket closed unexpectedly: {code} {reason}"
+            self._done_event.set()
+
+    def on_error(self, message: str) -> None:
+        self._error_message = str(message)
+        self._done_event.set()
+
+    def on_event(self, response: Any) -> None:
+        payload = _coerce_dashscope_event(response)
+        event_type = str(payload.get("type") or "").strip()
+        if event_type == "response.audio.delta":
+            delta = payload.get("delta")
+            if isinstance(delta, str):
+                try:
+                    self._append_audio(base64.b64decode(delta))
+                except Exception:
+                    return
+        elif event_type in {"response.done", "session.finished"}:
+            self._done_event.set()
+        elif event_type == "error":
+            self._error_message = _format_dashscope_error_event(payload)
+            self._done_event.set()
+
+    def on_data(self, data: bytes) -> None:
+        # Some SDK versions emit raw PCM frames via on_data.
+        if isinstance(data, (bytes, bytearray)):
+            self._append_audio(bytes(data))
+
+    def wait_for_open(self, timeout: float = 10.0) -> None:
+        if not self._open_event.wait(timeout):
+            raise TimeoutError("DashScope websocket open timeout")
+
+    def wait_for_done(self, timeout: float = 45.0) -> None:
+        if not self._done_event.wait(timeout):
+            raise TimeoutError("DashScope synthesis timeout")
+
+    def raise_if_error(self) -> None:
+        if self._error_message:
+            raise RuntimeError(self._error_message)
+
+    def read_audio(self) -> bytes:
+        with self._lock:
+            return b"".join(self._audio_chunks)
+
+    def _append_audio(self, chunk: bytes) -> None:
+        if not chunk:
+            return
+        with self._lock:
+            self._audio_chunks.append(chunk)
+
+
+def _coerce_dashscope_event(response: Any) -> Dict[str, Any]:
+    if isinstance(response, dict):
+        return response
+    if isinstance(response, str):
+        try:
+            parsed = json.loads(response)
+            if isinstance(parsed, dict):
+                return parsed
+        except json.JSONDecodeError:
+            pass
+    return {"type": "raw", "message": str(response)}
+
+
+def _format_dashscope_error_event(payload: Dict[str, Any]) -> str:
+    error = payload.get("error")
+    if isinstance(error, dict):
+        code = str(error.get("code") or "").strip()
+        message = str(error.get("message") or "").strip()
+        if code and message:
+            return f"{code}: {message}"
+        return message or str(error)
+    return str(error or "DashScope realtime TTS error")
+
+
+def _create_dashscope_realtime_client(*, model: str, callback: _DashScopePreviewCallback, url: str, api_key: str) -> Any:
+    if QwenTtsRealtime is None:
+        raise RuntimeError("DashScope SDK unavailable")
+
+    init_kwargs = {
+        "model": model,
+        "callback": callback,
+        "url": url,
+    }
+    try:
+        return QwenTtsRealtime(api_key=api_key, **init_kwargs)  # type: ignore[misc]
+    except TypeError as exc:
+        if "api_key" not in str(exc):
+            raise
+        return QwenTtsRealtime(**init_kwargs)  # type: ignore[misc]
+
+
+def _pcm16_to_wav_bytes(pcm_bytes: bytes, sample_rate: int = 24000) -> bytes:
+    with io.BytesIO() as buffer:
+        with wave.open(buffer, "wb") as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(sample_rate)
+            wav_file.writeframes(pcm_bytes)
+        return buffer.getvalue()
+
+
+def _synthesize_dashscope_preview(
+    *,
+    text: str,
+    api_key: str,
+    base_url: str,
+    model: str,
+    voice_key: str,
+    speed: Optional[float],
+) -> bytes:
+    if not DASHSCOPE_SDK_AVAILABLE:
+        raise RuntimeError("dashscope package not installed; install with `pip install dashscope>=1.25.11`")
+    if not AudioFormat:
+        raise RuntimeError("DashScope SDK AudioFormat unavailable")
+
+    callback = _DashScopePreviewCallback()
+    if dashscope is not None:
+        dashscope.api_key = api_key
+    client = _create_dashscope_realtime_client(
+        model=model,
+        callback=callback,
+        url=base_url,
+        api_key=api_key,
+    )
+
+    try:
+        client.connect()
+        callback.wait_for_open()
+        session_kwargs: Dict[str, Any] = {
+            "voice": voice_key,
+            "response_format": AudioFormat.PCM_24000HZ_MONO_16BIT,
+            "mode": "commit",
+        }
+        # speech_rate is supported by qwen3-* realtime models.
+        normalized_model = str(model or "").strip().lower()
+        if speed is not None and normalized_model.startswith("qwen3-"):
+            session_kwargs["speech_rate"] = max(0.5, min(2.0, float(speed)))
+        client.update_session(**session_kwargs)
+        client.append_text(text)
+        client.commit()
+        callback.wait_for_done()
+        callback.raise_if_error()
+        pcm_audio = callback.read_audio()
+        if not pcm_audio:
+            raise RuntimeError("No audio chunk returned from DashScope realtime synthesis")
+        return _pcm16_to_wav_bytes(pcm_audio, sample_rate=24000)
+    finally:
+        finish_fn = getattr(client, "finish", None)
+        if callable(finish_fn):
+            try:
+                finish_fn()
+            except Exception:
+                pass
+        close_fn = getattr(client, "close", None)
+        if callable(close_fn):
+            try:
+                close_fn()
+            except Exception:
+                pass
+
+
+def _is_openai_compatible_vendor(vendor: str) -> bool:
+    normalized = (vendor or "").strip().lower()
+    return normalized in {
+        "openai compatible",
+        "openai-compatible",
+        "siliconflow",  # backward compatibility
+        "硅基流动",      # backward compatibility
+    }
+
+
+def _is_dashscope_vendor(vendor: str) -> bool:
+    normalized = (vendor or "").strip().lower()
+    return normalized in {
+        "dashscope",
+    }
+
+
+def _default_base_url(vendor: str) -> Optional[str]:
+    if _is_openai_compatible_vendor(vendor):
+        return "https://api.siliconflow.cn/v1"
+    if _is_dashscope_vendor(vendor):
+        return DASHSCOPE_DEFAULT_BASE_URL
+    return None
+
+
+def _build_openai_compatible_voice_key(voice: Voice, model: str) -> str:
+    if voice.voice_key:
+        return voice.voice_key
+    if ":" in voice.id:
+        return voice.id
+    return f"{model}:{voice.id}"
+
+
+@router.get("")
+def list_voices(
+    vendor: Optional[str] = None,
+    language: Optional[str] = None,
+    gender: Optional[str] = None,
+    page: int = 1,
+    limit: int = 50,
+    db: Session = Depends(get_db)
+):
+    """获取声音库列表"""
+    query = db.query(Voice)
+    if vendor:
+        query = query.filter(Voice.vendor == vendor)
+    if language:
+        query = query.filter(Voice.language == language)
+    if gender:
+        query = query.filter(Voice.gender == gender)
+
+    total = query.count()
+    voices = query.order_by(Voice.created_at.desc()) \
+        .offset((page - 1) * limit).limit(limit).all()
+    return {"total": total, "page": page, "limit": limit, "list": voices}
+
+
+@router.post("", response_model=VoiceOut)
+def create_voice(data: VoiceCreate, db: Session = Depends(get_db)):
+    """创建声音"""
+    vendor = data.vendor.strip()
+    model = data.model
+    voice_key = data.voice_key
+
+    if _is_openai_compatible_vendor(vendor):
+        model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
+        if not voice_key:
+            raw_id = (data.id or data.name).strip()
+            voice_key = raw_id if ":" in raw_id else f"{model}:{raw_id}"
+    elif _is_dashscope_vendor(vendor):
+        model = (model or "").strip() or DASHSCOPE_DEFAULT_MODEL
+        voice_key = (voice_key or "").strip() or DASHSCOPE_DEFAULT_VOICE_KEY
+
+    voice = Voice(
+        id=unique_short_id("tts", db, Voice),
+        user_id=1,
+        name=data.name,
+        vendor=vendor,
+        gender=data.gender,
+        language=data.language,
+        description=data.description,
+        model=model,
+        voice_key=voice_key,
+        api_key=data.api_key,
+        base_url=data.base_url,
+        speed=data.speed,
+        gain=data.gain,
+        pitch=data.pitch,
+        enabled=data.enabled,
+    )
+    db.add(voice)
+    db.commit()
+    db.refresh(voice)
+    return voice
+
+
+@router.get("/{id}", response_model=VoiceOut)
+def get_voice(id: str, db: Session = Depends(get_db)):
+    """获取单个声音详情"""
+    voice = db.query(Voice).filter(Voice.id == id).first()
+    if not voice:
+        raise HTTPException(status_code=404, detail="Voice not found")
+    return voice
+
+
+@router.put("/{id}", response_model=VoiceOut)
+def update_voice(id: str, data: VoiceUpdate, db: Session = Depends(get_db)):
+    """更新声音"""
+    voice = db.query(Voice).filter(Voice.id == id).first()
+    if not voice:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    update_data = data.model_dump(exclude_unset=True)
+    if "vendor" in update_data and update_data["vendor"] is not None:
+        update_data["vendor"] = update_data["vendor"].strip()
+
+    vendor_for_defaults = update_data.get("vendor", voice.vendor)
+    if _is_openai_compatible_vendor(vendor_for_defaults):
+        model = update_data.get("model") or voice.model or OPENAI_COMPATIBLE_DEFAULT_MODEL
+        voice_key = update_data.get("voice_key") or voice.voice_key
+        update_data["model"] = model
+        update_data["voice_key"] = voice_key or _build_openai_compatible_voice_key(voice, model)
+    elif _is_dashscope_vendor(vendor_for_defaults):
+        model = update_data.get("model") or voice.model or DASHSCOPE_DEFAULT_MODEL
+        voice_key = update_data.get("voice_key") or voice.voice_key or DASHSCOPE_DEFAULT_VOICE_KEY
+        update_data["model"] = model
+        update_data["voice_key"] = voice_key
+
+    for field, value in update_data.items():
+        setattr(voice, field, value)
+
+    db.commit()
+    db.refresh(voice)
+    return voice
+
+
+@router.delete("/{id}")
+def delete_voice(id: str, db: Session = Depends(get_db)):
+    """删除声音"""
+    voice = db.query(Voice).filter(Voice.id == id).first()
+    if not voice:
+        raise HTTPException(status_code=404, detail="Voice not found")
+    db.delete(voice)
+    db.commit()
+    return {"message": "Deleted successfully"}
+
+
+@router.post("/{id}/preview", response_model=VoicePreviewResponse)
+def preview_voice(id: str, data: VoicePreviewRequest, db: Session = Depends(get_db)):
+    """试听指定声音，支持 OpenAI-compatible 与 DashScope Realtime。"""
+    voice = db.query(Voice).filter(Voice.id == id).first()
+    if not voice:
+        raise HTTPException(status_code=404, detail="Voice not found")
+
+    text = data.text.strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="Preview text cannot be empty")
+
+    if _is_dashscope_vendor(voice.vendor):
+        api_key = (data.api_key or "").strip() or (voice.api_key or "").strip()
+        if not api_key:
+            api_key = os.getenv("DASHSCOPE_API_KEY", "").strip() or os.getenv("TTS_API_KEY", "").strip()
+        if not api_key:
+            raise HTTPException(status_code=400, detail=f"API key is required for voice: {voice.name}")
+
+        base_url = (voice.base_url or "").strip() or DASHSCOPE_DEFAULT_BASE_URL
+        model = (voice.model or "").strip() or DASHSCOPE_DEFAULT_MODEL
+        voice_key = (voice.voice_key or "").strip() or DASHSCOPE_DEFAULT_VOICE_KEY
+        effective_speed = data.speed if data.speed is not None else voice.speed
+        try:
+            wav_bytes = _synthesize_dashscope_preview(
+                text=text,
+                api_key=api_key,
+                base_url=base_url,
+                model=model,
+                voice_key=voice_key,
+                speed=effective_speed,
+            )
+        except Exception as exc:
+            raise HTTPException(status_code=502, detail=f"DashScope preview failed: {exc}") from exc
+        audio_base64 = base64.b64encode(wav_bytes).decode("utf-8")
+        return VoicePreviewResponse(success=True, audio_url=f"data:audio/wav;base64,{audio_base64}")
+
+    api_key = (data.api_key or "").strip() or (voice.api_key or "").strip()
+    if not api_key and _is_openai_compatible_vendor(voice.vendor):
+        api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
+    if not api_key:
+        raise HTTPException(status_code=400, detail=f"API key is required for voice: {voice.name}")
+
+    base_url = (voice.base_url or "").strip() or (_default_base_url(voice.vendor) or "")
+    if not base_url:
+        raise HTTPException(status_code=400, detail=f"Base URL is required for voice: {voice.name}")
+
+    model = voice.model or OPENAI_COMPATIBLE_DEFAULT_MODEL
+    payload = {
+        "model": model,
+        "input": text,
+        "voice": voice.voice_key or _build_openai_compatible_voice_key(voice, model),
+        "response_format": "mp3",
+        "speed": data.speed if data.speed is not None else voice.speed,
+    }
+
+    try:
+        with httpx.Client(timeout=45.0) as client:
+            response = client.post(
+                f"{base_url.rstrip('/')}/audio/speech",
+                headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+                json=payload,
+            )
+    except Exception as exc:
+        raise HTTPException(status_code=502, detail=f"TTS request failed: {exc}") from exc
+
+    if response.status_code != 200:
+        detail = response.text
+        try:
+            detail_json = response.json()
+            detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
+        except Exception:
+            pass
+        raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
+
+    audio_base64 = base64.b64encode(response.content).decode("utf-8")
+    return VoicePreviewResponse(success=True, audio_url=f"data:audio/mpeg;base64,{audio_base64}")
--- a/api/app/routers/workflows.py
+++ b/api/app/routers/workflows.py
@@ -0,0 +1,112 @@
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+import uuid
+from datetime import datetime
+from typing import Any, Dict, List, Tuple
+
+from ..db import get_db
+from ..models import Workflow
+from ..schemas import WorkflowCreate, WorkflowUpdate, WorkflowOut, WorkflowNode, WorkflowEdge
+
+router = APIRouter(prefix="/workflows", tags=["Workflows"])
+
+
+def _normalize_graph_payload(nodes: List[Any], edges: List[Any]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """Normalize graph payload to canonical dict structures."""
+    parsed_nodes: List[WorkflowNode] = []
+    for node in nodes:
+        parsed_nodes.append(node if isinstance(node, WorkflowNode) else WorkflowNode.model_validate(node))
+
+    parsed_edges: List[WorkflowEdge] = []
+    for edge in edges:
+        parsed_edges.append(edge if isinstance(edge, WorkflowEdge) else WorkflowEdge.model_validate(edge))
+
+    normalized_nodes = [node.model_dump() for node in parsed_nodes]
+    normalized_edges = [edge.model_dump() for edge in parsed_edges]
+    return normalized_nodes, normalized_edges
+
+
+@router.get("")
+def list_workflows(
+    page: int = 1,
+    limit: int = 50,
+    db: Session = Depends(get_db)
+):
+    """获取工作流列表"""
+    query = db.query(Workflow)
+    total = query.count()
+    workflows = query.order_by(Workflow.created_at.desc()) \
+        .offset((page - 1) * limit).limit(limit).all()
+    return {"total": total, "page": page, "limit": limit, "list": workflows}
+
+
+@router.post("", response_model=WorkflowOut)
+def create_workflow(data: WorkflowCreate, db: Session = Depends(get_db)):
+    """创建工作流"""
+    nodes, edges = _normalize_graph_payload(data.nodes, data.edges)
+    workflow = Workflow(
+        id=str(uuid.uuid4())[:8],
+        user_id=1,
+        name=data.name,
+        node_count=data.nodeCount or len(nodes),
+        created_at=data.createdAt or datetime.utcnow().isoformat(),
+        updated_at=data.updatedAt or "",
+        global_prompt=data.globalPrompt,
+        nodes=nodes,
+        edges=edges,
+    )
+    db.add(workflow)
+    db.commit()
+    db.refresh(workflow)
+    return workflow
+
+
+@router.get("/{id}", response_model=WorkflowOut)
+def get_workflow(id: str, db: Session = Depends(get_db)):
+    """获取单个工作流"""
+    workflow = db.query(Workflow).filter(Workflow.id == id).first()
+    if not workflow:
+        raise HTTPException(status_code=404, detail="Workflow not found")
+    return workflow
+
+
+@router.put("/{id}", response_model=WorkflowOut)
+def update_workflow(id: str, data: WorkflowUpdate, db: Session = Depends(get_db)):
+    """更新工作流"""
+    workflow = db.query(Workflow).filter(Workflow.id == id).first()
+    if not workflow:
+        raise HTTPException(status_code=404, detail="Workflow not found")
+
+    update_data = data.model_dump(exclude_unset=True, exclude={"nodes", "edges"})
+    field_map = {
+        "nodeCount": "node_count",
+        "globalPrompt": "global_prompt",
+    }
+    for field, value in update_data.items():
+        setattr(workflow, field_map.get(field, field), value)
+
+    if data.nodes is not None or data.edges is not None:
+        existing_nodes = workflow.nodes if isinstance(workflow.nodes, list) else []
+        existing_edges = workflow.edges if isinstance(workflow.edges, list) else []
+        input_nodes = data.nodes if data.nodes is not None else existing_nodes
+        input_edges = data.edges if data.edges is not None else existing_edges
+        nodes, edges = _normalize_graph_payload(input_nodes, input_edges)
+        workflow.nodes = nodes
+        workflow.edges = edges
+        workflow.node_count = len(nodes)
+
+    workflow.updated_at = datetime.utcnow().isoformat()
+    db.commit()
+    db.refresh(workflow)
+    return workflow
+
+
+@router.delete("/{id}")
+def delete_workflow(id: str, db: Session = Depends(get_db)):
+    """删除工作流"""
+    workflow = db.query(Workflow).filter(Workflow.id == id).first()
+    if not workflow:
+        raise HTTPException(status_code=404, detail="Workflow not found")
+    db.delete(workflow)
+    db.commit()
+    return {"message": "Deleted successfully"}
--- a/api/app/schemas.py
+++ b/api/app/schemas.py
@@ -1,19 +1,276 @@
 from datetime import datetime
-from typing import List, Optional
-from pydantic import BaseModel
+from enum import Enum
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+
+
+# ============ Enums ============
+class AssistantConfigMode(str, Enum):
+    PLATFORM = "platform"
+    DIFY = "dify"
+    FASTGPT = "fastgpt"
+    NONE = "none"
+
+
+class LLMModelType(str, Enum):
+    TEXT = "text"
+    EMBEDDING = "embedding"
+    RERANK = "rerank"
+
+
+class ASRLanguage(str, Enum):
+    ZH = "zh"
+    EN = "en"
+    MULTILINGUAL = "Multi-lingual"
+
+
+class VoiceGender(str, Enum):
+    MALE = "Male"
+    FEMALE = "Female"
+
+
+class CallRecordSource(str, Enum):
+    DEBUG = "debug"
+    EXTERNAL = "external"
+
+
+class CallRecordStatus(str, Enum):
+    CONNECTED = "connected"
+    MISSED = "missed"
+    FAILED = "failed"


 # ============ Voice ============
 class VoiceBase(BaseModel):
    name: str
    vendor: str
-    gender: str
-    language: str
-    description: str
+    gender: str  # "Male" | "Female"
+    language: str  # "zh" | "en"
+    description: str = ""
+
+
+class VoiceCreate(VoiceBase):
+    id: Optional[str] = None
+    model: Optional[str] = None  # 厂商语音模型标识
+    voice_key: Optional[str] = None  # 厂商voice_key
+    api_key: Optional[str] = None
+    base_url: Optional[str] = None
+    speed: float = 1.0
+    gain: int = 0
+    pitch: int = 0
+    enabled: bool = True
+
+
+class VoiceUpdate(BaseModel):
+    name: Optional[str] = None
+    vendor: Optional[str] = None
+    gender: Optional[str] = None
+    language: Optional[str] = None
+    description: Optional[str] = None
+    model: Optional[str] = None
+    voice_key: Optional[str] = None
+    api_key: Optional[str] = None
+    base_url: Optional[str] = None
+    speed: Optional[float] = None
+    gain: Optional[int] = None
+    pitch: Optional[int] = None
+    enabled: Optional[bool] = None


 class VoiceOut(VoiceBase):
    id: str
+    user_id: Optional[int] = None
+    model: Optional[str] = None
+    voice_key: Optional[str] = None
+    api_key: Optional[str] = None
+    base_url: Optional[str] = None
+    speed: float = 1.0
+    gain: int = 0
+    pitch: int = 0
+    enabled: bool = True
+    is_system: bool = False
+    created_at: Optional[datetime] = None
+
+    class Config:
+        from_attributes = True
+
+
+class VoicePreviewRequest(BaseModel):
+    text: str
+    api_key: Optional[str] = None
+    speed: Optional[float] = None
+    gain: Optional[int] = None
+    pitch: Optional[int] = None
+
+
+class VoicePreviewResponse(BaseModel):
+    success: bool
+    audio_url: Optional[str] = None
+    duration_ms: Optional[int] = None
+    error: Optional[str] = None
+
+
+# ============ LLM Model ============
+class LLMModelBase(BaseModel):
+    name: str
+    vendor: str
+    type: LLMModelType
+    base_url: str
+    api_key: str
+    model_name: Optional[str] = None
+    temperature: Optional[float] = None
+    context_length: Optional[int] = None
+    enabled: bool = True
+
+
+class LLMModelCreate(LLMModelBase):
+    id: Optional[str] = None
+
+
+class LLMModelUpdate(BaseModel):
+    name: Optional[str] = None
+    vendor: Optional[str] = None
+    type: Optional[LLMModelType] = None
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    model_name: Optional[str] = None
+    temperature: Optional[float] = None
+    context_length: Optional[int] = None
+    enabled: Optional[bool] = None
+
+
+class LLMModelOut(LLMModelBase):
+    id: str
+    user_id: int
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+
+    class Config:
+        from_attributes = True
+
+
+class LLMModelTestResponse(BaseModel):
+    success: bool
+    latency_ms: Optional[int] = None
+    message: Optional[str] = None
+
+
+class LLMPreviewRequest(BaseModel):
+    message: str
+    system_prompt: Optional[str] = None
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    api_key: Optional[str] = None
+
+
+class LLMPreviewResponse(BaseModel):
+    success: bool
+    reply: Optional[str] = None
+    usage: Optional[dict] = None
+    latency_ms: Optional[int] = None
+    error: Optional[str] = None
+
+
+# ============ ASR Model ============
+class ASRModelBase(BaseModel):
+    name: str
+    vendor: str
+    language: str  # "zh" | "en" | "Multi-lingual"
+    base_url: str
+    api_key: str
+    model_name: Optional[str] = None
+    enabled: bool = True
+
+
+class ASRModelCreate(ASRModelBase):
+    id: Optional[str] = None
+    hotwords: List[str] = []
+    enable_punctuation: bool = True
+    enable_normalization: bool = True
+
+
+class ASRModelUpdate(BaseModel):
+    name: Optional[str] = None
+    vendor: Optional[str] = None
+    language: Optional[str] = None
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    model_name: Optional[str] = None
+    hotwords: Optional[List[str]] = None
+    enable_punctuation: Optional[bool] = None
+    enable_normalization: Optional[bool] = None
+    enabled: Optional[bool] = None
+
+
+class ASRModelOut(ASRModelBase):
+    id: str
+    user_id: int
+    hotwords: List[str] = []
+    enable_punctuation: bool = True
+    enable_normalization: bool = True
+    created_at: Optional[datetime] = None
+
+    class Config:
+        from_attributes = True
+
+
+class ASRTestRequest(BaseModel):
+    audio_url: Optional[str] = None
+    audio_data: Optional[str] = None  # base64 encoded
+
+
+class ASRTestResponse(BaseModel):
+    success: bool
+    transcript: Optional[str] = None
+    language: Optional[str] = None
+    confidence: Optional[float] = None
+    duration_ms: Optional[int] = None
+    latency_ms: Optional[int] = None
+    message: Optional[str] = None
+    error: Optional[str] = None
+
+
+# ============ Tool Resource ============
+class ToolResourceBase(BaseModel):
+    name: str
+    description: str = ""
+    category: str = "system"  # system/query
+    icon: str = "Wrench"
+    http_method: str = "GET"
+    http_url: Optional[str] = None
+    http_headers: Dict[str, str] = Field(default_factory=dict)
+    http_timeout_ms: int = 10000
+    parameter_schema: Dict[str, Any] = Field(default_factory=dict)
+    parameter_defaults: Dict[str, Any] = Field(default_factory=dict)
+    wait_for_response: bool = False
+    enabled: bool = True
+
+
+class ToolResourceCreate(ToolResourceBase):
+    id: Optional[str] = None
+
+
+class ToolResourceUpdate(BaseModel):
+    name: Optional[str] = None
+    description: Optional[str] = None
+    category: Optional[str] = None
+    icon: Optional[str] = None
+    http_method: Optional[str] = None
+    http_url: Optional[str] = None
+    http_headers: Optional[Dict[str, str]] = None
+    http_timeout_ms: Optional[int] = None
+    parameter_schema: Optional[Dict[str, Any]] = None
+    parameter_defaults: Optional[Dict[str, Any]] = None
+    wait_for_response: Optional[bool] = None
+    enabled: Optional[bool] = None
+
+
+class ToolResourceOut(ToolResourceBase):
+    id: str
+    user_id: Optional[int] = None
+    is_system: bool = False
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None

    class Config:
        from_attributes = True
@@ -22,37 +279,131 @@ class VoiceOut(VoiceBase):
 # ============ Assistant ============
 class AssistantBase(BaseModel):
    name: str
+    firstTurnMode: str = "bot_first"
    opener: str = ""
+    manualOpenerToolCalls: List[Dict[str, Any]] = []
+    generatedOpenerEnabled: bool = False
+    openerAudioEnabled: bool = False
    prompt: str = ""
    knowledgeBaseId: Optional[str] = None
    language: str = "zh"
+    voiceOutputEnabled: bool = True
    voice: Optional[str] = None
    speed: float = 1.0
    hotwords: List[str] = []
    tools: List[str] = []
+    asrInterimEnabled: bool = False
+    botCannotBeInterrupted: bool = False
    interruptionSensitivity: int = 500
    configMode: str = "platform"
    apiUrl: Optional[str] = None
    apiKey: Optional[str] = None
+    appId: Optional[str] = None
+    # 模型关联
+    llmModelId: Optional[str] = None
+    asrModelId: Optional[str] = None
+    embeddingModelId: Optional[str] = None
+    rerankModelId: Optional[str] = None


 class AssistantCreate(AssistantBase):
    pass


-class AssistantUpdate(AssistantBase):
+class AssistantUpdate(BaseModel):
    name: Optional[str] = None
+    firstTurnMode: Optional[str] = None
+    opener: Optional[str] = None
+    manualOpenerToolCalls: Optional[List[Dict[str, Any]]] = None
+    generatedOpenerEnabled: Optional[bool] = None
+    openerAudioEnabled: Optional[bool] = None
+    prompt: Optional[str] = None
+    knowledgeBaseId: Optional[str] = None
+    language: Optional[str] = None
+    voiceOutputEnabled: Optional[bool] = None
+    voice: Optional[str] = None
+    speed: Optional[float] = None
+    hotwords: Optional[List[str]] = None
+    tools: Optional[List[str]] = None
+    asrInterimEnabled: Optional[bool] = None
+    botCannotBeInterrupted: Optional[bool] = None
+    interruptionSensitivity: Optional[int] = None
+    configMode: Optional[str] = None
+    apiUrl: Optional[str] = None
+    apiKey: Optional[str] = None
+    appId: Optional[str] = None
+    llmModelId: Optional[str] = None
+    asrModelId: Optional[str] = None
+    embeddingModelId: Optional[str] = None
+    rerankModelId: Optional[str] = None


 class AssistantOut(AssistantBase):
    id: str
    callCount: int = 0
    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None

    class Config:
        from_attributes = True


+class AssistantRuntimeMetadata(BaseModel):
+    """Canonical runtime metadata payload consumed by engine session.start."""
+
+    model_config = ConfigDict(extra="allow")
+
+    systemPrompt: str = ""
+    firstTurnMode: str = "bot_first"
+    greeting: str = ""
+    generatedOpenerEnabled: bool = False
+    manualOpenerToolCalls: List[Dict[str, Any]] = Field(default_factory=list)
+    output: Dict[str, Any] = Field(default_factory=dict)
+    bargeIn: Dict[str, Any] = Field(default_factory=dict)
+    services: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
+    tools: List[Any] = Field(default_factory=list)
+    knowledgeBaseId: Optional[str] = None
+    knowledge: Dict[str, Any] = Field(default_factory=dict)
+    history: Dict[str, Any] = Field(default_factory=dict)
+    openerAudio: Dict[str, Any] = Field(default_factory=dict)
+    assistantId: Optional[str] = None
+    configVersionId: Optional[str] = None
+
+
+class AssistantEngineConfigResponse(BaseModel):
+    assistantId: str
+    configVersionId: Optional[str] = None
+    assistant: AssistantRuntimeMetadata
+    sessionStartMetadata: AssistantRuntimeMetadata
+    sources: Dict[str, Optional[str]] = Field(default_factory=dict)
+    warnings: List[str] = Field(default_factory=list)
+
+
+class AssistantOpenerAudioGenerateRequest(BaseModel):
+    text: Optional[str] = None
+
+
+class AssistantOpenerAudioOut(BaseModel):
+    enabled: bool = False
+    ready: bool = False
+    encoding: str = "pcm_s16le"
+    sample_rate_hz: int = 16000
+    channels: int = 1
+    duration_ms: int = 0
+    updated_at: Optional[datetime] = None
+    text_hash: Optional[str] = None
+    tts_fingerprint: Optional[str] = None
+
+
+class AssistantStats(BaseModel):
+    assistant_id: str
+    total_calls: int = 0
+    connected_calls: int = 0
+    missed_calls: int = 0
+    avg_duration_seconds: float = 0.0
+    today_calls: int = 0
+
+
 # ============ Knowledge Base ============
 class KnowledgeDocument(BaseModel):
    id: str
@@ -137,24 +488,82 @@ class KnowledgeStats(BaseModel):

 # ============ Workflow ============
 class WorkflowNode(BaseModel):
-    name: str
-    type: str
+    model_config = ConfigDict(extra="allow")
+
+    id: Optional[str] = None
+    name: str = ""
+    type: str = "assistant"
    isStart: Optional[bool] = None
-    metadata: dict
+    metadata: Dict[str, Any] = Field(default_factory=dict)
    prompt: Optional[str] = None
-    messagePlan: Optional[dict] = None
-    variableExtractionPlan: Optional[dict] = None
-    tool: Optional[dict] = None
-    globalNodePlan: Optional[dict] = None
+    messagePlan: Optional[Dict[str, Any]] = None
+    variableExtractionPlan: Optional[Dict[str, Any]] = None
+    tool: Optional[Dict[str, Any]] = None
+    globalNodePlan: Optional[Dict[str, Any]] = None
+    assistantId: Optional[str] = None
+    assistant: Optional[Dict[str, Any]] = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_node(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+        raw = dict(data)
+        node_id = raw.get("id") or raw.get("name")
+        if not node_id:
+            node_id = f"node_{abs(hash(str(raw))) % 100000}"
+        raw["id"] = str(node_id)
+        raw["name"] = str(raw.get("name") or raw["id"])
+
+        node_type = str(raw.get("type") or "assistant").lower()
+        if node_type == "conversation":
+            node_type = "assistant"
+        elif node_type == "human":
+            node_type = "human_transfer"
+        elif node_type not in {"start", "assistant", "tool", "human_transfer", "end"}:
+            node_type = "assistant"
+        raw["type"] = node_type
+
+        metadata = raw.get("metadata")
+        if not isinstance(metadata, dict):
+            metadata = {}
+        if "position" not in metadata and isinstance(raw.get("position"), dict):
+            metadata["position"] = raw.get("position")
+        raw["metadata"] = metadata
+
+        if raw.get("isStart") is None and node_type == "start":
+            raw["isStart"] = True
+        return raw


 class WorkflowEdge(BaseModel):
-    from_: str
-    to: str
-    label: Optional[str] = None
+    model_config = ConfigDict(extra="allow")

-    class Config:
-        populate_by_name = True
+    id: Optional[str] = None
+    fromNodeId: str
+    toNodeId: str
+    label: Optional[str] = None
+    condition: Optional[Dict[str, Any]] = None
+    priority: int = 100
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_edge(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+        raw = dict(data)
+        from_node = raw.get("fromNodeId") or raw.get("from") or raw.get("from_") or raw.get("source")
+        to_node = raw.get("toNodeId") or raw.get("to") or raw.get("target")
+        raw["fromNodeId"] = str(from_node or "")
+        raw["toNodeId"] = str(to_node or "")
+        if raw.get("id") is None:
+            raw["id"] = f"e_{raw['fromNodeId']}_{raw['toNodeId']}"
+        if raw.get("condition") is None:
+            if raw.get("label"):
+                raw["condition"] = {"type": "contains", "source": "user", "value": str(raw["label"])}
+            else:
+                raw["condition"] = {"type": "always"}
+        return raw


 class WorkflowBase(BaseModel):
@@ -163,29 +572,85 @@ class WorkflowBase(BaseModel):
    createdAt: str = ""
    updatedAt: str = ""
    globalPrompt: Optional[str] = None
-    nodes: List[dict] = []
-    edges: List[dict] = []
+    nodes: List[WorkflowNode] = Field(default_factory=list)
+    edges: List[WorkflowEdge] = Field(default_factory=list)


 class WorkflowCreate(WorkflowBase):
-    pass
+    @model_validator(mode="after")
+    def _validate_graph(self) -> "WorkflowCreate":
+        _validate_workflow_graph(self.nodes, self.edges)
+        return self


 class WorkflowUpdate(BaseModel):
    name: Optional[str] = None
    nodeCount: Optional[int] = None
-    nodes: Optional[List[dict]] = None
-    edges: Optional[List[dict]] = None
+    nodes: Optional[List[WorkflowNode]] = None
+    edges: Optional[List[WorkflowEdge]] = None
    globalPrompt: Optional[str] = None

+    @model_validator(mode="after")
+    def _validate_partial_graph(self) -> "WorkflowUpdate":
+        if self.nodes is not None and self.edges is not None:
+            _validate_workflow_graph(self.nodes, self.edges)
+        return self
+

 class WorkflowOut(WorkflowBase):
    id: str

+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_db_fields(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            raw = dict(data)
+        else:
+            raw = {
+                "id": getattr(data, "id", None),
+                "name": getattr(data, "name", None),
+                "node_count": getattr(data, "node_count", None),
+                "created_at": getattr(data, "created_at", None),
+                "updated_at": getattr(data, "updated_at", None),
+                "global_prompt": getattr(data, "global_prompt", None),
+                "nodes": getattr(data, "nodes", None),
+                "edges": getattr(data, "edges", None),
+            }
+
+        if "nodeCount" not in raw and raw.get("node_count") is not None:
+            raw["nodeCount"] = raw["node_count"]
+        if "createdAt" not in raw and raw.get("created_at") is not None:
+            raw["createdAt"] = raw["created_at"]
+        if "updatedAt" not in raw and raw.get("updated_at") is not None:
+            raw["updatedAt"] = raw["updated_at"]
+        if "globalPrompt" not in raw and raw.get("global_prompt") is not None:
+            raw["globalPrompt"] = raw["global_prompt"]
+        return raw
+
    class Config:
        from_attributes = True


+def _validate_workflow_graph(nodes: List[WorkflowNode], edges: List[WorkflowEdge]) -> None:
+    if not nodes:
+        raise ValueError("Workflow must include at least one node")
+
+    node_ids = [node.id for node in nodes if node.id]
+    if len(node_ids) != len(set(node_ids)):
+        raise ValueError("Workflow node ids must be unique")
+
+    starts = [node for node in nodes if node.isStart or node.type == "start"]
+    if not starts:
+        raise ValueError("Workflow must define a start node (isStart=true or type=start)")
+
+    known = set(node_ids)
+    for edge in edges:
+        if edge.fromNodeId not in known:
+            raise ValueError(f"Workflow edge fromNodeId not found: {edge.fromNodeId}")
+        if edge.toNodeId not in known:
+            raise ValueError(f"Workflow edge toNodeId not found: {edge.toNodeId}")
+
+
 # ============ Call Record ============
 class TranscriptSegment(BaseModel):
    turnIndex: int
@@ -196,18 +661,24 @@ class TranscriptSegment(BaseModel):
    endMs: int
    durationMs: Optional[int] = None
    audioUrl: Optional[str] = None
+    emotion: Optional[str] = None


 class CallRecordCreate(BaseModel):
    user_id: int
    assistant_id: Optional[str] = None
    source: str = "debug"
+    status: Optional[str] = None
+    cost: Optional[float] = None


 class CallRecordUpdate(BaseModel):
    status: Optional[str] = None
    summary: Optional[str] = None
    duration_seconds: Optional[int] = None
+    ended_at: Optional[str] = None
+    cost: Optional[float] = None
+    metadata: Optional[dict] = None


 class CallRecordOut(BaseModel):
@@ -220,6 +691,9 @@ class CallRecordOut(BaseModel):
    ended_at: Optional[str] = None
    duration_seconds: Optional[int] = None
    summary: Optional[str] = None
+    cost: float = 0.0
+    metadata: dict = {}
+    created_at: Optional[datetime] = None
    transcripts: List[TranscriptSegment] = []

    class Config:
@@ -246,6 +720,19 @@ class TranscriptOut(TranscriptCreate):
        from_attributes = True


+# ============ History Stats ============
+class HistoryStats(BaseModel):
+    total_calls: int = 0
+    connected_calls: int = 0
+    missed_calls: int = 0
+    failed_calls: int = 0
+    avg_duration_seconds: float = 0.0
+    total_cost: float = 0.0
+    by_status: dict = {}
+    by_source: dict = {}
+    daily_trend: List[dict] = []
+
+
 # ============ Dashboard ============
 class DashboardStats(BaseModel):
    totalCalls: int
@@ -269,3 +756,9 @@ class ListResponse(BaseModel):
    page: int
    limit: int
    list: List
+
+
+class SearchResult(BaseModel):
+    id: str
+    started_at: str
+    matched_content: Optional[str] = None
--- a/api/app/vector_store.py
+++ b/api/app/vector_store.py
@@ -64,6 +64,8 @@ class VectorStore:
    ):
        """添加文档片段到向量库"""
        collection = self.get_collection(kb_id)
+        if collection is None:
+            raise ValueError(f"Knowledge collection not found for kb_id={kb_id}")
        
        if ids is None:
            ids = [f"chunk-{i}" for i in range(len(documents))]
@@ -93,6 +95,11 @@ class VectorStore:
    ) -> Dict:
        """检索相似文档"""
        collection = self.get_collection(kb_id)
+        if collection is None:
+            raise ValueError(
+                f"Knowledge collection not found for kb_id={kb_id}. "
+                "Please ensure the knowledge base exists and documents are indexed."
+            )
        
        # 生成查询向量
        query_embedding = embedding_service.embed_query(query)
@@ -108,6 +115,8 @@ class VectorStore:
    def get_stats(self, kb_id: str) -> Dict:
        """获取向量库统计"""
        collection = self.get_collection(kb_id)
+        if collection is None:
+            raise ValueError(f"Knowledge collection not found for kb_id={kb_id}")
        return {
            "count": collection.count(),
            "kb_id": kb_id
@@ -116,11 +125,15 @@ class VectorStore:
    def delete_documents(self, kb_id: str, ids: List[str]):
        """删除指定文档片段"""
        collection = self.get_collection(kb_id)
+        if collection is None:
+            return
        collection.delete(ids=ids)
    
    def delete_by_metadata(self, kb_id: str, document_id: str):
        """根据文档 ID 删除所有片段"""
        collection = self.get_collection(kb_id)
+        if collection is None:
+            return
        results = collection.get(where={"document_id": document_id})
        if results["ids"]:
            collection.delete(ids=results["ids"])
@@ -244,9 +257,6 @@ embedding_service = EmbeddingService()

 def search_knowledge(kb_id: str, query: str, n_results: int = 5) -> Dict:
    """知识库检索"""
-    # 生成查询向量
-    query_vector = embedding_service.embed_query(query)
-    
    # 检索
    results = vector_store.search(
        kb_id=kb_id,
--- a/api/docs/asr.md
+++ b/api/docs/asr.md
@@ -0,0 +1,439 @@
+# 语音识别 (ASR Model) API
+
+语音识别 API 用于管理语音识别模型的配置和调用。
+
+## 基础信息
+
+| 项目 | 值 |
+|------|-----|
+| Base URL | `/api/v1/asr` |
+| 认证方式 | Bearer Token (预留) |
+
+---
+
+## 数据模型
+
+### ASRModel
+
+```typescript
+interface ASRModel {
+  id: string;           // 模型唯一标识 (8位UUID)
+  user_id: number;      // 所属用户ID
+  name: string;        // 模型显示名称
+  vendor: string;      // 供应商: "OpenAI Compatible" | "Paraformer" | 等
+  language: string;     // 识别语言: "zh" | "en" | "Multi-lingual"
+  base_url: string;     // API Base URL
+  api_key: string;     // API Key
+  model_name?: string; // 模型名称，如 "whisper-1" | "paraformer-v2"
+  hotwords?: string[]; // 热词列表
+  enable_punctuation: boolean;  // 是否启用标点
+  enable_normalization: boolean; // 是否启用文本规范化
+  enabled: boolean;     // 是否启用
+  created_at: string;
+}
+```
+
+---
+
+## API 端点
+
+### 1. 获取 ASR 模型列表
+
+```http
+GET /api/v1/asr
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| language | string | 否 | - | 过滤语言: "zh" \| "en" \| "Multi-lingual" |
+| enabled | boolean | 否 | - | 过滤启用状态 |
+| page | int | 否 | 1 | 页码 |
+| limit | int | 否 | 50 | 每页数量 |
+
+**Response:**
+
+```json
+{
+  "total": 3,
+  "page": 1,
+  "limit": 50,
+  "list": [
+    {
+      "id": "abc12345",
+      "user_id": 1,
+      "name": "Whisper 多语种识别",
+      "vendor": "OpenAI Compatible",
+      "language": "Multi-lingual",
+      "base_url": "https://api.openai.com/v1",
+      "api_key": "sk-***",
+      "model_name": "whisper-1",
+      "enable_punctuation": true,
+      "enable_normalization": true,
+      "enabled": true,
+      "created_at": "2024-01-15T10:30:00Z"
+    },
+    {
+      "id": "def67890",
+      "user_id": 1,
+      "name": "SenseVoice 中文识别",
+      "vendor": "OpenAI Compatible",
+      "language": "zh",
+      "base_url": "https://api.siliconflow.cn/v1",
+      "api_key": "sf-***",
+      "model_name": "paraformer-v2",
+      "hotwords": ["小助手", "帮我"],
+      "enable_punctuation": true,
+      "enable_normalization": true,
+      "enabled": true,
+      "created_at": "2024-01-15T10:30:00Z"
+    }
+  ]
+}
+```
+
+---
+
+### 2. 获取单个 ASR 模型详情
+
+```http
+GET /api/v1/asr/{id}
+```
+
+**Path Parameters:**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| id | string | 模型ID |
+
+**Response:**
+
+```json
+{
+  "id": "abc12345",
+  "user_id": 1,
+  "name": "Whisper 多语种识别",
+  "vendor": "OpenAI Compatible",
+  "language": "Multi-lingual",
+  "base_url": "https://api.openai.com/v1",
+  "api_key": "sk-***",
+  "model_name": "whisper-1",
+  "hotwords": [],
+  "enable_punctuation": true,
+  "enable_normalization": true,
+  "enabled": true,
+  "created_at": "2024-01-15T10:30:00Z"
+}
+```
+
+---
+
+### 3. 创建 ASR 模型
+
+```http
+POST /api/v1/asr
+```
+
+**Request Body:**
+
+```json
+{
+  "name": "SenseVoice 中文识别",
+  "vendor": "OpenAI Compatible",
+  "language": "zh",
+  "base_url": "https://api.siliconflow.cn/v1",
+  "api_key": "sk-your-api-key",
+  "model_name": "paraformer-v2",
+  "hotwords": ["小助手", "帮我"],
+  "enable_punctuation": true,
+  "enable_normalization": true,
+  "enabled": true
+}
+```
+
+**Fields 说明:**
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| name | string | 是 | 模型显示名称 |
+| vendor | string | 是 | 供应商: "OpenAI Compatible" / "Paraformer" |
+| language | string | 是 | 语言: "zh" / "en" / "Multi-lingual" |
+| base_url | string | 是 | API Base URL |
+| api_key | string | 是 | API Key |
+| model_name | string | 否 | 模型名称 |
+| hotwords | string[] | 否 | 热词列表，提升识别准确率 |
+| enable_punctuation | boolean | 否 | 是否输出标点，默认 true |
+| enable_normalization | boolean | 否 | 是否文本规范化，默认 true |
+| enabled | boolean | 否 | 是否启用，默认 true |
+| id | string | 否 | 指定模型ID，默认自动生成 |
+
+---
+
+### 4. 更新 ASR 模型
+
+```http
+PUT /api/v1/asr/{id}
+```
+
+**Request Body:** (部分更新)
+
+```json
+{
+  "name": "Whisper-1 优化版",
+  "language": "zh",
+  "enable_punctuation": true,
+  "hotwords": ["新词1", "新词2"]
+}
+```
+
+---
+
+### 5. 删除 ASR 模型
+
+```http
+DELETE /api/v1/asr/{id}
+```
+
+**Response:**
+
+```json
+{
+  "message": "Deleted successfully"
+}
+```
+
+---
+
+### 6. 测试 ASR 模型
+
+```http
+POST /api/v1/asr/{id}/test
+```
+
+**Request Body:**
+
+```json
+{
+  "audio_url": "https://example.com/test-audio.wav"
+}
+```
+
+或使用 Base64 编码的音频数据：
+
+```json
+{
+  "audio_data": "UklGRi..."
+}
+```
+
+**Response (成功):**
+
+```json
+{
+  "success": true,
+  "transcript": "您好，请问有什么可以帮助您？",
+  "language": "zh",
+  "confidence": 0.95,
+  "latency_ms": 500
+}
+```
+
+**Response (失败):**
+
+```json
+{
+  "success": false,
+  "error": "HTTP Error: 401 - Unauthorized"
+}
+```
+
+---
+
+### 7. 转写音频
+
+```http
+POST /api/v1/asr/{id}/transcribe
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| audio_url | string | 否* | 音频文件URL |
+| audio_data | string | 否* | Base64编码的音频数据 |
+| hotwords | string[] | 否 | 热词列表 |
+
+*二选一，至少提供一个
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "transcript": "您好，请问有什么可以帮助您？",
+  "language": "zh",
+  "confidence": 0.95
+}
+```
+
+---
+
+### 8. 预览 ASR (上传音频文件)
+
+```http
+POST /api/v1/asr/{id}/preview
+```
+
+上传音频文件进行识别预览。
+
+**Request (multipart/form-data):**
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| file | file | 是 | 音频文件 (audio/* | string | 否 | 指定语言，覆盖) |
+| language模型配置 |
+| api_key | string | 否 | 覆盖模型配置的 API Key |
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "transcript": "您好，请问有什么可以帮助您？",
+  "language": "zh",
+  "confidence": 0.95,
+  "latency_ms": 1500
+}
+```
+
+---
+
+## Schema 定义
+
+```python
+from enum import Enum
+from pydantic import BaseModel
+from typing import Optional, List
+from datetime import datetime
+
+class ASRLanguage(str, Enum):
+    ZH = "zh"
+    EN = "en"
+    MULTILINGUAL = "Multi-lingual"
+
+class ASRModelBase(BaseModel):
+    name: str
+    vendor: str
+    language: str  # "zh" | "en" | "Multi-lingual"
+    base_url: str
+    api_key: str
+    model_name: Optional[str] = None
+    hotwords: List[str] = []
+    enable_punctuation: bool = True
+    enable_normalization: bool = True
+    enabled: bool = True
+
+class ASRModelCreate(ASRModelBase):
+    id: Optional[str] = None
+
+class ASRModelUpdate(BaseModel):
+    name: Optional[str] = None
+    language: Optional[str] = None
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    model_name: Optional[str] = None
+    hotwords: Optional[List[str]] = None
+    enable_punctuation: Optional[bool] = None
+    enable_normalization: Optional[bool] = None
+    enabled: Optional[bool] = None
+
+class ASRModelOut(ASRModelBase):
+    id: str
+    user_id: int
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+class ASRTestRequest(BaseModel):
+    audio_url: Optional[str] = None
+    audio_data: Optional[str] = None  # base64 encoded
+
+class ASRTestResponse(BaseModel):
+    success: bool
+    transcript: Optional[str] = None
+    language: Optional[str] = None
+    confidence: Optional[float] = None
+    latency_ms: Optional[int] = None
+    error: Optional[str] = None
+```
+
+---
+
+## 供应商配置示例
+
+### OpenAI Whisper
+
+```json
+{
+  "vendor": "OpenAI Compatible",
+  "base_url": "https://api.openai.com/v1",
+  "api_key": "sk-xxx",
+  "model_name": "whisper-1",
+  "language": "Multi-lingual",
+  "enable_punctuation": true,
+  "enable_normalization": true
+}
+```
+
+### OpenAI Compatible Paraformer
+
+```json
+{
+  "vendor": "OpenAI Compatible",
+  "base_url": "https://api.siliconflow.cn/v1",
+  "api_key": "sf-xxx",
+  "model_name": "paraformer-v2",
+  "language": "zh",
+  "hotwords": ["产品名称", "公司名"],
+  "enable_punctuation": true,
+  "enable_normalization": true
+}
+```
+
+---
+
+## 单元测试
+
+项目包含完整的单元测试，位于 `api/tests/test_asr.py`。
+
+### 测试用例概览
+
+| 测试方法 | 说明 |
+|----------|------|
+| test_get_asr_models_empty | 空数据库获取测试 |
+| test_create_asr_model | 创建模型测试 |
+| test_create_asr_model_minimal | 最小数据创建测试 |
+| test_get_asr_model_by_id | 获取单个模型测试 |
+| test_get_asr_model_not_found | 获取不存在模型测试 |
+| test_update_asr_model | 更新模型测试 |
+| test_delete_asr_model | 删除模型测试 |
+| test_list_asr_models_with_pagination | 分页测试 |
+| test_filter_asr_models_by_language | 按语言过滤测试 |
+| test_filter_asr_models_by_enabled | 按启用状态过滤测试 |
+| test_create_asr_model_with_hotwords | 热词配置测试 |
+| test_test_asr_model_siliconflow | OpenAI Compatible 供应商测试 |
+| test_test_asr_model_openai | OpenAI 供应商测试 |
+| test_different_asr_languages | 多语言测试 |
+| test_different_asr_vendors | 多供应商测试 |
+
+### 运行测试
+
+```bash
+# 运行 ASR 相关测试
+pytest api/tests/test_asr.py -v
+
+# 运行所有测试
+pytest api/tests/ -v
+```
--- a/api/docs/assistant.md
+++ b/api/docs/assistant.md
@@ -20,24 +20,31 @@ interface Assistant {
  id: string;           // 助手唯一标识 (8位UUID)
  user_id: number;      // 所属用户ID
  name: string;         // 助手名称
-  call_count: number;   // 调用次数
-  opener: string;      // 开场白
+  callCount: number;   // 调用次数
+  firstTurnMode: string;  // 首轮模式: "bot_first" | "user_first"
+  opener: string;       // 开场白
+  generatedOpenerEnabled: boolean;  // 是否启用生成式开场白
+  openerAudioEnabled: boolean;  // 是否启用预生成开场音频
+  openerAudioReady: boolean;    // 开场音频是否已生成
+  openerAudioDurationMs: number; // 开场音频时长(ms)
  prompt: string;       // 系统提示词/人格设定
-  knowledge_base_id?: string;  // 关联知识库ID
+  knowledgeBaseId?: string;  // 关联知识库ID
  language: string;     // 语言: "zh" | "en"
-  voice?: string;      // 声音ID
+  voiceOutputEnabled: boolean;  // 是否启用语音输出
+  voice?: string;       // 声音ID
  speed: number;        // 语速 (0.5-2.0)
-  hotwords: string[];  // 热词列表
-  tools: string[];      // 启用的工具ID列表
-  interruption_sensitivity: number;  // 打断灵敏度 (ms)
-  config_mode: string;  // 配置模式: "platform" | "dify" | "fastgpt" | "none"
-  api_url?: string;    // 外部API URL
-  api_key?: string;     // 外部API Key
-  // 模型关联 (新增)
-  llm_model_id?: string;       // LLM模型ID
-  asr_model_id?: string;       // ASR模型ID
-  embedding_model_id?: string; // Embedding模型ID
-  rerank_model_id?: string;    // Rerank模型ID
+  hotwords: string[];   // 热词列表
+  tools: string[];     // 启用的工具ID列表
+  botCannotBeInterrupted: boolean;  // 是否禁止打断
+  interruptionSensitivity: number;  // 打断灵敏度 (ms)
+  configMode: string;  // 配置模式: "platform" | "dify" | "fastgpt" | "none"
+  apiUrl?: string;    // 外部API URL
+  apiKey?: string;    // 外部API Key
+  // 模型关联
+  llmModelId?: string;       // LLM模型ID
+  asrModelId?: string;       // ASR模型ID
+  embeddingModelId?: string; // Embedding模型ID
+  rerankModelId?: string;    // Rerank模型ID
  created_at: string;
  updated_at: string;
 }
@@ -219,22 +226,109 @@ DELETE. 删除助手

 ---

-### 6. 获取助手调用统计
+### 6. 获取助手引擎配置

 ```http
-GET /api/v1/assistants/{id}/stats
+GET /api/v1/assistants/{id}/config
+```
+
+获取助手的运行时引擎配置，包含 LLM、ASR、TTS、知识库等服务的完整配置信息。
+
+**Response:**
+
+```json
+{
+  "assistantId": "abc12345",
+  "configVersionId": "asst_abc12345_20240115103000",
+  "assistant": {
+    "systemPrompt": "你是一个专业的客服人员...",
+    "firstTurnMode": "bot_first",
+    "greeting": "您好，请问有什么可以帮助您？",
+    "generatedOpenerEnabled": false,
+    "output": {"mode": "audio"},
+    "bargeIn": {"enabled": true, "minDurationMs": 500},
+    "services": {
+      "llm": {"provider": "openai", "model": "gpt-4o", "apiKey": "...", "baseUrl": "..."},
+      "asr": {"provider": "openai_compatible", "model": "paraformer-realtime-v2", "apiKey": "..."},
+      "tts": {"enabled": true, "provider": "dashscope", "model": "qwen3-tts-flash-realtime", "voice": "Cherry", "speed": 1.0}
+    },
+    "tools": [...],
+    "knowledgeBaseId": "kb_001",
+    "openerAudio": {"enabled": true, "ready": true, "pcmUrl": "/api/assistants/abc12345/opener-audio/pcm"}
+  },
+  "sessionStartMetadata": {...},
+  "sources": {
+    "llmModelId": "llm_001",
+    "asrModelId": "asr_001",
+    "voiceId": "voice_001",
+    "knowledgeBaseId": "kb_001"
+  },
+  "warnings": []
+}
+```
+
+---
+
+### 7. 获取助手开场音频状态
+
+```http
+GET /api/v1/assistants/{id}/opener-audio
 ```

 **Response:**

 ```json
 {
-  "assistant_id": "abc12345",
-  "total_calls": 128,
-  "connected_calls": 120,
-  "missed_calls": 8,
-  "avg_duration_seconds": 180,
-  "today_calls": 15
+  "enabled": true,
+  "ready": true,
+  "encoding": "pcm_s16le",
+  "sampleRateHz": 16000,
+  "channels": 1,
+  "durationMs": 2500,
+  "textHash": "abc123...",
+  "ttsFingerprint": "def456...",
+  "updatedAt": "2024-01-15T10:30:00Z"
+}
+```
+
+---
+
+### 8. 下载开场音频 PCM 文件
+
+```http
+GET /api/v1/assistants/{id}/opener-audio/pcm
+```
+
+返回 PCM 音频文件 (application/octet-stream)。
+
+---
+
+### 9. 生成开场音频
+
+```http
+POST /api/v1/assistants/{id}/opener-audio/generate
+```
+
+**Request Body:**
+
+```json
+{
+  "text": "您好，请问有什么可以帮助您？"
+}
+```
+
+**Response:**
+
+```json
+{
+  "enabled": true,
+  "ready": true,
+  "encoding": "pcm_s16le",
+  "sampleRateHz": 16000,
+  "channels": 1,
+  "durationMs": 2500,
+  "textHash": "abc123...",
+  "ttsFingerprint": "def456..."
 }
 ```

--- a/api/docs/history-records.md
+++ b/api/docs/history-records.md
@@ -289,86 +289,7 @@ GET /api/v1/history/{call_id}/audio/{turn_index}

 ---

-### 8. 搜索通话记录
-
-```http
-GET /api/v1/history/search
-```
-
-**Query Parameters:**
-
-| 参数 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| q | string | 是 | 搜索关键词 |
-| page | int | 否 | 页码 |
-| limit | int | 否 | 每页数量 |
-
-**Response:**
-
-```json
-{
-  "total": 5,
-  "page": 1,
-  "limit": 20,
-  "list": [
-    {
-      "id": "call_001",
-      "started_at": "2024-01-15T14:30:00Z",
-      "matched_content": "用户咨询产品A的售后服务"
-    }
-  ]
-}
-```
-
---
-
-### 9. 获取统计信息
-
-```http
-GET /api/v1/history/stats
-```
-
-**Query Parameters:**
-
-| 参数 | 类型 | 必填 | 说明 |
-|------|------|------|------|
-| start_date | string | 否 | 开始日期 |
-| end_date | string | 否 | 结束日期 |
-| assistant_id | string | 否 | 助手ID |
-
-**Response:**
-
-```json
-{
-  "total_calls": 150,
-  "connected_calls": 135,
-  "missed_calls": 15,
-  "failed_calls": 0,
-  "avg_duration_seconds": 180,
-  "total_cost": 7.50,
-  "by_status": {
-    "connected": 135,
-    "missed": 15,
-    "failed": 0
-  },
-  "by_source": {
-    "debug": 100,
-    "external": 50
-  },
-  "daily_trend": [
-    {
-      "date": "2024-01-15",
-      "calls": 20,
-      "connected": 18,
-      "avg_duration": 175
-    }
-  ]
-}
-```
-
---
-
-## 推荐的 Schema 定义
+## Schema 定义

 ```python
 # ============ Call Record ============
@@ -440,17 +361,6 @@ class TranscriptOut(TranscriptCreate):

    class Config:
        from_attributes = True
-
-class HistoryStats(BaseModel):
-    total_calls: int
-    connected_calls: int
-    missed_calls: int
-    failed_calls: int
-    avg_duration_seconds: float
-    total_cost: float
-    by_status: dict
-    by_source: dict
-    daily_trend: List[dict]
 ```

 ---
--- a/api/docs/index.md
+++ b/api/docs/index.md
@@ -7,9 +7,11 @@
 | 模块 | 文件 | 说明 |
 |------|------|------|
 | 小助手 | [assistant.md](./assistant.md) | AI 助手管理 |
-| 模型接入 | [model-access.md](./model-access.md) | LLM/ASR/TTS 模型配置 |
-| 语音识别 | [speech-recognition.md](./speech-recognition.md) | ASR 模型配置 |
-| 声音资源 | [voice-resources.md](./voice-resources.md) | TTS 声音库管理 |
+| LLM 模型 | [llm.md](./llm.md) | LLM 模型配置与管理 |
+| ASR 模型 | [asr.md](./asr.md) | 语音识别模型配置 |
+| 声音资源 | [voice-resources.md](./voice-resources.md) | TTS 语音配置 |
+| 工具与测试 | [tools.md](./tools.md) | 工具列表与自动测试 |
+| 知识库 | [knowledge.md](./knowledge.md) | 知识库与文档管理 |
 | 历史记录 | [history-records.md](./history-records.md) | 通话记录和转写 |

 ---
--- a/api/docs/knowledge.md
+++ b/api/docs/knowledge.md
@@ -0,0 +1,420 @@
+# 知识库 (Knowledge Base) API
+
+知识库 API 用于管理知识库和文档的创建、索引和搜索。
+
+## 基础信息
+
+| 项目 | 值 |
+|------|-----|
+| Base URL | `/api/v1/knowledge` |
+| 认证方式 | Bearer Token (预留) |
+
+---
+
+## 数据模型
+
+### KnowledgeBase
+
+```typescript
+interface KnowledgeBase {
+  id: string;              // 知识库唯一标识 (8位UUID)
+  user_id: number;         // 所属用户ID
+  name: string;            // 知识库名称
+  description: string;     // 知识库描述
+  embeddingModel: string;  // Embedding 模型名称
+  chunkSize: number;       // 文档分块大小
+  chunkOverlap: number;    // 分块重叠大小
+  docCount: number;        // 文档数量
+  chunkCount: number;      // 切分后的文本块数量
+  status: string;         // 状态: "active" | "inactive"
+  createdAt: string;      // 创建时间
+  updatedAt: string;      // 更新时间
+  documents: KnowledgeDocument[];  // 关联的文档列表
+}
+```
+
+### KnowledgeDocument
+
+```typescript
+interface KnowledgeDocument {
+  id: string;           // 文档唯一标识
+  kb_id: string;        // 所属知识库ID
+  name: string;        // 文档名称
+  size: string;        // 文件大小
+  fileType: string;    // 文件类型
+  storageUrl: string;  // 存储地址
+  status: string;      // 状态: "pending" | "processing" | "completed" | "failed"
+  chunkCount: number;  // 切分后的文本块数量
+  errorMessage: string; // 错误信息
+  uploadDate: string;  // 上传时间
+  createdAt: string;   // 创建时间
+  processedAt: string; // 处理完成时间
+}
+```
+
+---
+
+## API 端点
+
+### 1. 获取知识库列表
+
+```http
+GET /api/v1/knowledge/bases
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| user_id | int | 否 | 1 | 用户ID |
+| page | int | 否 | 1 | 页码 |
+| limit | int | 否 | 50 | 每页数量 |
+
+**Response:**
+
+```json
+{
+  "total": 2,
+  "page": 1,
+  "limit": 50,
+  "list": [
+    {
+      "id": "kb_001",
+      "user_id": 1,
+      "name": "产品知识库",
+      "description": "产品文档和FAQ",
+      "embeddingModel": "text-embedding-3-small",
+      "chunkSize": 500,
+      "chunkOverlap": 50,
+      "docCount": 10,
+      "chunkCount": 150,
+      "status": "active",
+      "createdAt": "2024-01-15T10:30:00",
+      "updatedAt": "2024-01-15T10:30:00",
+      "documents": [...]
+    }
+  ]
+}
+```
+
+---
+
+### 2. 获取单个知识库详情
+
+```http
+GET /api/v1/knowledge/bases/{kb_id}
+```
+
+**Response:**
+
+```json
+{
+  "id": "kb_001",
+  "user_id": 1,
+  "name": "产品知识库",
+  "description": "产品文档和FAQ",
+  "embeddingModel": "text-embedding-3-small",
+  "chunkSize": 500,
+  "chunkOverlap": 50,
+  "docCount": 10,
+  "chunkCount": 150,
+  "status": "active",
+  "createdAt": "2024-01-15T10:30:00",
+  "updatedAt": "2024-01-15T10:30:00",
+  "documents": [
+    {
+      "id": "doc_001",
+      "kb_id": "kb_001",
+      "name": "产品手册.pdf",
+      "size": "1.2 MB",
+      "fileType": "application/pdf",
+      "storageUrl": "",
+      "status": "completed",
+      "chunkCount": 45,
+      "errorMessage": null,
+      "uploadDate": "2024-01-15T10:30:00",
+      "createdAt": "2024-01-15T10:30:00",
+      "processedAt": "2024-01-15T10:30:05"
+    }
+  ]
+}
+```
+
+---
+
+### 3. 创建知识库
+
+```http
+POST /api/v1/knowledge/bases
+```
+
+**Request Body:**
+
+```json
+{
+  "name": "产品知识库",
+  "description": "产品文档和FAQ",
+  "embeddingModel": "text-embedding-3-small",
+  "chunkSize": 500,
+  "chunkOverlap": 50
+}
+```
+
+**Fields 说明:**
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| name | string | 是 | 知识库名称 |
+| description | string | 否 | 知识库描述 |
+| embeddingModel | string | 否 | Embedding 模型名称，默认 "text-embedding-3-small" |
+| chunkSize | int | 否 | 文档分块大小，默认 500 |
+| chunkOverlap | int | 否 | 分块重叠大小，默认 50 |
+
+---
+
+### 4. 更新知识库
+
+```http
+PUT /api/v1/knowledge/bases/{kb_id}
+```
+
+**Request Body:** (部分更新)
+
+```json
+{
+  "name": "更新后的知识库名称",
+  "description": "新的描述",
+  "chunkSize": 800
+}
+```
+
+**注意:** 如果知识库中已有索引的文档，则不能修改 embeddingModel。如需修改，请先删除所有文档。
+
+---
+
+### 5. 删除知识库
+
+```http
+DELETE /api/v1/knowledge/bases/{kb_id}
+```
+
+**Response:**
+
+```json
+{
+  "message": "Deleted successfully"
+}
+```
+
+**注意:** 删除知识库会同时删除向量数据库中的相关数据。
+
+---
+
+### 6. 上传文档
+
+```http
+POST /api/v1/knowledge/bases/{kb_id}/documents
+```
+
+支持两种上传方式：
+
+**方式一：文件上传 (multipart/form-data)**
+
+| 参数 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| file | file | 是 | 要上传的文档文件 |
+
+支持的文件类型：`.txt`, `.md`, `.csv`, `.json`, `.pdf`, `.docx`
+
+**方式二：仅创建文档记录 (application/json)**
+
+```json
+{
+  "name": "document.pdf",
+  "size": "1.2 MB",
+  "fileType": "application/pdf",
+  "storageUrl": "https://storage.example.com/doc.pdf"
+}
+```
+
+**Response (文件上传):**
+
+```json
+{
+  "id": "doc_001",
+  "name": "产品手册.pdf",
+  "size": "1.2 MB",
+  "fileType": "application/pdf",
+  "storageUrl": "",
+  "status": "completed",
+  "chunkCount": 45,
+  "message": "Document uploaded and indexed"
+}
+```
+
+---
+
+### 7. 索引文档内容
+
+```http
+POST /api/v1/knowledge/bases/{kb_id}/documents/{doc_id}/index
+```
+
+直接向向量数据库索引文本内容，无需上传文件。
+
+**Request Body:**
+
+```json
+{
+  "content": "要索引的文本内容..."
+}
+```
+
+**Response:**
+
+```json
+{
+  "message": "Document indexed",
+  "chunkCount": 10
+}
+```
+
+---
+
+### 8. 删除文档
+
+```http
+DELETE /api/v1/knowledge/bases/{kb_id}/documents/{doc_id}
+```
+
+**Response:**
+
+```json
+{
+  "message": "Deleted successfully"
+}
+```
+
+---
+
+### 9. 搜索知识库
+
+```http
+POST /api/v1/knowledge/search
+```
+
+**Request Body:**
+
+```json
+{
+  "kb_id": "kb_001",
+  "query": "产品退货政策",
+  "nResults": 5
+}
+```
+
+**Fields 说明:**
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| kb_id | string | 是 | 知识库ID |
+| query | string | 是 | 搜索查询文本 |
+| nResults | int | 否 | 返回结果数量，默认 5 |
+
+**Response:**
+
+```json
+{
+  "results": [
+    {
+      "id": "doc_001",
+      "text": "我们的退货政策是...",
+      "score": 0.85,
+      "metadata": {
+        "document_name": "退货政策.pdf",
+        "chunk_index": 3
+      }
+    }
+  ]
+}
+```
+
+---
+
+### 10. 获取知识库统计
+
+```http
+GET /api/v1/knowledge/bases/{kb_id}/stats
+```
+
+**Response:**
+
+```json
+{
+  "kb_id": "kb_001",
+  "docCount": 10,
+  "chunkCount": 150
+}
+```
+
+---
+
+## 支持的文件类型
+
+| 文件类型 | 扩展名 | 说明 |
+|----------|--------|------|
+| 纯文本 | .txt | 纯文本文件 |
+| Markdown | .md | Markdown 格式文档 |
+| CSV | .csv | CSV 表格数据 |
+| JSON | .json | JSON 格式数据 |
+| PDF | .pdf | PDF 文档 (需要 pypdf) |
+| Word | .docx | Word 文档 (需要 python-docx) |
+
+**注意:** 不支持旧的 .doc 格式，请转换为 .docx 或其他格式。
+
+---
+
+## Schema 定义
+
+```python
+from pydantic import BaseModel
+from typing import Optional, List
+
+class KnowledgeBaseCreate(BaseModel):
+    name: str
+    description: Optional[str] = None
+    embeddingModel: Optional[str] = "text-embedding-3-small"
+    chunkSize: Optional[int] = 500
+    chunkOverlap: Optional[int] = 50
+
+class KnowledgeBaseUpdate(BaseModel):
+    name: Optional[str] = None
+    description: Optional[str] = None
+    embeddingModel: Optional[str] = None
+    chunkSize: Optional[int] = None
+    chunkOverlap: Optional[int] = None
+
+class KnowledgeSearchQuery(BaseModel):
+    kb_id: str
+    query: str
+    nResults: Optional[int] = 5
+
+class DocumentIndexRequest(BaseModel):
+    content: str
+```
+
+---
+
+## 单元测试
+
+项目包含完整的单元测试，位于 `api/tests/test_knowledge.py`。
+
+### 运行测试
+
+```bash
+# 运行知识库相关测试
+pytest api/tests/test_knowledge.py -v
+
+# 运行所有测试
+pytest api/tests/ -v
+```
--- a/api/docs/llm.md
+++ b/api/docs/llm.md
@@ -0,0 +1,463 @@
+# LLM 模型 (LLM Model) API
+
+LLM 模型 API 用于管理大语言模型的配置和调用。
+
+## 基础信息
+
+| 项目 | 值 |
+|------|-----|
+| Base URL | `/api/v1/llm` |
+| 认证方式 | Bearer Token (预留) |
+
+---
+
+## 数据模型
+
+### LLMModel
+
+```typescript
+interface LLMModel {
+  id: string;           // 模型唯一标识 (8位UUID)
+  user_id: number;      // 所属用户ID
+  name: string;         // 模型显示名称
+  vendor: string;       // 供应商: "OpenAI Compatible" | "Dify" | "FastGPT" | 等
+  type: string;        // 类型: "text" | "embedding" | "rerank"
+  base_url: string;     // API Base URL
+  api_key: string;      // API Key
+  model_name?: string;  // 实际模型名称，如 "gpt-4o"
+  temperature?: number; // 温度参数 (0-2)
+  context_length?: int; // 上下文长度
+  enabled: boolean;     // 是否启用
+  created_at: string;
+  updated_at: string;
+}
+```
+
+---
+
+## API 端点
+
+### 1. 获取 LLM 模型列表
+
+```http
+GET /api/v1/llm
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| model_type | string | 否 | - | 过滤类型: "text" \| "embedding" \| "rerank" |
+| enabled | boolean | 否 | - | 过滤启用状态 |
+| page | int | 否 | 1 | 页码 |
+| limit | int | 否 | 50 | 每页数量 |
+
+**Response:**
+
+```json
+{
+  "total": 5,
+  "page": 1,
+  "limit": 50,
+  "list": [
+    {
+      "id": "abc12345",
+      "user_id": 1,
+      "name": "GPT-4o",
+      "vendor": "OpenAI Compatible",
+      "type": "text",
+      "base_url": "https://api.openai.com/v1",
+      "api_key": "sk-***",
+      "model_name": "gpt-4o",
+      "temperature": 0.7,
+      "context_length": 128000,
+      "enabled": true,
+      "created_at": "2024-01-15T10:30:00Z",
+      "updated_at": "2024-01-15T10:30:00Z"
+    },
+    {
+      "id": "def67890",
+      "user_id": 1,
+      "name": "Embedding-3-Small",
+      "vendor": "OpenAI Compatible",
+      "type": "embedding",
+      "base_url": "https://api.openai.com/v1",
+      "api_key": "sk-***",
+      "model_name": "text-embedding-3-small",
+      "enabled": true
+    }
+  ]
+}
+```
+
+---
+
+### 2. 获取单个 LLM 模型详情
+
+```http
+GET /api/v1/llm/{id}
+```
+
+**Path Parameters:**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| id | string | 模型ID |
+
+**Response:**
+
+```json
+{
+  "id": "abc12345",
+  "user_id": 1,
+  "name": "GPT-4o",
+  "vendor": "OpenAI Compatible",
+  "type": "text",
+  "base_url": "https://api.openai.com/v1",
+  "api_key": "sk-***",
+  "model_name": "gpt-4o",
+  "temperature": 0.7,
+  "context_length": 128000,
+  "enabled": true,
+  "created_at": "2024-01-15T10:30:00Z",
+  "updated_at": "2024-01-15T10:30:00Z"
+}
+```
+
+---
+
+### 3. 创建 LLM 模型
+
+```http
+POST /api/v1/llm
+```
+
+**Request Body:**
+
+```json
+{
+  "name": "GPT-4o",
+  "vendor": "OpenAI Compatible",
+  "type": "text",
+  "base_url": "https://api.openai.com/v1",
+  "api_key": "sk-your-api-key",
+  "model_name": "gpt-4o",
+  "temperature": 0.7,
+  "context_length": 128000,
+  "enabled": true
+}
+```
+
+**Fields 说明:**
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| name | string | 是 | 模型显示名称 |
+| vendor | string | 是 | 供应商名称 |
+| type | string | 是 | 模型类型: "text" / "embedding" / "rerank" |
+| base_url | string | 是 | API Base URL |
+| api_key | string | 是 | API Key |
+| model_name | string | 否 | 实际模型名称 |
+| temperature | number | 否 | 温度参数，默认 0.7 |
+| context_length | int | 否 | 上下文长度 |
+| enabled | boolean | 否 | 是否启用，默认 true |
+| id | string | 否 | 指定模型ID，默认自动生成 |
+
+---
+
+### 4. 更新 LLM 模型
+
+```http
+PUT /api/v1/llm/{id}
+```
+
+**Request Body:** (部分更新)
+
+```json
+{
+  "name": "GPT-4o-Updated",
+  "temperature": 0.8,
+  "enabled": false
+}
+```
+
+---
+
+### 5. 删除 LLM 模型
+
+```http
+DELETE /api/v1/llm/{id}
+```
+
+**Response:**
+
+```json
+{
+  "message": "Deleted successfully"
+}
+```
+
+---
+
+### 6. 测试 LLM 模型连接
+
+```http
+POST /api/v1/llm/{id}/test
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "latency_ms": 150,
+  "message": "Connection successful"
+}
+```
+
+**错误响应:**
+
+```json
+{
+  "success": false,
+  "latency_ms": 200,
+  "message": "HTTP Error: 401 - Unauthorized"
+}
+```
+
+---
+
+### 7. 与 LLM 模型对话
+
+```http
+POST /api/v1/llm/{id}/chat
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| message | string | 是 | - | 用户消息 |
+| system_prompt | string | 否 | - | 系统提示词 |
+| max_tokens | int | 否 | 1000 | 最大生成token数 |
+| temperature | number | 否 | 模型配置 | 温度参数 |
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "reply": "您好！有什么可以帮助您的？",
+  "usage": {
+    "prompt_tokens": 20,
+    "completion_tokens": 15,
+    "total_tokens": 35
+  }
+}
+```
+
+---
+
+### 8. 预览模型输出
+
+```http
+POST /api/v1/llm/{id}/preview
+```
+
+预览模型输出，支持 text(chat) 与 embedding 两类模型。
+
+**Request Body:**
+
+```json
+{
+  "message": "请介绍一下你自己",
+  "system_prompt": "你是一个专业的AI助手",
+  "max_tokens": 512,
+  "temperature": 0.7
+}
+```
+
+**Response (text model):**
+
+```json
+{
+  "success": true,
+  "reply": "您好！我是一个...",
+  "usage": {
+    "prompt_tokens": 20,
+    "completion_tokens": 50,
+    "total_tokens": 70
+  },
+  "latency_ms": 1500,
+  "error": null
+}
+```
+
+**Response (embedding model):**
+
+```json
+{
+  "success": true,
+  "reply": "Embedding generated successfully. dims=1536. head=[0.012345, -0.023456, ...]",
+  "usage": {
+    "prompt_tokens": 10,
+    "total_tokens": 10
+  },
+  "latency_ms": 800,
+  "error": null
+}
+```
+
+**Fields 说明:**
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| message | string | 是 | 用户消息/嵌入文本 |
+| system_prompt | string | 否 | 系统提示词 (仅 text 模型) |
+| max_tokens | int | 否 | 最大生成 token 数 (默认 512) |
+| temperature | float | 否 | 温度参数 |
+| api_key | string | 否 | 覆盖模型配置的 API Key |
+
+---
+
+## Schema 定义
+
+```python
+from enum import Enum
+from pydantic import BaseModel
+from typing import Optional
+from datetime import datetime
+
+class LLMModelType(str, Enum):
+    TEXT = "text"
+    EMBEDDING = "embedding"
+    RERANK = "rerank"
+
+class LLMModelBase(BaseModel):
+    name: str
+    vendor: str
+    type: LLMModelType
+    base_url: str
+    api_key: str
+    model_name: Optional[str] = None
+    temperature: Optional[float] = None
+    context_length: Optional[int] = None
+    enabled: bool = True
+
+class LLMModelCreate(LLMModelBase):
+    id: Optional[str] = None
+
+class LLMModelUpdate(BaseModel):
+    name: Optional[str] = None
+    vendor: Optional[str] = None
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    model_name: Optional[str] = None
+    temperature: Optional[float] = None
+    context_length: Optional[int] = None
+    enabled: Optional[bool] = None
+
+class LLMModelOut(LLMModelBase):
+    id: str
+    user_id: int
+    created_at: datetime
+    updated_at: datetime
+
+    class Config:
+        from_attributes = True
+
+class LLMModelTestResponse(BaseModel):
+    success: bool
+    latency_ms: int
+    message: str
+```
+
+---
+
+## 供应商配置示例
+
+### OpenAI Compatible (OpenAI Endpoint)
+
+```json
+{
+  "vendor": "OpenAI Compatible",
+  "base_url": "https://api.openai.com/v1",
+  "api_key": "sk-xxx",
+  "model_name": "gpt-4o",
+  "type": "text",
+  "temperature": 0.7
+}
+```
+
+### OpenAI Compatible
+
+```json
+{
+  "vendor": "OpenAI Compatible",
+  "base_url": "https://api.siliconflow.com/v1",
+  "api_key": "sf-xxx",
+  "model_name": "deepseek-v3",
+  "type": "text",
+  "temperature": 0.7
+}
+```
+
+### Dify
+
+```json
+{
+  "vendor": "Dify",
+  "base_url": "https://your-dify.domain.com/v1",
+  "api_key": "app-xxx",
+  "model_name": "gpt-4",
+  "type": "text"
+}
+```
+
+### Embedding 模型
+
+```json
+{
+  "vendor": "OpenAI Compatible",
+  "base_url": "https://api.openai.com/v1",
+  "api_key": "sk-xxx",
+  "model_name": "text-embedding-3-small",
+  "type": "embedding"
+}
+```
+
+---
+
+## 单元测试
+
+项目包含完整的单元测试，位于 `api/tests/test_llm.py`。
+
+### 测试用例概览
+
+| 测试方法 | 说明 |
+|----------|------|
+| test_get_llm_models_empty | 空数据库获取测试 |
+| test_create_llm_model | 创建模型测试 |
+| test_create_llm_model_minimal | 最小数据创建测试 |
+| test_get_llm_model_by_id | 获取单个模型测试 |
+| test_get_llm_model_not_found | 获取不存在模型测试 |
+| test_update_llm_model | 更新模型测试 |
+| test_delete_llm_model | 删除模型测试 |
+| test_list_llm_models_with_pagination | 分页测试 |
+| test_filter_llm_models_by_type | 按类型过滤测试 |
+| test_filter_llm_models_by_enabled | 按启用状态过滤测试 |
+| test_create_llm_model_with_all_fields | 全字段创建测试 |
+| test_test_llm_model_success | 测试连接成功测试 |
+| test_test_llm_model_failure | 测试连接失败测试 |
+| test_different_llm_vendors | 多供应商测试 |
+| test_embedding_llm_model | Embedding 模型测试 |
+
+### 运行测试
+
+```bash
+# 运行 LLM 相关测试
+pytest api/tests/test_llm.py -v
+
+# 运行所有测试
+pytest api/tests/ -v
+```
--- a/api/docs/model-access.md
+++ b/api/docs/model-access.md
@@ -20,7 +20,7 @@ interface LLMModel {
  id: string;           // 模型唯一标识
  user_id: number;      // 所属用户ID
  name: string;         // 模型显示名称
-  vendor: string;       // 供应商: "OpenAI Compatible" | "SiliconFlow" | "Dify" | "FastGPT"
+  vendor: string;       // 供应商: "OpenAI Compatible" | "Dify" | "FastGPT"
  type: string;         // 类型: "text" | "embedding" | "rerank"
  base_url: string;     // API Base URL
  api_key: string;       // API Key
@@ -57,7 +57,7 @@ interface TTSModel {
  id: string;
  user_id: number;
  name: string;
-  vendor: string;       // "Ali" | "Volcano" | "Minimax" | "硅基流动"
+  vendor: string;       // "OpenAI Compatible" | "Ali" | "Volcano" | "Minimax"
  language: string;      // "zh" | "en"
  voice_list?: string[]; // 支持的声音列表
  enabled: boolean;
@@ -316,7 +316,6 @@ class LLMModelType(str, Enum):

 class LLMModelVendor(str, Enum):
    OPENAI_COMPATIBLE = "OpenAI Compatible"
-    SILICONFLOW = "SiliconFlow"
    DIFY = "Dify"
    FASTGPT = "FastGPT"

@@ -389,11 +388,11 @@ class ASRModelOut(ASRModelBase):
 }
 ```

-### SiliconFlow
+### OpenAI Compatible

 ```json
 {
-  "vendor": "SiliconFlow",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.siliconflow.com/v1",
  "api_key": "sf-xxx",
  "model_name": "deepseek-v3"
--- a/api/docs/tools.md
+++ b/api/docs/tools.md
@@ -0,0 +1,580 @@
+# 工具与自动测试 (Tools & Autotest) API
+
+工具与自动测试 API 用于管理可用工具列表和自动测试功能。
+
+## 基础信息
+
+| 项目 | 值 |
+|------|-----|
+| Base URL | `/api/v1/tools` |
+| 认证方式 | Bearer Token (预留) |
+
+---
+
+## 可用工具 (Tool Registry)
+
+系统内置以下工具：
+
+| 工具ID | 名称 | 类别 | 说明 |
+|--------|------|------|------|
+| calculator | 计算器 | query | 执行数学计算 |
+| code_interpreter | 代码执行 | query | 安全地执行Python代码 |
+| current_time | 当前时间 | query | 获取当前本地时间 |
+| turn_on_camera | 打开摄像头 | system | 执行打开摄像头命令 |
+| turn_off_camera | 关闭摄像头 | system | 执行关闭摄像头命令 |
+| increase_volume | 调高音量 | system | 提升设备音量 |
+| decrease_volume | 调低音量 | system | 降低设备音量 |
+| voice_msg_prompt | 语音消息提示 | system | 播报一条语音提示消息 |
+| text_msg_prompt | 文本消息提示 | system | 显示一条文本弹窗提示 |
+| voice_choice_prompt | 语音选项提示 | system | 播报问题并展示可选项，等待用户选择 |
+| text_choice_prompt | 文本选项提示 | system | 显示文本选项弹窗并等待用户选择 |
+
+**类别说明：**
+- `query`: 查询类工具，需要配置 HTTP URL
+- `system`: 系统类工具，直接在客户端执行
+
+---
+
+## API 端点
+
+### 1. 获取可用工具列表
+
+```http
+GET /api/v1/tools/list
+```
+
+**Response:**
+
+```json
+{
+  "tools": {
+    "search": {
+      "name": "网络搜索",
+      "description": "搜索互联网获取最新信息",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "query": {"type": "string", "description": "搜索关键词"}
+        },
+        "required": ["query"]
+      }
+    },
+    "calculator": {
+      "name": "计算器",
+      "description": "执行数学计算",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "expression": {"type": "string", "description": "数学表达式，如: 2 + 3 * 4"}
+        },
+        "required": ["expression"]
+      }
+    },
+    "weather": {
+      "name": "天气查询",
+      "description": "查询指定城市的天气",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "city": {"type": "string", "description": "城市名称"}
+        },
+        "required": ["city"]
+      }
+    },
+    "translate": {
+      "name": "翻译",
+      "description": "翻译文本到指定语言",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "text": {"type": "string", "description": "要翻译的文本"},
+          "target_lang": {"type": "string", "description": "目标语言，如: en, ja, ko"}
+        },
+        "required": ["text", "target_lang"]
+      }
+    },
+    "knowledge": {
+      "name": "知识库查询",
+      "description": "从知识库中检索相关信息",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "query": {"type": "string", "description": "查询内容"},
+          "kb_id": {"type": "string", "description": "知识库ID"}
+        },
+        "required": ["query"]
+      }
+    },
+    "code_interpreter": {
+      "name": "代码执行",
+      "description": "安全地执行Python代码",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "code": {"type": "string", "description": "要执行的Python代码"}
+        },
+        "required": ["code"]
+      }
+    }
+  }
+}
+```
+
+---
+
+### 2. 获取工具详情
+
+```http
+GET /api/v1/tools/list/{tool_id}
+```
+
+**Path Parameters:**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| tool_id | string | 工具ID |
+
+**Response:**
+
+```json
+{
+  "name": "计算器",
+  "description": "执行数学计算",
+  "parameters": {
+    "type": "object",
+    "properties": {
+      "expression": {"type": "string", "description": "数学表达式，如: 2 + 3 * 4"}
+    },
+    "required": ["expression"]
+  }
+}
+```
+
+**错误响应 (工具不存在):**
+
+```json
+{
+  "detail": "Tool not found"
+}
+```
+
+---
+
+### 3. 健康检查
+
+```http
+GET /api/v1/tools/health
+```
+
+**Response:**
+
+```json
+{
+  "status": "healthy",
+  "timestamp": 1705315200.123,
+  "tools": ["search", "calculator", "weather", "translate", "knowledge", "code_interpreter"]
+}
+```
+
+---
+
+### 4. 获取工具资源列表
+
+```http
+GET /api/v1/tools/resources
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| category | string | 否 | - | 过滤类别: "query" \| "system" |
+| enabled | boolean | 否 | - | 过滤启用状态 |
+| include_system | boolean | 否 | true | 是否包含系统工具 |
+| page | int | 否 | 1 | 页码 |
+| limit | int | 否 | 100 | 每页数量 |
+
+**Response:**
+
+```json
+{
+  "total": 15,
+  "page": 1,
+  "limit": 100,
+  "list": [
+    {
+      "id": "calculator",
+      "user_id": 1,
+      "name": "计算器",
+      "description": "执行数学计算",
+      "category": "query",
+      "icon": "Terminal",
+      "http_method": "GET",
+      "http_url": null,
+      "http_timeout_ms": 10000,
+      "parameter_schema": {
+        "type": "object",
+        "properties": {
+          "expression": {"type": "string", "description": "数学表达式"}
+        },
+        "required": ["expression"]
+      },
+      "parameter_defaults": {},
+      "wait_for_response": false,
+      "enabled": true,
+      "is_system": true,
+      "created_at": "2024-01-15T10:30:00Z"
+    }
+  ]
+}
+```
+
+---
+
+### 5. 获取工具资源详情
+
+```http
+GET /api/v1/tools/resources/{id}
+```
+
+---
+
+### 6. 创建工具资源
+
+```http
+POST /api/v1/tools/resources
+```
+
+**Request Body:**
+
+```json
+{
+  "name": "订单查询",
+  "description": "查询用户订单信息",
+  "category": "query",
+  "icon": "Search",
+  "http_method": "POST",
+  "http_url": "https://api.example.com/orders",
+  "http_headers": {"Authorization": "Bearer {api_key}"},
+  "http_timeout_ms": 10000,
+  "parameter_schema": {
+    "type": "object",
+    "properties": {
+      "order_id": {"type": "string", "description": "订单ID"}
+    },
+    "required": ["order_id"]
+  },
+  "enabled": true
+}
+```
+
+**Fields 说明:**
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| id | string | 否 | 工具ID，默认自动生成 |
+| name | string | 是 | 工具名称 |
+| description | string | 否 | 工具描述 |
+| category | string | 是 | 类别: "query" \| "system" |
+| icon | string | 否 | 图标名称 |
+| http_method | string | 否 | HTTP 方法，默认 GET |
+| http_url | string | 否* | HTTP 请求地址 (query 类必填) |
+| http_headers | object | 否 | HTTP 请求头 |
+| http_timeout_ms | int | 否 | 超时时间(毫秒)，默认 10000 |
+| parameter_schema | object | 否 | 参数 JSON Schema |
+| parameter_defaults | object | 否 | 默认参数值 |
+| wait_for_response | boolean | 否 | 是否等待响应 (仅 system 类) |
+| enabled | boolean | 否 | 是否启用，默认 true |
+
+---
+
+### 7. 更新工具资源
+
+```http
+PUT /api/v1/tools/resources/{id}
+```
+
+---
+
+### 8. 删除工具资源
+
+```http
+DELETE /api/v1/tools/resources/{id}
+```
+
+---
+
+## 自动测试 (Autotest)
+
+### 4. 运行完整自动测试
+
+```http
+POST /api/v1/tools/autotest
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| llm_model_id | string | 否 | - | LLM 模型ID |
+| asr_model_id | string | 否 | - | ASR 模型ID |
+| test_llm | boolean | 否 | true | 是否测试LLM |
+| test_asr | boolean | 否 | true | 是否测试ASR |
+
+**Response:**
+
+```json
+{
+  "id": "abc12345",
+  "started_at": 1705315200.0,
+  "duration_ms": 2500,
+  "tests": [
+    {
+      "name": "Model Existence",
+      "passed": true,
+      "message": "Found model: GPT-4o",
+      "duration_ms": 15
+    },
+    {
+      "name": "API Connection",
+      "passed": true,
+      "message": "Latency: 150ms",
+      "duration_ms": 150
+    },
+    {
+      "name": "Temperature Setting",
+      "passed": true,
+      "message": "temperature=0.7"
+    },
+    {
+      "name": "Streaming Support",
+      "passed": true,
+      "message": "Received 15 chunks",
+      "duration_ms": 800
+    }
+  ],
+  "summary": {
+    "passed": 4,
+    "failed": 0,
+    "total": 4
+  }
+}
+```
+
+---
+
+### 5. 测试单个 LLM 模型
+
+```http
+POST /api/v1/tools/autotest/llm/{model_id}
+```
+
+**Path Parameters:**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| model_id | string | LLM 模型ID |
+
+**Response:**
+
+```json
+{
+  "id": "llm_test_001",
+  "started_at": 1705315200.0,
+  "duration_ms": 1200,
+  "tests": [
+    {
+      "name": "Model Existence",
+      "passed": true,
+      "message": "Found model: GPT-4o",
+      "duration_ms": 10
+    },
+    {
+      "name": "API Connection",
+      "passed": true,
+      "message": "Latency: 180ms",
+      "duration_ms": 180
+    },
+    {
+      "name": "Temperature Setting",
+      "passed": true,
+      "message": "temperature=0.7"
+    },
+    {
+      "name": "Streaming Support",
+      "passed": true,
+      "message": "Received 12 chunks",
+      "duration_ms": 650
+    }
+  ],
+  "summary": {
+    "passed": 4,
+    "failed": 0,
+    "total": 4
+  }
+}
+```
+
+---
+
+### 6. 测试单个 ASR 模型
+
+```http
+POST /api/v1/tools/autotest/asr/{model_id}
+```
+
+**Path Parameters:**
+
+| 参数 | 类型 | 说明 |
+|------|------|------|
+| model_id | string | ASR 模型ID |
+
+**Response:**
+
+```json
+{
+  "id": "asr_test_001",
+  "started_at": 1705315200.0,
+  "duration_ms": 800,
+  "tests": [
+    {
+      "name": "Model Existence",
+      "passed": true,
+      "message": "Found model: Whisper-1",
+      "duration_ms": 8
+    },
+    {
+      "name": "Hotwords Config",
+      "passed": true,
+      "message": "Hotwords: 3 words"
+    },
+    {
+      "name": "API Availability",
+      "passed": true,
+      "message": "Status: 200",
+      "duration_ms": 250
+    },
+    {
+      "name": "Language Config",
+      "passed": true,
+      "message": "Language: zh"
+    }
+  ],
+  "summary": {
+    "passed": 4,
+    "failed": 0,
+    "total": 4
+  }
+}
+```
+
+---
+
+### 7. 发送测试消息
+
+```http
+POST /api/v1/tools/test-message
+```
+
+**Query Parameters:**
+
+| 参数 | 类型 | 必填 | 默认值 | 说明 |
+|------|------|------|--------|------|
+| llm_model_id | string | 是 | - | LLM 模型ID |
+| message | string | 否 | "Hello, this is a test message." | 测试消息 |
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "reply": "Hello! This is a test reply from GPT-4o.",
+  "usage": {
+    "prompt_tokens": 15,
+    "completion_tokens": 12,
+    "total_tokens": 27
+  }
+}
+```
+
+**错误响应 (模型不存在):**
+
+```json
+{
+  "detail": "LLM Model not found"
+}
+```
+
+---
+
+## 测试结果结构
+
+### AutotestResult
+
+```typescript
+interface AutotestResult {
+  id: string;           // 测试ID
+  started_at: number;   // 开始时间戳
+  duration_ms: number;  // 总耗时(毫秒)
+  tests: TestCase[];    // 测试用例列表
+  summary: TestSummary; // 测试摘要
+}
+
+interface TestCase {
+  name: string;         // 测试名称
+  passed: boolean;      // 是否通过
+  message: string;      // 测试消息
+  duration_ms: number;  // 耗时(毫秒)
+}
+
+interface TestSummary {
+  passed: number;       // 通过数量
+  failed: number;       // 失败数量
+  total: number;        // 总数量
+}
+```
+
+---
+
+## 测试项目说明
+
+### LLM 模型测试项目
+
+| 测试名称 | 说明 |
+|----------|------|
+| Model Existence | 检查模型是否存在于数据库 |
+| API Connection | 测试 API 连接并测量延迟 |
+| Temperature Setting | 检查温度配置 |
+| Streaming Support | 测试流式响应支持 |
+
+### ASR 模型测试项目
+
+| 测试名称 | 说明 |
+|----------|------|
+| Model Existence | 检查模型是否存在于数据库 |
+| Hotwords Config | 检查热词配置 |
+| API Availability | 测试 API 可用性 |
+| Language Config | 检查语言配置 |
+
+---
+
+## 单元测试
+
+项目包含完整的单元测试，位于 `api/tests/test_tools.py`。
+
+### 测试用例概览
+
+| 测试类 | 说明 |
+|--------|------|
+| TestToolsAPI | 工具列表、健康检查等基础功能测试 |
+| TestAutotestAPI | 自动测试功能完整测试 |
+
+### 运行测试
+
+```bash
+# 运行工具相关测试
+pytest api/tests/test_tools.py -v
+
+# 运行所有测试
+pytest api/tests/ -v
+```
--- a/api/docs/voice-resources.md
+++ b/api/docs/voice-resources.md
@@ -182,12 +182,14 @@ POST /api/v1/voices
 | 字段 | 类型 | 必填 | 说明 |
 |------|------|------|------|
 | name | string | 是 | 声音名称 |
-| vendor | string | 是 | 供应商 |
+| vendor | string | 是 | 供应商: "Ali" \| "Volcano" \| "Minimax" \| "OpenAI Compatible" \| "DashScope" |
 | gender | string | 是 | 性别: "Male" \| "Female" |
 | language | string | 是 | 语言: "zh" \| "en" |
 | description | string | 否 | 描述信息 |
-| model | string | 是 | 厂商语音模型标识 |
-| voice_key | string | 是 | 厂商voice_key |
+| model | string | 否 | 厂商语音模型标识 (可选，部分供应商有默认值) |
+| voice_key | string | 否 | 厂商 voice_key (可选，部分供应商有默认值) |
+| api_key | string | 否 | 供应商 API Key (可选，也可通过环境变量配置) |
+| base_url | string | 否 | API Base URL (可选，部分供应商有默认值) |
 | speed | number | 否 | 默认语速 (0.5-2.0)，默认 1.0 |
 | gain | number | 否 | 音量增益 (-10~10 dB)，默认 0 |
 | pitch | number | 否 | 音调调整，默认 0 |
@@ -244,11 +246,14 @@ POST /api/v1/voices/{id}/preview
 ```json
 {
  "success": true,
-  "audio_url": "https://storage.example.com/preview/voice_001_preview.mp3",
-  "duration_ms": 2500
+  "audio_url": "data:audio/wav;base64,UklGRi...",
+  "duration_ms": 2500,
+  "error": null
 }
 ```

+**注意:** `audio_url` 返回 Base64 编码的音频数据 (data URI 格式)，可直接在浏览器中播放或解码保存为音频文件。
+
 ---

 ### 7. 获取供应商声音列表
--- a/api/init_db.py
+++ b/api/init_db.py
@@ -1,52 +1,490 @@
 #!/usr/bin/env python3
 """初始化数据库"""
+import argparse
 import os
 import sys
+from contextlib import contextmanager

 # 添加路径
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

-from app.db import Base, engine
-from app.models import Voice
+from app.db import Base, engine, DATABASE_URL
+from app.id_generator import short_id
+from app.models import Voice, Assistant, KnowledgeBase, Workflow, LLMModel, ASRModel, KnowledgeDocument
+
+VOICE_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
+DASHSCOPE_VOICE_MODEL = "qwen3-tts-flash-realtime"
+DASHSCOPE_DEFAULT_VOICE_KEY = "Cherry"
+DASHSCOPE_REALTIME_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+
+SEED_VOICE_IDS = {
+    "alex": short_id("tts"),
+    "david": short_id("tts"),
+    "bella": short_id("tts"),
+    "claire": short_id("tts"),
+    "dashscope_cherry": short_id("tts"),
+}
+
+SEED_LLM_IDS = {
+    "deepseek_chat": short_id("llm"),
+    "glm_4": short_id("llm"),
+    "embedding_3_small": short_id("llm"),
+}
+
+SEED_ASR_IDS = {
+    "sensevoice_small": short_id("asr"),
+    "telespeech_asr": short_id("asr"),
+    "dashscope_realtime": short_id("asr"),
+}
+
+SEED_ASSISTANT_IDS = {
+    "default": short_id("ast"),
+    "customer_service": short_id("ast"),
+    "english_tutor": short_id("ast"),
+}
+
+
+def ensure_db_dir():
+    """确保 SQLite 数据目录存在。"""
+    if not DATABASE_URL.startswith("sqlite:///"):
+        return
+    db_path = DATABASE_URL.replace("sqlite:///", "")
+    data_dir = os.path.dirname(db_path)
+    if data_dir:
+        os.makedirs(data_dir, exist_ok=True)
+
+
+@contextmanager
+def db_session():
+    """统一管理 DB session 生命周期。"""
+    from app.db import SessionLocal
+
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+
+def seed_if_empty(db, model_cls, records, success_msg: str):
+    """当目标表为空时写入默认记录。"""
+    if db.query(model_cls).count() != 0:
+        return
+    if isinstance(records, list):
+        db.add_all(records)
+    else:
+        db.add(records)
+    db.commit()
+    print(success_msg)


 def init_db():
    """创建所有表"""
+    ensure_db_dir()
+
    print("📦 创建数据库表...")
    Base.metadata.drop_all(bind=engine)  # 删除旧表
    Base.metadata.create_all(bind=engine)
    print("✅ 数据库表创建完成")


-def init_default_voices():
-    """初始化默认声音"""
-    from app.db import SessionLocal
-    
-    db = SessionLocal()
-    try:
-        if db.query(Voice).count() == 0:
-            voices = [
-                Voice(id="v1", name="Xiaoyun", vendor="Ali", gender="Female", language="zh", description="Gentle and professional."),
-                Voice(id="v2", name="Kevin", vendor="Volcano", gender="Male", language="en", description="Deep and authoritative."),
-                Voice(id="v3", name="Abby", vendor="Minimax", gender="Female", language="en", description="Cheerful and lively."),
-                Voice(id="v4", name="Guang", vendor="Ali", gender="Male", language="zh", description="Standard newscast style."),
-                Voice(id="v5", name="Doubao", vendor="Volcano", gender="Female", language="zh", description="Cute and young."),
-            ]
-            for v in voices:
-                db.add(v)
-            db.commit()
-            print("✅ 默认声音数据已初始化")
+def rebuild_vector_store(reset_doc_status: bool = True):
+    """重建知识库向量集合（按 DB 中的 KB 列表重建 collection 壳）。"""
+    from app.vector_store import vector_store
+
+    with db_session() as db:
+        print("🧹 重建向量库集合...")
+        kb_list = db.query(KnowledgeBase).all()
+
+        # 删除现有 KB 集合
+        try:
+            collections = vector_store.client.list_collections()
+        except Exception as exc:
+            raise RuntimeError(f"无法读取向量集合列表: {exc}") from exc
+
+        for col in collections:
+            name = getattr(col, "name", None) or str(col)
+            if name.startswith("kb_"):
+                try:
+                    vector_store.client.delete_collection(name=name)
+                    print(f"  - removed {name}")
+                except Exception as exc:
+                    print(f"  - skip remove {name}: {exc}")
+
+        # 按 DB 重建 KB 集合
+        for kb in kb_list:
+            vector_store.create_collection(kb.id, kb.embedding_model)
+            print(f"  + created kb_{kb.id} ({kb.embedding_model})")
+
+            if reset_doc_status:
+                kb.chunk_count = 0
+                docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb.id).all()
+                kb.doc_count = 0
+                for doc in docs:
+                    doc.chunk_count = 0
+                    doc.status = "pending"
+                    doc.error_message = None
+                    doc.processed_at = None
+
+        db.commit()
+        print("✅ 向量库重建完成（仅重建集合壳，文档需重新索引）")
+
+
+def init_default_data():
+    with db_session() as db:
+        # 检查是否已有数据
+        # OpenAI Compatible (SiliconFlow API) CosyVoice 2.0 预设声音 (8个)
+        # 参考: https://docs.siliconflow.cn/cn/api-reference/audio/create-speech
+        voices = [
+            # 男声 (Male Voices)
+            Voice(
+                id=SEED_VOICE_IDS["alex"],
+                name="Alex",
+                vendor="OpenAI Compatible",
+                gender="Male",
+                language="en",
+                description="Steady male voice.",
+                model=VOICE_MODEL,
+                voice_key=f"{VOICE_MODEL}:alex",
+                is_system=True,
+            ),
+            Voice(
+                id=SEED_VOICE_IDS["david"],
+                name="David",
+                vendor="OpenAI Compatible",
+                gender="Male",
+                language="en",
+                description="Cheerful male voice.",
+                model=VOICE_MODEL,
+                voice_key=f"{VOICE_MODEL}:david",
+                is_system=True,
+            ),
+            # 女声 (Female Voices)
+            Voice(
+                id=SEED_VOICE_IDS["bella"],
+                name="Bella",
+                vendor="OpenAI Compatible",
+                gender="Female",
+                language="en",
+                description="Passionate female voice.",
+                model=VOICE_MODEL,
+                voice_key=f"{VOICE_MODEL}:bella",
+                is_system=True,
+            ),
+            Voice(
+                id=SEED_VOICE_IDS["claire"],
+                name="Claire",
+                vendor="OpenAI Compatible",
+                gender="Female",
+                language="en",
+                description="Gentle female voice.",
+                model=VOICE_MODEL,
+                voice_key=f"{VOICE_MODEL}:claire",
+                is_system=True,
+            ),
+            Voice(
+                id=SEED_VOICE_IDS["dashscope_cherry"],
+                name="DashScope Cherry",
+                vendor="DashScope",
+                gender="Female",
+                language="zh",
+                description="DashScope realtime sample voice.",
+                model=DASHSCOPE_VOICE_MODEL,
+                voice_key=DASHSCOPE_DEFAULT_VOICE_KEY,
+                base_url=DASHSCOPE_REALTIME_URL,
+                is_system=True,
+            ),
+        ]
+        seed_if_empty(db, Voice, voices, "✅ 默认声音数据已初始化 (OpenAI Compatible + DashScope)")
+
+
+def init_default_tools(recreate: bool = False):
+    """初始化默认工具，或按需重建工具表数据。"""
+    from app.routers.tools import _seed_default_tools_if_empty, recreate_tool_resources
+
+    with db_session() as db:
+        if recreate:
+            recreate_tool_resources(db)
+            print("✅ 工具库已重建")
        else:
-            print("ℹ️ 声音数据已存在，跳过初始化")
-    finally:
-        db.close()
+            _seed_default_tools_if_empty(db)
+            print("✅ 默认工具已初始化")
+
+
+def init_default_assistants():
+    """初始化默认助手"""
+    with db_session() as db:
+        assistants = [
+            Assistant(
+                id=SEED_ASSISTANT_IDS["default"],
+                user_id=1,
+                name="AI 助手",
+                call_count=0,
+                opener="你好！我是AI助手，有什么可以帮你的吗？",
+                prompt="你是一个友好的AI助手，请用简洁清晰的语言回答用户的问题。",
+                language="zh",
+                voice_output_enabled=True,
+                voice=SEED_VOICE_IDS["bella"],
+                speed=1.0,
+                hotwords=[],
+                tools=["current_time"],
+                interruption_sensitivity=500,
+                config_mode="platform",
+                llm_model_id=SEED_LLM_IDS["deepseek_chat"],
+                asr_model_id=SEED_ASR_IDS["sensevoice_small"],
+            ),
+            Assistant(
+                id=SEED_ASSISTANT_IDS["customer_service"],
+                user_id=1,
+                name="客服助手",
+                call_count=0,
+                opener="您好，欢迎致电客服中心，请问有什么可以帮您？",
+                prompt="你是一个专业的客服人员，耐心解答客户问题，提供优质的服务体验。",
+                language="zh",
+                voice_output_enabled=True,
+                voice=SEED_VOICE_IDS["claire"],
+                speed=1.0,
+                hotwords=["客服", "投诉", "咨询"],
+                tools=["current_time"],
+                interruption_sensitivity=600,
+                config_mode="platform",
+            ),
+            Assistant(
+                id=SEED_ASSISTANT_IDS["english_tutor"],
+                user_id=1,
+                name="英语导师",
+                call_count=0,
+                opener="Hello! I'm your English learning companion. How can I help you today?",
+                prompt="You are a friendly English tutor. Help users practice English conversation and explain grammar points clearly.",
+                language="en",
+                voice_output_enabled=True,
+                voice=SEED_VOICE_IDS["alex"],
+                speed=1.0,
+                hotwords=["grammar", "vocabulary", "practice"],
+                tools=["current_time"],
+                interruption_sensitivity=400,
+                config_mode="platform",
+            ),
+        ]
+        seed_if_empty(db, Assistant, assistants, "✅ 默认助手数据已初始化")
+
+
+def init_default_workflows():
+    """初始化默认工作流"""
+    from datetime import datetime
+
+    with db_session() as db:
+        now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+        workflows = [
+            Workflow(
+                id="simple_conversation",
+                user_id=1,
+                name="简单对话",
+                node_count=2,
+                created_at=now,
+                updated_at=now,
+                global_prompt="处理简单的对话流程，用户问什么答什么。",
+                nodes=[
+                    {"id": "1", "type": "start", "position": {"x": 100, "y": 100}, "data": {"label": "开始"}},
+                    {"id": "2", "type": "ai_reply", "position": {"x": 300, "y": 100}, "data": {"label": "AI回复"}},
+                ],
+                edges=[{"source": "1", "target": "2", "id": "e1-2"}],
+            ),
+            Workflow(
+                id="voice_input_flow",
+                user_id=1,
+                name="语音输入流程",
+                node_count=4,
+                created_at=now,
+                updated_at=now,
+                global_prompt="处理语音输入的完整流程。",
+                nodes=[
+                    {"id": "1", "type": "start", "position": {"x": 100, "y": 100}, "data": {"label": "开始"}},
+                    {"id": "2", "type": "asr", "position": {"x": 250, "y": 100}, "data": {"label": "语音识别"}},
+                    {"id": "3", "type": "llm", "position": {"x": 400, "y": 100}, "data": {"label": "LLM处理"}},
+                    {"id": "4", "type": "tts", "position": {"x": 550, "y": 100}, "data": {"label": "语音合成"}},
+                ],
+                edges=[
+                    {"source": "1", "target": "2", "id": "e1-2"},
+                    {"source": "2", "target": "3", "id": "e2-3"},
+                    {"source": "3", "target": "4", "id": "e3-4"},
+                ],
+            ),
+        ]
+        seed_if_empty(db, Workflow, workflows, "✅ 默认工作流数据已初始化")
+
+
+def init_default_knowledge_bases():
+    """初始化默认知识库"""
+    with db_session() as db:
+        kb = KnowledgeBase(
+            id="default_kb",
+            user_id=1,
+            name="默认知识库",
+            description="系统默认知识库，用于存储常见问题解答。",
+            embedding_model="text-embedding-3-small",
+            chunk_size=500,
+            chunk_overlap=50,
+            doc_count=0,
+            chunk_count=0,
+            status="active",
+        )
+        seed_if_empty(db, KnowledgeBase, kb, "✅ 默认知识库已初始化")
+
+
+def init_default_llm_models():
+    """初始化默认LLM模型"""
+    with db_session() as db:
+        llm_models = [
+            LLMModel(
+                id=SEED_LLM_IDS["deepseek_chat"],
+                user_id=1,
+                name="DeepSeek Chat",
+                vendor="OpenAI Compatible",
+                type="text",
+                base_url="https://api.deepseek.com",
+                api_key="YOUR_API_KEY",  # 用户需替换
+                model_name="deepseek-chat",
+                temperature=0.7,
+                context_length=4096,
+                enabled=True,
+            ),
+            LLMModel(
+                id=SEED_LLM_IDS["glm_4"],
+                user_id=1,
+                name="GLM-4",
+                vendor="ZhipuAI",
+                type="text",
+                base_url="https://open.bigmodel.cn/api/paas/v4",
+                api_key="YOUR_API_KEY",
+                model_name="glm-4",
+                temperature=0.7,
+                context_length=8192,
+                enabled=True,
+            ),
+            LLMModel(
+                id=SEED_LLM_IDS["embedding_3_small"],
+                user_id=1,
+                name="Embedding 3 Small",
+                vendor="OpenAI Compatible",
+                type="embedding",
+                base_url="https://api.openai.com/v1",
+                api_key="YOUR_API_KEY",
+                model_name="text-embedding-3-small",
+                enabled=True,
+            ),
+        ]
+        seed_if_empty(db, LLMModel, llm_models, "✅ 默认LLM模型已初始化")
+
+
+def init_default_asr_models():
+    """初始化默认ASR模型"""
+    with db_session() as db:
+        asr_models = [
+            ASRModel(
+                id=SEED_ASR_IDS["sensevoice_small"],
+                user_id=1,
+                name="FunAudioLLM/SenseVoiceSmall",
+                vendor="OpenAI Compatible",
+                language="Multi-lingual",
+                base_url="https://api.siliconflow.cn/v1",
+                api_key="YOUR_API_KEY",
+                model_name="FunAudioLLM/SenseVoiceSmall",
+                hotwords=[],
+                enable_punctuation=True,
+                enable_normalization=True,
+                enabled=True,
+            ),
+            ASRModel(
+                id=SEED_ASR_IDS["telespeech_asr"],
+                user_id=1,
+                name="TeleAI/TeleSpeechASR",
+                vendor="OpenAI Compatible",
+                language="Multi-lingual",
+                base_url="https://api.siliconflow.cn/v1",
+                api_key="YOUR_API_KEY",
+                model_name="TeleAI/TeleSpeechASR",
+                hotwords=[],
+                enable_punctuation=True,
+                enable_normalization=True,
+                enabled=True,
+            ),
+            ASRModel(
+                id=SEED_ASR_IDS["dashscope_realtime"],
+                user_id=1,
+                name="DashScope Realtime ASR",
+                vendor="DashScope",
+                language="Multi-lingual",
+                base_url=DASHSCOPE_REALTIME_URL,
+                api_key="YOUR_API_KEY",
+                model_name="qwen3-asr-flash-realtime",
+                hotwords=[],
+                enable_punctuation=True,
+                enable_normalization=True,
+                enabled=True,
+            ),
+        ]
+        seed_if_empty(db, ASRModel, asr_models, "✅ 默认ASR模型已初始化")


 if __name__ == "__main__":
-    # 确保 data 目录存在
-    data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
-    os.makedirs(data_dir, exist_ok=True)
-    
-    init_db()
-    init_default_voices()
-    print("🎉 数据库初始化完成！")
+    parser = argparse.ArgumentParser(description="初始化/重建 AI VideoAssistant 数据与向量库")
+    parser.add_argument(
+        "--rebuild-db",
+        action="store_true",
+        help="重建数据库（drop + create tables）",
+    )
+    parser.add_argument(
+        "--rebuild-vector-store",
+        action="store_true",
+        help="重建向量库 KB 集合（清空后按 DB 的 knowledge_bases 重建 collection）",
+    )
+    parser.add_argument(
+        "--skip-seed",
+        action="store_true",
+        help="跳过默认数据初始化",
+    )
+    parser.add_argument(
+        "--recreate-tool-db",
+        action="store_true",
+        help="重建工具库数据（清空 tool_resources 后按内置默认工具重建）",
+    )
+    args = parser.parse_args()
+
+    # 无参数时保持旧行为：重建 DB + 初始化默认数据
+    # 仅当完全未指定任何选项时才自动触发 rebuild-db。
+    if (
+        not args.rebuild_db
+        and not args.rebuild_vector_store
+        and not args.skip_seed
+        and not args.recreate_tool_db
+    ):
+        args.rebuild_db = True
+
+    ensure_db_dir()
+
+    if args.rebuild_db:
+        init_db()
+    else:
+        print("ℹ️ 跳过数据库结构变更（未指定 --rebuild-db）")
+        if not args.skip_seed or args.recreate_tool_db:
+            print("ℹ️ 当前将执行非破坏性流程（仅工具/默认数据初始化）")
+
+    if args.recreate_tool_db:
+        init_default_tools(recreate=True)
+
+    if not args.skip_seed:
+        init_default_data()
+        if not args.recreate_tool_db:
+            init_default_tools(recreate=False)
+        init_default_assistants()
+        init_default_workflows()
+        init_default_knowledge_bases()
+        init_default_llm_models()
+        init_default_asr_models()
+        print("✅ 默认数据初始化完成")
+
+    if args.rebuild_vector_store:
+        rebuild_vector_store(reset_doc_status=True)
+
+    print("🎉 初始化脚本执行完成！")
--- a/api/main.py
+++ b/api/main.py
@@ -1,73 +0,0 @@
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from contextlib import asynccontextmanager
-import os
-
-from app.db import Base, engine
-from app.routers import assistants, history, knowledge
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # 启动时创建表
-    Base.metadata.create_all(bind=engine)
-    yield
-
-
-app = FastAPI(
-    title="AI VideoAssistant API",
-    description="Backend API for AI VideoAssistant",
-    version="1.0.0",
-    lifespan=lifespan
-)
-
-# CORS
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-# 路由
-app.include_router(assistants.router, prefix="/api")
-app.include_router(history.router, prefix="/api")
-app.include_router(knowledge.router, prefix="/api")
-
-
-@app.get("/")
-def root():
-    return {"message": "AI VideoAssistant API", "version": "1.0.0"}
-
-
-@app.get("/health")
-def health():
-    return {"status": "ok"}
-
-
-# 初始化默认数据
-@app.on_event("startup")
-def init_default_data():
-    from sqlalchemy.orm import Session
-    from app.db import SessionLocal
-    from app.models import Voice
-    
-    db = SessionLocal()
-    try:
-        # 检查是否已有数据
-        if db.query(Voice).count() == 0:
-            # 插入默认声音
-            voices = [
-                Voice(id="v1", name="Xiaoyun", vendor="Ali", gender="Female", language="zh", description="Gentle and professional."),
-                Voice(id="v2", name="Kevin", vendor="Volcano", gender="Male", language="en", description="Deep and authoritative."),
-                Voice(id="v3", name="Abby", vendor="Minimax", gender="Female", language="en", description="Cheerful and lively."),
-                Voice(id="v4", name="Guang", vendor="Ali", gender="Male", language="zh", description="Standard newscast style."),
-                Voice(id="v5", name="Doubao", vendor="Volcano", gender="Female", language="zh", description="Cute and young."),
-            ]
-            for v in voices:
-                db.add(v)
-            db.commit()
-            print("✅ 默认声音数据已初始化")
-    finally:
-        db.close()
--- a/api/pytest.ini
+++ b/api/pytest.ini
@@ -0,0 +1,8 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --tb=short
+filterwarnings =
+    ignore::DeprecationWarning
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -1,11 +1,12 @@
-aiosqlite==0.19.0
-fastapi==0.109.0
-uvicorn==0.27.0
-python-multipart==0.0.6
-python-dotenv==1.0.0
-pydantic==2.5.3
-sqlalchemy==2.0.25
-minio==7.2.0
-httpx==0.26.0
-chromadb==0.4.22
-openai==1.12.0
+aiosqlite==0.22.1
+fastapi==0.135.1
+uvicorn==0.41.0
+python-multipart==0.0.22
+python-dotenv==1.2.2
+pydantic==2.11.7
+sqlalchemy==2.0.48
+minio==7.2.20
+httpx==0.28.1
+chromadb==1.5.2
+openai==2.24.0
+dashscope==1.25.13
--- a/api/run_tests.bat
+++ b/api/run_tests.bat
@@ -0,0 +1,14 @@
+@echo off
+REM Run API tests
+
+cd /d "%~dp0"
+
+REM Install test dependencies
+echo Installing test dependencies...
+pip install pytest pytest-cov -q
+
+REM Run tests
+echo Running tests...
+pytest tests/ -v --tb=short
+
+pause
--- a/api/tests/init.py
+++ b/api/tests/init.py
@@ -0,0 +1 @@
+# Tests package
--- a/api/tests/conftest.py
+++ b/api/tests/conftest.py
@@ -0,0 +1,138 @@
+"""Pytest fixtures for API tests"""
+import os
+import sys
+import pytest
+
+# Add api directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from fastapi.testclient import TestClient
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.pool import StaticPool
+
+from app.db import Base, get_db
+from app.main import app
+
+
+# Use in-memory SQLite for testing
+DATABASE_URL = "sqlite:///:memory:"
+
+engine = create_engine(
+    DATABASE_URL,
+    connect_args={"check_same_thread": False},
+    poolclass=StaticPool,
+)
+TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+
+@pytest.fixture(scope="function")
+def db_session():
+    """Create a fresh database session for each test"""
+    # Create all tables
+    Base.metadata.create_all(bind=engine)
+
+    session = TestingSessionLocal()
+    try:
+        yield session
+    finally:
+        session.close()
+        # Drop all tables after test
+        Base.metadata.drop_all(bind=engine)
+
+
+@pytest.fixture(scope="function")
+def client(db_session):
+    """Create a test client with database dependency override"""
+
+    def override_get_db():
+        try:
+            yield db_session
+        finally:
+            pass
+
+    app.dependency_overrides[get_db] = override_get_db
+
+    with TestClient(app) as test_client:
+        yield test_client
+
+    app.dependency_overrides.clear()
+
+
+@pytest.fixture
+def sample_voice_data():
+    """Sample voice data for testing"""
+    return {
+        "name": "Test Voice",
+        "vendor": "TestVendor",
+        "gender": "Female",
+        "language": "zh",
+        "description": "A test voice for unit testing",
+        "model": "test-model",
+        "voice_key": "test-key",
+        "speed": 1.0,
+        "gain": 0,
+        "pitch": 0,
+        "enabled": True
+    }
+
+
+@pytest.fixture
+def sample_assistant_data():
+    """Sample assistant data for testing"""
+    return {
+        "name": "Test Assistant",
+        "opener": "Hello, welcome!",
+        "prompt": "You are a helpful assistant.",
+        "language": "zh",
+        "voiceOutputEnabled": True,
+        "speed": 1.0,
+        "hotwords": ["test", "hello"],
+        "tools": [],
+        "configMode": "platform"
+    }
+
+
+@pytest.fixture
+def sample_call_record_data():
+    """Sample call record data for testing"""
+    return {
+        "user_id": 1,
+        "assistant_id": None,
+        "source": "debug"
+    }
+
+
+@pytest.fixture
+def sample_llm_model_data():
+    """Sample LLM model data for testing"""
+    return {
+        "id": "test-llm-001",
+        "name": "Test LLM Model",
+        "vendor": "TestVendor",
+        "type": "text",
+        "base_url": "https://api.test.com/v1",
+        "api_key": "test-api-key",
+        "model_name": "test-model",
+        "temperature": 0.7,
+        "context_length": 4096,
+        "enabled": True
+    }
+
+
+@pytest.fixture
+def sample_asr_model_data():
+    """Sample ASR model data for testing"""
+    return {
+        "id": "test-asr-001",
+        "name": "Test ASR Model",
+        "vendor": "TestVendor",
+        "language": "zh",
+        "base_url": "https://api.test.com/v1",
+        "api_key": "test-api-key",
+        "model_name": "paraformer-v2",
+        "hotwords": ["测试", "语音"],
+        "enable_punctuation": True,
+        "enable_normalization": True,
+        "enabled": True
+    }
--- a/api/tests/test_asr.py
+++ b/api/tests/test_asr.py
@@ -0,0 +1,430 @@
+"""Tests for ASR Model API endpoints"""
+import io
+import wave
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+def _make_wav_bytes(sample_rate: int = 16000) -> bytes:
+    with io.BytesIO() as buffer:
+        with wave.open(buffer, "wb") as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(sample_rate)
+            wav_file.writeframes(b"\x00\x00" * sample_rate)
+        return buffer.getvalue()
+
+
+class TestASRModelAPI:
+    """Test cases for ASR Model endpoints"""
+
+    def test_get_asr_models_empty(self, client):
+        """Test getting ASR models when database is empty"""
+        response = client.get("/api/asr")
+        assert response.status_code == 200
+        data = response.json()
+        assert "total" in data
+        assert "list" in data
+        assert data["total"] == 0
+
+    def test_create_asr_model(self, client, sample_asr_model_data):
+        """Test creating a new ASR model"""
+        response = client.post("/api/asr", json=sample_asr_model_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == sample_asr_model_data["name"]
+        assert data["vendor"] == sample_asr_model_data["vendor"]
+        assert data["language"] == sample_asr_model_data["language"]
+        assert "id" in data
+
+    def test_create_asr_model_minimal(self, client):
+        """Test creating an ASR model with minimal required data"""
+        data = {
+            "name": "Minimal ASR",
+            "vendor": "Test",
+            "language": "zh",
+            "base_url": "https://api.test.com",
+            "api_key": "test-key"
+        }
+        response = client.post("/api/asr", json=data)
+        assert response.status_code == 200
+        assert response.json()["name"] == "Minimal ASR"
+
+    def test_get_asr_model_by_id(self, client, sample_asr_model_data):
+        """Test getting a specific ASR model by ID"""
+        # Create first
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Get by ID
+        response = client.get(f"/api/asr/{model_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == model_id
+        assert data["name"] == sample_asr_model_data["name"]
+
+    def test_get_asr_model_not_found(self, client):
+        """Test getting a non-existent ASR model"""
+        response = client.get("/api/asr/non-existent-id")
+        assert response.status_code == 404
+
+    def test_update_asr_model(self, client, sample_asr_model_data):
+        """Test updating an ASR model"""
+        # Create first
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Update
+        update_data = {
+            "name": "Updated ASR Model",
+            "language": "en",
+            "enable_punctuation": False
+        }
+        response = client.put(f"/api/asr/{model_id}", json=update_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Updated ASR Model"
+        assert data["language"] == "en"
+        assert data["enable_punctuation"] == False
+
+    def test_update_asr_model_vendor(self, client, sample_asr_model_data):
+        """Test updating ASR vendor metadata."""
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        response = client.put(
+            f"/api/asr/{model_id}",
+            json={
+                "vendor": "DashScope",
+                "model_name": "qwen3-asr-flash-realtime",
+                "base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+            },
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["vendor"] == "DashScope"
+        assert data["model_name"] == "qwen3-asr-flash-realtime"
+
+    def test_delete_asr_model(self, client, sample_asr_model_data):
+        """Test deleting an ASR model"""
+        # Create first
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Delete
+        response = client.delete(f"/api/asr/{model_id}")
+        assert response.status_code == 200
+
+        # Verify deleted
+        get_response = client.get(f"/api/asr/{model_id}")
+        assert get_response.status_code == 404
+
+    def test_list_asr_models_with_pagination(self, client, sample_asr_model_data):
+        """Test listing ASR models with pagination"""
+        # Create multiple models
+        for i in range(3):
+            data = sample_asr_model_data.copy()
+            data["id"] = f"test-asr-{i}"
+            data["name"] = f"ASR Model {i}"
+            client.post("/api/asr", json=data)
+
+        # Test pagination
+        response = client.get("/api/asr?page=1&limit=2")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] == 3
+        assert len(data["list"]) == 2
+
+    def test_filter_asr_models_by_language(self, client, sample_asr_model_data):
+        """Test filtering ASR models by language"""
+        # Create models with different languages
+        for i, lang in enumerate(["zh", "en", "Multi-lingual"]):
+            data = sample_asr_model_data.copy()
+            data["id"] = f"test-asr-{lang}"
+            data["name"] = f"ASR {lang}"
+            data["language"] = lang
+            client.post("/api/asr", json=data)
+
+        # Filter by language
+        response = client.get("/api/asr?language=zh")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] >= 1
+        for model in data["list"]:
+            assert model["language"] == "zh"
+
+    def test_filter_asr_models_by_enabled(self, client, sample_asr_model_data):
+        """Test filtering ASR models by enabled status"""
+        # Create enabled and disabled models
+        data = sample_asr_model_data.copy()
+        data["id"] = "test-asr-enabled"
+        data["name"] = "Enabled ASR"
+        data["enabled"] = True
+        client.post("/api/asr", json=data)
+
+        data["id"] = "test-asr-disabled"
+        data["name"] = "Disabled ASR"
+        data["enabled"] = False
+        client.post("/api/asr", json=data)
+
+        # Filter by enabled
+        response = client.get("/api/asr?enabled=true")
+        assert response.status_code == 200
+        data = response.json()
+        for model in data["list"]:
+            assert model["enabled"] == True
+
+    def test_create_asr_model_with_hotwords(self, client):
+        """Test creating an ASR model with hotwords"""
+        data = {
+            "id": "asr-hotwords",
+            "name": "ASR with Hotwords",
+            "vendor": "SiliconFlow",
+            "language": "zh",
+            "base_url": "https://api.siliconflow.cn/v1",
+            "api_key": "test-key",
+            "model_name": "paraformer-v2",
+            "hotwords": ["你好", "查询", "帮助"],
+            "enable_punctuation": True,
+            "enable_normalization": True
+        }
+        response = client.post("/api/asr", json=data)
+        assert response.status_code == 200
+        result = response.json()
+        assert result["hotwords"] == ["你好", "查询", "帮助"]
+
+    def test_create_asr_model_with_all_fields(self, client):
+        """Test creating an ASR model with all fields"""
+        data = {
+            "id": "full-asr",
+            "name": "Full ASR Model",
+            "vendor": "SiliconFlow",
+            "language": "zh",
+            "base_url": "https://api.siliconflow.cn/v1",
+            "api_key": "sk-test",
+            "model_name": "paraformer-v2",
+            "hotwords": ["测试"],
+            "enable_punctuation": True,
+            "enable_normalization": True,
+            "enabled": True
+        }
+        response = client.post("/api/asr", json=data)
+        assert response.status_code == 200
+        result = response.json()
+        assert result["name"] == "Full ASR Model"
+        assert result["enable_punctuation"] == True
+        assert result["enable_normalization"] == True
+
+    @patch('httpx.Client')
+    def test_test_asr_model_siliconflow(self, mock_client_class, client, sample_asr_model_data):
+        """Test testing an ASR model with SiliconFlow vendor"""
+        # Create model first
+        sample_asr_model_data["vendor"] = "SiliconFlow"
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock the HTTP response
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "results": [{"transcript": "测试文本", "language": "zh"}]
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_client.get.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.asr.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/asr/{model_id}/test")
+            assert response.status_code == 200
+            data = response.json()
+            assert data["success"] == True
+
+    @patch('httpx.Client')
+    def test_test_asr_model_openai(self, mock_client_class, client, sample_asr_model_data):
+        """Test testing an ASR model with OpenAI vendor"""
+        # Create model with OpenAI vendor
+        sample_asr_model_data["vendor"] = "OpenAI"
+        sample_asr_model_data["id"] = "test-asr-openai"
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock the HTTP response
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "Test transcript"}
+        mock_response.raise_for_status = MagicMock()
+        mock_client.get.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.asr.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/asr/{model_id}/test")
+            assert response.status_code == 200
+
+    def test_test_asr_model_dashscope(self, client, sample_asr_model_data, monkeypatch):
+        """Test DashScope ASR connectivity probe."""
+        from app.routers import asr as asr_router
+
+        sample_asr_model_data["vendor"] = "DashScope"
+        sample_asr_model_data["base_url"] = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+        sample_asr_model_data["model_name"] = "qwen3-asr-flash-realtime"
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        def fake_probe(**kwargs):
+            assert kwargs["api_key"] == sample_asr_model_data["api_key"]
+            assert kwargs["model"] == "qwen3-asr-flash-realtime"
+
+        monkeypatch.setattr(asr_router, "_probe_dashscope_asr_connection", fake_probe)
+
+        response = client.post(f"/api/asr/{model_id}/test")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["message"] == "DashScope realtime ASR connected"
+
+    @patch('httpx.Client')
+    def test_test_asr_model_failure(self, mock_client_class, client, sample_asr_model_data):
+        """Test testing an ASR model with failed connection"""
+        # Create model first
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock HTTP error
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 401
+        mock_response.text = "Unauthorized"
+        mock_response.raise_for_status = MagicMock(side_effect=Exception("401 Unauthorized"))
+        mock_client.get.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.asr.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/asr/{model_id}/test")
+            assert response.status_code == 200
+            data = response.json()
+            assert data["success"] == False
+
+    def test_different_asr_languages(self, client):
+        """Test creating ASR models with different languages"""
+        for lang in ["zh", "en", "Multi-lingual"]:
+            data = {
+                "id": f"asr-lang-{lang}",
+                "name": f"ASR {lang}",
+                "vendor": "SiliconFlow",
+                "language": lang,
+                "base_url": "https://api.siliconflow.cn/v1",
+                "api_key": "test-key"
+            }
+            response = client.post("/api/asr", json=data)
+            assert response.status_code == 200
+            assert response.json()["language"] == lang
+
+    def test_different_asr_vendors(self, client):
+        """Test creating ASR models with different vendors"""
+        vendors = ["SiliconFlow", "OpenAI", "Azure", "DashScope"]
+        for vendor in vendors:
+            data = {
+                "id": f"asr-vendor-{vendor.lower()}",
+                "name": f"ASR {vendor}",
+                "vendor": vendor,
+                "language": "zh",
+                "base_url": f"https://api.{vendor.lower()}.com/v1",
+                "api_key": "test-key"
+            }
+            response = client.post("/api/asr", json=data)
+            assert response.status_code == 200
+            assert response.json()["vendor"] == vendor
+
+    def test_preview_asr_model_success(self, client, sample_asr_model_data, monkeypatch):
+        """Test ASR preview endpoint with OpenAI-compatible transcriptions API."""
+        from app.routers import asr as asr_router
+
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        class DummyResponse:
+            status_code = 200
+
+            def json(self):
+                return {"text": "你好，这是测试转写", "language": "zh", "confidence": 0.98}
+
+            @property
+            def text(self):
+                return '{"text":"ok"}'
+
+        class DummyClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def post(self, url, headers=None, data=None, files=None):
+                assert url.endswith("/audio/transcriptions")
+                assert headers["Authorization"] == f"Bearer {sample_asr_model_data['api_key']}"
+                assert data["model"] == sample_asr_model_data["model_name"]
+                assert files["file"][0] == "sample.wav"
+                return DummyResponse()
+
+        monkeypatch.setattr(asr_router.httpx, "Client", DummyClient)
+
+        response = client.post(
+            f"/api/asr/{model_id}/preview",
+            files={"file": ("sample.wav", b"fake-wav-bytes", "audio/wav")},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["transcript"] == "你好，这是测试转写"
+        assert payload["language"] == "zh"
+
+    def test_preview_asr_model_reject_non_audio(self, client, sample_asr_model_data):
+        """Test ASR preview endpoint rejects non-audio file."""
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        response = client.post(
+            f"/api/asr/{model_id}/preview",
+            files={"file": ("sample.txt", b"text-data", "text/plain")},
+        )
+        assert response.status_code == 400
+        assert "Only audio files are supported" in response.text
+
+    def test_preview_asr_model_dashscope(self, client, sample_asr_model_data, monkeypatch):
+        """Test ASR preview endpoint with DashScope realtime helper."""
+        from app.routers import asr as asr_router
+
+        sample_asr_model_data["vendor"] = "DashScope"
+        sample_asr_model_data["base_url"] = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+        sample_asr_model_data["model_name"] = "qwen3-asr-flash-realtime"
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        def fake_preview(**kwargs):
+            assert kwargs["base_url"] == sample_asr_model_data["base_url"]
+            assert kwargs["model"] == sample_asr_model_data["model_name"]
+            return {
+                "transcript": "你好，这是实时识别",
+                "language": "zh",
+                "confidence": None,
+            }
+
+        monkeypatch.setattr(asr_router, "_transcribe_dashscope_preview", fake_preview)
+
+        response = client.post(
+            f"/api/asr/{model_id}/preview",
+            files={"file": ("sample.wav", _make_wav_bytes(), "audio/wav")},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["transcript"] == "你好，这是实时识别"
--- a/api/tests/test_assistants.py
+++ b/api/tests/test_assistants.py
@@ -0,0 +1,440 @@
+"""Tests for Assistant API endpoints"""
+import pytest
+import uuid
+
+
+class TestAssistantAPI:
+    """Test cases for Assistant endpoints"""
+
+    def test_get_assistants_empty(self, client):
+        """Test getting assistants when database is empty"""
+        response = client.get("/api/assistants")
+        assert response.status_code == 200
+        data = response.json()
+        assert "total" in data
+        assert "list" in data
+
+    def test_create_assistant(self, client, sample_assistant_data):
+        """Test creating a new assistant"""
+        response = client.post("/api/assistants", json=sample_assistant_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == sample_assistant_data["name"]
+        assert data["opener"] == sample_assistant_data["opener"]
+        assert data["manualOpenerToolCalls"] == []
+        assert data["prompt"] == sample_assistant_data["prompt"]
+        assert data["language"] == sample_assistant_data["language"]
+        assert data["voiceOutputEnabled"] is True
+        assert data["firstTurnMode"] == "bot_first"
+        assert data["generatedOpenerEnabled"] is False
+        assert data["asrInterimEnabled"] is False
+        assert data["botCannotBeInterrupted"] is False
+        assert data["appId"] is None
+        assert "id" in data
+        assert data["callCount"] == 0
+
+    def test_create_assistant_minimal(self, client):
+        """Test creating an assistant with minimal required data"""
+        data = {"name": "Minimal Assistant"}
+        response = client.post("/api/assistants", json=data)
+        assert response.status_code == 200
+        assert response.json()["name"] == "Minimal Assistant"
+        assert response.json()["asrInterimEnabled"] is False
+
+    def test_get_assistant_by_id(self, client, sample_assistant_data):
+        """Test getting a specific assistant by ID"""
+        # Create first
+        create_response = client.post("/api/assistants", json=sample_assistant_data)
+        assistant_id = create_response.json()["id"]
+
+        # Get by ID
+        response = client.get(f"/api/assistants/{assistant_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == assistant_id
+        assert data["name"] == sample_assistant_data["name"]
+
+    def test_get_assistant_not_found(self, client):
+        """Test getting a non-existent assistant"""
+        response = client.get("/api/assistants/non-existent-id")
+        assert response.status_code == 404
+
+    def test_update_assistant(self, client, sample_assistant_data):
+        """Test updating an assistant"""
+        # Create first
+        create_response = client.post("/api/assistants", json=sample_assistant_data)
+        assistant_id = create_response.json()["id"]
+
+        # Update
+        update_data = {
+            "name": "Updated Assistant",
+            "prompt": "You are an updated assistant.",
+            "speed": 1.5,
+            "voiceOutputEnabled": False,
+            "asrInterimEnabled": True,
+            "manualOpenerToolCalls": [
+                {"toolName": "text_msg_prompt", "arguments": {"msg": "请选择服务类型"}}
+            ],
+        }
+        response = client.put(f"/api/assistants/{assistant_id}", json=update_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Updated Assistant"
+        assert data["prompt"] == "You are an updated assistant."
+        assert data["speed"] == 1.5
+        assert data["voiceOutputEnabled"] is False
+        assert data["asrInterimEnabled"] is True
+        assert data["manualOpenerToolCalls"] == [
+            {"toolName": "text_msg_prompt", "arguments": {"msg": "请选择服务类型"}}
+        ]
+
+    def test_delete_assistant(self, client, sample_assistant_data):
+        """Test deleting an assistant"""
+        # Create first
+        create_response = client.post("/api/assistants", json=sample_assistant_data)
+        assistant_id = create_response.json()["id"]
+
+        # Delete
+        response = client.delete(f"/api/assistants/{assistant_id}")
+        assert response.status_code == 200
+
+        # Verify deleted
+        get_response = client.get(f"/api/assistants/{assistant_id}")
+        assert get_response.status_code == 404
+
+    def test_list_assistants_with_pagination(self, client, sample_assistant_data):
+        """Test listing assistants with pagination"""
+        # Create multiple assistants
+        for i in range(3):
+            data = sample_assistant_data.copy()
+            data["name"] = f"Assistant {i}"
+            client.post("/api/assistants", json=data)
+
+        # Test pagination
+        response = client.get("/api/assistants?page=1&limit=2")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] == 3
+        assert len(data["list"]) == 2
+
+    def test_create_assistant_with_voice(self, client, sample_assistant_data, sample_voice_data):
+        """Test creating an assistant with a voice reference"""
+        # Create a voice first
+        voice_response = client.post("/api/voices", json=sample_voice_data)
+        voice_id = voice_response.json()["id"]
+
+        # Create assistant with voice
+        sample_assistant_data["voice"] = voice_id
+        response = client.post("/api/assistants", json=sample_assistant_data)
+        assert response.status_code == 200
+        assert response.json()["voice"] == voice_id
+
+    def test_create_assistant_with_knowledge_base(self, client, sample_assistant_data):
+        """Test creating an assistant with knowledge base reference"""
+        # Note: This test assumes knowledge base doesn't exist
+        sample_assistant_data["knowledgeBaseId"] = "non-existent-kb"
+        response = client.post("/api/assistants", json=sample_assistant_data)
+        assert response.status_code == 200
+        assert response.json()["knowledgeBaseId"] == "non-existent-kb"
+
+        assistant_id = response.json()["id"]
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        assert metadata["knowledgeBaseId"] == "non-existent-kb"
+        assert metadata["knowledge"]["enabled"] is True
+        assert metadata["knowledge"]["kbId"] == "non-existent-kb"
+
+    def test_assistant_with_model_references(self, client, sample_assistant_data):
+        """Test creating assistant with model references"""
+        sample_assistant_data.update({
+            "llmModelId": "llm-001",
+            "asrModelId": "asr-001",
+            "embeddingModelId": "emb-001",
+            "rerankModelId": "rerank-001"
+        })
+        response = client.post("/api/assistants", json=sample_assistant_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["llmModelId"] == "llm-001"
+        assert data["asrModelId"] == "asr-001"
+        assert data["embeddingModelId"] == "emb-001"
+        assert data["rerankModelId"] == "rerank-001"
+
+    def test_assistant_with_tools(self, client, sample_assistant_data):
+        """Test creating assistant with tools"""
+        sample_assistant_data["tools"] = ["weather", "calculator", "search"]
+        response = client.post("/api/assistants", json=sample_assistant_data)
+        assert response.status_code == 200
+        assert response.json()["tools"] == ["weather", "calculator", "search"]
+
+    def test_assistant_with_hotwords(self, client, sample_assistant_data):
+        """Test creating assistant with hotwords"""
+        sample_assistant_data["hotwords"] = ["hello", "help", "stop"]
+        response = client.post("/api/assistants", json=sample_assistant_data)
+        assert response.status_code == 200
+        assert response.json()["hotwords"] == ["hello", "help", "stop"]
+
+    def test_different_config_modes(self, client, sample_assistant_data):
+        """Test creating assistants with different config modes"""
+        for mode in ["platform", "dify", "fastgpt", "none"]:
+            sample_assistant_data["name"] = f"Assistant {mode}"
+            sample_assistant_data["configMode"] = mode
+            response = client.post("/api/assistants", json=sample_assistant_data)
+            assert response.status_code == 200
+            assert response.json()["configMode"] == mode
+
+    def test_different_languages(self, client, sample_assistant_data):
+        """Test creating assistants with different languages"""
+        for lang in ["zh", "en", "ja", "ko"]:
+            sample_assistant_data["name"] = f"Assistant {lang}"
+            sample_assistant_data["language"] = lang
+            response = client.post("/api/assistants", json=sample_assistant_data)
+            assert response.status_code == 200
+            assert response.json()["language"] == lang
+
+    def test_get_runtime_config(self, client, sample_assistant_data, sample_llm_model_data, sample_asr_model_data, sample_voice_data):
+        """Test resolved runtime config endpoint for WS session.start metadata."""
+        sample_asr_model_data["vendor"] = "OpenAI Compatible"
+        llm_resp = client.post("/api/llm", json=sample_llm_model_data)
+        assert llm_resp.status_code == 200
+        llm_id = llm_resp.json()["id"]
+
+        asr_resp = client.post("/api/asr", json=sample_asr_model_data)
+        assert asr_resp.status_code == 200
+        asr_id = asr_resp.json()["id"]
+
+        sample_voice_data["vendor"] = "OpenAI Compatible"
+        sample_voice_data["base_url"] = "https://tts.example.com/v1/audio/speech"
+        sample_voice_data["api_key"] = "test-voice-key"
+        voice_resp = client.post("/api/voices", json=sample_voice_data)
+        assert voice_resp.status_code == 200
+        voice_id = voice_resp.json()["id"]
+
+        sample_assistant_data.update({
+            "llmModelId": llm_id,
+            "asrModelId": asr_id,
+            "voice": voice_id,
+            "prompt": "runtime prompt",
+            "opener": "runtime opener",
+            "manualOpenerToolCalls": [{"toolName": "text_msg_prompt", "arguments": {"msg": "欢迎"}}],
+            "asrInterimEnabled": True,
+            "speed": 1.1,
+        })
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        payload = runtime_resp.json()
+
+        assert payload["assistantId"] == assistant_id
+        metadata = payload["sessionStartMetadata"]
+        assert metadata["systemPrompt"].startswith("runtime prompt")
+        assert "Tool usage policy:" in metadata["systemPrompt"]
+        assert metadata["greeting"] == "runtime opener"
+        assert metadata["manualOpenerToolCalls"] == [{"toolName": "text_msg_prompt", "arguments": {"msg": "欢迎"}}]
+        assert metadata["services"]["llm"]["model"] == sample_llm_model_data["model_name"]
+        assert metadata["services"]["asr"]["model"] == sample_asr_model_data["model_name"]
+        assert metadata["services"]["asr"]["baseUrl"] == sample_asr_model_data["base_url"]
+        assert metadata["services"]["asr"]["enableInterim"] is True
+        expected_tts_voice = f"{sample_voice_data['model']}:{sample_voice_data['voice_key']}"
+        assert metadata["services"]["tts"]["voice"] == expected_tts_voice
+        assert metadata["services"]["tts"]["baseUrl"] == sample_voice_data["base_url"]
+
+    def test_get_engine_config_endpoint(self, client, sample_assistant_data):
+        """Test canonical assistant config endpoint consumed by engine backend adapter."""
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        config_resp = client.get(f"/api/assistants/{assistant_id}/config")
+        assert config_resp.status_code == 200
+        payload = config_resp.json()
+
+        assert payload["assistantId"] == assistant_id
+        assert payload["assistant"]["assistantId"] == assistant_id
+        assert payload["assistant"]["configVersionId"].startswith(f"asst_{assistant_id}_")
+        assert payload["assistant"]["systemPrompt"].startswith(sample_assistant_data["prompt"])
+        assert "Tool usage policy:" in payload["assistant"]["systemPrompt"]
+        assert payload["sessionStartMetadata"]["systemPrompt"].startswith(sample_assistant_data["prompt"])
+        assert "Tool usage policy:" in payload["sessionStartMetadata"]["systemPrompt"]
+        assert payload["sessionStartMetadata"]["history"]["assistantId"] == assistant_id
+
+    def test_runtime_config_resolves_selected_tools_into_runtime_definitions(self, client, sample_assistant_data):
+        sample_assistant_data["tools"] = ["increase_volume", "calculator"]
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        tools = metadata["tools"]
+        assert isinstance(tools, list)
+        assert len(tools) == 2
+
+        by_name = {item["function"]["name"]: item for item in tools}
+        assert by_name["increase_volume"]["executor"] == "client"
+        assert by_name["increase_volume"]["defaultArgs"]["step"] == 1
+        assert by_name["calculator"]["executor"] == "server"
+        assert by_name["calculator"]["function"]["parameters"]["type"] == "object"
+        assert "expression" in by_name["calculator"]["function"]["parameters"]["properties"]
+
+    def test_runtime_config_normalizes_legacy_voice_message_prompt_tool_id(self, client, sample_assistant_data):
+        sample_assistant_data["tools"] = ["voice_message_prompt"]
+        sample_assistant_data["manualOpenerToolCalls"] = [
+            {"toolName": "voice_message_prompt", "arguments": {"msg": "您好"}}
+        ]
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_payload = assistant_resp.json()
+        assistant_id = assistant_payload["id"]
+        assert assistant_payload["tools"] == ["voice_msg_prompt"]
+        assert assistant_payload["manualOpenerToolCalls"] == [
+            {"toolName": "voice_msg_prompt", "arguments": {"msg": "您好"}}
+        ]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        tools = metadata["tools"]
+        by_name = {item["function"]["name"]: item for item in tools}
+        assert "voice_msg_prompt" in by_name
+        assert metadata["manualOpenerToolCalls"] == [
+            {"toolName": "voice_msg_prompt", "arguments": {"msg": "您好"}}
+        ]
+
+    def test_runtime_config_text_mode_when_voice_output_disabled(self, client, sample_assistant_data):
+        sample_assistant_data["voiceOutputEnabled"] = False
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        assert metadata["output"]["mode"] == "text"
+        assert metadata["services"]["asr"]["enableInterim"] is False
+        assert metadata["services"]["tts"]["enabled"] is False
+
+    def test_runtime_config_dashscope_voice_provider(self, client, sample_assistant_data):
+        """DashScope voices should map to dashscope tts provider in runtime metadata."""
+        voice_resp = client.post("/api/voices", json={
+            "name": "DashScope Cherry",
+            "vendor": "DashScope",
+            "gender": "Female",
+            "language": "zh",
+            "description": "dashscope voice",
+            "api_key": "dashscope-key",
+            "base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+        })
+        assert voice_resp.status_code == 200
+        voice_payload = voice_resp.json()
+
+        sample_assistant_data.update({
+            "voice": voice_payload["id"],
+            "voiceOutputEnabled": True,
+        })
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        tts = metadata["services"]["tts"]
+        assert tts["provider"] == "dashscope"
+        assert tts["voice"] == "Cherry"
+        assert tts["model"] == "qwen3-tts-flash-realtime"
+        assert tts["apiKey"] == "dashscope-key"
+        assert tts["baseUrl"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+
+    def test_runtime_config_dashscope_asr_provider(self, client, sample_assistant_data):
+        """DashScope ASR models should map to dashscope asr provider in runtime metadata."""
+        asr_resp = client.post("/api/asr", json={
+            "name": "DashScope Realtime ASR",
+            "vendor": "DashScope",
+            "language": "zh",
+            "base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+            "api_key": "dashscope-asr-key",
+            "model_name": "qwen3-asr-flash-realtime",
+            "hotwords": [],
+            "enable_punctuation": True,
+            "enable_normalization": True,
+            "enabled": True,
+        })
+        assert asr_resp.status_code == 200
+        asr_payload = asr_resp.json()
+
+        sample_assistant_data.update({
+            "asrModelId": asr_payload["id"],
+        })
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        asr = metadata["services"]["asr"]
+        assert asr["provider"] == "dashscope"
+        assert asr["baseUrl"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+        assert asr["enableInterim"] is False
+
+    def test_runtime_config_defaults_asr_interim_disabled_without_asr_model(self, client, sample_assistant_data):
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        assert metadata["services"]["asr"]["enableInterim"] is False
+
+    def test_assistant_interrupt_and_generated_opener_flags(self, client, sample_assistant_data):
+        sample_assistant_data.update({
+            "firstTurnMode": "user_first",
+            "generatedOpenerEnabled": True,
+            "botCannotBeInterrupted": True,
+            "interruptionSensitivity": 900,
+        })
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+
+        get_resp = client.get(f"/api/assistants/{assistant_id}")
+        assert get_resp.status_code == 200
+        payload = get_resp.json()
+        assert payload["firstTurnMode"] == "user_first"
+        assert payload["generatedOpenerEnabled"] is True
+        assert payload["botCannotBeInterrupted"] is True
+        assert payload["interruptionSensitivity"] == 900
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        assert metadata["firstTurnMode"] == "user_first"
+        assert metadata["generatedOpenerEnabled"] is True
+        assert metadata["greeting"] == ""
+        assert metadata["bargeIn"]["enabled"] is False
+        assert metadata["bargeIn"]["minDurationMs"] == 900
+
+    def test_fastgpt_app_id_persists_and_flows_to_runtime(self, client, sample_assistant_data):
+        sample_assistant_data.update({
+            "configMode": "fastgpt",
+            "apiUrl": "https://cloud.fastgpt.cn/api",
+            "apiKey": "fastgpt-key",
+            "appId": "app-fastgpt-123",
+        })
+        assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
+        assert assistant_resp.status_code == 200
+        assistant_id = assistant_resp.json()["id"]
+        assert assistant_resp.json()["appId"] == "app-fastgpt-123"
+
+        runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
+        assert runtime_resp.status_code == 200
+        metadata = runtime_resp.json()["sessionStartMetadata"]
+        assert metadata["services"]["llm"]["provider"] == "fastgpt"
+        assert metadata["services"]["llm"]["appId"] == "app-fastgpt-123"
--- a/api/tests/test_history.py
+++ b/api/tests/test_history.py
@@ -0,0 +1,236 @@
+"""Tests for History/Call Record API endpoints"""
+import pytest
+import time
+
+
+class TestHistoryAPI:
+    """Test cases for History/Call Record endpoints"""
+
+    def test_get_history_empty(self, client):
+        """Test getting history when database is empty"""
+        response = client.get("/api/history")
+        assert response.status_code == 200
+        data = response.json()
+        assert "total" in data
+        assert "list" in data
+
+    def test_create_call_record(self, client, sample_call_record_data):
+        """Test creating a new call record"""
+        response = client.post("/api/history", json=sample_call_record_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["user_id"] == sample_call_record_data["user_id"]
+        assert data["source"] == sample_call_record_data["source"]
+        assert data["status"] == "connected"
+        assert "id" in data
+        assert "started_at" in data
+
+    def test_create_call_record_with_assistant(self, client, sample_assistant_data, sample_call_record_data):
+        """Test creating a call record associated with an assistant"""
+        # Create assistant first
+        assistant_response = client.post("/api/assistants", json=sample_assistant_data)
+        assistant_id = assistant_response.json()["id"]
+
+        # Create call record with assistant
+        sample_call_record_data["assistant_id"] = assistant_id
+        response = client.post("/api/history", json=sample_call_record_data)
+        assert response.status_code == 200
+        assert response.json()["assistant_id"] == assistant_id
+
+    def test_get_call_record_by_id(self, client, sample_call_record_data):
+        """Test getting a specific call record by ID"""
+        # Create first
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Get by ID
+        response = client.get(f"/api/history/{record_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == record_id
+
+    def test_get_call_record_not_found(self, client):
+        """Test getting a non-existent call record"""
+        response = client.get("/api/history/non-existent-id")
+        assert response.status_code == 404
+
+    def test_update_call_record(self, client, sample_call_record_data):
+        """Test updating a call record"""
+        # Create first
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Update
+        update_data = {
+            "status": "completed",
+            "summary": "Test summary",
+            "duration_seconds": 120
+        }
+        response = client.put(f"/api/history/{record_id}", json=update_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "completed"
+        assert data["summary"] == "Test summary"
+        assert data["duration_seconds"] == 120
+
+    def test_delete_call_record(self, client, sample_call_record_data):
+        """Test deleting a call record"""
+        # Create first
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Delete
+        response = client.delete(f"/api/history/{record_id}")
+        assert response.status_code == 200
+
+        # Verify deleted
+        get_response = client.get(f"/api/history/{record_id}")
+        assert get_response.status_code == 404
+
+    def test_add_transcript(self, client, sample_call_record_data):
+        """Test adding a transcript to a call record"""
+        # Create call record first
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Add transcript
+        transcript_data = {
+            "turn_index": 0,
+            "speaker": "human",
+            "content": "Hello, I need help",
+            "start_ms": 0,
+            "end_ms": 3000,
+            "confidence": 0.95
+        }
+        response = client.post(
+            f"/api/history/{record_id}/transcripts",
+            json=transcript_data
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["turn_index"] == 0
+        assert data["speaker"] == "human"
+        assert data["content"] == "Hello, I need help"
+
+    def test_add_multiple_transcripts(self, client, sample_call_record_data):
+        """Test adding multiple transcripts"""
+        # Create call record first
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Add human transcript
+        human_transcript = {
+            "turn_index": 0,
+            "speaker": "human",
+            "content": "Hello",
+            "start_ms": 0,
+            "end_ms": 1000
+        }
+        client.post(f"/api/history/{record_id}/transcripts", json=human_transcript)
+
+        # Add AI transcript
+        ai_transcript = {
+            "turn_index": 1,
+            "speaker": "ai",
+            "content": "Hello! How can I help you?",
+            "start_ms": 1500,
+            "end_ms": 4000
+        }
+        client.post(f"/api/history/{record_id}/transcripts", json=ai_transcript)
+
+        # Verify both transcripts exist
+        response = client.get(f"/api/history/{record_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["transcripts"]) == 2
+
+    def test_filter_history_by_status(self, client, sample_call_record_data):
+        """Test filtering history by status"""
+        # Create records with different statuses
+        for i in range(2):
+            data = sample_call_record_data.copy()
+            data["status"] = "connected" if i % 2 == 0 else "missed"
+            client.post("/api/history", json=data)
+
+        # Filter by status
+        response = client.get("/api/history?status=connected")
+        assert response.status_code == 200
+        data = response.json()
+        for record in data["list"]:
+            assert record["status"] == "connected"
+
+    def test_filter_history_by_source(self, client, sample_call_record_data):
+        """Test filtering history by source"""
+        sample_call_record_data["source"] = "external"
+        client.post("/api/history", json=sample_call_record_data)
+
+        response = client.get("/api/history?source=external")
+        assert response.status_code == 200
+        data = response.json()
+        for record in data["list"]:
+            assert record["source"] == "external"
+
+    def test_history_pagination(self, client, sample_call_record_data):
+        """Test history pagination"""
+        # Create multiple records
+        for i in range(5):
+            data = sample_call_record_data.copy()
+            data["source"] = f"source-{i}"
+            client.post("/api/history", json=data)
+
+        # Test pagination
+        response = client.get("/api/history?page=1&limit=3")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] == 5
+        assert len(data["list"]) == 3
+
+    def test_transcript_with_emotion(self, client, sample_call_record_data):
+        """Test adding transcript with emotion"""
+        # Create call record first
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Add transcript with emotion
+        transcript_data = {
+            "turn_index": 0,
+            "speaker": "ai",
+            "content": "Great news!",
+            "start_ms": 0,
+            "end_ms": 2000,
+            "emotion": "happy"
+        }
+        response = client.post(
+            f"/api/history/{record_id}/transcripts",
+            json=transcript_data
+        )
+        assert response.status_code == 200
+        assert response.json()["emotion"] == "happy"
+
+    def test_history_with_cost(self, client, sample_call_record_data):
+        """Test creating history with cost"""
+        sample_call_record_data["cost"] = 0.05
+        response = client.post("/api/history", json=sample_call_record_data)
+        assert response.status_code == 200
+        assert response.json()["cost"] == 0.05
+
+    def test_history_search(self, client, sample_call_record_data):
+        """Test searching history"""
+        # Create record
+        create_response = client.post("/api/history", json=sample_call_record_data)
+        record_id = create_response.json()["id"]
+
+        # Add transcript with searchable content
+        transcript_data = {
+            "turn_index": 0,
+            "speaker": "human",
+            "content": "I want to buy a product",
+            "start_ms": 0,
+            "end_ms": 3000
+        }
+        client.post(f"/api/history/{record_id}/transcripts", json=transcript_data)
+
+        # Search (this endpoint may not exist yet)
+        response = client.get("/api/history/search?q=product")
+        # This might return 404 if endpoint doesn't exist
+        assert response.status_code in [200, 404]
--- a/api/tests/test_knowledge.py
+++ b/api/tests/test_knowledge.py
@@ -0,0 +1,291 @@
+"""Tests for Knowledge Base API endpoints"""
+import pytest
+import uuid
+from app.models import KnowledgeBase
+
+
+class TestKnowledgeAPI:
+    """Test cases for Knowledge Base endpoints"""
+
+    def test_get_knowledge_bases_empty(self, client):
+        """Test getting knowledge bases when database is empty"""
+        response = client.get("/api/knowledge/bases")
+        assert response.status_code == 200
+        data = response.json()
+        assert "total" in data
+        assert "list" in data
+
+    def test_create_knowledge_base(self, client):
+        """Test creating a new knowledge base"""
+        data = {
+            "name": "Test Knowledge Base",
+            "description": "A test knowledge base",
+            "embeddingModel": "text-embedding-3-small",
+            "chunkSize": 500,
+            "chunkOverlap": 50
+        }
+        response = client.post("/api/knowledge/bases", json=data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Test Knowledge Base"
+        assert data["description"] == "A test knowledge base"
+        assert data["embeddingModel"] == "text-embedding-3-small"
+        assert "id" in data
+        assert data["docCount"] == 0
+        assert data["chunkCount"] == 0
+        assert data["status"] == "active"
+
+    def test_create_knowledge_base_minimal(self, client):
+        """Test creating a knowledge base with minimal data"""
+        data = {"name": "Minimal KB"}
+        response = client.post("/api/knowledge/bases", json=data)
+        assert response.status_code == 200
+        assert response.json()["name"] == "Minimal KB"
+
+    def test_get_knowledge_base_by_id(self, client):
+        """Test getting a specific knowledge base by ID"""
+        # Create first
+        create_data = {"name": "Test KB"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Get by ID
+        response = client.get(f"/api/knowledge/bases/{kb_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == kb_id
+        assert data["name"] == "Test KB"
+
+    def test_get_knowledge_base_not_found(self, client):
+        """Test getting a non-existent knowledge base"""
+        response = client.get("/api/knowledge/bases/non-existent-id")
+        assert response.status_code == 404
+
+    def test_update_knowledge_base(self, client):
+        """Test updating a knowledge base"""
+        # Create first
+        create_data = {"name": "Original Name"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Update
+        update_data = {
+            "name": "Updated Name",
+            "description": "Updated description",
+            "chunkSize": 800
+        }
+        response = client.put(f"/api/knowledge/bases/{kb_id}", json=update_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Updated Name"
+        assert data["description"] == "Updated description"
+        assert data["chunkSize"] == 800
+
+    def test_delete_knowledge_base(self, client):
+        """Test deleting a knowledge base"""
+        # Create first
+        create_data = {"name": "To Delete"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Delete
+        response = client.delete(f"/api/knowledge/bases/{kb_id}")
+        assert response.status_code == 200
+
+        # Verify deleted
+        get_response = client.get(f"/api/knowledge/bases/{kb_id}")
+        assert get_response.status_code == 404
+
+    def test_upload_document(self, client):
+        """Test uploading a document to knowledge base"""
+        # Create KB first
+        create_data = {"name": "Test KB for Docs"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Upload document
+        doc_data = {
+            "name": "test-document.txt",
+            "size": "1024",
+            "fileType": "txt",
+            "storageUrl": "https://storage.example.com/test-document.txt"
+        }
+        response = client.post(
+            f"/api/knowledge/bases/{kb_id}/documents",
+            json=doc_data
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "test-document.txt"
+        assert "id" in data
+        assert data["status"] == "pending"
+
+    def test_upload_file_auto_index(self, client):
+        """Test uploading a real file triggers auto indexing."""
+        create_response = client.post("/api/knowledge/bases", json={"name": "Auto Index KB"})
+        kb_id = create_response.json()["id"]
+
+        content = "Line one about product.\nLine two about warranty."
+        files = {"file": ("auto-index.txt", content.encode("utf-8"), "text/plain")}
+        response = client.post(f"/api/knowledge/bases/{kb_id}/documents", files=files)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "completed"
+        assert data["chunkCount"] >= 1
+
+    def test_delete_document(self, client):
+        """Test deleting a document from knowledge base"""
+        # Create KB first
+        create_data = {"name": "Test KB for Delete"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Upload document
+        doc_data = {"name": "to-delete.txt", "size": "100", "fileType": "txt"}
+        upload_response = client.post(
+            f"/api/knowledge/bases/{kb_id}/documents",
+            json=doc_data
+        )
+        doc_id = upload_response.json()["id"]
+
+        # Delete document
+        response = client.delete(
+            f"/api/knowledge/bases/{kb_id}/documents/{doc_id}"
+        )
+        assert response.status_code == 200
+
+    def test_index_document(self, client):
+        """Test indexing a document"""
+        # Create KB first
+        create_data = {"name": "Test KB for Index"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Index document
+        index_data = {
+            "document_id": "doc-001",
+            "content": "This is the content to index. It contains important information about the product."
+        }
+        response = client.post(
+            f"/api/knowledge/bases/{kb_id}/documents/doc-001/index",
+            json=index_data
+        )
+        # This might return 200 or error depending on vector store implementation
+        assert response.status_code in [200, 500]
+
+    def test_search_knowledge(self, client):
+        """Test searching knowledge base"""
+        # Create KB first
+        create_data = {"name": "Test KB for Search"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Search (this may fail without indexed content)
+        search_data = {
+            "query": "test query",
+            "kb_id": kb_id,
+            "nResults": 5
+        }
+        response = client.post("/api/knowledge/search", json=search_data)
+        # This might return 200 or error depending on implementation
+        assert response.status_code in [200, 500]
+
+    def test_get_knowledge_stats(self, client):
+        """Test getting knowledge base statistics"""
+        # Create KB first
+        create_data = {"name": "Test KB for Stats"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        response = client.get(f"/api/knowledge/bases/{kb_id}/stats")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["kb_id"] == kb_id
+        assert "docCount" in data
+        assert "chunkCount" in data
+
+    def test_knowledge_bases_pagination(self, client):
+        """Test knowledge bases pagination"""
+        # Create multiple KBs
+        for i in range(5):
+            data = {"name": f"Knowledge Base {i}"}
+            client.post("/api/knowledge/bases", json=data)
+
+        # Test pagination
+        response = client.get("/api/knowledge/bases?page=1&limit=3")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] == 5
+        assert len(data["list"]) == 3
+
+    def test_different_embedding_models(self, client):
+        """Test creating KB with different embedding models"""
+        models = [
+            "text-embedding-3-small",
+            "text-embedding-3-large",
+            "bge-small-zh"
+        ]
+        for model in models:
+            data = {"name": f"KB with {model}", "embeddingModel": model}
+            response = client.post("/api/knowledge/bases", json=data)
+            assert response.status_code == 200
+            assert response.json()["embeddingModel"] == model
+
+    def test_different_chunk_sizes(self, client):
+        """Test creating KB with different chunk configurations"""
+        configs = [
+            {"chunkSize": 500, "chunkOverlap": 50},
+            {"chunkSize": 1000, "chunkOverlap": 100},
+            {"chunkSize": 256, "chunkOverlap": 25}
+        ]
+        for idx, config in enumerate(configs):
+            data = {"name": f"Chunk Test KB {idx}", **config}
+            response = client.post("/api/knowledge/bases", json=data)
+            assert response.status_code == 200
+
+    def test_knowledge_base_with_documents(self, client):
+        """Test creating KB and adding multiple documents"""
+        # Create KB
+        create_data = {"name": "KB with Multiple Docs"}
+        create_response = client.post("/api/knowledge/bases", json=create_data)
+        kb_id = create_response.json()["id"]
+
+        # Add multiple documents
+        for i in range(3):
+            doc_data = {
+                "name": f"document-{i}.txt",
+                "size": f"{1000 + i * 100}",
+                "fileType": "txt"
+            }
+            client.post(
+                f"/api/knowledge/bases/{kb_id}/documents",
+                json=doc_data
+            )
+
+        # Verify documents are listed
+        response = client.get(f"/api/knowledge/bases/{kb_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["documents"]) == 3
+
+    def test_create_knowledge_base_duplicate_name(self, client):
+        """Test duplicate KB names are rejected for same user."""
+        payload = {"name": "Duplicate KB"}
+        first = client.post("/api/knowledge/bases", json=payload)
+        assert first.status_code == 200
+
+        second = client.post("/api/knowledge/bases", json=payload)
+        assert second.status_code == 400
+
+    def test_update_embedding_model_blocked_when_chunks_exist(self, client, db_session):
+        """Test embedding model change is blocked after indexing chunks."""
+        create_resp = client.post("/api/knowledge/bases", json={"name": "KB Embedding Lock"})
+        assert create_resp.status_code == 200
+        kb_id = create_resp.json()["id"]
+
+        kb = db_session.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
+        kb.chunk_count = 5
+        db_session.commit()
+
+        update_resp = client.put(f"/api/knowledge/bases/{kb_id}", json={"embeddingModel": "text-embedding-3-large"})
+        assert update_resp.status_code == 400
--- a/api/tests/test_llm.py
+++ b/api/tests/test_llm.py
@@ -0,0 +1,352 @@
+"""Tests for LLM Model API endpoints"""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+class TestLLMModelAPI:
+    """Test cases for LLM Model endpoints"""
+
+    def test_get_llm_models_empty(self, client):
+        """Test getting LLM models when database is empty"""
+        response = client.get("/api/llm")
+        assert response.status_code == 200
+        data = response.json()
+        assert "total" in data
+        assert "list" in data
+        assert data["total"] == 0
+
+    def test_create_llm_model(self, client, sample_llm_model_data):
+        """Test creating a new LLM model"""
+        response = client.post("/api/llm", json=sample_llm_model_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == sample_llm_model_data["name"]
+        assert data["vendor"] == sample_llm_model_data["vendor"]
+        assert data["type"] == sample_llm_model_data["type"]
+        assert data["base_url"] == sample_llm_model_data["base_url"]
+        assert "id" in data
+
+    def test_create_llm_model_minimal(self, client):
+        """Test creating an LLM model with minimal required data"""
+        data = {
+            "name": "Minimal LLM",
+            "vendor": "Test",
+            "type": "text",
+            "base_url": "https://api.test.com",
+            "api_key": "test-key"
+        }
+        response = client.post("/api/llm", json=data)
+        assert response.status_code == 200
+        assert response.json()["name"] == "Minimal LLM"
+
+    def test_get_llm_model_by_id(self, client, sample_llm_model_data):
+        """Test getting a specific LLM model by ID"""
+        # Create first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Get by ID
+        response = client.get(f"/api/llm/{model_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == model_id
+        assert data["name"] == sample_llm_model_data["name"]
+
+    def test_get_llm_model_not_found(self, client):
+        """Test getting a non-existent LLM model"""
+        response = client.get("/api/llm/non-existent-id")
+        assert response.status_code == 404
+
+    def test_update_llm_model(self, client, sample_llm_model_data):
+        """Test updating an LLM model"""
+        # Create first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Update
+        update_data = {
+            "name": "Updated LLM Model",
+            "vendor": "SiliconFlow",
+            "type": "embedding",
+            "temperature": 0.5,
+            "context_length": 8192
+        }
+        response = client.put(f"/api/llm/{model_id}", json=update_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Updated LLM Model"
+        assert data["vendor"] == "SiliconFlow"
+        assert data["type"] == "embedding"
+        assert data["temperature"] == 0.5
+        assert data["context_length"] == 8192
+
+    def test_delete_llm_model(self, client, sample_llm_model_data):
+        """Test deleting an LLM model"""
+        # Create first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Delete
+        response = client.delete(f"/api/llm/{model_id}")
+        assert response.status_code == 200
+
+        # Verify deleted
+        get_response = client.get(f"/api/llm/{model_id}")
+        assert get_response.status_code == 404
+
+    def test_list_llm_models_with_pagination(self, client, sample_llm_model_data):
+        """Test listing LLM models with pagination"""
+        # Create multiple models
+        for i in range(3):
+            data = sample_llm_model_data.copy()
+            data["id"] = f"test-llm-{i}"
+            data["name"] = f"LLM Model {i}"
+            client.post("/api/llm", json=data)
+
+        # Test pagination
+        response = client.get("/api/llm?page=1&limit=2")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] == 3
+        assert len(data["list"]) == 2
+
+    def test_filter_llm_models_by_type(self, client, sample_llm_model_data):
+        """Test filtering LLM models by type"""
+        # Create models with different types
+        for i, model_type in enumerate(["text", "embedding", "rerank"]):
+            data = sample_llm_model_data.copy()
+            data["id"] = f"test-llm-{model_type}"
+            data["name"] = f"LLM {model_type}"
+            data["type"] = model_type
+            client.post("/api/llm", json=data)
+
+        # Filter by type
+        response = client.get("/api/llm?model_type=text")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] >= 1
+        for model in data["list"]:
+            assert model["type"] == "text"
+
+    def test_filter_llm_models_by_enabled(self, client, sample_llm_model_data):
+        """Test filtering LLM models by enabled status"""
+        # Create enabled and disabled models
+        data = sample_llm_model_data.copy()
+        data["id"] = "test-llm-enabled"
+        data["name"] = "Enabled LLM"
+        data["enabled"] = True
+        client.post("/api/llm", json=data)
+
+        data["id"] = "test-llm-disabled"
+        data["name"] = "Disabled LLM"
+        data["enabled"] = False
+        client.post("/api/llm", json=data)
+
+        # Filter by enabled
+        response = client.get("/api/llm?enabled=true")
+        assert response.status_code == 200
+        data = response.json()
+        for model in data["list"]:
+            assert model["enabled"] == True
+
+    def test_create_llm_model_with_all_fields(self, client):
+        """Test creating an LLM model with all fields"""
+        data = {
+            "id": "full-llm",
+            "name": "Full LLM Model",
+            "vendor": "OpenAI",
+            "type": "text",
+            "base_url": "https://api.openai.com/v1",
+            "api_key": "sk-test",
+            "model_name": "gpt-4",
+            "temperature": 0.8,
+            "context_length": 16384,
+            "enabled": True
+        }
+        response = client.post("/api/llm", json=data)
+        assert response.status_code == 200
+        result = response.json()
+        assert result["name"] == "Full LLM Model"
+        assert result["temperature"] == 0.8
+        assert result["context_length"] == 16384
+
+    @patch('httpx.Client')
+    def test_test_llm_model_success(self, mock_client_class, client, sample_llm_model_data):
+        """Test testing an LLM model with successful connection"""
+        # Create model first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock the HTTP response
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [{"message": {"content": "OK"}}]
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_client.post.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.llm.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/llm/{model_id}/test")
+            assert response.status_code == 200
+            data = response.json()
+            assert data["success"] == True
+
+    @patch('httpx.Client')
+    def test_test_llm_model_failure(self, mock_client_class, client, sample_llm_model_data):
+        """Test testing an LLM model with failed connection"""
+        # Create model first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock HTTP error
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 401
+        mock_response.text = "Unauthorized"
+        mock_response.raise_for_status = MagicMock(side_effect=Exception("401 Unauthorized"))
+        mock_client.post.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.llm.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/llm/{model_id}/test")
+            assert response.status_code == 200
+            data = response.json()
+            assert data["success"] == False
+
+    def test_different_llm_vendors(self, client):
+        """Test creating LLM models with different vendors"""
+        vendors = ["OpenAI", "SiliconFlow", "ZhipuAI", "Anthropic"]
+        for vendor in vendors:
+            data = {
+                "id": f"test-{vendor.lower()}",
+                "name": f"Test {vendor}",
+                "vendor": vendor,
+                "type": "text",
+                "base_url": f"https://api.{vendor.lower()}.com/v1",
+                "api_key": "test-key"
+            }
+            response = client.post("/api/llm", json=data)
+            assert response.status_code == 200
+            assert response.json()["vendor"] == vendor
+
+    def test_embedding_llm_model(self, client):
+        """Test creating an embedding LLM model"""
+        data = {
+            "id": "embedding-test",
+            "name": "Embedding Model",
+            "vendor": "OpenAI",
+            "type": "embedding",
+            "base_url": "https://api.openai.com/v1",
+            "api_key": "test-key",
+            "model_name": "text-embedding-3-small"
+        }
+        response = client.post("/api/llm", json=data)
+        assert response.status_code == 200
+        assert response.json()["type"] == "embedding"
+
+    def test_preview_llm_model_success(self, client, sample_llm_model_data, monkeypatch):
+        """Test LLM preview endpoint returns model reply."""
+        from app.routers import llm as llm_router
+
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        class DummyResponse:
+            status_code = 200
+
+            def json(self):
+                return {
+                    "choices": [{"message": {"content": "Preview OK"}}],
+                    "usage": {"prompt_tokens": 10, "completion_tokens": 2, "total_tokens": 12}
+                }
+
+            @property
+            def text(self):
+                return '{"ok":true}'
+
+        class DummyClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def post(self, url, json=None, headers=None):
+                assert url.endswith("/chat/completions")
+                assert headers["Authorization"] == f"Bearer {sample_llm_model_data['api_key']}"
+                assert json["messages"][0]["role"] == "user"
+                return DummyResponse()
+
+        monkeypatch.setattr(llm_router.httpx, "Client", DummyClient)
+
+        response = client.post(f"/api/llm/{model_id}/preview", json={"message": "hello"})
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["reply"] == "Preview OK"
+
+    def test_preview_llm_model_reject_empty_message(self, client, sample_llm_model_data):
+        """Test LLM preview endpoint validates message."""
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        response = client.post(f"/api/llm/{model_id}/preview", json={"message": "   "})
+        assert response.status_code == 400
+
+    def test_preview_embedding_model_success(self, client, monkeypatch):
+        """Test embedding model preview endpoint returns embedding summary."""
+        from app.routers import llm as llm_router
+
+        embedding_model_data = {
+            "id": "preview-emb",
+            "name": "Preview Embedding",
+            "vendor": "OpenAI",
+            "type": "embedding",
+            "base_url": "https://api.openai.com/v1",
+            "api_key": "test-key",
+            "model_name": "text-embedding-3-small"
+        }
+        create_response = client.post("/api/llm", json=embedding_model_data)
+        model_id = create_response.json()["id"]
+
+        class DummyResponse:
+            status_code = 200
+
+            def json(self):
+                return {"data": [{"embedding": [0.1, 0.2, 0.3, 0.4]}], "usage": {"total_tokens": 7}}
+
+            @property
+            def text(self):
+                return '{"ok":true}'
+
+        class DummyClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def post(self, url, json=None, headers=None):
+                assert url.endswith("/embeddings")
+                assert json["input"] == "hello embedding"
+                assert headers["Authorization"] == "Bearer test-key"
+                return DummyResponse()
+
+        monkeypatch.setattr(llm_router.httpx, "Client", DummyClient)
+
+        response = client.post(f"/api/llm/{model_id}/preview", json={"message": "hello embedding"})
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert "dims=4" in data["reply"]
--- a/api/tests/test_tools.py
+++ b/api/tests/test_tools.py
@@ -0,0 +1,368 @@
+"""Tests for Tools & Autotest API endpoints"""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+class TestToolsAPI:
+    """Test cases for Tools endpoints"""
+
+    def test_list_available_tools(self, client):
+        """Test listing all available tools"""
+        response = client.get("/api/tools/list")
+        assert response.status_code == 200
+        data = response.json()
+        assert "tools" in data
+        # Check for expected tools
+        tools = data["tools"]
+        assert "calculator" in tools
+        assert "code_interpreter" in tools
+        assert "current_time" in tools
+        assert "turn_on_camera" in tools
+        assert "turn_off_camera" in tools
+        assert "increase_volume" in tools
+        assert "decrease_volume" in tools
+        assert "voice_msg_prompt" in tools
+        assert "calculator" in tools
+
+    def test_get_tool_detail(self, client):
+        """Test getting a specific tool's details"""
+        response = client.get("/api/tools/list/calculator")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "计算器"
+        assert "parameters" in data
+
+    def test_get_tool_detail_not_found(self, client):
+        """Test getting a non-existent tool"""
+        response = client.get("/api/tools/list/non-existent-tool")
+        assert response.status_code == 404
+
+    def test_get_tool_detail_legacy_alias(self, client):
+        """Legacy tool id should resolve to canonical tool detail."""
+        response = client.get("/api/tools/list/voice_message_prompt")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "语音消息提示"
+        assert "msg" in data["parameters"]["properties"]
+
+    def test_health_check(self, client):
+        """Test health check endpoint"""
+        response = client.get("/api/tools/health")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "healthy"
+        assert "timestamp" in data
+        assert "tools" in data
+
+
+class TestAutotestAPI:
+    """Test cases for Autotest endpoints"""
+
+    def test_autotest_no_models(self, client):
+        """Test autotest without specifying model IDs"""
+        response = client.post("/api/tools/autotest")
+        assert response.status_code == 200
+        data = response.json()
+        assert "id" in data
+        assert "tests" in data
+        assert "summary" in data
+        # Should have test failures since no models provided
+        assert data["summary"]["total"] > 0
+
+    def test_autotest_with_llm_model(self, client, sample_llm_model_data):
+        """Test autotest with an LLM model"""
+        # Create an LLM model first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Run autotest
+        response = client.post(f"/api/tools/autotest?llm_model_id={model_id}&test_asr=false")
+        assert response.status_code == 200
+        data = response.json()
+        assert "tests" in data
+        assert "summary" in data
+
+    def test_autotest_with_asr_model(self, client, sample_asr_model_data):
+        """Test autotest with an ASR model"""
+        # Create an ASR model first
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Run autotest
+        response = client.post(f"/api/tools/autotest?asr_model_id={model_id}&test_llm=false")
+        assert response.status_code == 200
+        data = response.json()
+        assert "tests" in data
+        assert "summary" in data
+
+    def test_autotest_with_both_models(self, client, sample_llm_model_data, sample_asr_model_data):
+        """Test autotest with both LLM and ASR models"""
+        # Create models
+        llm_response = client.post("/api/llm", json=sample_llm_model_data)
+        llm_id = llm_response.json()["id"]
+
+        asr_response = client.post("/api/asr", json=sample_asr_model_data)
+        asr_id = asr_response.json()["id"]
+
+        # Run autotest
+        response = client.post(
+            f"/api/tools/autotest?llm_model_id={llm_id}&asr_model_id={asr_id}"
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert "tests" in data
+        assert "summary" in data
+
+    @patch('httpx.Client')
+    def test_autotest_llm_model_success(self, mock_client_class, client, sample_llm_model_data):
+        """Test autotest for a specific LLM model with successful connection"""
+        # Create an LLM model first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock the HTTP response for successful connection
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [{"message": {"content": "OK"}}]
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_response.iter_bytes = MagicMock(return_value=[b'chunk1', b'chunk2'])
+        mock_client.post.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.tools.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/tools/autotest/llm/{model_id}")
+            assert response.status_code == 200
+            data = response.json()
+            assert "tests" in data
+            assert "summary" in data
+
+    @patch('httpx.Client')
+    def test_autotest_asr_model_success(self, mock_client_class, client, sample_asr_model_data):
+        """Test autotest for a specific ASR model with successful connection"""
+        # Create an ASR model first
+        create_response = client.post("/api/asr", json=sample_asr_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock the HTTP response for successful connection
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.raise_for_status = MagicMock()
+        mock_client.get.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.tools.httpx.Client', return_value=mock_client):
+            response = client.post(f"/api/tools/autotest/asr/{model_id}")
+            assert response.status_code == 200
+            data = response.json()
+            assert "tests" in data
+            assert "summary" in data
+
+    def test_autotest_llm_model_not_found(self, client):
+        """Test autotest for a non-existent LLM model"""
+        response = client.post("/api/tools/autotest/llm/non-existent-id")
+        assert response.status_code == 200
+        data = response.json()
+        # Should have a failure test
+        assert any(not t["passed"] for t in data["tests"])
+
+    def test_autotest_asr_model_not_found(self, client):
+        """Test autotest for a non-existent ASR model"""
+        response = client.post("/api/tools/autotest/asr/non-existent-id")
+        assert response.status_code == 200
+        data = response.json()
+        # Should have a failure test
+        assert any(not t["passed"] for t in data["tests"])
+
+    @patch('httpx.Client')
+    def test_test_message_success(self, mock_client_class, client, sample_llm_model_data):
+        """Test sending a test message to an LLM model"""
+        # Create an LLM model first
+        create_response = client.post("/api/llm", json=sample_llm_model_data)
+        model_id = create_response.json()["id"]
+
+        # Mock the HTTP response
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [{"message": {"content": "Hello! This is a test reply."}}],
+            "usage": {"total_tokens": 10}
+        }
+        mock_response.raise_for_status = MagicMock()
+        mock_client.post.return_value = mock_response
+        mock_client.__enter__ = MagicMock(return_value=mock_client)
+        mock_client.__exit__ = MagicMock(return_value=False)
+
+        with patch('app.routers.tools.httpx.Client', return_value=mock_client):
+            response = client.post(
+                f"/api/tools/test-message?llm_model_id={model_id}",
+                json={"message": "Hello!"}
+            )
+            assert response.status_code == 200
+            data = response.json()
+            assert data["success"] == True
+            assert "reply" in data
+
+    def test_test_message_model_not_found(self, client):
+        """Test sending a test message to a non-existent model"""
+        response = client.post(
+            "/api/tools/test-message?llm_model_id=non-existent",
+            json={"message": "Hello!"}
+        )
+        assert response.status_code == 404
+
+    def test_autotest_result_structure(self, client):
+        """Test that autotest results have the correct structure"""
+        response = client.post("/api/tools/autotest")
+        assert response.status_code == 200
+        data = response.json()
+
+        # Check required fields
+        assert "id" in data
+        assert "started_at" in data
+        assert "duration_ms" in data
+        assert "tests" in data
+        assert "summary" in data
+
+        # Check summary structure
+        assert "passed" in data["summary"]
+        assert "failed" in data["summary"]
+        assert "total" in data["summary"]
+
+        # Check test structure
+        if data["tests"]:
+            test = data["tests"][0]
+            assert "name" in test
+            assert "passed" in test
+            assert "message" in test
+            assert "duration_ms" in test
+
+    def test_tools_have_required_fields(self, client):
+        """Test that all tools have required fields"""
+        response = client.get("/api/tools/list")
+        assert response.status_code == 200
+        data = response.json()
+
+        for tool_id, tool in data["tools"].items():
+            assert "name" in tool
+            assert "description" in tool
+            assert "parameters" in tool
+
+            # Check parameters structure
+            params = tool["parameters"]
+            assert "type" in params
+            assert "properties" in params
+
+    def test_calculator_tool_parameters(self, client):
+        """Test calculator tool has correct parameters"""
+        response = client.get("/api/tools/list/calculator")
+        assert response.status_code == 200
+        data = response.json()
+
+        assert data["name"] == "计算器"
+        assert "expression" in data["parameters"]["properties"]
+        assert "required" in data["parameters"]
+        assert "expression" in data["parameters"]["required"]
+
+    def test_code_interpreter_tool_parameters(self, client):
+        """Test code_interpreter tool has correct parameters"""
+        response = client.get("/api/tools/list/code_interpreter")
+        assert response.status_code == 200
+        data = response.json()
+
+        assert data["name"] == "代码执行"
+        assert "code" in data["parameters"]["properties"]
+
+
+class TestToolResourceCRUD:
+    """Test cases for persistent tool resource CRUD endpoints."""
+
+    def test_list_tool_resources_contains_system_tools(self, client):
+        response = client.get("/api/tools/resources")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["total"] >= 1
+        ids = [item["id"] for item in payload["list"]]
+        assert "calculator" in ids
+        assert "voice_msg_prompt" in ids
+        calculator = next((item for item in payload["list"] if item["id"] == "calculator"), None)
+        assert calculator is not None
+        assert calculator["parameter_schema"]["type"] == "object"
+
+    def test_create_update_delete_tool_resource(self, client):
+        create_resp = client.post("/api/tools/resources", json={
+            "name": "自定义网页抓取",
+            "description": "抓取页面并提取正文",
+            "category": "query",
+            "icon": "Globe",
+            "http_method": "GET",
+            "http_url": "https://example.com/search",
+            "http_headers": {},
+            "http_timeout_ms": 10000,
+            "parameter_schema": {
+                "type": "object",
+                "properties": {"keyword": {"type": "string"}},
+                "required": ["keyword"]
+            },
+            "parameter_defaults": {"limit": 10},
+            "enabled": True,
+        })
+        assert create_resp.status_code == 200
+        created = create_resp.json()
+        tool_id = created["id"]
+        assert created["name"] == "自定义网页抓取"
+        assert created["is_system"] is False
+        assert created["parameter_schema"]["required"] == ["keyword"]
+        assert created["parameter_defaults"]["limit"] == 10
+
+        update_resp = client.put(f"/api/tools/resources/{tool_id}", json={
+            "name": "自定义网页检索",
+            "category": "system",
+            "parameter_defaults": {"limit": 20},
+        })
+        assert update_resp.status_code == 200
+        updated = update_resp.json()
+        assert updated["name"] == "自定义网页检索"
+        assert updated["category"] == "system"
+        assert updated["parameter_defaults"]["limit"] == 20
+
+        get_resp = client.get(f"/api/tools/resources/{tool_id}")
+        assert get_resp.status_code == 200
+        assert get_resp.json()["id"] == tool_id
+
+        delete_resp = client.delete(f"/api/tools/resources/{tool_id}")
+        assert delete_resp.status_code == 200
+
+        missing_resp = client.get(f"/api/tools/resources/{tool_id}")
+        assert missing_resp.status_code == 404
+
+    def test_create_query_tool_requires_http_url(self, client):
+        resp = client.post("/api/tools/resources", json={
+            "name": "缺失URL的查询工具",
+            "description": "应当失败",
+            "category": "query",
+            "icon": "Globe",
+            "enabled": True,
+        })
+        assert resp.status_code == 400
+
+    def test_system_tool_can_be_updated_and_deleted(self, client):
+        list_resp = client.get("/api/tools/resources")
+        assert list_resp.status_code == 200
+        assert any(item["id"] == "turn_on_camera" for item in list_resp.json()["list"])
+
+        update_resp = client.put("/api/tools/resources/turn_on_camera", json={"name": "更新后的打开摄像头", "category": "system"})
+        assert update_resp.status_code == 200
+        assert update_resp.json()["name"] == "更新后的打开摄像头"
+
+        delete_resp = client.delete("/api/tools/resources/turn_on_camera")
+        assert delete_resp.status_code == 200
+
+        get_resp = client.get("/api/tools/resources/turn_on_camera")
+        assert get_resp.status_code == 404
--- a/api/tests/test_voices.py
+++ b/api/tests/test_voices.py
@@ -0,0 +1,331 @@
+"""Tests for Voice API endpoints"""
+import base64
+import pytest
+
+
+class TestVoiceAPI:
+    """Test cases for Voice endpoints"""
+
+    def test_get_voices_empty(self, client):
+        """Test getting voices when database is empty"""
+        response = client.get("/api/voices")
+        assert response.status_code == 200
+        data = response.json()
+        assert "total" in data
+        assert "list" in data
+
+    def test_create_voice(self, client, sample_voice_data):
+        """Test creating a new voice"""
+        response = client.post("/api/voices", json=sample_voice_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == sample_voice_data["name"]
+        assert data["vendor"] == sample_voice_data["vendor"]
+        assert data["gender"] == sample_voice_data["gender"]
+        assert data["language"] == sample_voice_data["language"]
+        assert "id" in data
+
+    def test_create_voice_minimal(self, client):
+        """Test creating a voice with minimal data"""
+        data = {
+            "name": "Minimal Voice",
+            "vendor": "Test",
+            "gender": "Male",
+            "language": "en",
+            "description": ""
+        }
+        response = client.post("/api/voices", json=data)
+        assert response.status_code == 200
+
+    def test_get_voice_by_id(self, client, sample_voice_data):
+        """Test getting a specific voice by ID"""
+        # Create first
+        create_response = client.post("/api/voices", json=sample_voice_data)
+        voice_id = create_response.json()["id"]
+
+        # Get by ID
+        response = client.get(f"/api/voices/{voice_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == voice_id
+        assert data["name"] == sample_voice_data["name"]
+
+    def test_get_voice_not_found(self, client):
+        """Test getting a non-existent voice"""
+        response = client.get("/api/voices/non-existent-id")
+        assert response.status_code == 404
+
+    def test_update_voice(self, client, sample_voice_data):
+        """Test updating a voice"""
+        # Create first
+        create_response = client.post("/api/voices", json=sample_voice_data)
+        voice_id = create_response.json()["id"]
+
+        # Update
+        update_data = {"name": "Updated Voice", "speed": 1.5}
+        response = client.put(f"/api/voices/{voice_id}", json=update_data)
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "Updated Voice"
+        assert data["speed"] == 1.5
+
+    def test_delete_voice(self, client, sample_voice_data):
+        """Test deleting a voice"""
+        # Create first
+        create_response = client.post("/api/voices", json=sample_voice_data)
+        voice_id = create_response.json()["id"]
+
+        # Delete
+        response = client.delete(f"/api/voices/{voice_id}")
+        assert response.status_code == 200
+
+        # Verify deleted
+        get_response = client.get(f"/api/voices/{voice_id}")
+        assert get_response.status_code == 404
+
+    def test_list_voices_with_pagination(self, client, sample_voice_data):
+        """Test listing voices with pagination"""
+        # Create multiple voices
+        for i in range(3):
+            data = sample_voice_data.copy()
+            data["name"] = f"Voice {i}"
+            client.post("/api/voices", json=data)
+
+        # Test pagination
+        response = client.get("/api/voices?page=1&limit=2")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["total"] == 3
+        assert len(data["list"]) == 2
+
+    def test_filter_voices_by_vendor(self, client, sample_voice_data):
+        """Test filtering voices by vendor"""
+        # Create voice with specific vendor
+        sample_voice_data["vendor"] = "FilterTestVendor"
+        client.post("/api/voices", json=sample_voice_data)
+
+        response = client.get("/api/voices?vendor=FilterTestVendor")
+        assert response.status_code == 200
+        data = response.json()
+        for voice in data["list"]:
+            assert voice["vendor"] == "FilterTestVendor"
+
+    def test_filter_voices_by_language(self, client, sample_voice_data):
+        """Test filtering voices by language"""
+        sample_voice_data["language"] = "en"
+        client.post("/api/voices", json=sample_voice_data)
+
+        response = client.get("/api/voices?language=en")
+        assert response.status_code == 200
+        data = response.json()
+        for voice in data["list"]:
+            assert voice["language"] == "en"
+
+    def test_filter_voices_by_gender(self, client, sample_voice_data):
+        """Test filtering voices by gender"""
+        sample_voice_data["gender"] = "Female"
+        client.post("/api/voices", json=sample_voice_data)
+
+        response = client.get("/api/voices?gender=Female")
+        assert response.status_code == 200
+        data = response.json()
+        for voice in data["list"]:
+            assert voice["gender"] == "Female"
+
+    def test_preview_voice_success(self, client, monkeypatch):
+        """Test preview voice endpoint returns audio data URL"""
+        from app.routers import voices as voice_router
+
+        class DummyResponse:
+            status_code = 200
+            content = b"fake-mp3-bytes"
+            text = "ok"
+
+            def json(self):
+                return {}
+
+        class DummyClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def post(self, *args, **kwargs):
+                return DummyResponse()
+
+        monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
+        monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
+
+        create_resp = client.post("/api/voices", json={
+            "id": "anna",
+            "name": "Anna",
+            "vendor": "SiliconFlow",
+            "gender": "Female",
+            "language": "zh",
+            "description": "system voice",
+            "model": "FunAudioLLM/CosyVoice2-0.5B",
+            "voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
+        })
+        assert create_resp.status_code == 200
+        voice_id = create_resp.json()["id"]
+
+        preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
+        assert preview_resp.status_code == 200
+        payload = preview_resp.json()
+        assert payload["success"] is True
+        assert payload["audio_url"].startswith("data:audio/mpeg;base64,")
+        encoded = payload["audio_url"].split(",", 1)[1]
+        assert base64.b64decode(encoded) == b"fake-mp3-bytes"
+
+    def test_voice_credential_persist_and_preview_use_voice_key(self, client, monkeypatch):
+        """Test per-voice api_key/base_url persisted and used by preview endpoint"""
+        from app.routers import voices as voice_router
+
+        captured_auth = {"value": ""}
+        captured_url = {"value": ""}
+
+        class DummyResponse:
+            status_code = 200
+            content = b"fake-mp3"
+            text = "ok"
+
+            def json(self):
+                return {}
+
+        class DummyClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def post(self, *args, **kwargs):
+                headers = kwargs.get("headers", {})
+                captured_auth["value"] = headers.get("Authorization", "")
+                if args:
+                    captured_url["value"] = args[0]
+                return DummyResponse()
+
+        monkeypatch.delenv("SILICONFLOW_API_KEY", raising=False)
+        monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
+
+        create_resp = client.post("/api/voices", json={
+            "id": "anna2",
+            "name": "Anna 2",
+            "vendor": "SiliconFlow",
+            "gender": "Female",
+            "language": "zh",
+            "description": "voice",
+            "model": "FunAudioLLM/CosyVoice2-0.5B",
+            "voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna",
+            "api_key": "voice-key-123",
+            "base_url": "https://api.siliconflow.cn/v1"
+        })
+        assert create_resp.status_code == 200
+        voice_id = create_resp.json()["id"]
+
+        preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "hello"})
+        assert preview_resp.status_code == 200
+        assert captured_auth["value"] == "Bearer voice-key-123"
+        assert captured_url["value"] == "https://api.siliconflow.cn/v1/audio/speech"
+
+    def test_create_voice_dashscope_defaults(self, client):
+        """Test creating DashScope voice applies model/voice defaults."""
+        create_resp = client.post("/api/voices", json={
+            "name": "DashScope Voice",
+            "vendor": "DashScope",
+            "gender": "Female",
+            "language": "zh",
+            "description": "dashscope",
+        })
+        assert create_resp.status_code == 200
+        payload = create_resp.json()
+        assert payload["vendor"] == "DashScope"
+        assert payload["model"] == "qwen3-tts-flash-realtime"
+        assert payload["voice_key"] == "Cherry"
+
+    def test_preview_voice_dashscope_success(self, client, monkeypatch):
+        """DashScope voice preview should return playable wav data url."""
+        from app.routers import voices as voice_router
+
+        captured = {
+            "api_key": "",
+            "model": "",
+            "url": "",
+            "session": {},
+            "text": "",
+        }
+
+        class DummyAudioFormat:
+            PCM_24000HZ_MONO_16BIT = "pcm24k16mono"
+
+        class DummyDashScopeModule:
+            api_key = ""
+
+        class DummyRealtime:
+            def __init__(self, *args, **kwargs):
+                captured["api_key"] = kwargs.get("api_key", "")
+                captured["model"] = kwargs.get("model", "")
+                captured["url"] = kwargs.get("url", "")
+                self.callback = kwargs["callback"]
+
+            def connect(self):
+                self.callback.on_open()
+
+            def update_session(self, **kwargs):
+                captured["session"] = kwargs
+
+            def append_text(self, text):
+                captured["text"] = text
+
+            def commit(self):
+                # 16-bit PCM mono samples
+                raw_pcm = b"\x00\x00\x01\x00\x02\x00\x03\x00"
+                self.callback.on_event({
+                    "type": "response.audio.delta",
+                    "delta": base64.b64encode(raw_pcm).decode("utf-8"),
+                })
+                self.callback.on_event({"type": "response.done"})
+
+            def finish(self):
+                return None
+
+            def close(self):
+                return None
+
+        monkeypatch.setattr(voice_router, "DASHSCOPE_SDK_AVAILABLE", True)
+        monkeypatch.setattr(voice_router, "AudioFormat", DummyAudioFormat)
+        monkeypatch.setattr(voice_router, "QwenTtsRealtime", DummyRealtime)
+        monkeypatch.setattr(voice_router, "dashscope", DummyDashScopeModule())
+
+        create_resp = client.post("/api/voices", json={
+            "name": "DashScope Voice",
+            "vendor": "DashScope",
+            "gender": "Female",
+            "language": "zh",
+            "description": "dashscope",
+            "api_key": "dashscope-key",
+            "base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
+        })
+        assert create_resp.status_code == 200
+        voice_id = create_resp.json()["id"]
+
+        preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
+        assert preview_resp.status_code == 200
+        payload = preview_resp.json()
+        assert payload["success"] is True
+        assert payload["audio_url"].startswith("data:audio/wav;base64,")
+        encoded = payload["audio_url"].split(",", 1)[1]
+        wav_bytes = base64.b64decode(encoded)
+        assert wav_bytes.startswith(b"RIFF")
+        assert captured["model"] == "qwen3-tts-flash-realtime"
+        assert captured["url"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
+        assert captured["text"] == "你好"
+        assert captured["session"]["voice"] == "Cherry"
--- a/api/tests/test_workflows.py
+++ b/api/tests/test_workflows.py
@@ -0,0 +1,167 @@
+"""Tests for workflow graph schema and router behavior."""
+
+
+class TestWorkflowAPI:
+    """Workflow CRUD and graph validation test cases."""
+
+    def _minimal_nodes(self):
+        return [
+            {
+                "id": "start_1",
+                "name": "start_1",
+                "type": "start",
+                "isStart": True,
+                "metadata": {"position": {"x": 80, "y": 80}},
+            },
+            {
+                "id": "assistant_1",
+                "name": "assistant_1",
+                "type": "assistant",
+                "metadata": {"position": {"x": 280, "y": 80}},
+                "prompt": "You are the first assistant node.",
+            },
+        ]
+
+    def test_create_workflow_with_canonical_graph(self, client):
+        payload = {
+            "name": "Canonical Graph",
+            "nodes": self._minimal_nodes(),
+            "edges": [
+                {
+                    "id": "edge_start_assistant",
+                    "fromNodeId": "start_1",
+                    "toNodeId": "assistant_1",
+                    "condition": {"type": "always"},
+                }
+            ],
+        }
+
+        resp = client.post("/api/workflows", json=payload)
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["name"] == "Canonical Graph"
+        assert data["nodeCount"] == 2
+        assert data["nodes"][0]["id"] == "start_1"
+        assert data["edges"][0]["fromNodeId"] == "start_1"
+        assert data["edges"][0]["toNodeId"] == "assistant_1"
+
+    def test_create_workflow_with_legacy_graph(self, client):
+        payload = {
+            "name": "Legacy Graph",
+            "nodes": [
+                {
+                    "name": "legacy_start",
+                    "type": "conversation",
+                    "isStart": True,
+                    "metadata": {"position": {"x": 100, "y": 100}},
+                },
+                {
+                    "name": "legacy_human",
+                    "type": "human",
+                    "metadata": {"position": {"x": 300, "y": 100}},
+                },
+            ],
+            "edges": [
+                {
+                    "from": "legacy_start",
+                    "to": "legacy_human",
+                    "label": "人工",
+                }
+            ],
+        }
+
+        resp = client.post("/api/workflows", json=payload)
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["nodes"][0]["type"] == "assistant"
+        assert data["nodes"][1]["type"] == "human_transfer"
+        assert data["edges"][0]["fromNodeId"] == "legacy_start"
+        assert data["edges"][0]["toNodeId"] == "legacy_human"
+        assert data["edges"][0]["condition"]["type"] == "contains"
+
+    def test_create_workflow_without_start_node_fails(self, client):
+        payload = {
+            "name": "No Start",
+            "nodes": [
+                {"id": "node_1", "name": "node_1", "type": "assistant", "metadata": {"position": {"x": 0, "y": 0}}},
+            ],
+            "edges": [],
+        }
+        resp = client.post("/api/workflows", json=payload)
+        assert resp.status_code == 422
+
+    def test_create_workflow_with_invalid_edge_fails(self, client):
+        payload = {
+            "name": "Bad Edge",
+            "nodes": self._minimal_nodes(),
+            "edges": [
+                {"id": "edge_bad", "fromNodeId": "missing", "toNodeId": "assistant_1", "condition": {"type": "always"}},
+            ],
+        }
+        resp = client.post("/api/workflows", json=payload)
+        assert resp.status_code == 422
+
+    def test_update_workflow_nodes_and_edges(self, client):
+        create_payload = {
+            "name": "Before Update",
+            "nodes": self._minimal_nodes(),
+            "edges": [
+                {
+                    "id": "edge_start_assistant",
+                    "fromNodeId": "start_1",
+                    "toNodeId": "assistant_1",
+                    "condition": {"type": "always"},
+                }
+            ],
+        }
+        create_resp = client.post("/api/workflows", json=create_payload)
+        assert create_resp.status_code == 200
+        workflow_id = create_resp.json()["id"]
+
+        update_payload = {
+            "name": "After Update",
+            "nodes": [
+                {
+                    "id": "start_1",
+                    "name": "start_1",
+                    "type": "start",
+                    "isStart": True,
+                    "metadata": {"position": {"x": 50, "y": 50}},
+                },
+                {
+                    "id": "assistant_2",
+                    "name": "assistant_2",
+                    "type": "assistant",
+                    "metadata": {"position": {"x": 250, "y": 50}},
+                    "prompt": "new prompt",
+                },
+                {
+                    "id": "end_1",
+                    "name": "end_1",
+                    "type": "end",
+                    "metadata": {"position": {"x": 450, "y": 50}},
+                },
+            ],
+            "edges": [
+                {
+                    "id": "edge_start_assistant2",
+                    "fromNodeId": "start_1",
+                    "toNodeId": "assistant_2",
+                    "condition": {"type": "always"},
+                },
+                {
+                    "id": "edge_assistant2_end",
+                    "fromNodeId": "assistant_2",
+                    "toNodeId": "end_1",
+                    "condition": {"type": "contains", "source": "user", "value": "结束"},
+                },
+            ],
+        }
+
+        update_resp = client.put(f"/api/workflows/{workflow_id}", json=update_payload)
+        assert update_resp.status_code == 200
+        updated = update_resp.json()
+        assert updated["name"] == "After Update"
+        assert updated["nodeCount"] == 3
+        assert len(updated["nodes"]) == 3
+        assert len(updated["edges"]) == 2
--- a/changelog/README.md
+++ b/changelog/README.md
@@ -0,0 +1 @@
+# Changelog
--- a/docker/README.md
+++ b/docker/README.md
@@ -1 +1,78 @@
-# Docker Deployment
+# Docker Deployment
+
+This folder contains Docker Compose configuration to run the entire AI VideoAssistant stack.
+
+## Services
+
+| Service | Port | Description |
+|---------|------|-------------|
+| minio | 9000, 9001 | S3-compatible object storage |
+| backend | 8100 | FastAPI backend API |
+| engine | 8001 | Conversation engine (WebSocket) |
+| frontend | 6000 | React web application |
+
+## Prerequisites
+
+1. Docker and Docker Compose installed
+2. The `engine/data/vad/silero_vad.onnx` VAD model file must exist
+3. Agent configuration in `engine/config/agents/default.yaml`
+
+## Quick Start
+
+```bash
+cd docker
+docker compose up -d
+```
+
+## Access Points
+
+- **Frontend**: http://localhost:6000
+- **Backend API**: http://localhost:8100
+- **Engine WebSocket**: ws://localhost:8001/ws
+- **MinIO Console**: http://localhost:9001 (admin / password123)
+
+## Configuration
+
+### Engine Environment Variables
+
+The engine service uses environment variables for configuration. Key variables:
+
+- `BACKEND_URL`: Backend API URL (default: `http://backend:8100`)
+- `LOG_LEVEL`: Logging level (default: `INFO`)
+- `CORS_ORIGINS`: Allowed CORS origins
+
+Agent-specific settings (LLM, TTS, ASR) are configured via YAML files in `engine/config/agents/`.
+
+### Volumes
+
+- `minio_data`: MinIO storage data
+- `backend_data`: Backend SQLite database
+- `engine_logs`: Engine log files
+
+## Development Mode
+
+To mount source code for hot-reload during development:
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+```
+
+## Logs
+
+```bash
+# View all logs
+docker compose logs -f
+
+# View specific service logs
+docker compose logs -f engine
+docker compose logs -f backend
+```
+
+## Stopping
+
+```bash
+docker compose down
+
+# Remove volumes as well
+docker compose down -v
+```
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -1,13 +1,37 @@
-version: '3.8'
+# Project name used as prefix for containers, volumes, and networks
+name: ras
+
+# Docker registry mirror for China users (change to empty or "docker.io" if you have direct access)
+x-registry-mirror: &registry-mirror docker.1ms.run

 services:
-  # 后端 API
+  # MinIO (S3 compatible storage)
+  minio:
+    image: ${REGISTRY_MIRROR:-docker.1ms.run}/minio/minio
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    volumes:
+      - minio_data:/data
+    environment:
+      MINIO_ROOT_USER: admin
+      MINIO_ROOT_PASSWORD: password123
+    command: server /data --console-address ":9001"
+    healthcheck:
+      test: ["CMD", "mc", "ready", "local"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  # Backend API
  backend:
    build:
-      context: ./backend
+      context: ../api
      dockerfile: Dockerfile
+      args:
+        REGISTRY_MIRROR: ${REGISTRY_MIRROR:-docker.1ms.run}
    ports:
-      - "8000:8000"
+      - "8100:8100"
    environment:
      - DATABASE_URL=sqlite:///./data/app.db
      - MINIO_ENDPOINT=minio:9000
@@ -15,35 +39,83 @@ services:
      - MINIO_SECRET_KEY=password123
      - MINIO_BUCKET=ai-audio
    volumes:
-      - ./backend:/app
-      - ./backend/data:/app/data
+      - backend_data:/app/data
    depends_on:
-      - minio
+      minio:
+        condition: service_started
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s

-  # 对话引擎 (py-active-call)
+  # Conversation Engine
  engine:
    build:
-      context: ../py-active-call
+      context: ../engine
      dockerfile: Dockerfile
    ports:
      - "8001:8001"
    environment:
-      - BACKEND_URL=http://backend:8000
+      - HOST=0.0.0.0
+      - PORT=8001
+      - BACKEND_MODE=http
+      - BACKEND_URL=http://backend:8100
+      - LOG_LEVEL=INFO
+      - CORS_ORIGINS=["http://localhost:6000","http://localhost:3000"]
+    volumes:
+      - ../engine/config:/app/config:ro
+      - ../engine/data:/app/data:ro
+      - engine_logs:/app/logs
+    depends_on:
+      backend:
+        condition: service_started
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 15s
+
+  # Frontend (Vite + React) – production: built static files served on 6000
+  frontend:
+    build:
+      context: ../web
+      dockerfile: Dockerfile
+      args:
+        REGISTRY_MIRROR: ${REGISTRY_MIRROR:-docker.1ms.run}
+        VITE_API_BASE_URL: ${VITE_API_BASE_URL:-http://localhost:8100/api}
+        VITE_ENGINE_WS_URL: ${VITE_ENGINE_WS_URL:-ws://localhost:8001/ws}
+    ports:
+      - "6000:6000"
    depends_on:
      - backend
+      - engine

-  # MinIO (S3 兼容存储)
-  minio:
-    image: minio/minio
+  # Frontend dev – hot reload on port 3000 (run with: docker compose --profile dev up)
+  frontend-dev:
+    profiles:
+      - dev
+    build:
+      context: ../web
+      dockerfile: Dockerfile.dev
+      args:
+        REGISTRY_MIRROR: ${REGISTRY_MIRROR:-docker.1ms.run}
    ports:
-      - "9000:9000"
-      - "9001:9001"
-    volumes:
-      - ./storage/minio/data:/data
+      - "3000:3000"
    environment:
-      MINIO_ROOT_USER: admin
-      MINIO_ROOT_PASSWORD: password123
-    command: server /data --console-address ":9001"
+      - VITE_API_BASE_URL=${VITE_API_BASE_URL:-http://localhost:8100/api}
+      - VITE_ENGINE_WS_URL=${VITE_ENGINE_WS_URL:-ws://localhost:8001/ws}
+    volumes:
+      - ../web:/app
+      - frontend_dev_node_modules:/app/node_modules
+    depends_on:
+      - backend
+      - engine

 volumes:
-  minio-data:
+  minio_data:
+  backend_data:
+  engine_logs:
+  frontend_dev_node_modules:
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,7 +1,18 @@
 # Documentation

-部署 MkDocs：
-pip install mkdocs
-mkdocs serve
+**安装依赖（推荐使用 1.x，避免与 Material 主题不兼容）：**

-访问 http://localhost:8000 查看文档网站。
+```bash
+cd docs
+pip install -r requirements.txt
+```
+
+或手动安装：`pip install "mkdocs>=1.6,<2" mkdocs-material`
+
+**本地预览：**
+
+```bash
+mkdocs serve
+```
+
+访问终端中显示的地址（如 http://127.0.0.1:8000）查看文档。
--- a/docs/content/analysis/autotest.md
+++ b/docs/content/analysis/autotest.md
--- a/docs/content/analysis/dashboard.md
+++ b/docs/content/analysis/dashboard.md
--- a/docs/content/analysis/evaluation.md
+++ b/docs/content/analysis/evaluation.md
@@ -0,0 +1,166 @@
+# 效果评估
+
+效果评估帮助你系统地衡量和改进助手的对话质量。
+
+## 评估维度
+
+### 核心指标
+
+| 指标 | 说明 | 计算方式 |
+|------|------|---------|
+| **解决率** | 用户问题被成功解决的比例 | 已解决 / 总对话数 |
+| **准确率** | 回复内容正确的比例 | 正确回复 / 总回复数 |
+| **满意度** | 用户满意的对话比例 | 满意评价 / 总评价数 |
+| **转人工率** | 需要人工介入的比例 | 转人工数 / 总对话数 |
+
+### 性能指标
+
+| 指标 | 说明 | 建议值 |
+|------|------|--------|
+| **首次响应时间** | 用户输入到首次回复的时间 | < 2s |
+| **平均对话轮次** | 解决问题需要的平均轮数 | < 5 轮 |
+| **平均对话时长** | 单次对话的平均时长 | 视场景而定 |
+
+## 配置评估标准
+
+在助手配置中设置评估标准：
+
+### 解决标准
+
+定义什么情况视为"问题已解决"：
+
+```
+评估标准：solved_inquiry
+描述：用户的问题得到了满意的解答
+
+成功条件：
+- 用户明确表示问题已解决
+- 用户表示感谢并结束对话
+- 用户获得了所需信息
+
+失败条件：
+- 用户要求转人工
+- 用户多次重复相同问题
+- 用户表达不满
+```
+
+### 质量标准
+
+定义回复质量的评估维度：
+
+```
+评估维度：
+1. 准确性 - 信息是否正确
+2. 完整性 - 是否回答了用户所有问题
+3. 相关性 - 回复是否切题
+4. 简洁性 - 是否避免了冗余信息
+5. 语气 - 是否保持了友好专业的态度
+```
+
+## 数据收集
+
+### 自动收集
+
+系统自动收集以下数据：
+
+- 对话内容和时间戳
+- 工具调用记录
+- 错误和异常
+- 转人工事件
+
+### 用户反馈
+
+配置用户反馈收集：
+
+1. 对话结束后显示满意度评价
+2. 收集用户评分（1-5 分）
+3. 可选的文字反馈
+
+### 数据提取
+
+配置需要从对话中提取的信息：
+
+```
+数据提取项：
+
+1. user_intent
+   描述：用户的主要意图
+   类型：string
+
+2. issue_category
+   描述：问题分类
+   类型：enum [产品问题, 订单问题, 技术问题, 其他]
+
+3. resolution_status
+   描述：解决状态
+   类型：enum [已解决, 未解决, 转人工]
+```
+
+## 评估报告
+
+### 查看报告
+
+在 **数据分析** > **效果评估** 页面查看：
+
+1. **总体概览** - 核心指标趋势图
+2. **分类分析** - 按问题类型的评估结果
+3. **时段分析** - 不同时间段的表现
+4. **详细记录** - 单条对话的评估结果
+
+### 报告示例
+
+```
+评估报告 - 2025年1月
+
+总对话数：1,234
+解决率：78.5%
+准确率：85.2%
+平均满意度：4.2/5
+转人工率：12.3%
+
+问题分类分布：
+- 产品问题：45%
+- 订单问题：30%
+- 技术问题：15%
+- 其他：10%
+
+改进建议：
+1. 订单问题解决率较低（65%），建议补充订单相关知识库
+2. 技术问题转人工率高（25%），建议增加技术支持工具
+```
+
+## 持续改进
+
+### 改进流程
+
+1. **收集数据** - 持续收集对话和评估数据
+2. **分析问题** - 找出低分对话的共性
+3. **制定方案** - 针对问题制定改进措施
+4. **实施改进** - 更新提示词、知识库或工具
+5. **验证效果** - 观察改进后的指标变化
+
+### 常见改进措施
+
+| 问题 | 改进措施 |
+|------|---------|
+| 回复不准确 | 优化提示词，补充知识库 |
+| 无法理解问题 | 增加示例，优化 ASR 热词 |
+| 回复太长 | 在提示词中限制长度 |
+| 缺少专业知识 | 上传相关文档到知识库 |
+| 工具调用失败 | 检查工具配置和 API 状态 |
+
+### A/B 测试
+
+对比不同配置的效果：
+
+1. 创建助手的变体版本
+2. 按比例分配流量
+3. 收集两个版本的评估数据
+4. 比较各项指标
+5. 选择效果更好的版本
+
+## 下一步
+
+- [自动化测试](autotest.md) - 批量测试助手
+- [历史记录](history.md) - 查看对话详情
+- [提示词指南](../concepts/assistants/prompts.md) - 优化提示词
--- a/docs/content/analysis/history.md
+++ b/docs/content/analysis/history.md
--- a/docs/content/api-reference/errors.md
+++ b/docs/content/api-reference/errors.md
@@ -0,0 +1,88 @@
+# 错误码
+
+本文档列出 Realtime Agent Studio (RAS) API 的所有错误码及其说明。
+
+## 协议错误
+
+| 错误码 | 说明 | 解决方案 |
+|---|---|---|
+| `protocol.invalid_json` | JSON 格式错误 | 检查发送的 JSON 是否合法 |
+| `protocol.invalid_message` | 消息格式错误 | 检查消息结构是否符合协议 |
+| `protocol.order` | 消息顺序错误 | 确保先发送 `session.start` |
+| `protocol.assistant_id_required` | 缺少 `assistant_id` query 参数 | 在连接 URL 中添加 `assistant_id` 参数 |
+| `protocol.invalid_override` | metadata 覆盖字段不合法 | 检查 overrides 字段是否在白名单内 |
+
+## 助手错误
+
+| 错误码 | 说明 | 解决方案 |
+|---|---|---|
+| `assistant.not_found` | 助手不存在 | 检查 `assistant_id` 是否正确 |
+| `assistant.config_unavailable` | 助手配置不可用 | 确认助手已正确配置并发布 |
+
+## 音频错误
+
+| 错误码 | 说明 | 解决方案 |
+|---|---|---|
+| `audio.invalid_pcm` | PCM 数据无效 | 检查音频格式是否为 `pcm_s16le` |
+| `audio.frame_size_mismatch` | 音频帧大小不匹配 | 确保帧长度是 640 字节的整数倍 |
+
+## 服务器错误
+
+| 错误码 | 说明 | 解决方案 |
+|---|---|---|
+| `server.internal` | 服务端内部错误 | 查看服务端日志排查问题 |
+
+## 错误响应格式
+
+所有错误都通过 `error` 事件返回：
+
+```json
+{
+  "type": "error",
+  "timestamp": 1730000000000,
+  "sessionId": "sess_xxx",
+  "data": {
+    "code": "protocol.invalid_json",
+    "message": "Invalid JSON format",
+    "details": {}
+  }
+}
+```
+
+## HTTP API 错误
+
+REST API 使用标准 HTTP 状态码：
+
+| 状态码 | 说明 |
+|--------|------|
+| 200 | 请求成功 |
+| 201 | 创建成功 |
+| 400 | 请求参数错误 |
+| 401 | 未授权（缺少或无效的认证信息） |
+| 403 | 禁止访问（权限不足） |
+| 404 | 资源不存在 |
+| 422 | 请求实体无法处理 |
+| 500 | 服务器内部错误 |
+
+### HTTP 错误响应示例
+
+```json
+{
+  "success": false,
+  "error": {
+    "code": "VALIDATION_ERROR",
+    "message": "Invalid request parameters",
+    "details": {
+      "field": "name",
+      "reason": "required"
+    }
+  }
+}
+```
+
+## 错误处理最佳实践
+
+1. **始终检查错误响应** - 不要假设请求一定成功
+2. **实现重试机制** - 对于临时性错误（如网络问题）实现指数退避重试
+3. **记录错误日志** - 保存错误详情用于问题排查
+4. **友好的用户提示** - 将技术错误转换为用户可理解的提示
--- a/docs/content/api-reference/index.md
+++ b/docs/content/api-reference/index.md
@@ -0,0 +1,235 @@
+# API 参考
+
+本节提供 Realtime Agent Studio (RAS) 的完整 API 文档。
+
+## API 概览
+
+Realtime Agent Studio (RAS) 提供两种类型的 API：
+
+| API 类型 | 用途 | 协议 |
+|---------|------|------|
+| **REST API** | 管理助手、模型、知识库等资源 | HTTP |
+| **WebSocket API** | 实时语音对话 | WebSocket |
+
+## REST API
+
+### 基础地址
+
+```
+http://localhost:8000/api/v1
+```
+
+### 认证
+
+REST API 使用 Bearer Token 认证：
+
+```bash
+curl -H "Authorization: Bearer YOUR_API_KEY" \
+  http://localhost:8000/api/v1/assistants
+```
+
+### 通用响应格式
+
+**成功响应**
+
+```json
+{
+  "success": true,
+  "data": { ... }
+}
+```
+
+**列表响应**
+
+```json
+{
+  "success": true,
+  "data": {
+    "items": [...],
+    "total": 100,
+    "page": 1,
+    "page_size": 20
+  }
+}
+```
+
+**错误响应**
+
+```json
+{
+  "success": false,
+  "error": {
+    "code": "ERROR_CODE",
+    "message": "错误描述"
+  }
+}
+```
+
+### 主要端点
+
+#### 助手管理
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /assistants | 获取助手列表 |
+| POST | /assistants | 创建助手 |
+| GET | /assistants/{id} | 获取助手详情 |
+| PUT | /assistants/{id} | 更新助手 |
+| DELETE | /assistants/{id} | 删除助手 |
+| GET | /assistants/{id}/config | 获取引擎配置 |
+| GET | /assistants/{id}/opener-audio | 获取开场音频状态 |
+| POST | /assistants/{id}/opener-audio/generate | 生成开场音频 |
+
+#### 模型管理
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /llm | 获取 LLM 模型列表 |
+| POST | /llm | 添加 LLM 模型 |
+| PUT | /llm/{id} | 更新 LLM 模型 |
+| DELETE | /llm/{id} | 删除 LLM 模型 |
+| POST | /llm/{id}/test | 测试 LLM 连接 |
+| POST | /llm/{id}/preview | 预览模型输出 |
+| GET | /asr | 获取 ASR 模型列表 |
+| POST | /asr | 添加 ASR 模型 |
+| PUT | /asr/{id} | 更新 ASR 模型 |
+| DELETE | /asr/{id} | 删除 ASR 模型 |
+| POST | /asr/{id}/test | 测试 ASR 连接 |
+| POST | /asr/{id}/preview | 上传音频预览识别 |
+| GET | /voices | 获取语音列表 |
+| POST | /voices | 添加语音配置 |
+| PUT | /voices/{id} | 更新语音配置 |
+| DELETE | /voices/{id} | 删除语音配置 |
+| POST | /voices/{id}/preview | 预览声音 |
+
+#### 知识库管理
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /knowledge/bases | 获取知识库列表 |
+| POST | /knowledge/bases | 创建知识库 |
+| PUT | /knowledge/bases/{id} | 更新知识库 |
+| DELETE | /knowledge/bases/{id} | 删除知识库 |
+| POST | /knowledge/bases/{id}/documents | 上传文档 |
+| POST | /knowledge/bases/{id}/documents/{doc_id}/index | 索引文档内容 |
+| DELETE | /knowledge/bases/{id}/documents/{doc_id} | 删除文档 |
+| POST | /knowledge/search | 搜索知识库 |
+| GET | /knowledge/bases/{id}/stats | 获取统计信息 |
+
+#### 工具管理
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /tools/list | 获取内置工具列表 |
+| GET | /tools/resources | 获取工具资源列表 |
+| POST | /tools/resources | 创建工具资源 |
+| PUT | /tools/resources/{id} | 更新工具资源 |
+| DELETE | /tools/resources/{id} | 删除工具资源 |
+| GET | /tools/health | 健康检查 |
+| POST | /tools/autotest | 运行自动测试 |
+| POST | /tools/test-message | 发送测试消息 |
+
+#### 历史记录
+
+| 方法 | 路径 | 说明 |
+|------|------|------|
+| GET | /history | 获取对话历史 |
+| GET | /history/{id} | 获取对话详情 |
+| POST | /history | 创建通话记录 |
+| PUT | /history/{id} | 更新通话记录 |
+| DELETE | /history/{id} | 删除通话记录 |
+| POST | /history/{id}/transcripts | 添加转写片段 |
+| GET | /history/{id}/audio/{turn_index} | 获取音频文件 |
+
+## WebSocket API
+
+### 连接地址
+
+```
+ws://localhost:8000/ws?assistant_id=<assistant_id>
+```
+
+### 协议概述
+
+WebSocket API 使用双向消息通信：
+
+- **文本帧**：JSON 格式的控制消息
+- **二进制帧**：PCM 音频数据
+
+### 详细文档
+
+- [WebSocket 协议](websocket.md) - 完整的消息格式和流程
+- [错误码](errors.md) - 错误码列表和处理方式
+
+## SDK
+
+> 下面的 SDK 包名和类名沿用当前包标识；产品名称在文档中统一使用 Realtime Agent Studio（RAS）。
+
+### JavaScript SDK
+
+```bash
+npm install @ai-video-assistant/sdk
+```
+
+```javascript
+import { AIVideoAssistant } from '@ai-video-assistant/sdk';
+
+const assistant = new AIVideoAssistant({
+  apiUrl: 'http://localhost:8080',
+  wsUrl: 'ws://localhost:8000'
+});
+
+// 创建助手
+const result = await assistant.create({
+  name: '客服助手',
+  prompt: '你是一个友好的客服助手'
+});
+
+// 开始对话
+const conversation = await assistant.connect(result.id);
+conversation.on('response', (text) => {
+  console.log('助手回复:', text);
+});
+```
+
+### Python SDK
+
+```bash
+pip install ai-video-assistant
+```
+
+```python
+from ai_video_assistant import AIVideoAssistant
+
+client = AIVideoAssistant(
+    api_url="http://localhost:8080",
+    ws_url="ws://localhost:8000"
+)
+
+# 创建助手
+assistant = client.assistants.create(
+    name="客服助手",
+    prompt="你是一个友好的客服助手"
+)
+
+# 开始对话
+async with client.connect(assistant.id) as conv:
+    response = await conv.send_text("你好")
+    print(f"助手回复: {response}")
+```
+
+## 速率限制
+
+| 端点类型 | 限制 |
+|---------|------|
+| REST API | 100 请求/分钟 |
+| WebSocket | 10 并发连接/用户 |
+
+超出限制会返回 `429 Too Many Requests`。
+
+## 下一步
+
+- [WebSocket 协议](websocket.md) - 实时对话协议详解
+- [错误码](errors.md) - 错误处理参考
+- [快速开始](../quickstart/index.md) - 快速创建助手
+
--- a/docs/content/api-reference/websocket.md
+++ b/docs/content/api-reference/websocket.md
@@ -0,0 +1,880 @@
+# WebSocket 协议
+
+WebSocket 端点提供双向实时语音对话能力，支持音频流输入输出和文本消息交互。
+
+## 连接地址
+
+```
+ws://<host>/ws?assistant_id=<assistant_id>
+```
+
+- `assistant_id` 为必填 query 参数，用于从数据库加载该助手的运行时配置。
+
+## 传输规则
+
+- **文本帧**：JSON 格式控制消息
+- **二进制帧**：PCM 音频数据（`pcm_s16le`, 16kHz, 单声道）
+- 帧长度必须是 640 字节的整数倍（20ms 音频 = 640 bytes）
+
+---
+
+## 消息流程
+
+```
+Client -> session.start
+Server <- session.started
+Server <- (optional) config.resolved
+Client -> (binary pcm frames...)
+Server <- input.speech_started / transcript.delta / transcript.final
+Server <- assistant.response.delta / assistant.response.final
+Server <- output.audio.start
+Server <- (binary pcm frames...)
+Server <- output.audio.end
+Client -> output.audio.played (optional)
+Client -> session.stop
+Server <- session.stopped
+```
+
+---
+
+## 客户端 -> 服务端消息
+
+`session.start`
+
+客户端连接后发送的第一个消息，用于启动对话会话。
+
+```json
+{
+  "type": "session.start",
+  "audio": {
+    "encoding": "pcm_s16le",
+    "sample_rate_hz": 16000,
+    "channels": 1
+  },
+  "metadata": {
+    "channel": "web",
+    "source": "web_debug",
+    "history": {
+      "userId": 1
+    },
+    "overrides": {
+      "systemPrompt": "你是简洁助手",
+      "greeting": "你好，我能帮你什么？",
+      "output": {
+        "mode": "audio"
+      }
+    },
+    "dynamicVariables": {
+      "customer_name": "Alice",
+      "plan_tier": "Pro"
+    }
+  }
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|---|---|---|---|
+| `type` | string | 是 | 固定为 `"session.start"` |
+| `audio` | object | 否 | 音频格式描述 |
+| `audio.encoding` | string | 否 | 固定为 `"pcm_s16le"` |
+| `audio.sample_rate_hz` | number | 否 | 固定为 `16000` |
+| `audio.channels` | number | 否 | 固定为 `1` |
+| `metadata` | object | 否 | 运行时配置 |
+
+**metadata 支持的字段**：
+- `channel` - 渠道标识
+- `source` - 来源标识
+- `history.userId` - 历史记录用户 ID
+- `overrides` - 可覆盖字段（仅限安全白名单）
+- `dynamicVariables` - 动态变量（支持 `{{variable}}` 占位符）
+
+**`metadata.overrides` 白名单字段**：
+- `systemPrompt`
+- `greeting`
+- `firstTurnMode`
+- `generatedOpenerEnabled`
+- `output`
+- `bargeIn`
+- `knowledgeBaseId`
+- `knowledge`
+- `tools`
+- `openerAudio`
+
+**限制**：
+- `metadata.workflow` 会被忽略（不触发 workflow 事件）
+- 禁止提交 `metadata.services`
+- 禁止提交 `assistantId` / `appId` / `app_id` / `configVersionId` / `config_version_id`
+- 禁止提交包含密钥语义的字段（如 `apiKey` / `token` / `secret` / `password` / `authorization`）
+
+---
+
+`input.text`
+
+发送文本输入，跳过 ASR 识别，直接触发 LLM 回复。
+
+```json
+{
+  "type": "input.text",
+  "text": "你能做什么？"
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|---|---|---|---|
+| `type` | string | 是 | 固定为 `"input.text"` |
+| `text` | string | 是 | 用户文本内容 |
+
+---
+
+`response.cancel`
+
+请求中断当前回答。
+
+```json
+{
+  "type": "response.cancel",
+  "graceful": false
+}
+```
+
+| 字段 | 类型 | 必填 | 默认值 | 说明 |
+|---|---|---|---|---|
+| `type` | string | 是 | - | 固定为 `"response.cancel"` |
+| `graceful` | boolean | 否 | `false` | `false` 立即打断 |
+
+---
+
+`output.audio.played`
+
+客户端回执音频已在本地播放完成（含本地 jitter buffer / 播放队列）。
+
+```json
+{
+  "type": "output.audio.played",
+  "tts_id": "tts_001",
+  "response_id": "resp_001",
+  "turn_id": "turn_001",
+  "played_at_ms": 1730000018450,
+  "played_ms": 2520
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|---|---|---|---|
+| `type` | string | 是 | 固定为 `"output.audio.played"` |
+| `tts_id` | string | 是 | 已完成播放的 TTS 段 ID |
+| `response_id` | string | 否 | 所属回复 ID（建议回传） |
+| `turn_id` | string | 否 | 所属轮次 ID（建议回传） |
+| `played_at_ms` | number | 否 | 客户端本地播放完成时间戳（毫秒） |
+| `played_ms` | number | 否 | 本次播放耗时（毫秒） |
+
+---
+
+`tool_call.results`
+
+回传客户端执行的工具结果。
+
+```json
+{
+  "type": "tool_call.results",
+  "results": [
+    {
+      "tool_call_id": "call_abc123",
+      "name": "weather",
+      "output": { "temp_c": 21, "condition": "sunny" },
+      "status": { "code": 200, "message": "ok" }
+    }
+  ]
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|---|---|---|---|
+| `type` | string | 是 | 固定为 `"tool_call.results"` |
+| `results` | array | 否 | 工具结果列表 |
+| `results[].tool_call_id` | string | 是 | 工具调用 ID |
+| `results[].name` | string | 是 | 工具名称 |
+| `results[].output` | any | 否 | 工具输出 |
+| `results[].status` | object | 是 | 执行状态 |
+| `results[].status.code` | number | 是 | HTTP 状态码（200-299 表示成功） |
+| `results[].status.message` | string | 是 | 状态描述 |
+
+---
+
+`session.stop`
+
+结束对话会话。
+
+```json
+{
+  "type": "session.stop",
+  "reason": "client_disconnect"
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|---|---|---|---|
+| `type` | string | 是 | 固定为 `"session.stop"` |
+| `reason` | string | 否 | 结束原因 |
+
+---
+
+`Binary Audio`
+
+在 `session.started` 之后可持续发送二进制 PCM 音频。
+
+- **格式**：`pcm_s16le`
+- **采样率**：16000 Hz
+- **声道**：1（单声道）
+- **帧长**：20ms = 640 bytes
+
+---
+
+## 服务端 -> 客户端事件
+
+### 事件包络
+
+所有 JSON 事件都包含统一包络字段：
+
+```json
+{
+  "type": "event.name",
+  "timestamp": 1730000000000,
+  "sessionId": "sess_xxx",
+  "seq": 42,
+  "source": "asr",
+  "trackId": "audio_in",
+  "data": {}
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `type` | string | 事件类型 |
+| `timestamp` | number | 事件时间戳（Unix 毫秒） |
+| `sessionId` | string | 会话 ID |
+| `seq` | number | 递增序号（用于重放/恢复） |
+| `source` | string | 事件来源：`asr` / `llm` / `tts` / `tool` / `system` / `client` / `server` |
+| `trackId` | string | 事件轨道：`audio_in` / `audio_out` / `control` |
+| `data` | object | 业务数据（可选） |
+
+**轨道 ID 说明**：
+
+| trackId | 说明 | 相关事件 |
+|---------|------|---------|
+| `audio_in` | ASR/VAD 输入侧事件 | `input.*`, `transcript.*` |
+| `audio_out` | 助手输出侧事件 | `assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb` |
+| `control` | 会话控制事件 | `session.*`, `error`, `heartbeat`, `(optional) config.resolved` |
+
+---
+
+### 会话控制类事件
+
+#### `session.started`
+
+会话启动成功，客户端收到此事件后可以开始发送音频。
+
+```json
+{
+  "type": "session.started",
+  "timestamp": 1730000000000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 1,
+  "trackId": "control",
+  "tracks": {
+    "audio_in": "audio_in",
+    "audio_out": "audio_out",
+    "control": "control"
+  },
+  "audio": {
+    "encoding": "pcm_s16le",
+    "sample_rate_hz": 16000,
+    "channels": 1
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `sessionId` | string | 会话唯一标识符 |
+| `trackId` | string | 固定为 `"control"` |
+| `tracks` | object | 可用轨道列表 |
+| `tracks.audio_in` | string | 输入轨道 ID |
+| `tracks.audio_out` | string | 输出轨道 ID |
+| `tracks.control` | string | 控制轨道 ID |
+| `audio` | object | 音频格式配置 |
+| `audio.encoding` | string | 编码格式 |
+| `audio.sample_rate_hz` | number | 采样率 |
+| `audio.channels` | number | 声道数 |
+
+---
+
+#### `config.resolved`
+
+服务端返回的**公开配置快照**。
+默认不发送（SaaS 公网模式建议关闭）；仅在 `WS_EMIT_CONFIG_RESOLVED=true` 时发送。
+
+```json
+{
+  "type": "config.resolved",
+  "timestamp": 1730000000001,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 2,
+  "trackId": "control",
+  "config": {
+    "channel": "web_debug",
+    "output": {
+      "mode": "audio"
+    },
+    "tools": {
+      "enabled": true,
+      "count": 2
+    },
+    "tracks": {
+      "audio_in": "audio_in",
+      "audio_out": "audio_out",
+      "control": "control"
+    }
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"control"` |
+| `config` | object | SaaS 安全的公开配置快照 |
+| `config.channel` | string | 回显 `session.start.metadata.channel`（如提供） |
+| `config.output` | object | 输出配置 |
+| `config.output.mode` | string | 输出模式：`"audio"` / `"text"` |
+| `config.tools.enabled` | boolean | 是否启用工具能力 |
+| `config.tools.count` | number | 可用工具数量（不暴露工具清单） |
+| `config.tracks` | object | 可用轨道列表 |
+
+**不会返回以下内部字段**：
+- `assistantId` / `appId` / `configVersionId`
+- `services`（provider/model/baseUrl 等）
+- 系统提示词原文及其它内部编排细节
+
+---
+
+#### `heartbeat`
+
+保活心跳事件，默认每 50 秒发送一次。
+
+```json
+{
+  "type": "heartbeat",
+  "timestamp": 1730000050000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 10
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `timestamp` | number | 心跳时间戳 |
+
+---
+
+#### `session.stopped`
+
+会话结束确认。
+
+```json
+{
+  "type": "session.stopped",
+  "timestamp": 1730000100000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 50,
+  "reason": "client_requested"
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `reason` | string | 结束原因：`"client_requested"` / `"timeout"` / `"error"` |
+
+---
+
+### ASR 识别事件
+
+#### `input.speech_started`
+
+检测到语音开始（VAD）。
+
+```json
+{
+  "type": "input.speech_started",
+  "timestamp": 1730000010000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 5,
+  "source": "asr",
+  "trackId": "audio_in",
+  "probability": 0.95
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_in"` |
+| `probability` | number | 语音检测置信度（0-1） |
+
+---
+
+#### `input.speech_stopped`
+
+检测到语音结束（VAD）。
+
+```json
+{
+  "type": "input.speech_stopped",
+  "timestamp": 1730000012000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 8,
+  "source": "asr",
+  "trackId": "audio_in",
+  "probability": 0.92
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_in"` |
+| `probability` | number | 静音检测置信度（0-1） |
+
+---
+
+#### `transcript.delta`
+
+ASR 增量识别文本（实时转写）。
+
+```json
+{
+  "type": "transcript.delta",
+  "timestamp": 1730000011000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 6,
+  "source": "asr",
+  "trackId": "audio_in",
+  "text": "你好",
+  "data": {
+    "text": "你好",
+    "turn_id": "turn_001",
+    "utterance_id": "utt_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_in"` |
+| `text` | string | 增量识别文本 |
+| `data.text` | string | 增量识别文本（同 `text`） |
+| `data.turn_id` | string | 当前对话轮次 ID |
+| `data.utterance_id` | string | 当前语句 ID |
+
+**节流说明**：服务端默认每 300ms 合并一次 delta 事件。
+
+---
+
+#### `transcript.final`
+
+ASR 最终识别文本（语句结束）。
+
+```json
+{
+  "type": "transcript.final",
+  "timestamp": 1730000012500,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 9,
+  "source": "asr",
+  "trackId": "audio_in",
+  "text": "你好，请问今天天气怎么样",
+  "data": {
+    "text": "你好，请问今天天气怎么样",
+    "turn_id": "turn_001",
+    "utterance_id": "utt_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_in"` |
+| `text` | string | 最终识别文本 |
+| `data.text` | string | 最终识别文本（同 `text`） |
+| `data.turn_id` | string | 当前对话轮次 ID |
+| `data.utterance_id` | string | 当前语句 ID |
+
+---
+
+### LLM/TTS 输出事件
+
+#### `assistant.response.delta`
+
+助手增量文本输出（流式生成）。
+
+```json
+{
+  "type": "assistant.response.delta",
+  "timestamp": 1730000013000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 12,
+  "source": "llm",
+  "trackId": "audio_out",
+  "text": "今天天气",
+  "data": {
+    "text": "今天天气",
+    "turn_id": "turn_001",
+    "response_id": "resp_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `source` | string | 固定为 `"llm"` |
+| `text` | string | 增量文本内容 |
+| `data.text` | string | 增量文本内容（同 `text`） |
+| `data.turn_id` | string | 当前对话轮次 ID |
+| `data.response_id` | string | 当前回复 ID |
+
+**节流说明**：服务端默认每 80ms 合并一次 delta 事件。
+
+---
+
+#### `assistant.response.final`
+
+助手完整文本输出（回复结束）。
+
+```json
+{
+  "type": "assistant.response.final",
+  "timestamp": 1730000015000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 18,
+  "source": "llm",
+  "trackId": "audio_out",
+  "text": "今天天气晴朗，气温25度，适合外出。",
+  "data": {
+    "text": "今天天气晴朗，气温25度，适合外出。",
+    "turn_id": "turn_001",
+    "response_id": "resp_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `source` | string | 固定为 `"llm"` |
+| `text` | string | 完整回复文本 |
+| `data.text` | string | 完整回复文本（同 `text`） |
+| `data.turn_id` | string | 当前对话轮次 ID |
+| `data.response_id` | string | 当前回复 ID |
+
+---
+
+#### `assistant.tool_call`
+
+工具调用通知，通知客户端 LLM 请求调用工具。
+
+```json
+{
+  "type": "assistant.tool_call",
+  "timestamp": 1730000014000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 14,
+  "source": "llm",
+  "trackId": "audio_out",
+  "tool_call_id": "call_abc123",
+  "tool_name": "weather",
+  "arguments": {
+    "city": "北京"
+  },
+  "executor": "server",
+  "timeout_ms": 30000,
+  "data": {
+    "tool_call": {
+      "id": "call_abc123",
+      "name": "weather",
+      "arguments": "{\"city\":\"北京\"}"
+    },
+    "turn_id": "turn_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `source` | string | 固定为 `"llm"` |
+| `tool_call_id` | string | 工具调用唯一 ID |
+| `tool_name` | string | 工具名称 |
+| `arguments` | object | 工具参数（已解析的 JSON） |
+| `executor` | string | 执行方：`"server"` 服务端执行 / `"client"` 客户端执行 |
+| `timeout_ms` | number | 超时时间（毫秒） |
+| `data.tool_call` | object | 原始工具调用信息 |
+| `data.tool_call.id` | string | 工具调用 ID |
+| `data.tool_call.name` | string | 工具名称 |
+| `data.tool_call.arguments` | string | 工具参数（JSON 字符串） |
+| `data.turn_id` | string | 当前对话轮次 ID |
+
+**注意**：当 `executor = "client"` 时，客户端需要执行工具并返回 `tool_call.results`。
+
+---
+
+#### `assistant.tool_result`
+
+工具执行结果通知。
+
+```json
+{
+  "type": "assistant.tool_result",
+  "timestamp": 1730000014500,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 15,
+  "source": "server",
+  "trackId": "audio_out",
+  "tool_call_id": "call_abc123",
+  "tool_name": "weather",
+  "tool_display_name": "天气查询",
+  "ok": true,
+  "error": null,
+  "result": {
+    "tool_call_id": "call_abc123",
+    "name": "weather",
+    "output": {
+      "temperature": 25,
+      "condition": "晴",
+      "humidity": 40
+    },
+    "status": {
+      "code": 200,
+      "message": "ok"
+    }
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `source` | string | 执行方：`"server"` / `"client"` |
+| `tool_call_id` | string | 工具调用 ID |
+| `tool_name` | string | 工具名称 |
+| `tool_display_name` | string | 工具显示名称 |
+| `ok` | boolean | 执行是否成功（状态码 200-299 为 true） |
+| `error` | object \| null | 错误信息（`ok=false` 时存在） |
+| `error.code` | number | 错误状态码 |
+| `error.message` | string | 错误描述 |
+| `error.retryable` | boolean | 是否可重试 |
+| `result` | object | 原始执行结果 |
+| `result.output` | any | 工具返回数据 |
+| `result.status` | object | 执行状态 |
+| `result.status.code` | number | HTTP 状态码 |
+| `result.status.message` | string | 状态描述 |
+
+---
+
+#### `output.audio.start`
+
+TTS 音频播放开始标记。
+
+```json
+{
+  "type": "output.audio.start",
+  "timestamp": 1730000015500,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 19,
+  "source": "tts",
+  "trackId": "audio_out",
+  "data": {
+    "tts_id": "tts_001",
+    "turn_id": "turn_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `source` | string | 固定为 `"tts"` |
+| `data.tts_id` | string | TTS 播放段 ID |
+| `data.turn_id` | string | 当前对话轮次 ID |
+
+**说明**：此事件后服务端将发送二进制 PCM 音频帧。
+
+---
+
+#### `output.audio.end`
+
+TTS 音频播放结束标记。
+
+```json
+{
+  "type": "output.audio.end",
+  "timestamp": 1730000018000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 25,
+  "source": "tts",
+  "trackId": "audio_out",
+  "data": {
+    "tts_id": "tts_001",
+    "turn_id": "turn_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `source` | string | 固定为 `"tts"` |
+| `data.tts_id` | string | TTS 播放段 ID |
+| `data.turn_id` | string | 当前对话轮次 ID |
+
+**说明**：`output.audio.end` 表示服务端已发送完成，不代表客户端扬声器已播完。若需要“真实播完”信号，客户端应发送 `output.audio.played`。
+
+---
+
+#### `response.interrupted`
+
+回答被打断（用户插话）。
+
+```json
+{
+  "type": "response.interrupted",
+  "timestamp": 1730000016000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 20,
+  "source": "system",
+  "trackId": "audio_out",
+  "data": {
+    "turn_id": "turn_001",
+    "response_id": "resp_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `data.turn_id` | string | 被打断的对话轮次 ID |
+| `data.response_id` | string | 被打断的回复 ID |
+
+---
+
+#### `metrics.ttfb`
+
+首包音频时延指标（Time To First Byte）。
+
+```json
+{
+  "type": "metrics.ttfb",
+  "timestamp": 1730000015600,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 21,
+  "source": "system",
+  "trackId": "audio_out",
+  "latencyMs": 1520,
+  "data": {
+    "latencyMs": 1520,
+    "turn_id": "turn_001"
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `trackId` | string | 固定为 `"audio_out"` |
+| `latencyMs` | number | 首包音频时延（毫秒） |
+| `data.latencyMs` | number | 首包音频时延（同 `latencyMs`） |
+| `data.turn_id` | string | 当前对话轮次 ID |
+
+**说明**：从用户输入结束到第一个音频包发送的时间。
+
+---
+
+### 错误事件
+
+#### `error`
+
+统一错误事件。
+
+```json
+{
+  "type": "error",
+  "timestamp": 1730000020000,
+  "sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
+  "seq": 30,
+  "sender": "server",
+  "code": "llm.timeout",
+  "message": "LLM request timeout",
+  "stage": "llm",
+  "retryable": true,
+  "trackId": "audio_out",
+  "data": {
+    "error": {
+      "stage": "llm",
+      "code": "llm.timeout",
+      "message": "LLM request timeout",
+      "retryable": true
+    }
+  }
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `sender` | string | 错误来源：`"server"` / `"client"` |
+| `code` | string | 错误码 |
+| `message` | string | 错误描述 |
+| `stage` | string | 错误阶段：`"protocol"` / `"asr"` / `"llm"` / `"tts"` / `"tool"` / `"audio"` |
+| `retryable` | boolean | 是否可重试 |
+| `trackId` | string | 错误关联的轨道 |
+| `data.error` | object | 结构化错误信息 |
+| `data.error.stage` | string | 错误阶段 |
+| `data.error.code` | string | 错误码 |
+| `data.error.message` | string | 错误描述 |
+| `data.error.retryable` | boolean | 是否可重试 |
+
+**trackId 约定**：
+- `audio_in`：ASR/音频输入相关错误
+- `audio_out`：LLM/TTS/工具相关错误
+- `control`：协议/会话控制相关错误
+
+---
+
+## 关联 ID 说明
+
+事件中的关联 ID 用于追踪对话流程：
+
+| ID 类型 | 说明 | 生命周期 |
+|---------|------|---------|
+| `turn_id` | 对话轮次 ID | 一次用户-助手交互 |
+| `utterance_id` | 语句 ID | 一次 ASR 最终识别结果 |
+| `response_id` | 回复 ID | 一次助手回复生成 |
+| `tool_call_id` | 工具调用 ID | 一次工具调用 |
+| `tts_id` | TTS 播放段 ID | 一段语音合成播放 |
+
+---
+
+## 心跳与超时
+
+- **心跳间隔**：默认 50 秒（`heartbeat_interval_sec`）
+- **空闲超时**：默认 60 秒（`inactivity_timeout_sec`）
+- 客户端应持续发送音频或轻量消息避免被判定闲置
+
+## 事件节流
+
+为保持客户端渲染和服务端负载稳定，v1 协议对部分事件进行节流：
+
+| 事件 | 默认节流间隔 | 说明 |
+|------|-------------|------|
+| `transcript.delta` | 300ms | ASR 增量文本 |
+| `assistant.response.delta` | 80ms | LLM 增量文本 |
+
+## 错误处理
+
+详细错误码请参考 [错误码](errors.md)。
--- a/docs/content/assistants/configuration.md
+++ b/docs/content/assistants/configuration.md
@@ -0,0 +1,8 @@
+# 配置选项（旧入口）
+
+本页保留旧链接，用于承接历史导航或外部引用。助手配置的正式文档已经迁移到：
+
+- [配置选项](../concepts/assistants/configuration.md) - 助手配置界面与运行时配置层说明
+- [助手概念](../concepts/assistants.md) - 先理解助手对象、会话与动态变量
+
+如果你是从创建路径进入，也可以直接回到 [快速开始](../quickstart/index.md)。
--- a/docs/content/assistants/index.md
+++ b/docs/content/assistants/index.md
@@ -0,0 +1,10 @@
+# 助手管理（旧入口）
+
+本页保留旧链接，用于承接历史导航或外部引用。助手相关内容已经拆分到更明确的文档中：
+
+- [助手概念](../concepts/assistants.md) - 了解助手是什么、由哪些部分组成，以及会话如何运行
+- [配置选项](../concepts/assistants/configuration.md) - 查看控制台和运行时配置项的分工
+- [提示词指南](../concepts/assistants/prompts.md) - 编写高质量系统提示词
+- [测试调试](../concepts/assistants/testing.md) - 验证助手行为并排查问题
+
+如果你是第一次上手，建议直接从 [快速开始](../quickstart/index.md) 进入。
--- a/docs/content/assistants/prompts.md
+++ b/docs/content/assistants/prompts.md
@@ -0,0 +1,8 @@
+# 提示词指南（旧入口）
+
+本页保留旧链接，用于承接历史导航或外部引用。提示词的正式文档已经迁移到：
+
+- [提示词指南](../concepts/assistants/prompts.md) - 设计角色、任务、限制与风格
+- [助手概念](../concepts/assistants.md) - 理解提示词在助手体系中的位置
+
+如果你想先完成最小可用配置，请从 [快速开始](../quickstart/index.md) 继续。
--- a/docs/content/assistants/testing.md
+++ b/docs/content/assistants/testing.md
@@ -0,0 +1,8 @@
+# 测试调试（旧入口）
+
+本页保留旧链接，用于承接历史导航或外部引用。测试与调试的正式文档已经迁移到：
+
+- [测试调试](../concepts/assistants/testing.md) - 验证助手行为、事件流和常见问题定位
+- [故障排查](../resources/troubleshooting.md) - 进入更细的链路排查步骤
+
+如果你还没创建助手，请先完成 [快速开始](../quickstart/index.md)。
--- a/docs/content/assistants/workflow-configuration.md
+++ b/docs/content/assistants/workflow-configuration.md
@@ -0,0 +1,7 @@
+# 工作流配置（旧入口）
+
+本页保留旧链接，用于承接早期草稿和历史引用。工作流的正式文档已收敛到：
+
+- [工作流](../customization/workflows.md) - 了解工作流的定位、节点结构、设计建议和当前边界
+
+如果你正在配置助手中的流程能力，请优先阅读上述页面，再结合 [工具](../customization/tools.md) 与 [助手概念](../concepts/assistants.md) 一起使用。
--- a/docs/content/changelog.md
+++ b/docs/content/changelog.md
@@ -0,0 +1,81 @@
+# 更新日志
+
+本文档记录 Realtime Agent Studio 的所有重要变更。
+
+格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/)，
+版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。
+
+---
+
+## [未发布]
+
+### 开发中
+
+- 工作流可视化编辑器
+- 知识库 RAG 集成
+- JavaScript/Python SDK
+- Step Audio 多模态模型支持
+
+---
+
+## [0.1.0] - 2025-01-15
+
+### 新增
+
+#### 实时交互引擎
+
+- **管线式全双工引擎** - ASR → LLM → TTS 流水线架构
+- **智能打断** - 支持 VAD 和 EOU 检测
+- **OpenAI 兼容接口** - 支持 OpenAI Compatible 的 ASR/TTS 服务
+- **DashScope TTS** - 阿里云语音合成服务适配
+
+#### 助手配置
+
+- **系统提示词** - 支持角色定义和动态变量 `{{variable}}`
+- **模型管理** - LLM/ASR/TTS 模型统一管理界面
+- **工具调用** - Webhook 工具和客户端工具配置
+
+#### 交互测试
+
+- **实时调试控制台** - 内置 WebSocket 调试工具
+
+#### 开放接口
+
+- **WebSocket 协议** - `/ws` 端点，支持二进制音频流
+- **RESTful API** - 完整的助手/模型/会话 CRUD 接口
+
+#### 历史监控
+
+- **会话回放** - 音频 + 转写 + LLM 响应完整记录
+- **会话筛选** - 按时间、助手、状态多维度检索
+
+#### 部署
+
+- **Docker 支持** - 提供 docker-compose 一键部署
+
+### 技术栈
+
+- 前端：React 18, TypeScript, Tailwind CSS, Zustand
+- 后端：FastAPI (Python 3.10+)
+- 数据库：SQLite（开发）/ PostgreSQL（生产）
+
+---
+
+## 版本规划
+
+| 版本 | 计划发布 | 主要特性 |
+|------|---------|---------|
+| 0.2.0 | 2025 Q1 | 工作流编辑器、知识库集成 |
+| 0.3.0 | 2025 Q2 | SDK 发布、多模态模型 |
+| 1.0.0 | 2025 H2 | 生产就绪、企业特性 |
+
+---
+
+## 贡献者
+
+感谢所有为 RAS 做出贡献的开发者！
+
+---
+
+[未发布]: https://github.com/your-org/AI-VideoAssistant/compare/v0.1.0...HEAD
+[0.1.0]: https://github.com/your-org/AI-VideoAssistant/releases/tag/v0.1.0
--- a/docs/content/concepts/assistants.md
+++ b/docs/content/concepts/assistants.md
@@ -0,0 +1,147 @@
+# 助手概念详解
+
+助手（Assistant）是 Realtime Agent Studio（RAS）中最核心的配置单元，也是控制台和 API 对外暴露能力的基本对象。
+
+---
+
+## 什么是助手
+
+一个助手代表一个可接入、可测试、可发布的实时 AI 入口。它回答三个问题：
+
+- **它是谁**：角色、语气、目标、限制、开场方式、静默时候的行动（比如静默时候的询问 Ask-on-Idle）
+- **它能做什么**：语言模型能力、语音模型能力（ASR、TTS、用户打断灵敏度（Barge-in）、语句端点设置（End-of-Utterance））、知识库、记忆、工具（Webhook、客户端工具、系统工具、MCP）、输出模式
+- **它在一次会话中如何运行**：通过 `assistant_id` 载入配置，并在运行时接收动态变量、对话时候的上下文更新
+
+如果把引擎理解为“运行时”，那么助手就是“运行时要执行的那份定义”。
+
+## 助手由哪些部分组成
+
+| 层次 | 负责什么 | 典型内容 |
+|------|----------|----------|
+| **身份层** | 定义助手角色和交互风格 | 系统提示词、限制、开场白、静默处理 |
+| **模型层** | 决定理解与生成能力 | LLM、ASR、TTS、引擎类型、用户打断、语句端点 |
+| **能力层** | 扩展知识和执行能力 | 知识库、工具、记忆 |
+| **会话层** | 决定运行时上下文如何注入 | `assistant_id`、动态变量 |
+
+## 身份层
+
+助手首先是一个“被约束的角色”，而不是一段孤立的模型调用。
+
+### 系统提示词
+
+系统提示词定义助手的角色、任务、边界和风格，是所有能力组合的基础。
+
+| 要素 | 作用 | 示例 |
+|------|------|------|
+| **角色** | 告诉模型“自己是谁” | 客服助手、销售顾问、培训教练 |
+| **任务** | 指定要完成的结果 | 解答咨询、收集信息、调用工具处理业务 |
+| **限制** | 明确哪些事不能做 | 不承诺超权限优惠、不输出未经验证的结论 |
+| **风格** | 约束回答节奏和措辞 | 简洁、口语化、每次 2-3 句 |
+
+### 开场白
+
+一个助手还要定义会话应该如何开始，以及用户静默时候如何处理，包括：
+
+- **首轮模式**：助手先说、用户先说或者机器先说
+- **开场白**：使用固定开场白或者AI生成开场白
+
+### 静默处理
+
+用户静默时候是否询问用户是否在线
+
+## 模型层
+
+模型决定助手的基础理解、推理和表达能力，但不是助手定义的全部。
+
+- **LLM** 决定对话推理与文本生成能力
+- **ASR** 决定语音输入如何被实时转写
+- **TTS** 决定文本回复如何转成可播放语音
+- **引擎类型** 决定运行链路是分段可控还是端到端低延迟
+- **VAD** 声音活动模型，判断用户是否在说话
+- **EOU** 语句端点模型，判断用户是否完成一段语句等待回复
+- **Barge In** 由于用户声音活动或者手动请求，是否打断助手当前的回复
+
+## 能力层
+
+### 知识库
+
+知识库用于补充私有领域知识，让助手回答超出基础模型常识之外的问题。
+
+```mermaid
+flowchart LR
+    Question[用户问题] --> Retrieval[检索]
+    Retrieval --> KB[(知识库)]
+    KB --> Context[相关片段]
+    Context --> LLM[LLM]
+    LLM --> Answer[回答]
+```
+
+知识库适合承载政策、产品资料、流程说明、FAQ 和内部文档，而不是把所有业务知识堆进系统提示词。
+
+### 工具
+
+工具让助手从“会说”变成“能做事”。
+
+```mermaid
+flowchart LR
+    User[用户] --> Assistant[助手]
+    Assistant --> Tool[工具 / 外部系统]
+    Tool --> Assistant
+    Assistant --> User
+```
+
+适合用工具处理的任务包括：订单查询、预约、外部搜索、写入业务系统、调用客户端能力等。
+
+## 会话层
+
+### `assistant_id` 的作用
+
+在接入层面，客户端通过 `assistant_id` 指定要加载哪一个助手。引擎据此读取默认配置，并把同一份助手定义应用到当前会话。
+
+### 会话生命周期
+
+```mermaid
+stateDiagram-v2
+    [*] --> Connecting: WebSocket 连接
+    Connecting --> Started: session.started
+    Started --> Active: config.resolved / 开始对话
+    Active --> Active: 多轮交互
+    Active --> Stopped: session.stop 或连接关闭
+    Stopped --> [*]
+```
+
+一次会话通常会沉淀以下信息：
+
+- 用户与助手消息时间线
+- 音频流、转写结果和模型输出
+- 工具调用记录与中间事件
+- 自定义 metadata、渠道和业务上下文
+
+
+### 动态变量与会话级覆盖
+
+助手的默认配置不需要为每个用户都重新复制一份。RAS 提供两种常见的运行时注入方式：
+
+- **动态变量**：在提示词中使用 `{{variable}}` 占位，并在会话开始时传入具体值
+- **会话级覆盖**：仅对当前会话覆盖部分运行时参数，不回写助手基线配置
+
+```json
+{
+  "type": "session.start",
+  "metadata": {
+    "dynamicVariables": {
+      "company_name": "ABC 公司",
+      "customer_name": "张三",
+      "tier": "VIP"
+    }
+  }
+}
+```
+
+这种设计让你既能复用标准助手，又能在每次接入时注入渠道、用户、订单或上下文信息。
+
+## 相关文档
+
+- [配置选项](assistants/configuration.md) - 查看助手在控制台和运行时有哪些配置层
+- [提示词指南](assistants/prompts.md) - 设计角色、任务、限制和语气
+- [测试调试](assistants/testing.md) - 验证助手质量并定位问题
--- a/docs/content/concepts/assistants/configuration.md
+++ b/docs/content/concepts/assistants/configuration.md
@@ -0,0 +1,218 @@
+# 配置选项
+
+助手配置界面包含多个标签页，每个标签页负责不同方面的配置。
+
+## 全局设置
+
+全局设置定义助手的核心对话能力。
+
+| 配置项 | 说明 | 建议值 |
+|-------|------|--------|
+| 助手名称 | 用于标识和管理 | 简洁明确 |
+| 系统提示词 | 定义角色、任务和约束 | 详见[提示词指南](prompts.md) |
+| 开场白 | 对话开始时的问候语 | 简短友好 |
+| 温度参数 | 控制回复随机性 | 0.7（通用）/ 0.3（严谨） |
+| 上下文长度 | 保留的历史消息数 | 10-20 |
+
+### 高级选项
+
+- **首轮模式** - 设置首次对话的触发方式
+- **打断检测** - 用户打断时的处理策略
+- **超时设置** - 无响应时的处理
+
+## 语音配置
+
+配置语音识别和语音合成参数。
+
+### TTS 语音合成
+
+| 配置 | 说明 |
+|------|------|
+| TTS 引擎 | 选择语音合成服务（阿里/火山/Minimax） |
+| 音色 | 选择语音风格和性别 |
+| 语速 | 语音播放速度（0.5-2.0） |
+| 音量 | 语音输出音量（0-100） |
+| 音调 | 语音音调高低（0.5-2.0） |
+
+### ASR 语音识别
+
+| 配置 | 说明 |
+|------|------|
+| ASR 引擎 | 选择语音识别服务 |
+| 语言 | 识别语言（中文/英文/多语言） |
+| 热词 | 提高特定词汇识别准确率 |
+
+## 工具绑定
+
+配置助手可调用的外部工具。
+
+### 可用工具类型
+
+| 工具 | 说明 |
+|------|------|
+| 搜索工具 | 网络搜索获取信息 |
+| 天气查询 | 查询天气预报 |
+| 计算器 | 数学计算 |
+| 知识库检索 | RAG 知识检索 |
+| 自定义工具 | HTTP 回调外部 API |
+
+### 配置步骤
+
+1. 在工具列表中勾选需要的工具
+2. 配置工具参数（如有）
+3. 测试工具调用是否正常
+
+## 知识关联
+
+关联 RAG 知识库，让助手能够回答专业领域问题。
+
+### 配置参数
+
+| 参数 | 说明 | 建议值 |
+|------|------|--------|
+| 知识库 | 选择要关联的知识库 | - |
+| 相似度阈值 | 低于此分数不返回 | 0.7 |
+| 返回数量 | 单次检索返回条数 | 3 |
+| 检索策略 | 混合/向量/关键词 | 混合 |
+
+### 多知识库
+
+支持关联多个知识库，系统会自动合并检索结果。
+
+## 外部链接
+
+配置第三方服务集成和 Webhook 回调。
+
+### Webhook 配置
+
+| 字段 | 说明 |
+|------|------|
+| 回调 URL | 接收事件的 HTTP 端点 |
+| 事件类型 | 订阅的事件（对话开始/结束/工具调用等） |
+| 认证方式 | API Key / Bearer Token / 无 |
+
+### 支持的事件
+
+- `conversation.started` - 对话开始
+- `conversation.ended` - 对话结束
+- `tool.called` - 工具被调用
+- `human.transfer` - 转人工
+
+## 配置持久化与运行时覆盖
+
+助手配置分为两层：
+
+1. **数据库持久化配置（基线配置）**：通过助手管理 API 保存，后续会话默认读取这一层。
+2. **会话级覆盖配置（runtime overrides）**：仅对当前 WebSocket 会话生效，不会写回数据库。
+
+### 哪些配置会存到数据库
+
+以下字段会持久化在 `assistants` / `assistant_opener_audio` 等表中（通过创建/更新助手写入）：
+
+| 类别 | 典型字段 |
+|------|---------|
+| 对话行为 | `name`、`prompt`、`opener`、`firstTurnMode`、`generatedOpenerEnabled` |
+| 输出与打断 | `voiceOutputEnabled`、`voice`、`speed`、`botCannotBeInterrupted`、`interruptionSensitivity` |
+| 工具与知识库 | `tools`、`knowledgeBaseId` |
+| 模型与外部模式 | `configMode`、`apiUrl`、`apiKey`、`llmModelId`、`asrModelId`、`embeddingModelId`、`rerankModelId` |
+| 开场音频 | `openerAudioEnabled` 及音频文件状态（`ready`、`durationMs` 等） |
+
+> 引擎在连接时通过 `assistant_id` 从后端读取该助手的 `sessionStartMetadata` 作为默认运行配置。
+
+### 哪些配置可以在会话中覆盖
+
+客户端可在 `session.start.metadata.overrides` 中覆盖以下白名单字段（仅当前会话有效）：
+
+- `systemPrompt`
+- `greeting`
+- `firstTurnMode`
+- `generatedOpenerEnabled`
+- `output`
+- `bargeIn`
+- `knowledgeBaseId`
+- `knowledge`
+- `tools`
+- `openerAudio`
+
+以下字段不能由客户端覆盖：
+
+- `services`（模型 provider / apiKey / baseUrl 等）
+- `assistantId` / `appId` / `configVersionId`（及下划线变体）
+- 包含密钥语义的字段（如 `apiKey`、`token`、`secret`、`password`、`authorization`）
+
+### 覆盖示例（代码）
+
+下面示例展示「数据库基线配置 + 会话 overrides」的最终效果。
+
+```json
+// 1) 数据库存储的基线配置（示意）
+// GET /api/v1/assistants/asst_demo/config -> sessionStartMetadata
+{
+  "systemPrompt": "你是电商客服助手，回答要简洁。",
+  "greeting": "你好，我是你的客服助手。",
+  "firstTurnMode": "bot_first",
+  "output": { "mode": "audio" },
+  "knowledgeBaseId": "kb_orders",
+  "tools": [
+    { "type": "function", "function": { "name": "query_order" } }
+  ]
+}
+```
+
+```json
+// 2) 客户端发起会话时的覆盖
+{
+  "type": "session.start",
+  "metadata": {
+    "channel": "web",
+    "history": { "userId": 1001 },
+    "overrides": {
+      "greeting": "你好，我来帮你查订单进度。",
+      "output": { "mode": "text" },
+      "knowledgeBaseId": "kb_vip_orders",
+      "tools": [
+        { "type": "function", "function": { "name": "query_vip_order" } }
+      ]
+    }
+  }
+}
+```
+
+```json
+// 3) 引擎合并后的有效配置（示意）
+{
+  "assistantId": "asst_demo",
+  "systemPrompt": "你是电商客服助手，回答要简洁。",
+  "greeting": "你好，我来帮你查订单进度。",
+  "firstTurnMode": "bot_first",
+  "output": { "mode": "text" },
+  "knowledgeBaseId": "kb_vip_orders",
+  "tools": [
+    { "type": "function", "function": { "name": "query_vip_order" } }
+  ],
+  "channel": "web",
+  "history": { "userId": 1001 }
+}
+```
+
+合并规则可简化为：
+
+```python
+effective = {**db_session_start_metadata, **metadata.overrides}
+```
+
+当 `WS_EMIT_CONFIG_RESOLVED=true` 时，服务端会返回 `config.resolved`（公开、安全裁剪后的快照）用于前端调试当前生效配置。
+
+## 配置导入导出
+
+### 导出配置
+
+1. 在助手详情页点击 **更多**
+2. 选择 **导出配置**
+3. 下载 JSON 格式的配置文件
+
+### 导入配置
+
+1. 点击 **新建助手**
+2. 选择 **从配置导入**
+3. 上传配置文件
--- a/docs/content/concepts/assistants/prompts.md
+++ b/docs/content/concepts/assistants/prompts.md
@@ -0,0 +1,184 @@
+# 提示词指南
+
+系统提示词（System Prompt）是定义助手行为的核心配置。本指南介绍如何编写高质量的提示词。
+
+## 提示词结构
+
+一个完整的系统提示词通常包含以下部分：
+
+```
+[角色定义]
+[任务描述]
+[行为约束]
+[输出格式]
+[示例（可选）]
+```
+
+## 编写原则
+
+### 1. 明确角色
+
+告诉助手它是谁：
+
+```
+你是一个专业的技术支持工程师，专门负责解答产品使用问题。
+```
+
+### 2. 定义任务
+
+明确助手需要完成什么：
+
+```
+你的主要任务是：
+1. 解答用户关于产品功能的问题
+2. 提供使用指导和最佳实践
+3. 帮助用户排查常见故障
+```
+
+### 3. 设置约束
+
+限制不希望出现的行为：
+
+```
+请注意：
+- 不要讨论与产品无关的话题
+- 不要编造不存在的功能
+- 如果不确定答案，请建议用户联系人工客服
+```
+
+### 4. 指定风格
+
+定义回复的语气和风格：
+
+```
+回复风格要求：
+- 使用友好、专业的语气
+- 回答简洁明了，避免冗长
+- 适当使用列表和步骤说明
+```
+
+## 提示词模板
+
+### 客服助手
+
+```
+你是 [公司名称] 的智能客服助手。
+
+## 你的职责
+- 解答用户关于产品和服务的问题
+- 处理常见的投诉和建议
+- 引导用户完成操作流程
+
+## 回复要求
+- 保持友好和耐心
+- 回答简洁，一般不超过 3 句话
+- 如果问题复杂，建议转接人工客服
+
+## 禁止行为
+- 不要讨论竞争对手
+- 不要承诺无法兑现的事项
+- 不要透露内部信息
+```
+
+### 技术支持
+
+```
+你是一个技术支持工程师，专门帮助用户解决技术问题。
+
+## 工作流程
+1. 首先了解用户遇到的具体问题
+2. 询问必要的环境信息（系统版本、错误信息等）
+3. 提供分步骤的解决方案
+4. 确认问题是否解决
+
+## 回复格式
+- 使用编号列表说明操作步骤
+- 提供代码示例时使用代码块
+- 复杂问题可以分多次回复
+```
+
+### 销售顾问
+
+```
+你是一个产品销售顾问，帮助用户了解产品并做出购买决策。
+
+## 沟通策略
+- 先了解用户需求，再推荐合适的产品
+- 突出产品优势，但不贬低竞品
+- 提供真实的价格和优惠信息
+
+## 目标
+- 帮助用户找到最适合的方案
+- 解答购买相关的疑问
+- 促进成交但不过度推销
+```
+
+## 动态变量
+
+提示词支持动态变量，使用 `{{变量名}}` 语法：
+
+```
+你好 {{customer_name}}，欢迎来到 {{company_name}}。
+你当前的会员等级是 {{membership_tier}}。
+```
+
+在 `session.start` 时通过 `dynamicVariables` 传入：
+
+```json
+{
+  "type": "session.start",
+  "metadata": {
+    "dynamicVariables": {
+      "customer_name": "张三",
+      "company_name": "AI 公司",
+      "membership_tier": "黄金会员"
+    }
+  }
+}
+```
+
+## 常见问题
+
+### 回复太长
+
+在提示词中明确限制：
+
+```
+回复长度要求：
+- 一般问题：1-2 句话
+- 复杂问题：不超过 5 句话
+- 避免重复和冗余内容
+```
+
+### 答非所问
+
+增加任务边界说明：
+
+```
+重要提示：
+- 只回答与 [产品/服务] 相关的问题
+- 对于无关问题，礼貌地拒绝并引导回正题
+```
+
+### 编造信息
+
+强调诚实原则：
+
+```
+信息准确性要求：
+- 只提供你确定的信息
+- 不确定时说"我不太确定，建议您..."
+- 绝对不要编造数据或功能
+```
+
+## 最佳实践
+
+1. **迭代优化** - 根据实际对话效果持续调整
+2. **测试覆盖** - 用各种场景测试提示词效果
+3. **版本管理** - 保存历史版本，便于回退
+4. **定期复盘** - 分析对话记录，发现改进点
+
+## 下一步
+
+- [测试调试](testing.md) - 验证提示词效果
+- [知识库配置](../../customization/knowledge-base.md) - 补充专业知识
--- a/docs/content/concepts/assistants/testing.md
+++ b/docs/content/concepts/assistants/testing.md
@@ -0,0 +1,162 @@
+# 测试调试
+
+本指南介绍如何测试和调试 AI 助手，确保其行为符合预期。
+
+## 测试面板
+
+在助手详情页，点击 **测试** 按钮打开测试面板。
+
+### 功能介绍
+
+| 功能 | 说明 |
+|------|------|
+| 文本对话 | 直接输入文字进行测试 |
+| 语音测试 | 使用麦克风进行语音对话 |
+| 查看日志 | 实时查看系统日志 |
+| 事件追踪 | 查看 WebSocket 事件流 |
+
+## 测试用例设计
+
+### 基础功能测试
+
+| 测试项 | 输入 | 预期结果 |
+|--------|------|---------|
+| 问候响应 | "你好" | 友好的问候回复 |
+| 功能介绍 | "你能做什么？" | 准确描述能力范围 |
+| 开场白 | 连接后自动 | 播放配置的开场白 |
+
+### 业务场景测试
+
+根据助手定位设计测试用例：
+
+```
+场景：产品咨询助手
+
+测试用例 1：常见问题
+- 输入："产品有哪些功能？"
+- 预期：准确列出主要功能
+
+测试用例 2：价格询问
+- 输入："多少钱？"
+- 预期：提供价格信息或引导方式
+
+测试用例 3：超出范围
+- 输入："帮我写一首诗"
+- 预期：礼貌拒绝并引导回业务话题
+```
+
+### 边界测试
+
+| 测试项 | 输入 | 预期结果 |
+|--------|------|---------|
+| 空输入 | "" | 提示用户输入内容 |
+| 超长输入 | 1000+ 字符 | 正常处理或提示过长 |
+| 特殊字符 | "<script>alert(1)</script>" | 安全处理，不执行 |
+| 敏感内容 | 不当言论 | 拒绝回复并提示 |
+
+## 日志分析
+
+### 查看日志
+
+在测试面板的 **日志** 标签页，可以看到：
+
+- ASR 识别结果
+- LLM 推理过程
+- TTS 合成状态
+- 工具调用记录
+
+### 常见日志
+
+```
+[ASR] transcript.final: "你好，请问有什么可以帮你"
+[LLM] request: messages=[...]
+[LLM] response: "您好！我是..."
+[TTS] synthesizing: "您好！我是..."
+[TTS] audio.start
+[TTS] audio.end
+```
+
+## 事件追踪
+
+在 **事件** 标签页查看完整的 WebSocket 事件流：
+
+```json
+{"type": "session.started", "timestamp": 1704067200000}
+{"type": "input.speech_started", "timestamp": 1704067201000}
+{"type": "transcript.delta", "data": {"text": "你"}}
+{"type": "transcript.delta", "data": {"text": "好"}}
+{"type": "transcript.final", "data": {"text": "你好"}}
+{"type": "assistant.response.delta", "data": {"text": "您"}}
+{"type": "assistant.response.final", "data": {"text": "您好！..."}}
+{"type": "output.audio.start"}
+{"type": "output.audio.end"}
+```
+
+## 性能指标
+
+关注以下性能指标：
+
+| 指标 | 说明 | 建议值 |
+|------|------|--------|
+| TTFB | 首字节时间 | < 500ms |
+| 识别延迟 | ASR 处理时间 | < 1s |
+| 回复延迟 | LLM 推理时间 | < 2s |
+| 合成延迟 | TTS 处理时间 | < 500ms |
+
+## 常见问题排查
+
+### 助手不响应
+
+1. **检查连接状态**
+   - 确认 WebSocket 连接成功
+   - 查看是否收到 `session.started` 事件
+
+2. **检查模型配置**
+   - 确认 LLM 模型 API Key 有效
+   - 测试模型连接是否正常
+
+3. **查看错误日志**
+   - 打开浏览器开发者工具
+   - 检查 Console 和 Network 标签
+
+### 回复质量差
+
+1. **优化提示词**
+   - 增加更明确的指令
+   - 添加示例和约束
+
+2. **调整温度参数**
+   - 降低 temperature 提高一致性
+   - 适当值通常在 0.3-0.7
+
+3. **补充知识库**
+   - 上传相关文档
+   - 提高检索相关性
+
+### 语音问题
+
+1. **ASR 识别不准**
+   - 检查麦克风权限
+   - 尝试更换 ASR 引擎
+   - 添加热词提高识别率
+
+2. **TTS 不播放**
+   - 检查浏览器自动播放限制
+   - 确认 TTS 配置正确
+
+## 自动化测试
+
+使用自动化测试功能进行批量测试：
+
+1. 进入 **自动化测试** 页面
+2. 创建测试任务
+3. 配置测试用例
+4. 运行测试并查看报告
+
+详见 [自动化测试](../../analysis/autotest.md)。
+
+## 下一步
+
+- [自动化测试](../../analysis/autotest.md) - 批量测试
+- [历史记录](../../analysis/history.md) - 查看对话记录
+- [效果评估](../../analysis/evaluation.md) - 评估对话质量
--- a/docs/content/concepts/engines.md
+++ b/docs/content/concepts/engines.md
@@ -0,0 +1,107 @@
+# 引擎架构
+
+RAS 提供两类实时运行时：**Pipeline 引擎** 和 **Realtime 引擎**。本页只回答一个问题：你的助手应该跑在哪种引擎上。
+
+---
+
+## 先记住这条判断标准
+
+- 如果你优先考虑 **可控性、可替换性、成本管理、工具 / 知识 / 流程编排**，优先选 **Pipeline 引擎**
+- 如果你优先考虑 **超低延迟、更自然的端到端语音体验**，优先选 **Realtime 引擎**
+
+## 两类引擎的区别
+
+| 维度 | Pipeline 引擎 | Realtime 引擎 |
+|------|---------------|---------------|
+| **交互路径** | VAD → ASR → TD → LLM → TTS | 端到端实时模型 |
+| **可控性** | 高，每个环节可替换 | 中，更多依赖模型供应商 |
+| **延迟** | 中等，通常由多环节累加 | 低，链路更短 |
+| **能力编排** | 更适合接入工具、知识库、工作流 | 也可接工具，但流程可控性较弱 |
+| **成本结构** | 可按环节优化 | 往往更依赖单一供应商定价 |
+| **适合场景** | 企业客服、流程型助手、电话场景、知识问答 | 高拟真语音助手、多模态入口、高自然度体验 |
+
+## Pipeline 引擎是什么
+
+Pipeline 引擎把实时语音拆成多个明确环节：
+
+```mermaid
+flowchart LR
+    VAD[VAD] --> ASR[ASR]
+    ASR --> TD[回合检测]
+    TD --> LLM[LLM]
+    LLM --> TTS[TTS]
+```
+
+这样做的好处是：
+
+- 你可以分别选择 ASR、LLM、TTS 的供应商
+- 你可以单独优化某一个环节，而不是整体替换
+- 工具、知识库和工作流更容易插入到链路中
+
+代价是：
+
+- 延迟会累加
+- 系统集成更复杂
+- 你需要同时管理多类外部依赖
+
+## Realtime 引擎是什么
+
+Realtime 引擎直接连接端到端实时模型，让模型同时处理输入、理解、生成与打断。
+
+```mermaid
+flowchart LR
+    Input[音频 / 视频 / 文本输入] --> RT[Realtime Model]
+    RT --> Output[音频 / 文本输出]
+    RT --> Tools[工具]
+```
+
+这样做的好处是：
+
+- 链路更短，延迟更低
+- 全双工与打断通常更自然
+- 接入路径更简单，适合强调体验的入口
+
+代价是：
+
+- 更依赖特定模型供应商
+- 对 ASR / TTS / 回合检测的独立控制更弱
+- 成本和能力边界受实时模型限制更大
+
+## 怎么选
+
+### 适合选择 Pipeline 的情况
+
+- 你要接入特定 ASR 或 TTS 供应商
+- 你需要知识库、工具、工作流形成稳定业务流程
+- 你更在意可解释性、观测和分段优化
+- 你需要把成本按环节精细控制
+
+### 适合选择 Realtime 的情况
+
+- 你把“自然对话感”放在首位
+- 你需要更低的首响和更顺滑的打断体验
+- 你可以接受对某个模型供应商的依赖
+- 你的场景更接近语音助手、陪练、虚拟角色或多模态入口
+
+## 简化决策表
+
+| 场景 | 推荐引擎 | 原因 |
+|------|----------|------|
+| 企业客服 / 电话机器人 | Pipeline | 可控、可审计、易接工具与业务系统 |
+| 知识问答 / 业务流程助手 | Pipeline | 更适合接知识库与工作流 |
+| 高拟真语音助手 | Realtime | 更自然、更低延迟 |
+| 多模态入口 | Realtime | 端到端处理音频 / 视频 / 文本 |
+| 预算敏感场景 | Pipeline | 更容易逐环节优化成本 |
+
+## 智能打断的差异
+
+两类引擎都支持打断，但边界不同：
+
+- **Pipeline**：由 VAD / 回合检测与 TTS 停止逻辑协同实现，行为更可控
+- **Realtime**：更多由实时模型内部完成，体验更自然，但可解释性更低
+
+## 继续阅读
+
+- [Pipeline 引擎](pipeline-engine.md) - 查看分段链路、延迟构成与配置示例
+- [Realtime 引擎](realtime-engine.md) - 查看端到端实时模型的交互路径
+- [系统架构](../overview/architecture.md) - 从服务边界理解引擎在整体系统中的位置
--- a/docs/content/concepts/index.md
+++ b/docs/content/concepts/index.md
@@ -0,0 +1,49 @@
+# 核心概念
+
+本章节只解释 Realtime Agent Studio 的关键心智模型，不重复环境部署或助手构建的操作细节。
+
+---
+
+## 先建立这三个概念
+
+### 1. 助手是“对外提供能力的配置单元”
+
+助手决定了一个实时 AI 入口对外表现成什么角色：它使用什么提示词、哪些模型、能访问哪些知识和工具、会话如何开始以及运行时如何被覆盖。
+
+- [助手概念](assistants.md) — 统一理解助手、会话、动态变量与能力边界
+- [配置选项](assistants/configuration.md) — 了解界面层和运行时配置项如何分工
+- [提示词指南](assistants/prompts.md) — 学会定义助手的角色、任务、风格与约束
+- [测试调试](assistants/testing.md) — 理解如何验证助手行为和定位问题
+
+### 2. 引擎是“承载实时交互的运行时”
+
+RAS 同时提供 Pipeline 引擎与 Realtime 引擎。它们都能驱动实时助手，但在延迟、可控性、成本和可替换性上各有取舍。
+
+- [引擎概览](engines.md) — 两类引擎的能力边界与选择建议
+- [Pipeline 引擎](pipeline-engine.md) — VAD/ASR/TD/LLM/TTS 串联的可组合链路
+- [Realtime 引擎](realtime-engine.md) — 面向端到端实时模型的低延迟交互路径
+
+### 3. 工作流是“把复杂业务拆成步骤和分支的方法”
+
+当单一提示词不足以稳定处理多步骤、多条件、多工具的业务流程时，应使用工作流来显式编排节点、路由和回退策略。
+
+- [工作流](../customization/workflows.md) — 了解何时需要工作流、它由哪些部分组成、如何设计可维护的流程
+
+---
+
+## 本章节不负责什么
+
+以下内容属于“如何搭建和使用”，不在本章节展开说明：
+
+- 助手搭建、模型/知识库/工具/工作流配置：从 [助手概览](assistants.md) 进入构建链路
+- 部署与环境变量：见 [环境与部署](../getting-started/index.md)
+- 第一个助手的最短操作路径：见 [快速开始](../quickstart/index.md)
+- 事件格式与接入协议：见 [API 参考](../api-reference/index.md)
+
+## 建议阅读顺序
+
+1. 先读 [助手概念](assistants.md)，明确你要配置的对象到底是什么
+2. 再读 [引擎概览](engines.md)，决定应该选择 Pipeline 还是 Realtime
+3. 如果场景涉及多步骤流程，再读 [工作流](../customization/workflows.md)
+4. 最后回到 [快速开始](../quickstart/index.md) 或 [助手概览](assistants.md) 开始具体配置
+
--- a/docs/content/concepts/pipeline-engine.md
+++ b/docs/content/concepts/pipeline-engine.md
@@ -0,0 +1,137 @@
+# Pipeline 引擎
+
+Pipeline 引擎把实时对话拆成多个清晰环节，适合需要高可控性、可替换外部能力和复杂业务编排的场景。
+
+---
+
+## 运行链路
+
+```mermaid
+flowchart LR
+    subgraph Input["输入处理"]
+        Audio[用户音频] --> VAD[声音活动检测 VAD]
+        VAD --> ASR[语音识别 ASR]
+        ASR --> TD[回合检测 TD]
+    end
+
+    subgraph Reasoning["语义处理"]
+        TD --> LLM[大语言模型 LLM]
+        LLM --> Tools[工具]
+        LLM --> Text[回复文本]
+    end
+
+    subgraph Output["输出生成"]
+        Text --> TTS[语音合成 TTS]
+        TTS --> AudioOut[助手音频]
+    end
+```
+
+Pipeline 的关键价值不在于“环节多”，而在于每个环节都可以被单独选择、单独优化、单独观测。
+
+## 它适合什么场景
+
+- 需要接特定 ASR / TTS 供应商
+- 需要稳定接入知识库、工具和工作流
+- 需要把问题定位到具体环节，而不是只看到整体失败
+- 需要按延迟、成本、质量对不同环节分别优化
+
+## 数据流
+
+```mermaid
+sequenceDiagram
+    participant U as 用户
+    participant E as 引擎
+    participant ASR as ASR 服务
+    participant LLM as LLM 服务
+    participant TTS as TTS 服务
+
+    U->>E: 音频帧 (PCM)
+    E->>E: VAD / 回合检测
+    E->>ASR: 发送可识别音频
+    ASR-->>E: transcript.delta / transcript.final
+    E->>LLM: 发送对话历史与当前输入
+    LLM-->>E: assistant.response.delta
+    E->>TTS: 文本片段
+    TTS-->>E: 音频片段
+    E-->>U: 音频流与事件
+```
+
+## 延迟来自哪里
+
+| 环节 | 典型影响 | 常见优化点 |
+|------|----------|------------|
+| **VAD / EoU** | 用户说完后多久触发回复 | 调整静音阈值和最短语音门限 |
+| **ASR** | 语音转写速度和准确率 | 选择合适模型、热词和语言设置 |
+| **LLM** | 首个 token 返回速度 | 选择低延迟模型、优化上下文 |
+| **TTS** | 文字到音频的生成速度 | 选择流式 TTS，缩短单次回复 |
+
+Pipeline 的总延迟通常不是单点问题，而是链路总和。因此更适合做“逐环节调优”。
+
+## EoU（用户说完）为什么重要
+
+Pipeline 必须决定“什么时候把当前轮输入正式交给 LLM”。这个判断通常由 **EoU** 完成。
+
+- 阈值小：响应更快，但更容易把用户停顿误判为说完
+- 阈值大：更稳，但首次响应会更慢
+
+你可以把它理解为 Pipeline 中最直接影响“对话节奏感”的参数之一。
+
+## 工具、知识库和工作流如何插入
+
+Pipeline 特别适合把业务能力插入到对话中：
+
+- **知识库**：在 LLM 生成前补充领域事实
+- **工具**：在需要外部信息或动作时调用系统能力
+- **工作流**：在多步骤、多分支流程中决定接下来走哪个节点
+
+这也是它在企业客服、流程助手和知识问答场景中更常见的原因。
+
+## 智能打断
+
+在 Pipeline 中，打断通常由 VAD 检测和 TTS 停止逻辑协同完成：
+
+```mermaid
+sequenceDiagram
+    participant U as 用户
+    participant E as 引擎
+    participant TTS as TTS
+
+    Note over E,TTS: 正在播放回复
+    E->>U: 音频流...
+    U->>E: 用户开始说话
+    E->>E: 判定是否触发打断
+    E->>TTS: 停止合成 / 播放
+    E-->>U: output.audio.interrupted
+```
+
+相比端到端实时模型，这种方式更容易解释“为什么打断”以及“在哪个环节发生了问题”。
+
+## 配置示例
+
+```json
+{
+  "engine": "pipeline",
+  "asr": {
+    "provider": "openai-compatible",
+    "model": "FunAudioLLM/SenseVoiceSmall",
+    "language": "zh"
+  },
+  "llm": {
+    "provider": "openai",
+    "model": "gpt-4o-mini",
+    "temperature": 0.7
+  },
+  "tts": {
+    "provider": "openai-compatible",
+    "model": "FunAudioLLM/CosyVoice2-0.5B",
+    "voice": "anna"
+  }
+}
+```
+
+## 相关文档
+
+- [引擎架构](engines.md) - 回到选择指南
+- [Realtime 引擎](realtime-engine.md) - 对比端到端实时模型路径
+- [工具](../customization/tools.md) - 设计可被 LLM 安全调用的工具
+- [知识库](../customization/knowledge-base.md) - 在对话中补充领域知识
--- a/docs/content/concepts/realtime-engine.md
+++ b/docs/content/concepts/realtime-engine.md
@@ -0,0 +1,97 @@
+# Realtime 引擎
+
+Realtime 引擎直接连接端到端实时模型，适合把低延迟和自然语音体验放在第一位的场景。
+
+---
+
+## 运行链路
+
+```mermaid
+flowchart LR
+    Input[音频 / 视频 / 文本输入] --> RT[Realtime Model]
+    RT --> Output[音频 / 文本输出]
+    RT --> Tools[工具]
+```
+
+与 Pipeline 不同，Realtime 引擎不会把 ASR、回合检测、LLM、TTS 作为独立阶段暴露出来，而是更多依赖实时模型整体处理。
+
+## 常见后端
+
+| 后端 | 特点 |
+|------|------|
+| **OpenAI Realtime** | 语音交互自然，延迟低 |
+| **Gemini Live** | 多模态能力强 |
+| **Doubao 实时交互** | 更适合国内环境与中文场景 |
+
+## 它适合什么场景
+
+- 语音助手、陪练、虚拟角色等高自然度体验场景
+- 对首响和连续打断体验要求高的入口
+- 希望减少链路拼装复杂度，直接接入端到端模型的团队
+
+## 数据流
+
+```mermaid
+sequenceDiagram
+    participant U as 用户
+    participant E as 引擎
+    participant RT as Realtime Model
+
+    U->>E: 音频 / 视频 / 文本输入
+    E->>RT: 转发实时流
+    RT-->>E: 流式文本 / 音频输出
+    E-->>U: 播放或渲染结果
+```
+
+## Realtime 的优势
+
+- **延迟更低**：链路更短，用户感知更自然
+- **全双工更顺滑**：用户插话时，模型更容易在内部处理打断
+- **多模态更直接**：适合音频、视频、文本混合输入输出场景
+
+## Realtime 的取舍
+
+- 更依赖实时模型供应商的能力边界
+- 不容易对 ASR / TTS / 回合检测做独立替换
+- 成本和可观测性往往不如 Pipeline 那样可逐环节拆分
+
+## 智能打断
+
+Realtime 模型通常原生支持全双工和打断：
+
+```mermaid
+sequenceDiagram
+    participant U as 用户
+    participant E as 引擎
+    participant RT as Realtime Model
+
+    Note over RT: 模型正在输出
+    RT-->>E: 音频流...
+    E-->>U: 播放
+    U->>E: 用户开始说话
+    E->>RT: 转发新输入
+    Note over RT: 模型内部处理中断并切换回复
+    RT-->>E: 新的响应
+    E-->>U: 播放新响应
+```
+
+这种方式更自然，但你通常只能看到模型的整体行为，而不是每个中间阶段的细节。
+
+## 配置示例
+
+```json
+{
+  "engine": "multimodal",
+  "model": {
+    "provider": "openai",
+    "model": "gpt-4o-realtime-preview",
+    "voice": "alloy"
+  }
+}
+```
+
+## 相关文档
+
+- [引擎架构](engines.md) - 回到两类引擎的选择指南
+- [Pipeline 引擎](pipeline-engine.md) - 查看分段可控的运行路径
+- [WebSocket 协议](../api-reference/websocket.md) - 了解客户端如何与引擎建立会话
--- a/docs/content/customization/asr.md
+++ b/docs/content/customization/asr.md
@@ -0,0 +1,53 @@
+# 语音识别
+
+语音识别（ASR）负责把用户音频实时转写成文本，供引擎继续理解和处理。
+
+## 关键配置项
+
+| 配置项 | 说明 |
+|--------|------|
+| **ASR 引擎** | 选择语音识别服务提供商或自建服务 |
+| **模型** | 实际使用的识别模型名称 |
+| **语言** | 中文、英文或多语言 |
+| **热词** | 提高业务词汇、品牌词、专有名词识别率 |
+| **标点与规范化** | 自动补全标点、规范数字和日期等 |
+
+## 模式
+
+- `offline`：引擎本地缓冲音频后触发识别（适用于 OpenAI-compatible / SiliconFlow）。
+- `streaming`：音频分片实时发送到服务端，服务端持续返回转写事件（适用于 DashScope Realtime ASR、Volcengine BigASR）。
+
+## 配置项
+
+| 配置项 | 说明 |
+|---|---|
+| ASR 引擎 | 选择语音识别服务提供商 |
+| 模型 | 识别模型名称 |
+| `enable_interim` | 是否开启离线 ASR 中间结果（默认 `false`，仅离线模式生效） |
+| `app_id` / `resource_id` | Volcengine 等厂商的应用标识与资源标识 |
+| `request_params` | 厂商原生请求参数透传，例如 `end_window_size`、`force_to_speech_time`、`context` |
+| 语言 | 中文/英文/多语言 |
+| 热词 | 提升特定词汇识别准确率 |
+| 标点与规范化 | 是否自动补全标点、文本规范化 |
+
+## 选择建议
+
+- 客服、外呼等业务场景建议维护热词表，并按业务线持续更新
+- 多语言入口建议显式指定语言，避免模型自动判断带来的波动
+- 对延迟敏感的场景优先选择流式识别模型
+- 对准确率敏感的场景，先评估专有名词、数字、地址等样本的识别表现
+
+## 运行建议
+
+- 使用与接入端一致的采样率和编码方式，减少额外转换
+- 在测试阶段准备固定样本，便于对比不同模型或参数的变化
+- 把“识别准确率”和“识别延迟”一起看，不要只看其中一项
+
+## 相关文档
+
+- [声音资源](voices.md) - 完整语音输入输出链路中的 TTS 侧配置
+- [快速开始](../quickstart/index.md) - 以任务路径接入第一个 ASR 资源
+- 客服场景建议开启热词并维护业务词表
+- 多语言场景建议按会话入口显式指定语言
+- 对延迟敏感场景优先选择流式识别模型
+- 当前支持提供商：`openai_compatible`、`siliconflow`、`dashscope`、`volcengine`、`buffered`（回退）
--- a/docs/content/customization/knowledge-base.md
+++ b/docs/content/customization/knowledge-base.md
@@ -0,0 +1,86 @@
+# 知识库
+
+知识库负责承载助手需要引用的私有事实、业务资料和长文档内容，是 RAG（检索增强生成）能力的正式说明页。
+
+## 什么时候应该用知识库
+
+当问题答案主要来自“稳定文档”而不是实时外部动作时，优先使用知识库：
+
+- 产品说明、政策条款、操作流程、培训材料
+- 内部手册、FAQ、规范文档
+- 需要被多位助手复用的领域知识
+
+如果任务本质上是“查状态、写数据、执行动作”，那通常更适合 [工具](tools.md)，而不是知识库。
+
+## 工作原理
+
+```mermaid
+flowchart LR
+    subgraph Indexing["索引阶段"]
+        Doc[文档] --> Chunk[分块]
+        Chunk --> Embed[向量化]
+        Embed --> Store[(向量数据库)]
+    end
+
+    subgraph Query["查询阶段"]
+        Q[用户问题] --> Search[相似度检索]
+        Store --> Search
+        Search --> Context[相关片段]
+        Context --> LLM[LLM 生成回答]
+    end
+```
+
+核心原则很简单：把长文档转成可检索的片段，在用户提问时只把最相关的内容送给模型。
+
+## 适合放进知识库的内容
+
+| 适合 | 不适合 |
+|------|--------|
+| 稳定规则、标准答案、产品文档 | 高频变化的实时状态 |
+| 领域术语、说明手册、培训材料 | 需要外部系统写入或变更的动作 |
+| 需要跨助手复用的内容 | 只在单次会话里临时生成的数据 |
+
+## 内容准备建议
+
+- 优先上传结构清晰、主题明确的文档
+- 对超长文档按主题拆分，减少一次索引的噪声
+- 标题、章节名和表格说明对召回质量很重要，不要全部删掉格式信息
+- 与其堆很多相近文档，不如先清理重复、过期和相互冲突的内容
+
+## 常见配置项
+
+| 配置项 | 作用 | 常见做法 |
+|--------|------|----------|
+| **相似度阈值** | 过滤弱相关结果 | 从保守值起步，再按误召回调 |
+| **返回数量** | 控制一次送给模型的候选片段数 | 先少后多，避免上下文污染 |
+| **分块大小** | 决定每个文档片段的长度 | 按文档类型和问题粒度调整 |
+
+## 创建与维护
+
+### 最小流程
+
+1. 新建知识库
+2. 上传文档
+3. 完成索引
+4. 用典型问题测试召回结果
+5. 绑定到目标助手
+
+### 日常维护
+
+- 删除过期或互相矛盾的文档
+- 当业务口径变化时，优先更新知识库而不是只改提示词
+- 为关键问题准备固定测试问句，观察召回是否稳定
+
+## 与助手的关系
+
+知识库不是独立产品入口，而是助手的能力层：
+
+- 助手决定是否、何时、以什么风格使用知识
+- 知识库决定能够提供哪些事实片段
+- 工作流和工具可以与知识库并用，但承担不同职责
+
+## 相关文档
+
+- [助手概念](../concepts/assistants.md) - 知识库在助手能力层中的位置
+- [LLM 模型](models.md) - 为知识库准备嵌入或重排模型
+- [工具](tools.md) - 当任务需要执行动作时，优先考虑工具而不是知识库
--- a/docs/content/customization/models.md
+++ b/docs/content/customization/models.md
@@ -0,0 +1,53 @@
+# LLM 模型
+
+本页是资源库中 LLM 模型的正式说明页，聚焦文本生成、嵌入和重排模型的接入与选择。
+
+## 这页负责什么
+
+当你需要为助手配置“理解与生成能力”时，请从这里开始决定：
+
+- 使用哪个供应商或模型家族
+- 该模型负责文本生成、嵌入还是重排
+- 接口地址、认证信息和默认参数如何设置
+
+语音识别和语音合成分别由 [语音识别](asr.md) 与 [声音资源](voices.md) 说明，不在本页重复。
+
+## 模型类型
+
+| 类型 | 用途 | 常见场景 |
+|------|------|----------|
+| **文本模型** | 生成回复、总结、分类、规划 | 助手主对话、工具调用决策 |
+| **嵌入模型** | 向量化文档或查询 | 知识库检索 |
+| **重排模型** | 对检索结果再次排序 | 提升知识召回质量 |
+
+## 配置清单
+
+| 配置项 | 说明 | 建议 |
+|--------|------|------|
+| **供应商** | OpenAI 兼容、托管平台或自建服务 | 用统一命名规范区分环境 |
+| **模型名称** | 控制台中的显示名称 | 体现厂商、用途和环境 |
+| **模型标识** | 请求中实际使用的 model 名称 | 保持与供应商文档一致 |
+| **Base URL** | 接口地址 | 为不同环境分别配置 |
+| **API Key / Token** | 鉴权凭证 | 与显示名称配套管理 |
+| **默认参数** | Temperature、Max Tokens、上下文长度等 | 按业务场景收敛默认值 |
+
+## 选择建议
+
+- **先按用途选模型，再按成本和延迟筛选供应商**
+- **文本模型不要承担知识库检索职责**：检索应交给嵌入与重排模型
+- **为不同环境建立清晰命名**：如 `prod-gpt4o-mini`、`staging-qwen-text`
+- **默认参数要保守**：让助手默认稳定，再在单个场景内按需调优
+
+## 常见组合
+
+| 目标 | 推荐组合 |
+|------|----------|
+| **通用对话助手** | 1 个文本模型 |
+| **知识问答助手** | 文本模型 + 嵌入模型 |
+| **高质量知识召回** | 文本模型 + 嵌入模型 + 重排模型 |
+
+## 下一步
+
+- [语音识别](asr.md) - 为语音输入选择 ASR
+- [声音资源](voices.md) - 为语音输出准备 TTS 资源
+- [知识库](knowledge-base.md) - 把嵌入 / 重排模型接入 RAG 链路
--- a/docs/content/customization/tools.md
+++ b/docs/content/customization/tools.md
@@ -0,0 +1,108 @@
+# 工具
+
+工具让助手从“会回答”扩展成“能执行动作”。本页是工具能力的正式说明页。
+
+## 什么时候应该用工具
+
+当用户请求需要依赖外部系统、实时数据或执行某个动作时，应该使用工具，而不是只靠提示词或知识库。
+
+典型场景包括：
+
+- 查询订单、库存、物流、天气等实时信息
+- 创建预约、提交表单、写入业务系统
+- 获取客户端环境能力，如定位、相机、权限确认
+
+如果问题本质上是“查阅稳定资料”，优先用 [知识库](knowledge-base.md)；如果问题是“执行动作或读写实时状态”，优先用工具。
+
+## 工具类型
+
+| 类型 | 说明 | 常见场景 |
+|------|------|----------|
+| **Webhook 工具** | 调用外部 HTTP API | 订单查询、CRM 写入、预约服务 |
+| **客户端工具** | 由接入端在本地执行 | 获取定位、打开相机、请求用户授权 |
+| **内建工具** | 平台或运行时直接提供 | 搜索、计算、知识检索等 |
+
+## 工具调用的基本过程
+
+```mermaid
+sequenceDiagram
+    participant User as 用户
+    participant Assistant as 助手 / 模型
+    participant Tool as 工具
+
+    User->>Assistant: 发起请求
+    Assistant->>Assistant: 判断是否需要工具
+    Assistant->>Tool: 发起工具调用
+    Tool-->>Assistant: 返回结构化结果
+    Assistant->>User: 组织最终回复
+```
+
+关键点不是“模型会不会调用工具”，而是“工具的定义是否足够清晰，能让模型在正确时机调用”。
+
+## 如何定义一个好工具
+
+| 要素 | 为什么重要 |
+|------|------------|
+| **清晰名称** | 让模型知道它是做什么的，而不是猜用途 |
+| **明确描述** | 告诉模型何时调用、何时不要调用 |
+| **完整参数定义** | 降低缺参、错参和歧义调用 |
+| **稳定返回结构** | 让模型更容易根据结果组织回复 |
+| **明确错误语义** | 让失败时也能安全退回用户对话 |
+
+## Webhook 工具示例
+
+```json
+{
+  "name": "query_order",
+  "description": "根据订单号查询当前订单状态，仅用于用户已提供订单号的场景。",
+  "parameters": {
+    "type": "object",
+    "properties": {
+      "order_id": {
+        "type": "string",
+        "description": "订单编号"
+      }
+    },
+    "required": ["order_id"]
+  }
+}
+```
+
+## 客户端工具的作用
+
+某些动作必须在接入端执行，例如：
+
+- 获取当前位置
+- 请求麦克风或相机权限
+- 打开特定页面或原生能力
+
+这类工具通常通过事件流和客户端配合完成，而不是由后端直接执行。
+
+## 工具设计建议
+
+- **一工具一职责**：不要把多个业务动作塞进同一个工具
+- **名称与描述写给模型看**：必须明确何时用、何时不用
+- **先设计错误返回**：失败时模型应该知道如何解释给用户
+- **减少高权限工具暴露面**：不是每个助手、每个工作流节点都需要全部工具
+- **把业务规则放回系统**：工具负责执行，提示词负责决策边界
+
+## 与知识库、工作流的分工
+
+- **知识库**：提供稳定事实
+- **工具**：执行动作或读取实时状态
+- **工作流**：决定何时进入某个步骤、调用哪个工具、失败如何回退
+
+当一个助手开始涉及多步骤、多系统调用时，工具通常应与 [工作流](workflows.md) 一起设计，而不是孤立配置。
+
+## 安全与治理
+
+- 校验输入，不直接信任模型生成的参数
+- 为工具设置最小权限和清晰的可见范围
+- 记录调用日志，便于审计和回放
+- 对外部接口增加超时、重试和速率限制策略
+
+## 相关文档
+
+- [知识库](knowledge-base.md) - 当问题更适合“查资料”时使用知识库
+- [工作流](workflows.md) - 当工具调用需要流程控制和分支逻辑时接入工作流
+- [助手概念](../concepts/assistants.md) - 理解工具在助手能力层中的位置
--- a/docs/content/customization/tts.md
+++ b/docs/content/customization/tts.md
@@ -0,0 +1,25 @@
+# TTS 参数
+
+TTS 参数决定助手语音输出的节奏、音量和听感。本页只讨论参数层面的调优建议。
+
+## 常用参数
+
+| 参数 | 说明 | 常见范围 |
+|------|------|----------|
+| **语速** | 说话速度 | `0.5 - 2.0` |
+| **音量 / 增益** | 输出音量强弱 | 供应商自定义 |
+| **音调** | 声线高低 | 供应商自定义 |
+| **模型** | 合成模型名称 | 依供应商而定 |
+| **声音 ID** | 发音人或音色标识 | 依供应商而定 |
+
+## 调优建议
+
+- 对话助手通常建议把语速控制在 `0.9 - 1.2`
+- 需要打断能力的场景，优先选择低延迟流式 TTS，并避免过长的单次回复
+- 如果业务强调可信度或专业感，先保证清晰度和稳定性，再追求个性化音色
+- 不要只试听一句问候语，至少用三类文案对比：短答复、长答复、数字或专有名词较多的答复
+
+## 相关文档
+
+- [声音资源](voices.md) - 先选择适合的供应商、模型和音色
+- [语音识别](asr.md) - 结合输入侧延迟一起评估整条语音链路
--- a/docs/content/customization/voices.md
+++ b/docs/content/customization/voices.md
@@ -0,0 +1,43 @@
+# 声音资源
+
+本页是资源库中 TTS 声音与发音人资源的正式说明页，聚焦“选择哪种声音给助手输出”。
+
+## 这页负责什么
+
+当你已经决定启用语音输出后，需要在这里完成：
+
+- 选择供应商、模型和声音资源
+- 为不同业务或语言准备不同音色
+- 通过预览和测试确定默认发音人
+
+更细的速度、音量、音调等参数建议见 [TTS 参数](tts.md)。
+
+## 选择声音时要考虑什么
+
+| 维度 | 说明 |
+|------|------|
+| **语言与口音** | 是否覆盖目标用户语言与地区口音 |
+| **风格** | 专业、亲切、活泼、沉稳等输出气质 |
+| **延迟** | 是否适合实时对话，而不仅是离线合成 |
+| **稳定性** | 长文本、多轮会话中的音色一致性 |
+| **成本** | 单次调用成本和高并发可用性 |
+
+## 推荐做法
+
+1. 先为每类业务角色确定一条主音色
+2. 再按语言或渠道补充少量备选音色
+3. 通过固定测试文案试听，统一比较自然度、节奏和可懂度
+4. 上线后尽量保持默认音色稳定，避免频繁切换影响用户体验
+
+## 常见资源组织方式
+
+| 组织方式 | 适用场景 |
+|----------|----------|
+| **按语言区分** | 中英文或多语种助手 |
+| **按业务角色区分** | 客服、销售、培训、提醒类助手 |
+| **按环境区分** | 开发、预发、生产使用不同供应商或凭证 |
+
+## 下一步
+
+- [TTS 参数](tts.md) - 调整语速、增益、音调等输出参数
+- [快速开始](../quickstart/index.md) - 把声音资源绑定到第一个助手
--- a/docs/content/customization/workflows.md
+++ b/docs/content/customization/workflows.md
@@ -0,0 +1,106 @@
+# 工作流
+
+工作流用于把复杂业务拆成明确的步骤、分支和回退策略，是 RAS 中承载流程逻辑的正式能力页。
+
+## 什么时候需要工作流
+
+当一个助手同时满足以下任一情况时，通常应考虑工作流，而不是继续堆叠单一提示词：
+
+- 需要多轮收集信息，例如订单号、手机号、预约时间等
+- 需要按意图或条件走不同分支
+- 需要串联多个工具或业务系统
+- 需要在异常或信息不足时统一回退到澄清、兜底或人工节点
+
+## 工作流与助手的关系
+
+助手负责对外表现、全局策略和渠道接入；工作流负责把某个业务流程拆成可维护的节点。
+
+```mermaid
+flowchart LR
+    Assistant[助手] --> Workflow[工作流]
+    Workflow --> Nodes[节点与分支]
+    Nodes --> Tools[工具 / 知识库 / 人工]
+```
+
+这意味着：
+
+- 助手定义角色、提示词基线、模型和输出方式
+- 工作流定义“这类问题该按什么顺序被处理”
+- 工具和知识库作为节点可调用的能力，被有选择地暴露给流程
+
+## 关键组成
+
+| 组成 | 作用 | 设计建议 |
+|------|------|----------|
+| **工作流名称** | 区分业务流程 | 用业务语义命名，避免过于技术化 |
+| **入口节点** | 用户进入后的第一步 | 保持单入口，便于理解和测试 |
+| **全局提示词** | 对所有节点生效的共性约束 | 保持简短，避免与节点提示词冲突 |
+| **节点提示词** | 当前节点的任务说明 | 单一职责，明确输入 / 输出 |
+| **节点工具白名单** | 控制当前节点可调用的工具集合 | 遵循最小权限原则 |
+| **超时与回退** | 异常、超时、缺信息时的处理方式 | 优先回到澄清、兜底或人工节点 |
+| **上下文透传** | 在节点之间共享状态 | 只传递后续节点真正需要的信息 |
+
+## 常见节点类型
+
+| 节点类型 | 适合做什么 |
+|----------|------------|
+| **路由节点** | 判断用户意图并进入不同分支 |
+| **信息收集节点** | 收集订单号、联系方式、时间等关键信息 |
+| **处理节点** | 调用工具、执行查询、计算或写入系统 |
+| **回复节点** | 组织最终答复并控制输出风格 |
+| **人工节点** | 转接人工、排队或发起通知 |
+| **结束节点** | 输出结束语并关闭流程 |
+
+## 推荐编排步骤
+
+1. 先写清楚流程目标：这条工作流要解决哪一类业务问题
+2. 画出最小节点图：入口、关键分支、结束和兜底
+3. 为每个节点定义唯一职责和输入 / 输出
+4. 再绑定知识库、工具和回退策略
+5. 在测试面板或流程调试工具中验证每条主路径和异常路径
+
+## 配置示例
+
+```yaml
+workflow:
+  name: "订单咨询流程"
+  entry: "intent_router"
+  global_prompt: "优先给出可执行步骤，必要时先澄清信息。"
+  nodes:
+    - id: "intent_router"
+      type: "router"
+      prompt: "识别用户意图：查订单、退款、投诉"
+      next:
+        - when: "intent == query_order"
+          to: "collect_order_id"
+        - when: "intent == refund"
+          to: "refund_policy"
+    - id: "collect_order_id"
+      type: "collect"
+      prompt: "请用户提供订单号"
+      tools: ["query_order"]
+      fallback: "human_handoff"
+    - id: "human_handoff"
+      type: "end"
+      prompt: "转人工处理"
+```
+
+## 设计建议
+
+- **让每个节点只做一件事**：避免单节点同时负责路由、收集信息和最终回复
+- **工具按节点授权**：不要把所有工具暴露给整条流程中的每个节点
+- **把失败路径设计出来**：超时、无结果、参数缺失都应该有明确回退
+- **优先传状态，不传长文本**：节点之间共享必要结构化信息，比传递大段自然语言更稳
+- **为流程保留可观测性**：每条主路径都应能在调试时解释“为什么走到这里”
+
+## 当前边界
+
+- 文档不会完整覆盖所有表达式或节点字段的最终 Schema
+- 不同执行引擎下，可用节点字段和运行行为可能存在差异
+- 可视化编排与底层字段映射可能不会一一对应
+
+## 相关文档
+
+- [助手概念](../concepts/assistants.md) - 工作流在助手体系中的位置
+- [工具](tools.md) - 设计可被流程安全调用的工具
+- [知识库](knowledge-base.md) - 让流程中的节点使用 RAG 能力
--- a/docs/content/deployment.md
+++ b/docs/content/deployment.md
@@ -1,95 +0,0 @@
-# 部署指南
-
-## 方式一：Docker 部署（推荐）
-
-### 1. 构建镜像
-
-```bash
-docker build -t ai-video-assistant-web ./web
-```
-
-### 2. 运行容器
-
-```bash
-docker run -d \
-  --name ai-assistant-web \
-  -p 3000:80 \
-  ai-video-assistant-web
-```
-
-### 3. 使用 Docker Compose
-
-```yaml
-version: '3.8'
-
-services:
-  web:
-    build: ./web
-    ports:
-      - "3000:80"
-    environment:
-      - VITE_API_URL=http://api:8080
-```
-
-运行：
-```bash
-docker-compose up -d
-```
-
-## 方式二：Nginx 部署
-
-### 1. 构建前端
-
-```bash
-cd web
-npm run build
-```
-
-### 2. 配置 Nginx
-
-```nginx
-server {
-    listen 80;
-    server_name your-domain.com;
-    root /var/www/ai-assistant/dist;
-    index index.html;
-
-    location / {
-        try_files $uri $uri/ /index.html;
-    }
-
-    location /api {
-        proxy_pass http://localhost:8080;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-    }
-}
-```
-
-### 3. 启动 Nginx
-
-```bash
-sudo nginx -t
-sudo systemctl reload nginx
-```
-
-## 环境变量配置
-
-| 变量 | 说明 | 默认值 |
-|------|------|--------|
-| VITE_API_URL | 后端 API 地址 | http://localhost:8080 |
-| VITE_GEMINI_API_KEY | Gemini API Key | - |
-
-## 验证部署
-
-1. 访问 http://your-domain.com
-2. 检查页面是否正常加载
-3. 验证各功能模块是否可用
-
-## 故障排查
-
-| 问题 | 解决方案 |
-|------|---------|
-| 页面空白 | 检查浏览器控制台错误 |
-| API 请求失败 | 确认 VITE_API_URL 配置正确 |
-| 静态资源 404 | 检查 nginx try_files 配置 |
--- a/docs/content/deployment/docker.md
+++ b/docs/content/deployment/docker.md
@@ -0,0 +1,161 @@
+# Docker 部署
+
+Docker 是推荐的部署方式，可以快速启动服务并确保环境一致性。
+
+## 前提条件
+
+- Docker 20.10+
+- Docker Compose 2.0+（可选）
+
+## 构建镜像
+
+### Web 前端
+
+```bash
+docker build -t ai-video-assistant-web ./web
+```
+
+### API 服务
+
+```bash
+docker build -t ai-video-assistant-api ./api
+```
+
+### Engine 服务
+
+```bash
+docker build -t ai-video-assistant-engine ./engine
+```
+
+## 运行容器
+
+### 单独运行
+
+```bash
+# Web 前端
+docker run -d \
+  --name ai-assistant-web \
+  -p 3000:80 \
+  ai-video-assistant-web
+
+# API 服务
+docker run -d \
+  --name ai-assistant-api \
+  -p 8080:8080 \
+  ai-video-assistant-api
+
+# Engine 服务
+docker run -d \
+  --name ai-assistant-engine \
+  -p 8000:8000 \
+  ai-video-assistant-engine
+```
+
+## Docker Compose
+
+推荐使用 Docker Compose 管理多个服务：
+
+```yaml
+version: '3.8'
+
+services:
+  web:
+    build: ./web
+    ports:
+      - "3000:80"
+    environment:
+      - VITE_API_URL=http://api:8080
+    depends_on:
+      - api
+
+  api:
+    build: ./api
+    ports:
+      - "8080:8080"
+    environment:
+      - DATABASE_URL=postgresql://postgres:password@db:5432/ai_assistant
+    depends_on:
+      - db
+
+  engine:
+    build: ./engine
+    ports:
+      - "8000:8000"
+    environment:
+      - BACKEND_URL=http://api:8080
+
+  db:
+    image: postgres:15
+    environment:
+      - POSTGRES_DB=ai_assistant
+      - POSTGRES_PASSWORD=password
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+volumes:
+  postgres_data:
+```
+
+### 启动服务
+
+```bash
+# 启动所有服务
+docker-compose up -d
+
+# 查看日志
+docker-compose logs -f
+
+# 停止服务
+docker-compose down
+```
+
+## 镜像优化
+
+### 多阶段构建
+
+Web 前端 Dockerfile 示例：
+
+```dockerfile
+# 构建阶段
+FROM node:18-alpine AS builder
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci
+COPY . .
+RUN npm run build
+
+# 运行阶段
+FROM nginx:alpine
+COPY --from=builder /app/dist /usr/share/nginx/html
+COPY nginx.conf /etc/nginx/nginx.conf
+EXPOSE 80
+CMD ["nginx", "-g", "daemon off;"]
+```
+
+## 健康检查
+
+```yaml
+services:
+  api:
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+```
+
+## 常见问题
+
+### 容器启动失败
+
+```bash
+# 查看容器日志
+docker logs ai-assistant-web
+
+# 进入容器调试
+docker exec -it ai-assistant-web sh
+```
+
+### 端口冲突
+
+修改 `docker-compose.yml` 中的端口映射，例如 `3001:80`。
--- a/docs/content/deployment/index.md
+++ b/docs/content/deployment/index.md
@@ -0,0 +1,41 @@
+# 部署概览
+
+本章节介绍如何使用 Docker 部署 Realtime Agent Studio (RAS)。
+
+## 部署方式
+
+| 方式 | 适用场景 | 复杂度 |
+|------|---------|--------|
+| [Docker 部署](docker.md) | 快速启动、容器化运行 | 简单 |
+
+## 快速开始
+
+### Docker 一键部署
+
+```bash
+docker build -t ai-video-assistant-web ./web
+docker run -d -p 3000:80 --name ai-assistant-web ai-video-assistant-web
+```
+
+### 验证部署
+
+1. 访问 http://localhost:3000
+2. 检查页面是否正常加载
+3. 验证各功能模块是否可用
+
+## 环境变量配置
+
+| 变量 | 说明 | 默认值 |
+|------|------|--------|
+| VITE_API_URL | 后端 API 地址 | http://localhost:8080 |
+| VITE_GEMINI_API_KEY | Gemini API Key | - |
+
+## 故障排查
+
+| 问题 | 解决方案 |
+|------|---------|
+| 页面空白 | 检查浏览器控制台错误 |
+| API 请求失败 | 确认 VITE_API_URL 配置正确 |
+| 静态资源 404 | 检查 nginx try_files 配置 |
+
+更多问题请参考 [故障排查](../resources/troubleshooting.md)。
--- a/docs/content/features/assistants.md
+++ b/docs/content/features/assistants.md
@@ -1,65 +0,0 @@
-# 助手管理
-
-助手是 AI Video Assistant 的核心模块，用于创建和配置智能对话机器人。
-
-## 创建助手
-
-![助手管理](../images/assistants.png)
-
-### 基本配置
-
-1. 进入 **助手管理** 页面
-2. 点击 **新建助手** 按钮
-3. 填写基本信息：
-
-| 配置项 | 说明 |
-|-------|------|
-| 助手名称 | 唯一标识，用于区分不同助手 |
-| 提示词 | 定义助手的角色和行为 |
-| 温度参数 | 控制回复的随机性（0-1） |
-
-### 配置标签页
-
-#### 全局设置
-
- 设置助手的核心对话能力
- 配置上下文长度
- 设置对话开场白
-
-#### 语音配置
-
-| 配置 | 说明 |
-|------|------|
-| TTS 引擎 | 选择语音合成服务（阿里/火山/Minimax） |
-| 音色 | 选择语音风格和性别 |
-| 语速 | 语音播放速度 |
-| 音量 | 语音输出音量 |
-
-#### 工具绑定
-
- 配置助手可调用的外部工具
- 启用/禁用特定功能模块
-
-#### 知识关联
-
- 关联 RAG 知识库
- 配置检索参数（相似度阈值、返回数量）
-
-#### 外部链接
-
- 配置第三方服务集成
- 设置 Webhook 回调
-
-## 调试助手
-
-在助手详情页可进行实时调试：
- 文本对话测试
- 语音输入测试
- 工具调用验证
-
-## 发布助手
-
-配置完成后：
-1. 点击 **保存**
-2. 点击 **发布**
-3. 获取 API 调用地址
--- a/docs/content/features/knowledge.md
+++ b/docs/content/features/knowledge.md
@@ -1,53 +0,0 @@
-# 知识库
-
-知识库基于 RAG（检索增强生成）技术，让 AI 能够回答私有领域问题。
-
-## 概述
-
-![知识库](../images/knowledge.png)
-
-## 创建知识库
-
-### 步骤
-
-1. 进入 **知识库** 页面
-2. 点击 **新建知识库**
-3. 填写知识库名称
-4. 上传文档
-
-### 支持格式
-
-| 格式 | 说明 |
-|------|------|
-| Markdown | 最佳选择，格式清晰 |
-| PDF | 自动提取文本 |
-| TXT | 纯文本支持 |
-| Word | 需转换为其他格式 |
-
-### 文档上传
-
- 拖拽上传或点击选择
- 单文件大小限制 10MB
- 建议单文档不超过 50000 字
-
-## 配置检索参数
-
-| 参数 | 说明 | 默认值 |
-|------|------|--------|
-| 相似度阈值 | 低于此分数的结果不返回 | 0.7 |
-| 返回数量 | 单次检索返回的结果数 | 3 |
-| 分块大小 | 文档分块的最大长度 | 500 |
-
-## 管理知识库
-
- **查看文档** - 浏览已上传的文件
- **删除文档** - 移除不需要的内容
- **更新文档** - 重新上传覆盖
- **测试检索** - 验证知识库效果
-
-## 关联助手
-
-在助手配置的 **知识** 标签页中：
-1. 选择要关联的知识库
-2. 设置检索策略
-3. 保存配置
--- a/docs/content/features/models.md
+++ b/docs/content/features/models.md
@@ -1,44 +0,0 @@
-# 模型配置
-
-## LLM 模型库
-
-![LLM模型库](../images/llms.png)
-
-### 支持的模型
-
-| 供应商 | 模型 | 特点 |
-|--------|------|------|
-| **OpenAI** | GPT-4 / GPT-3.5 | 通用能力强 |
-| **DeepSeek** | DeepSeek Chat | 高性价比 |
-| **SiliconFlow** | 多种开源模型 | 本地部署友好 |
-| **Google** | Gemini Pro | 多模态支持 |
-
-### 配置步骤
-
-1. 进入 **LLM 库** 页面
-2. 点击 **添加模型**
-3. 选择供应商
-4. 填写 API Key 和 Endpoint
-5. 设置默认参数
-
-### 参数说明
-
-| 参数 | 说明 | 建议值 |
-|------|------|--------|
-| Temperature | 随机性 | 0.7 |
-| Max Tokens | 最大输出长度 | 2048 |
-| Top P | 核采样 | 0.9 |
-
-## ASR 语音识别
-
-### 支持引擎
-
- **Whisper** - OpenAI 通用语音识别
- **SenseVoice** - 高精度中文语音识别
-
-### 配置方法
-
-1. 进入 **ASR 库** 页面
-2. 选择识别引擎
-3. 配置音频参数（采样率、编码）
-4. 测试识别效果
--- a/docs/content/features/voices.md
+++ b/docs/content/features/voices.md
@@ -1,58 +0,0 @@
-# 语音合成
-
-语音合成（TTS）模块提供自然流畅的语音输出能力。
-
-## 概述
-
-![语音合成](../images/voices.png)
-
-## 支持的引擎
-
-| 供应商 | 特点 | 适用场景 |
-|--------|------|---------|
-| **阿里云** | 多音色、高自然度 | 通用场景 |
-| **火山引擎** | 低延迟、实时性好 | 实时对话 |
-| **Minimax** | 高性价比 | 批量合成 |
-
-## 配置方法
-
-### 添加语音配置
-
-1. 进入 **语音库** 页面
-2. 点击 **添加语音**
-3. 选择供应商
-4. 填写 API 凭证
-5. 保存配置
-
-### 测试语音
-
- 在线预览发音效果
- 调整语速和音量
- 切换不同音色
-
-## 音色选择
-
-### 中文音色
-
-| 音色 | 风格 |
-|------|------|
-| 晓晓 | 标准女声 |
-| 晓北 | 知性女声 |
-| 逍遥 | 青年男声 |
-| 丫丫 | 活泼童声 |
-
-### 英文音色
-
-| 音色 | 风格 |
-|------|------|
-| Joanna | 专业女声 |
-| Matthew | 沉稳男声 |
-| Amy | 亲切女声 |
-
-## 参数调优
-
-| 参数 | 范围 | 说明 |
-|------|------|------|
-| 语速 | 0.5-2.0 | 1.0 为正常速度 |
-| 音量 | 0-100 | 输出音量百分比 |
-| 音调 | 0.5-2.0 | 语音音调高低 |
--- a/docs/content/features/workflows.md
+++ b/docs/content/features/workflows.md
@@ -1,53 +0,0 @@
-# 工作流管理
-
-工作流提供可视化的对话流程编排能力，支持复杂的业务场景。
-
-## 概述
-
-![工作流](../images/workflows.png)
-
-## 节点类型
-
-| 节点 | 图标 | 功能说明 |
-|------|------|---------|
-| **对话节点** | 💬 | AI 自动回复，可设置回复策略 |
-| **工具节点** | 🔧 | 调用外部 API 或自定义工具 |
-| **人工节点** | 👤 | 转接人工客服 |
-| **结束节点** | 🏁 | 结束对话流程 |
-
-## 创建工作流
-
-### 步骤
-
-1. 进入 **工作流** 页面
-2. 点击 **新建工作流**
-3. 从左侧拖拽节点到画布
-4. 连接节点建立流程
-5. 配置各节点参数
-6. 保存并发布
-
-### 节点配置
-
-#### 对话节点配置
-
- 回复模板
- 条件分支
- 知识库检索
-
-#### 工具节点配置
-
- 选择工具类型
- 配置输入参数
- 设置输出处理
-
-#### 人工节点配置
-
- 转接规则
- 排队策略
- 通知设置
-
-## 流程测试
-
- 支持单步调试
- 可查看执行日志
- 实时验证流程逻辑
--- a/docs/content/getting-started.md
+++ b/docs/content/getting-started.md
@@ -1,59 +0,0 @@
-# 快速开始
-
-## 环境准备
-
-### 前置条件
-
-| 软件 | 版本要求 |
-|------|---------|
-| Node.js | 18.0 或更高 |
-| npm/yarn/pnpm | 最新版本 |
-| 现代浏览器 | Chrome 90+ / Firefox 90+ / Edge 90+ |
-
-### 检查环境
-
-```bash
-node --version
-npm --version
-```
-
-## 安装步骤
-
-### 1. 克隆项目
-
-```bash
-git clone https://github.com/your-repo/AI-VideoAssistant.git
-cd AI-VideoAssistant
-```
-
-### 2. 安装依赖
-
-```bash
-cd web
-npm install
-```
-
-### 3. 配置环境变量
-
-创建 `.env` 文件：
-
-```env
-VITE_API_URL=http://localhost:8080
-VITE_GEMINI_API_KEY=your_api_key_here
-```
-
-### 4. 启动开发服务器
-
-```bash
-npm run dev
-```
-
-访问 http://localhost:3000
-
-## 构建生产版本
-
-```bash
-npm run build
-```
-
-构建产物在 `dist` 目录。
--- a/docs/content/getting-started/configuration.md
+++ b/docs/content/getting-started/configuration.md
@@ -0,0 +1,279 @@
+# 配置说明
+
+本页面介绍 Realtime Agent Studio 各组件的配置方法。
+
+---
+
+## 配置概览
+
+RAS 采用分层配置，各组件独立配置：
+
+```mermaid
+flowchart TB
+    subgraph Config["配置层级"]
+        ENV[环境变量]
+        File[配置文件]
+        DB[数据库配置]
+    end
+
+    subgraph Services["服务组件"]
+        Web[Web 前端]
+        API[API 服务]
+        Engine[Engine 服务]
+    end
+
+    ENV --> Web
+    ENV --> API
+    ENV --> Engine
+    File --> API
+    File --> Engine
+    DB --> API
+```
+
+---
+
+## Web 前端配置
+
+### 环境变量
+
+在 `web/` 目录创建 `.env` 文件：
+
+```env
+# API 服务地址（必填）
+VITE_API_URL=http://localhost:8080
+
+# Engine WebSocket 地址（可选，默认同 API 服务器）
+VITE_WS_URL=ws://localhost:8000
+
+# Google Gemini API Key（可选，用于前端直连）
+VITE_GEMINI_API_KEY=your_api_key
+```
+
+### 变量说明
+
+| 变量 | 必填 | 说明 | 默认值 |
+|------|:----:|------|--------|
+| `VITE_API_URL` | ✅ | 后端 API 服务地址 | - |
+| `VITE_WS_URL` | ❌ | WebSocket 服务地址 | 从 API URL 推断 |
+| `VITE_GEMINI_API_KEY` | ❌ | Gemini API 密钥 | - |
+
+### 开发环境配置
+
+```env
+# .env.development
+VITE_API_URL=http://localhost:8080
+VITE_WS_URL=ws://localhost:8000
+```
+
+---
+
+## API 服务配置
+
+### 环境变量
+
+```env
+# 数据库配置
+DATABASE_URL=sqlite:///./data/app.db
+# 或 PostgreSQL
+# DATABASE_URL=postgresql://user:pass@localhost:5432/ras
+
+# Redis 配置（可选）
+REDIS_URL=redis://localhost:6379/0
+
+# 安全配置
+SECRET_KEY=your-secret-key-at-least-32-chars
+CORS_ORIGINS=http://localhost:3000,https://your-domain.com
+
+# 日志级别
+LOG_LEVEL=INFO
+
+# 文件存储路径
+UPLOAD_DIR=./uploads
+```
+
+### 配置文件
+
+API 服务支持 YAML 配置文件 `api/config/settings.yaml`：
+
+```yaml
+# 服务配置
+server:
+  host: "0.0.0.0"
+  port: 8080
+  workers: 4
+
+# 数据库配置
+database:
+  url: "sqlite:///./data/app.db"
+  pool_size: 5
+  max_overflow: 10
+
+# Redis 配置
+redis:
+  url: "redis://localhost:6379/0"
+  
+# 安全配置
+security:
+  secret_key: "your-secret-key"
+  token_expire_minutes: 1440
+
+# 日志配置
+logging:
+  level: "INFO"
+  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+```
+
+---
+
+## Engine 服务配置
+
+### 环境变量
+
+```env
+# 后端 API 地址
+BACKEND_URL=http://localhost:8080
+
+# WebSocket 服务配置
+WS_HOST=0.0.0.0
+WS_PORT=8000
+
+# 音频配置
+AUDIO_SAMPLE_RATE=16000
+AUDIO_CHANNELS=1
+
+# 日志级别
+LOG_LEVEL=INFO
+```
+
+### 引擎配置
+
+Engine 配置文件 `engine/config/engine.yaml`：
+
+```yaml
+# WebSocket 服务
+websocket:
+  host: "0.0.0.0"
+  port: 8000
+  ping_interval: 30
+  ping_timeout: 10
+
+# 音频处理
+audio:
+  sample_rate: 16000
+  channels: 1
+  chunk_size: 640  # 20ms at 16kHz
+
+# VAD 配置
+vad:
+  enabled: true
+  threshold: 0.5
+  min_speech_duration: 0.25
+  min_silence_duration: 0.5
+
+# 引擎默认配置
+defaults:
+  engine_type: "pipeline"  # pipeline 或 multimodal
+  max_response_tokens: 512
+  temperature: 0.7
+```
+
+---
+
+## Docker 配置
+
+### docker-compose.yml 环境变量
+
+```yaml
+version: '3.8'
+
+services:
+  web:
+    environment:
+      - VITE_API_URL=http://api:8080
+    
+  api:
+    environment:
+      - DATABASE_URL=postgresql://postgres:password@db:5432/ras
+      - REDIS_URL=redis://redis:6379/0
+      - SECRET_KEY=${SECRET_KEY}
+    
+  engine:
+    environment:
+      - BACKEND_URL=http://api:8080
+      - LOG_LEVEL=INFO
+```
+
+### 使用 .env 文件
+
+在项目根目录创建 `.env`：
+
+```env
+# Docker Compose 会自动加载
+SECRET_KEY=your-secret-key-at-least-32-chars
+POSTGRES_PASSWORD=secure-db-password
+```
+
+---
+
+## 配置优先级
+
+配置按以下优先级加载（高优先级覆盖低优先级）：
+
+```
+1. 命令行参数（最高）
+2. 环境变量
+3. .env 文件
+4. 配置文件 (yaml)
+5. 代码默认值（最低）
+```
+
+---
+
+## 敏感配置管理
+
+!!! danger "安全提醒"
+    不要将敏感信息提交到代码仓库！
+
+### 推荐做法
+
+1. **使用 .env 文件**，并将其加入 `.gitignore`
+2. **使用环境变量**，通过 CI/CD 注入
+3. **使用密钥管理服务**，如 AWS Secrets Manager、HashiCorp Vault
+
+### .gitignore 配置
+
+```gitignore
+# 环境配置文件
+.env
+.env.local
+.env.*.local
+
+# 敏感数据目录
+/secrets/
+*.pem
+*.key
+```
+
+---
+
+## 配置验证
+
+启动服务前验证配置是否正确：
+
+```bash
+# 验证 API 服务配置
+cd api
+python -c "from app.config import settings; print(settings)"
+
+# 验证 Engine 配置
+cd engine
+python -c "from config import settings; print(settings)"
+```
+
+---
+
+## 下一步
+
+- [环境与部署](index.md) - 开始安装服务
+- [Docker 部署](../deployment/docker.md) - 容器化部署
+
--- a/docs/content/getting-started/index.md
+++ b/docs/content/getting-started/index.md
@@ -0,0 +1,115 @@
+# 环境与部署
+
+本页属于“快速开始”中的环境与部署路径，只负责把服务跑起来、说明配置入口和部署方式。首次创建助手请转到 [创建第一个助手](../quickstart/index.md)。
+
+---
+
+## 先理解部署对象
+
+Realtime Agent Studio（RAS）通常由三个核心服务组成：
+
+```mermaid
+flowchart LR
+    subgraph Services["服务组件"]
+        Web[Web 前端<br/>React + TypeScript]
+        API[API 服务<br/>FastAPI]
+        Engine[Engine 服务<br/>WebSocket]
+    end
+
+    subgraph Storage["数据存储"]
+        DB[(SQLite/PostgreSQL)]
+    end
+
+    Web -->|REST| API
+    Web -->|WebSocket| Engine
+    API <--> DB
+    Engine <--> API
+```
+
+| 组件 | 默认端口 | 负责什么 |
+|------|----------|----------|
+| **Web 前端** | 3000 | 管理控制台与调试界面 |
+| **API 服务** | 8080 | 资源管理、配置持久化、历史数据 |
+| **Engine 服务** | 8000 | 实时会话、事件流和音频流 |
+
+## 选择你的安装方式
+
+### 方式一：Docker Compose
+
+适合希望尽快跑通一套完整环境的团队。
+
+```bash
+# 仓库目录示例沿用当前代码仓库 slug
+# 你本地实际目录名可以不同
+git clone https://github.com/your-org/AI-VideoAssistant.git
+cd AI-VideoAssistant
+
+docker-compose up -d
+```
+
+### 方式二：本地开发
+
+适合需要分别调试前端、API 和 Engine 的开发者。
+
+#### 启动 API 服务
+
+```bash
+cd api
+python -m venv venv
+source venv/bin/activate  # Windows: venv\Scripts\activate
+pip install -r requirements.txt
+uvicorn main:app --host 0.0.0.0 --port 8080 --reload
+```
+
+#### 启动 Engine 服务
+
+```bash
+cd engine
+python -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+python main.py
+```
+
+#### 启动 Web 前端
+
+```bash
+cd web
+npm install
+npm run dev
+```
+
+## 基础验证
+
+完成安装后，至少确认以下入口可访问：
+
+| 服务 | 地址 | 用途 |
+|------|------|------|
+| Web | `http://localhost:3000` | 打开控制台 |
+| API | `http://localhost:8080/docs` | 查看管理接口 |
+| Engine | `http://localhost:8000/health` | 检查实时引擎健康状态 |
+
+如果你需要更完整的环境变量、配置文件和部署说明，请继续阅读本章节其他页面：
+
+- [环境要求](requirements.md)
+- [配置说明](configuration.md)
+- [部署概览](../deployment/index.md)
+- [Docker 部署](../deployment/docker.md)
+
+## 目录结构（阅读导向）
+
+```text
+repo/
+├── web/      # 管理控制台
+├── api/      # 控制面与管理接口
+├── engine/   # 实时交互引擎
+├── docker/   # 部署编排与镜像配置
+└── docs/     # 当前文档站点
+```
+
+## 遇到问题时去哪里
+
+- 需要“快速判断往哪看”：先看 [常见问题](../resources/faq.md)
+- 需要“按步骤排查”：直接看 [故障排查](../resources/troubleshooting.md)
+- 已经跑通环境，准备创建助手：回到 [快速开始](../quickstart/index.md)
+
--- a/docs/content/getting-started/requirements.md
+++ b/docs/content/getting-started/requirements.md
@@ -0,0 +1,150 @@
+# 环境要求
+
+本页面列出运行 Realtime Agent Studio 所需的软件和硬件要求。
+
+---
+
+## 软件依赖
+
+### 必需软件
+
+| 软件 | 版本要求 | 说明 | 安装命令 |
+|------|---------|------|---------|
+| **Node.js** | 18.0+ | 前端构建运行 | `nvm install 18` |
+| **Python** | 3.10+ | 后端服务 | `pyenv install 3.10` |
+| **Docker** | 20.10+ | 容器化部署（可选） | [安装指南](https://docs.docker.com/get-docker/) |
+
+### 可选软件
+
+| 软件 | 版本要求 | 用途 |
+|------|---------|------|
+| **Docker Compose** | 2.0+ | 多服务编排 |
+| **PostgreSQL** | 14+ | 生产数据库 |
+| **Redis** | 6.0+ | 缓存与会话 |
+| **Nginx** | 1.20+ | 反向代理 |
+
+---
+
+## 版本检查
+
+运行以下命令验证环境：
+
+=== "Node.js"
+
+    ```bash
+    node --version
+    # v18.0.0 或更高
+    
+    npm --version
+    # 8.0.0 或更高
+    ```
+
+=== "Python"
+
+    ```bash
+    python --version
+    # Python 3.10.0 或更高
+    
+    pip --version
+    # pip 22.0 或更高
+    ```
+
+=== "Docker"
+
+    ```bash
+    docker --version
+    # Docker version 20.10.0 或更高
+    
+    docker compose version
+    # Docker Compose version v2.0.0 或更高
+    ```
+
+---
+
+## 浏览器支持
+
+控制台需要现代浏览器支持 WebSocket 和 Web Audio API：
+
+| 浏览器 | 最低版本 | 推荐版本 |
+|--------|---------|---------|
+| Chrome | 90+ | 最新版 |
+| Firefox | 90+ | 最新版 |
+| Edge | 90+ | 最新版 |
+| Safari | 14+ | 最新版 |
+
+!!! warning "IE 不支持"
+    Internet Explorer 不受支持，请使用现代浏览器。
+
+---
+
+## 硬件要求
+
+### 开发环境
+
+| 资源 | 最低配置 | 推荐配置 |
+|------|---------|---------|
+| **CPU** | 2 核心 | 4 核心+ |
+| **内存** | 4GB | 8GB+ |
+| **磁盘** | 10GB | 20GB+ SSD |
+| **网络** | 10Mbps | 100Mbps |
+
+---
+
+## 网络要求
+
+### 出站访问
+
+以下外部服务需要网络可达（根据使用的模型供应商）：
+
+| 服务 | 域名 | 端口 | 用途 |
+|------|------|------|------|
+| **OpenAI** | api.openai.com | 443 | LLM / TTS |
+| **Azure OpenAI** | *.openai.azure.com | 443 | LLM / ASR / TTS |
+| **阿里云** | *.aliyuncs.com | 443 | DashScope TTS |
+| **SiliconFlow** | api.siliconflow.cn | 443 | ASR / TTS |
+| **DeepSeek** | api.deepseek.com | 443 | LLM |
+
+### 端口规划
+
+| 服务 | 默认端口 | 可配置 |
+|------|---------|--------|
+| Web 前端 | 3000 | ✅ |
+| API 服务 | 8080 | ✅ |
+| Engine 服务 | 8000 | ✅ |
+| PostgreSQL | 5432 | ✅ |
+| Redis | 6379 | ✅ |
+
+---
+
+## 操作系统
+
+### 支持的系统
+
+| 操作系统 | 版本 | 支持状态 |
+|---------|------|---------|
+| **Ubuntu** | 20.04 LTS, 22.04 LTS | ✅ 完全支持 |
+| **Debian** | 11, 12 | ✅ 完全支持 |
+| **CentOS** | 8+ | ✅ 完全支持 |
+| **macOS** | 12+ (Monterey) | ✅ 开发支持 |
+| **Windows** | 10/11 + WSL2 | ✅ 开发支持 |
+
+### Windows 注意事项
+
+推荐使用 WSL2 进行开发：
+
+```powershell
+# 安装 WSL2
+wsl --install
+
+# 安装 Ubuntu
+wsl --install -d Ubuntu
+```
+
+---
+
+## 下一步
+
+- [配置说明](configuration.md) - 环境变量配置
+- [环境与部署](index.md) - 开始安装
+- [Docker 部署](../deployment/docker.md) - 容器化部署
+
--- a/docs/content/images/logo.png
+++ b/docs/content/images/logo.png
--- a/docs/content/index.md
+++ b/docs/content/index.md
@@ -1,200 +1,186 @@
-# AI Video Assistant 使用说明
+<p align="center">
+  <img src="images/logo.png" alt="Realtime Agent Studio" width="400">
+</p>

-## 产品概述
+<p align="center">
+  <strong>通过管理控制台与 API 构建、部署和运营实时多模态助手</strong>
+</p>

-AI Video Assistant 是一款基于大语言模型的智能对话与工作流管理平台，支持多模型集成、语音合成、自动化测试等功能，帮助企业快速构建智能客服系统。
+<p align="center">
+  <img src="https://img.shields.io/badge/version-0.1.0-blue" alt="Version">
+  <img src="https://img.shields.io/badge/license-MIT-green" alt="License">
+  <img src="https://img.shields.io/badge/python-3.10+-blue" alt="Python">
+  <img src="https://img.shields.io/badge/node-18+-green" alt="Node">
+</p>

-![仪表盘](images/dashboard.png)
+<p align="center">
+  <a href="overview/index.md">产品概览</a> ·
+  <a href="quickstart/index.md">快速开始</a> ·
+  <a href="concepts/assistants.md">构建助手</a> ·
+  <a href="concepts/index.md">核心概念</a> ·
+  <a href="api-reference/index.md">API 参考</a>
+</p>

-## 核心功能
+---

-| 功能模块 | 描述 |
-|---------|------|
-| **仪表盘** | 实时数据统计与可视化分析 |
-| **助手管理** | 创建、配置、测试 AI 助手 |
-| **工作流** | 可视化流程编排 |
-| **模型库** | LLM/ASR/语音模型配置 |
-| **知识库** | RAG 文档知识管理 |
-| **历史记录** | 对话日志查询与分析 |
-| **自动化测试** | 批量测试与质量评估 |
+Realtime Agent Studio (RAS) 是一个通过管理控制台与 API 构建、部署和运营实时多模态助手的开源平台。

-## 快速开始
+## 适合谁

-### 环境要求
+- 需要把实时语音或视频助手接入产品、设备或内部系统的开发团队
+- 需要通过控制台快速配置提示词、模型、知识库、工具和工作流的运营团队
+- 需要私有化部署、模型可替换、链路可观测的企业场景

- Node.js 18+
- 现代浏览器（Chrome/Firefox/Edge）
+## 核心能力

-### 启动服务
+<div class="grid cards" markdown>

-```bash
-cd web
-npm install
-npm run dev
+-   :material-robot-outline: **助手构建**
+
+    ---
+
+    用统一的助手对象管理提示词、模型、知识库、工具、开场白和会话策略。
+
+-   :material-pulse: **双引擎运行时**
+
+    ---
+
+    同时支持 Pipeline 引擎与 Realtime 引擎，可按延迟、成本和可控性选择运行方式。
+
+-   :material-source-branch: **能力扩展**
+
+    ---
+
+    通过资源库、知识库、工具与工作流扩展助手能力，而不是把全部逻辑塞进单一提示词。
+
+-   :material-api: **开放集成**
+
+    ---
+
+    使用 REST API 管理资源，使用 WebSocket API 接入实时对话，面向 Web、移动端和第三方系统。
+
+-   :material-shield-lock-outline: **私有化部署**
+
+    ---
+
+    支持 Docker 部署、自有模型服务和企业内网运行，便于满足合规与成本要求。
+
+-   :material-chart-line: **可观测与评估**
+
+    ---
+
+    提供会话历史、实时指标、自动化测试和效果评估，帮助持续改进助手质量。
+
+</div>
+
+## 系统架构
+
+平台架构层级：
+
+```mermaid
+flowchart TB
+
+    subgraph Access["Access Layer"]
+        API["API"]
+        SDK["SDK"]
+        Browser["Browser UI"]
+        Embed["Web Embed"]
+    end
+
+    subgraph Runtime["Realtime Interaction Engine"]
+        direction LR
+
+        subgraph Duplex["Duplex Interaction Engine"]
+            direction LR
+
+            subgraph Pipeline["Pipeline Engine"]
+                direction LR
+                VAD["VAD"]
+                ASR["ASR"]
+                TD["Turn Detection"]
+                LLM["LLM"]
+                TTS["TTS"]
+            end
+
+            subgraph Multi["Realtime Engine"]
+                MM["Realtime Model"]
+            end
+        end
+
+        subgraph Capability["Agent Capabilities"]
+            subgraph Tools["Tool System"]
+                Webhook["Webhook"]
+                ClientTool["Client Tools"]
+                Builtin["Builtin Tools"]
+            end
+
+            subgraph KB["Knowledge System"]
+                Docs["Documents"]
+                Vector[("Vector Index")]
+                Retrieval["Retrieval"]
+            end
+        end
+    end
+
+    subgraph Platform["Platform Services"]
+        direction TB
+        Backend["Backend Service"]
+        Frontend["Frontend Console"]
+        DB[("Database")]
+    end
+
+    Access --> Runtime
+    Runtime <--> Backend
+    Backend <--> DB
+    Backend <--> Frontend
+    LLM --> Tools
+    MM --> Tools
+    LLM <--> KB
+    MM <--> KB
 ```

-访问 `http://localhost:3000`
+## 从这里开始

-## 详细使用指南
+<div class="grid cards" markdown>

-### 1. 仪表盘
+-   :material-compass-outline: **[了解产品](overview/index.md)**

-![仪表盘](images/dashboard.png)
+    ---

-仪表盘展示系统核心指标：
- **总对话数** - 累计对话请求数量
- **回答率** - 成功回答的对话占比
- **平均时长** - 单次对话平均持续时间
- **人工转接率** - 需要人工介入的对话比例
+    先看产品定位、核心模块、适用场景，以及 RAS 与其他方案的差异。

-### 2. 助手管理
+-   :material-cog-outline: **[环境与部署](getting-started/index.md)**

-![助手管理](images/assistants.png)
+    ---

-#### 创建助手
+    先把服务跑起来，了解环境要求、配置入口和部署方式。

-1. 点击 **创建助手**
-2. 配置助手基本信息（名称、提示词）
-3. 选择对话语言与音色
-4. 绑定知识库和工具
+-   :material-rocket-launch-outline: **[创建第一个助手](quickstart/index.md)**

-#### 配置选项
+    ---

-| 标签页 | 配置项 |
-|-------|--------|
-| 全局 | 名称、提示词、温度参数 |
-| 语音 | TTS 引擎、音色、语言 |
-| 工具 | 可用工具列表 |
-| 知识 | RAG 知识库关联 |
-| 链接 | 外部服务配置 |
+    按最短路径准备资源、创建助手、测试效果并拿到接入所需信息。

-### 3. 工作流
+-   :material-tune: **[构建助手](concepts/assistants.md)**

-![工作流管理](images/workflows.png)
+    ---

-#### 工作流节点类型
+    按完整链路配置助手、提示词、模型、知识库、工具与工作流。

-| 节点 | 功能 |
-|------|------|
-| 对话节点 | AI 自动回复 |
-| 工具节点 | 调用外部工具 |
-| 人工节点 | 转接人工客服 |
-| 结束节点 | 结束对话流程 |
+-   :material-connection: **[接入应用](api-reference/index.md)**

-### 4. 模型配置
+    ---

-![模型库](images/llms.png)
+    查看 REST 与 WebSocket 接口，把助手嵌入到你的 Web、移动端或服务端系统。

-#### 支持的 LLM 模型
+-   :material-lifebuoy: **[排查问题](resources/troubleshooting.md)**

- **OpenAI** - GPT-4/GPT-3.5
- **DeepSeek** - DeepSeek Chat
- **SiliconFlow** - 多种开源模型
- **Google Gemini** - Gemini Pro
+    ---

-#### ASR 语音识别
+    当连接、对话质量或部署链路出现问题时，从这里进入可执行的排查步骤。

- **Whisper** - OpenAI 语音识别
- **SenseVoice** - 高精度中文识别
+</div>

-### 5. 知识库

-![知识库](images/knowledge.png)

-#### 创建知识库

-1. 进入 **知识库** 页面
-2. 点击 **新建知识库**
-3. 上传文档（支持 Markdown/PDF/TXT）
-4. 配置检索参数
-
-### 6. 历史记录
-
-![历史记录](images/history.png)
-
-查询条件：
- 按时间范围筛选
- 按助手名称搜索
- 查看对话详情与统计
-
-### 7. 自动化测试
-
-![自动化测试](images/autotest.png)
-
-#### 测试类型
-
-| 类型 | 说明 |
-|------|------|
-| 固定测试 | 预设问答对测试 |
-| 智能测试 | AI 生成测试用例 |
-
-#### 评估指标
-
- 回复准确率
- 回答完整度
- 响应时间
-
-### 8. 语音合成
-
-![语音合成](images/voices.png)
-
-#### 支持的 TTS 引擎
-
- **阿里云** - 多音色可选
- **火山引擎** - 高自然度
- **Minimax** - 低延迟
-
-### 9. 个人中心
-
-![个人中心](images/profile.png)
-
-管理账户信息与系统设置。
-
-## 部署指南
-
-### Docker 部署（推荐）
-
-```bash
-# 构建镜像
-docker build -t ai-video-assistant .
-
-# 运行容器
-docker run -d -p 3000:3000 --name ai-assistant ai-video-assistant
-```
-
-### Nginx 反向代理
-
-```nginx
-server {
-    listen 80;
-    server_name your-domain.com;
-
-    location / {
-        proxy_pass http://localhost:3000;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-    }
-}
-```
-
-## 常见问题
-
-### Q: 如何配置 API Key？
-
-进入 **LLM 库** 或 **语音库** 页面，点击对应模型的配置按钮填写 API Key。
-
-### Q: 助手无法回复？
-
-1. 检查模型配置是否正确
-2. 确认知识库已正确关联
-3. 查看系统日志排查错误
-
-### Q: 语音识别不准确？
-
- 确认 ASR 模型选择正确
- 检查音频采样率（推荐 16kHz）
- 确认语言设置匹配
-
-## 技术支持
-
-如有问题，请提交 Issue 或联系技术支持团队。
--- a/docs/content/javascripts/extra.js
+++ b/docs/content/javascripts/extra.js
@@ -0,0 +1,26 @@
+// Realtime Agent Studio - Custom JavaScript
+
+document.addEventListener("DOMContentLoaded", function () {
+  // Add external link icons
+  document.querySelectorAll('a[href^="http"]').forEach(function (link) {
+    if (!link.hostname.includes(window.location.hostname)) {
+      link.setAttribute("target", "_blank");
+      link.setAttribute("rel", "noopener noreferrer");
+    }
+  });
+
+  // Smooth scroll for anchor links
+  document.querySelectorAll('a[href^="#"]').forEach(function (anchor) {
+    anchor.addEventListener("click", function (e) {
+      const targetId = this.getAttribute("href").slice(1);
+      const targetElement = document.getElementById(targetId);
+      if (targetElement) {
+        e.preventDefault();
+        targetElement.scrollIntoView({
+          behavior: "smooth",
+          block: "start",
+        });
+      }
+    });
+  });
+});
--- a/docs/content/javascripts/mermaid.mjs
+++ b/docs/content/javascripts/mermaid.mjs
@@ -0,0 +1,18 @@
+/**
+ * Global Mermaid config for consistent diagram sizing across all docs.
+ * Exposed as window.mermaid so Material for MkDocs uses this instance.
+ */
+import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs";
+
+mermaid.initialize({
+  startOnLoad: false,
+  securityLevel: "loose",
+  theme: "base",
+  useMaxWidth: false,
+  themeVariables: {
+    fontSize: "14px",
+    fontFamily: "Inter, sans-serif",
+  },
+});
+
+window.mermaid = mermaid;
--- a/docs/content/overview/architecture.md
+++ b/docs/content/overview/architecture.md
@@ -0,0 +1,312 @@
+# 系统架构
+
+本文档只解释 Realtime Agent Studio (RAS) 的服务边界、数据流、部署形态和关键技术选型，不重复产品定位或上手流程。
+
+---
+
+## 整体架构
+
+RAS 采用前后端分离的微服务架构，主要由三个核心服务组成：
+
+```mermaid
+flowchart TB
+    subgraph Client["客户端"]
+        Browser[Web 浏览器]
+        Mobile[移动应用]
+        ThirdParty[第三方系统]
+    end
+
+    subgraph Frontend["前端服务"]
+        WebApp[React 管理控制台]
+    end
+
+    subgraph Backend["后端服务"]
+        API[API 服务<br/>FastAPI]
+        Engine[实时交互引擎<br/>WebSocket]
+    end
+
+    subgraph Storage["数据存储"]
+        DB[(SQLite/PostgreSQL)]
+        FileStore[文件存储]
+    end
+
+    subgraph External["外部服务"]
+        OpenAI[OpenAI]
+        SiliconFlow[SiliconFlow]
+        DashScope[DashScope]
+        LocalModel[本地模型]
+    end
+
+    subgraph Tools["工具"]
+        Webhook[Webhook]
+        ClientTool[客户端工具]
+        Builtin[内建工具]
+    end
+
+    Browser --> WebApp
+    Mobile -->|WebSocket| Engine
+    ThirdParty -->|REST API| API
+    WebApp -->|REST API| API
+    WebApp -->|WebSocket| Engine
+    API <--> DB
+    API <--> FileStore
+    Engine <--> API
+    Engine --> External
+    Engine --> Tools
+```
+
+---
+
+## 核心组件
+
+### 1. Web 前端 (React)
+
+管理控制台，提供可视化的配置、测试和监控界面。
+
+| 功能模块 | 说明 |
+|---------|------|
+| 助手管理 | 创建、配置、测试智能助手 |
+| 资源库 | LLM / ASR / TTS 等模型管理 |
+| 知识库 | RAG 文档上传与管理 |
+| 历史记录 | 会话日志查询与回放 |
+| 仪表盘 | 实时数据统计 |
+| 调试控制台 | WebSocket 实时测试 |
+
+### 2. API 服务 (FastAPI)
+
+REST API 后端，处理资源管理、持久化配置和历史数据等控制面能力。
+
+```mermaid
+flowchart LR
+    subgraph API["API 服务"]
+        Router[路由层]
+        Service[业务逻辑层]
+        Model[数据模型层]
+    end
+
+    Client[客户端] --> Router
+    Router --> Service
+    Service --> Model
+    Model --> DB[(数据库)]
+```
+
+**主要职责：**
+
+- 助手 CRUD 操作
+- 模型资源管理
+- 知识库管理
+- 会话记录存储
+- 认证与授权
+
+### 3. 实时交互引擎 (Engine)
+
+处理实时音视频对话、事件流转、模型调用与工具执行。
+
+```mermaid
+flowchart TB
+    subgraph Engine["实时交互引擎"]
+        WS[WebSocket Handler]
+        SM[会话管理器]
+        
+        subgraph Pipeline["管线式引擎"]
+            VAD[声音活动检测 VAD]
+            ASR[语音识别 ASR]
+            TD[回合检测 TD]
+            LLM[大语言模型 LLM]
+            TTS[语音合成 TTS]
+        end
+        
+        subgraph Realtime["实时引擎连接"]
+            RTOpenAI[OpenAI Realtime]
+            RTGemini[Gemini Live]
+            RTDoubao[Doubao 实时交互]
+        end
+        
+        subgraph Tools["工具"]
+            Webhook[Webhook]
+            ClientTool[客户端工具]
+            Builtin[内建工具]
+        end
+    end
+
+    Client[客户端] -->|音频流| WS
+    WS --> SM
+    SM --> Pipeline
+    SM --> Realtime
+    Pipeline --> LLM
+    LLM --> Tools
+    Realtime --> Tools
+    Pipeline -->|文本/音频| WS
+    Realtime -->|文本/音频| WS
+```
+
+### 外部服务与工具
+
+| 类别 | 说明 | 可选项 |
+|------|------|--------|
+| **外部模型服务** | Pipeline 引擎各环节依赖的云端或本地服务 | OpenAI、SiliconFlow、DashScope、本地模型 |
+| **实时模型连接** | Realtime 引擎可直接连接的后端 | OpenAI Realtime、Gemini Live、Doubao 实时交互 |
+| **工具系统** | 由助手或引擎调用的外部执行能力 | Webhook、客户端工具、内建工具 |
+
+---
+
+## 引擎架构
+
+### 管线式全双工引擎
+
+管线式引擎由 **VAD → ASR → TD → LLM → TTS** 组成。每个环节可替换，适合需要精细控制、工具扩展和较高可解释性的场景。
+
+```mermaid
+sequenceDiagram
+    participant C as 客户端
+    participant E as 引擎
+    participant VAD as VAD
+    participant ASR as 语音识别
+    participant TD as 回合检测
+    participant LLM as 大语言模型
+    participant TTS as 语音合成
+    participant Tools as 工具
+
+    C->>E: 音频流 (PCM)
+    E->>VAD: 检测语音活动
+    VAD-->>E: 有效语音段
+    E->>ASR: 语音转写
+    ASR-->>E: 转写文本
+    E->>TD: 判断回合边界
+    TD-->>E: 可送入 LLM 的输入
+    E->>LLM: 生成回复
+    LLM->>Tools: 可选：调用工具
+    Tools-->>LLM: 工具结果
+    LLM-->>E: 回复文本 (流式)
+    E->>TTS: 文本转语音
+    TTS-->>E: 音频流
+    E->>C: 播放音频
+```
+
+**特点：**
+
+- 各环节可单独替换和优化
+- 便于接入知识库、工具、工作流等能力
+- 延迟通常高于端到端实时模型，但可控性更强
+
+### Realtime 引擎
+
+Realtime 引擎直接连接端到端实时模型，适合追求更低延迟和更自然多模态交互的场景。
+
+```mermaid
+sequenceDiagram
+    participant C as 客户端
+    participant E as 引擎
+    participant RT as Realtime Model
+
+    C->>E: 音频/视频/文本输入
+    E->>RT: 实时流输入
+    RT-->>E: 流式文本/音频输出
+    E->>C: 播放或渲染结果
+```
+
+**特点：**
+
+- 交互链路更短，延迟更低
+- 更依赖具体模型供应商的能力边界
+- 适合强调自然对话和多模态体验的入口
+
+---
+
+## 数据流
+
+### WebSocket 会话流程
+
+```mermaid
+sequenceDiagram
+    participant C as 客户端
+    participant E as 引擎
+    participant API as API 服务
+    participant DB as 数据库
+
+    C->>E: 连接 ws://.../ws?assistant_id=xxx
+    E->>API: 获取助手配置
+    API->>DB: 查询助手
+    DB-->>API: 助手数据
+    API-->>E: 配置信息
+
+    C->>E: session.start
+    E-->>C: session.started
+    E-->>C: config.resolved
+
+    loop 对话循环
+        C->>E: 音频帧 (binary)
+        E-->>C: input.speech_started
+        E-->>C: transcript.delta
+        E-->>C: transcript.final
+        E-->>C: assistant.response.delta
+        E-->>C: output.audio.start
+        E-->>C: 音频帧 (binary)
+        E-->>C: output.audio.end
+    end
+
+    C->>E: session.stop
+    E->>API: 保存会话记录
+    API->>DB: 存储
+    E-->>C: session.stopped
+```
+
+### 智能打断流程
+
+```mermaid
+sequenceDiagram
+    participant C as 客户端
+    participant E as 引擎
+    participant TTS as TTS 服务
+
+    Note over E: 正在播放 TTS 音频
+    E->>C: 音频帧...
+
+    C->>E: 用户说话 (VAD 检测)
+    E->>E: 触发打断
+    E->>TTS: 停止合成
+    E-->>C: output.audio.interrupted
+
+    Note over E: 处理新的用户输入
+    E-->>C: input.speech_started
+```
+
+---
+
+## 部署形态
+
+### 开发环境
+
+```mermaid
+flowchart LR
+    subgraph Local["本地开发"]
+        Web[npm run dev<br/>:3000]
+        API[uvicorn<br/>:8080]
+        Engine[python main.py<br/>:8000]
+        DB[(SQLite)]
+    end
+
+    Web --> API
+    Web --> Engine
+    API --> DB
+    Engine --> API
+```
+
+## 技术选型
+
+| 组件 | 技术 | 说明 |
+|------|------|------|
+| **前端框架** | React 18 | 管理控制台与调试界面 |
+| **状态管理** | Zustand | 前端轻量状态管理 |
+| **UI 样式** | Tailwind CSS | 快速构建控制台界面 |
+| **后端框架** | FastAPI | 管理接口与配置持久化 |
+| **WebSocket** | websockets | 实时事件与音频流通信 |
+| **数据库** | SQLite / PostgreSQL | 配置与历史数据存储 |
+
+---
+
+## 相关文档
+
+- [产品概览](index.md) - 产品定位、核心模块与适用场景
+- [引擎架构](../concepts/engines.md) - Pipeline 与 Realtime 的选择指南
+- [WebSocket 协议](../api-reference/websocket.md) - 实时对话事件和消息格式
--- a/docs/content/overview/index.md
+++ b/docs/content/overview/index.md
@@ -0,0 +1,84 @@
+# 产品概览
+
+Realtime Agent Studio (RAS) 是一个通过管理控制台与 API 构建、部署和运营实时多模态助手的开源平台。
+
+---
+
+## 产品定位
+
+RAS 面向需要构建实时语音或视频助手的团队，目标不是替代你的业务系统，而是提供一套可组合的助手基础设施：
+
+- **控制台**：让团队快速配置助手、资源库、知识库、工具、工作流与评估策略
+- **API 与实时运行时**：让应用、设备和第三方系统稳定接入实时对话能力
+- **运维与分析能力**：让团队能观察会话效果、排查问题并持续迭代助手质量
+
+如果你把实时助手看作一条完整的产品链路，RAS 负责其中的“构建、接入、运行、观测”四个阶段。
+
+## 核心模块
+
+| 模块 | 负责什么 | 适合谁使用 |
+|------|----------|------------|
+| **助手** | 定义角色、行为、模型、知识、工具和会话策略 | 产品、运营、算法、开发 |
+| **引擎** | 承载实时语音/多模态对话，输出事件流和音频流 | 开发、基础设施 |
+| **资源库** | 管理 LLM、ASR、TTS 等外部能力接入 | 平台管理员、开发 |
+| **知识库 / 工具 / 工作流** | 让助手获得领域知识、外部执行能力和复杂流程控制 | 业务设计者、开发 |
+| **分析与评估** | 记录会话、监控指标、做自动化回归和效果评估 | 运营、QA、开发 |
+
+## 为什么是“控制台 + API”
+
+RAS 采用“控制台配置 + API 接入”的组合方式，而不是把所有内容都固化在代码里：
+
+- **控制台负责提效**：让非后端角色也能参与提示词、工具、知识、流程的配置与调优
+- **API 负责集成**：让产品团队继续用自己的前端、服务端或设备侧应用承载最终体验
+- **同一套助手配置可复用**：控制台保存的助手定义可以被不同渠道重复接入和评估
+
+## 典型使用方式
+
+<div class="grid cards" markdown>
+
+-   :material-headset: **客户服务与运营自动化**
+
+    ---
+
+    在客服、外呼、预约、售后等场景中接入实时语音助手，并保留人工接管与工具调用能力。
+
+-   :material-school-outline: **培训、陪练与问答**
+
+    ---
+
+    用知识库、提示词和流程编排构建可持续优化的教学、培训或辅导助手。
+
+-   :material-domain: **企业内部助手**
+
+    ---
+
+    通过私有部署、内部知识库和业务系统工具，把助手接入内部流程或设备终端。
+
+-   :material-devices: **多端集成**
+
+    ---
+
+    通过 WebSocket API 将同一个助手接入 Web、移动端、坐席工作台或自有硬件设备。
+
+</div>
+
+## 与其他方案的差异
+
+本页是站内唯一保留“产品对比”视角的地方，用于帮助你快速判断 RAS 的定位边界。
+
+| 特性 | RAS | Vapi | Retell | ElevenLabs Agents |
+|------|-----|------|--------|-------------------|
+| **开源** | :white_check_mark: | :x: | :x: | :x: |
+| **私有部署** | :white_check_mark: | :x: | :x: | :x: |
+| **Pipeline 引擎** | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: |
+| **Realtime / 多模态引擎** | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: |
+| **自定义 ASR / TTS** | :white_check_mark: | 有限 | 有限 | :x: |
+| **知识库与工具扩展** | :white_check_mark: | :white_check_mark: | :white_check_mark: | 有限 |
+| **工作流编排** | 开发中 | :white_check_mark: | :x: | :x: |
+| **数据与链路可观测** | :white_check_mark: | 有限 | 有限 | 有限 |
+
+## 继续阅读
+
+- [系统架构](architecture.md) - 从服务边界、数据流和部署形态理解系统如何组成
+- [核心概念](../concepts/index.md) - 先建立助手、引擎与工作流的心智模型
+- [快速开始](../quickstart/index.md) - 以最短路径创建第一个助手
--- a/docs/content/quickstart/dashboard.md
+++ b/docs/content/quickstart/dashboard.md
@@ -0,0 +1,44 @@
+# 资源准备清单
+
+本页保留原“资源库配置详解”链接，但在本轮文档收敛后，它只承担快速开始阶段的资源核对职责。
+
+## 你至少要准备什么
+
+在创建第一个助手前，至少确认以下三类资源都已经可用：
+
+| 资源 | 为什么需要 | 正式说明页 |
+|------|------------|------------|
+| **LLM 模型** | 负责理解与生成回复 | [LLM 模型](../customization/models.md) |
+| **ASR 资源** | 负责把语音输入转写为文本 | [语音识别](../customization/asr.md) |
+| **TTS 声音资源** | 负责把文本回复合成为语音 | [声音资源](../customization/voices.md) |
+
+## 上手前自检
+
+### LLM
+
+- 已配置供应商、模型名称、Base URL 和凭证
+- 已明确该模型用于文本生成、嵌入还是重排
+- 已准备保守的默认参数，而不是先追求极端效果
+
+### ASR
+
+- 已确认目标语言与模型匹配
+- 已准备必要热词或专有名词词表
+- 已能用固定样本测试识别准确率和延迟
+
+### TTS
+
+- 已选择主音色，并完成至少一次试听
+- 已确认该声音适合实时对话，而不是仅适合离线播报
+- 已为默认语速、音量等参数设定初始值
+
+## 不在本页展开的内容
+
+字段说明、供应商差异、参数建议和最佳实践已经分别收敛到正式能力页：
+
+- [LLM 模型](../customization/models.md)
+- [语音识别](../customization/asr.md)
+- [声音资源](../customization/voices.md)
+- [TTS 参数](../customization/tts.md)
+
+准备完成后，请回到 [快速开始](index.md) 继续创建助手。
--- a/docs/content/quickstart/index.md
+++ b/docs/content/quickstart/index.md
@@ -0,0 +1,98 @@
+# 快速开始
+
+本页负责“创建第一个助手”的最短路径。环境要求、配置文件和部署方式统一放在 [环境与部署](../getting-started/index.md)。
+
+## 目标
+
+完成本页后，你应该已经：
+
+1. 准备好 1 个 LLM、1 个 ASR、1 个 TTS 资源
+2. 创建并保存 1 个助手
+3. 完成至少 1 轮测试对话
+4. 拿到接入应用所需的 `assistant_id` 和 WebSocket 地址
+
+## 前提条件
+
+- 已部署 Realtime Agent Studio（RAS）服务
+- 已准备可用的 LLM / ASR / TTS 凭证
+- 已能访问控制台与 WebSocket 服务
+
+## 第一步：准备资源
+
+创建助手之前，先准备三类资源：
+
+- **LLM 模型**：决定助手如何理解和生成回复。详见 [LLM 模型](../customization/models.md)
+- **ASR 资源**：决定语音输入如何转写。详见 [语音识别](../customization/asr.md)
+- **TTS 声音资源**：决定回复如何被合成为语音。详见 [声音资源](../customization/voices.md)
+
+如果你想先检查“资源是否准备齐”，可以看 [资源准备清单](dashboard.md)。
+
+## 第二步：创建助手
+
+1. 进入控制台中的 **助手** 页面
+2. 新建一个助手，并填写最小必要信息：
+   - **助手名称**：让团队知道它服务于什么场景
+   - **系统提示词**：先定义角色、任务和限制
+   - **首轮模式**：决定由助手先说还是等待用户开口
+3. 绑定默认模型：
+   - 文本生成使用一个 LLM
+   - 语音输入使用一个 ASR
+   - 语音输出使用一个 TTS 声音资源
+
+如果你想把助手设计得更稳，继续阅读：
+
+- [助手概念](../concepts/assistants.md)
+- [配置选项](../concepts/assistants/configuration.md)
+- [提示词指南](../concepts/assistants/prompts.md)
+
+## 第三步：补充能力
+
+最小助手可以只依赖提示词和模型；更复杂的场景通常还需要以下能力：
+
+- **知识库**：让助手回答私有领域问题。见 [知识库](../customization/knowledge-base.md)
+- **工具**：让助手执行查单、预约、查询等外部操作。见 [工具](../customization/tools.md)
+- **工作流**：让助手处理多步骤、多分支流程。见 [工作流](../customization/workflows.md)
+
+## 第四步：测试并发布
+
+1. 打开助手测试面板，先验证文本对话，再验证语音输入输出
+2. 观察事件流、转写、工具调用和最终回复是否符合预期
+3. 保存当前配置，并确认该助手已可用于外部接入
+
+更系统的验证方式见 [测试调试](../concepts/assistants/testing.md)。
+
+## 第五步：接入应用
+
+最小接入方式是使用 WebSocket API 建立实时会话：
+
+```javascript
+const ws = new WebSocket('ws://your-server/ws?assistant_id=YOUR_ASSISTANT_ID');
+
+ws.onopen = () => {
+  ws.send(JSON.stringify({
+    type: 'session.start',
+    audio: { encoding: 'pcm_s16le', sample_rate_hz: 16000, channels: 1 }
+  }));
+};
+```
+
+你通常只需要两项信息：
+
+- `assistant_id`：指定接入哪个助手
+- WebSocket 地址：由引擎服务提供实时对话入口
+
+完整协议见 [WebSocket 协议](../api-reference/websocket.md)。
+
+## 常见卡点
+
+- 资源配置不生效：回到 [资源准备清单](dashboard.md) 检查三类资源是否都已准备好
+- 助手不回复：先看 [测试调试](../concepts/assistants/testing.md)，再进入 [故障排查](../resources/troubleshooting.md)
+- 回复质量不稳定：优先检查 [提示词指南](../concepts/assistants/prompts.md) 与 [知识库](../customization/knowledge-base.md)
+
+## 下一步
+
+- [环境与部署](../getting-started/index.md) - 补全环境、配置和部署细节
+- [构建助手](../concepts/assistants.md) - 深入配置助手、模型、知识库、工具与工作流
+- [API 参考](../api-reference/index.md) - 查看管理接口与实时协议
+
+
--- a/docs/content/resources/faq.md
+++ b/docs/content/resources/faq.md
@@ -0,0 +1,59 @@
+# 常见问题
+
+本页只提供简短回答和跳转建议；如果你需要逐步排查，请直接进入 [故障排查](troubleshooting.md)。
+
+## Q: 我应该先看哪一部分文档？
+
+- 想了解产品是什么：看 [产品概览](../overview/index.md)
+- 想先把服务跑起来：看 [环境与部署](../getting-started/index.md)
+- 想最快创建第一个助手：看 [快速开始](../quickstart/index.md)
+- 想系统完成助手配置：从 [助手概览](../concepts/assistants.md) 开始
+
+## Q: 如何配置模型或 API Key？
+
+进入对应资源页完成配置：
+
+- LLM：见 [LLM 模型](../customization/models.md)
+- ASR：见 [语音识别](../customization/asr.md)
+- TTS：见 [声音资源](../customization/voices.md)
+
+## Q: 助手为什么不回复？
+
+通常先检查三件事：
+
+- 助手是否已绑定可用的模型资源
+- 提示词、知识库或工具是否配置完整
+- WebSocket 会话是否已经正常建立
+
+下一步：
+
+- 助手行为验证：看 [测试调试](../concepts/assistants/testing.md)
+- 逐步排查：看 [故障排查](troubleshooting.md)
+
+## Q: 回复为什么不准确或不稳定？
+
+优先检查：
+
+- 提示词是否明确了角色、任务和限制
+- 是否应该补充知识库，而不是继续堆叠提示词
+- 是否需要把复杂业务改成工作流，而不是单轮问答
+
+相关文档：
+
+- [提示词指南](../concepts/assistants/prompts.md)
+- [知识库](../customization/knowledge-base.md)
+- [工作流](../customization/workflows.md)
+
+## Q: 语音识别或语音播放效果不好怎么办？
+
+- 输入侧问题先看 [语音识别](../customization/asr.md)
+- 输出侧问题先看 [声音资源](../customization/voices.md) 和 [TTS 参数](../customization/tts.md)
+- 需要逐步定位链路问题时，再看 [故障排查](troubleshooting.md)
+
+## Q: 页面空白、接口报错或连接不上怎么办？
+
+这是典型的环境或链路问题：
+
+- 先确认 [环境与部署](../getting-started/index.md) 中的三个服务都已启动
+- 再进入 [故障排查](troubleshooting.md) 按连接、API、页面加载或性能问题分类处理
+
--- a/docs/content/resources/troubleshooting.md
+++ b/docs/content/resources/troubleshooting.md
@@ -0,0 +1,292 @@
+# 故障排查
+
+本文档汇总常见问题的排查步骤和解决方案。
+
+## 连接问题
+
+### WebSocket 连接失败
+
+**症状**：无法建立 WebSocket 连接，控制台显示连接错误。
+
+**排查步骤**：
+
+1. **检查服务状态**
+   ```bash
+   # 检查 Engine 服务是否运行
+   curl http://localhost:8000/health
+   ```
+
+2. **验证连接地址**
+   - 确认 host 和 port 正确
+   - 确认 assistant_id 参数存在
+
+3. **检查网络**
+   - 确认防火墙未阻止 WebSocket
+   - 检查 Nginx 代理配置（如有）
+
+4. **查看服务日志**
+   ```bash
+   docker logs ai-assistant-engine
+   ```
+
+**常见原因**：
+- Engine 服务未启动
+- assistant_id 无效
+- 防火墙阻止 WebSocket 端口
+
+---
+
+### API 请求失败
+
+**症状**：REST API 返回错误或超时。
+
+**排查步骤**：
+
+1. **检查 API 服务**
+   ```bash
+   curl http://localhost:8080/health
+   ```
+
+2. **验证请求格式**
+   - Content-Type 是否为 application/json
+   - 请求体是否为有效 JSON
+
+3. **检查认证**
+   - Authorization header 是否正确
+   - API Key 是否有效
+
+4. **查看响应详情**
+   ```bash
+   curl -v http://localhost:8080/api/v1/assistants
+   ```
+
+---
+
+## 助手问题
+
+### 助手不回复
+
+**症状**：发送消息后没有收到助手回复。
+
+**排查步骤**：
+
+1. **检查会话状态**
+   - 确认收到 `session.started` 事件
+   - 确认没有 `error` 事件
+
+2. **检查 LLM 配置**
+   - API Key 是否有效
+   - 模型配置是否正确
+   - 测试模型连接
+
+3. **查看日志**
+   - 检查 LLM 调用是否成功
+   - 查看是否有超时错误
+
+**常见原因**：
+- LLM API Key 无效或过期
+- 模型服务不可用
+- 请求超时
+
+---
+
+### 回复质量差
+
+**症状**：助手回复不准确、不相关或格式混乱。
+
+**排查步骤**：
+
+1. **检查提示词**
+   - 是否有明确的角色定义
+   - 是否有清晰的任务描述
+   - 是否有必要的约束
+
+2. **调整参数**
+   - 降低 temperature 提高一致性
+   - 调整 max_tokens 控制长度
+
+3. **检查知识库**
+   - 确认知识库已关联
+   - 测试检索结果是否相关
+
+4. **查看对话历史**
+   - 分析问题出现的模式
+   - 收集典型的失败案例
+
+---
+
+## 语音问题
+
+### 语音识别不准确
+
+**症状**：ASR 识别结果与实际说话内容不符。
+
+**排查步骤**：
+
+1. **检查音频质量**
+   - 麦克风是否正常工作
+   - 环境是否嘈杂
+   - 采样率是否正确（16kHz）
+
+2. **验证 ASR 配置**
+   - 语言设置是否正确
+   - 是否配置了热词
+
+3. **测试不同引擎**
+   - 尝试切换 ASR 服务提供商
+   - 对比识别效果
+
+**改进建议**：
+- 添加业务相关的热词
+- 使用降噪麦克风
+- 选择针对中文优化的 ASR 引擎
+
+---
+
+### 语音无法播放
+
+**症状**：TTS 合成成功但没有声音输出。
+
+**排查步骤**：
+
+1. **检查浏览器设置**
+   - 是否允许自动播放音频
+   - 音量是否静音
+
+2. **验证音频数据**
+   - 确认收到 `output.audio.start` 事件
+   - 确认收到二进制音频帧
+   - 确认收到 `output.audio.end` 事件
+
+3. **检查音频解码**
+   - PCM 格式是否正确解析
+   - AudioContext 是否正确初始化
+
+4. **测试 TTS 服务**
+   - 单独测试 TTS 配置
+   - 检查 TTS API 状态
+
+---
+
+## 部署问题
+
+### Docker 容器启动失败
+
+**症状**：容器无法启动或立即退出。
+
+**排查步骤**：
+
+1. **查看容器日志**
+   ```bash
+   docker logs <container_name>
+   ```
+
+2. **检查资源限制**
+   ```bash
+   docker stats
+   ```
+
+3. **验证配置文件**
+   - 环境变量是否正确
+   - 配置文件路径是否存在
+
+4. **检查端口冲突**
+   ```bash
+   netstat -an | grep <port>
+   ```
+
+---
+
+### 页面加载空白
+
+**症状**：浏览器打开页面但内容为空。
+
+**排查步骤**：
+
+1. **检查浏览器控制台**
+   - 打开 F12 开发者工具
+   - 查看 Console 错误信息
+
+2. **验证静态资源**
+   - 检查 Network 标签页
+   - 确认 JS/CSS 文件加载成功
+
+3. **检查 API 连接**
+   - 确认 VITE_API_URL 配置正确
+   - 测试 API 是否可访问
+
+4. **清除缓存**
+   ```bash
+   # 强制刷新
+   Ctrl + Shift + R
+   ```
+
+---
+
+## 性能问题
+
+### 响应延迟高
+
+**症状**：从发送消息到收到回复时间过长。
+
+**排查步骤**：
+
+1. **定位延迟环节**
+   - ASR 处理时间
+   - LLM 推理时间
+   - TTS 合成时间
+
+2. **查看性能指标**
+   - 检查 `metrics.ttfb` 事件
+   - 分析各环节耗时
+
+3. **优化配置**
+   - 使用更快的模型
+   - 减少 max_tokens
+   - 启用流式输出
+
+4. **检查网络**
+   - 测试到各 API 的延迟
+   - 考虑使用更近的服务区域
+
+---
+
+## 日志查看
+
+### 服务端日志
+
+```bash
+# Docker 容器日志
+docker logs -f ai-assistant-engine
+
+# 查看最近 100 行
+docker logs --tail 100 ai-assistant-engine
+```
+
+### 客户端日志
+
+在浏览器开发者工具中：
+
+1. **Console** - 查看 JavaScript 错误和日志
+2. **Network** - 查看网络请求和响应
+3. **WebSocket** - 查看 WS 消息（在 Network 标签页）
+
+### 启用详细日志
+
+设置环境变量启用调试日志：
+
+```bash
+# Engine 服务
+LOG_LEVEL=debug
+
+# API 服务
+DEBUG=true
+```
+
+## 获取帮助
+
+如果以上方法无法解决问题：
+
+1. 收集相关日志和错误信息
+2. 描述复现步骤
+3. 提交 Issue 或联系技术支持
--- a/docs/content/roadmap.md
+++ b/docs/content/roadmap.md
@@ -0,0 +1,110 @@
+# 开发路线图
+
+本页面展示 Realtime Agent Studio 的开发计划和进度。
+
+---
+
+## 已完成 :white_check_mark:
+
+### 实时交互引擎
+
+- [x] **管线式全双工引擎** - ASR / LLM / TTS 流水线架构
+- [x] **智能打断处理** - VAD + EOU 检测
+- [x] **OpenAI 兼容接口** - ASR / TTS 标准接口适配
+- [x] **DashScope TTS** - 阿里云语音合成适配
+
+### 助手配置管理
+
+- [x] **系统提示词编辑** - Prompt 配置，动态变量注入
+- [x] **模型选择** - LLM / ASR / TTS 模型管理界面
+- [x] **工具调用配置** - Webhook 工具 + 客户端工具
+
+### 调试与观察
+
+- [x] **实时调试控制台** - WebSocket 调试连接示例
+- [x] **完整会话回放** - 音频 + 转写 + LLM 响应
+- [x] **会话检索筛选** - 按时间 / 助手 / 状态筛选
+
+### 开放接口
+
+- [x] **WebSocket 协议** - `/ws` 端点完整实现
+- [x] **RESTful 接口** - 完整的 CRUD API
+
+---
+
+## 开发中 :construction:
+
+### 助手与能力编排
+
+- [ ] **私有化 ASR / TTS 适配** - 本地模型接入
+- [ ] **工作流编辑** - 可视化流程编排
+- [ ] **知识库关联** - RAG 文档管理
+
+### 实时交互引擎
+
+- [ ] **原生多模态模型** - Step Audio 接入（GPT-4o Realtime / Gemini Live 国内环境受限）
+- [ ] **WebRTC 协议** - `/webrtc` 端点
+
+### 开放接口
+
+- [ ] **SDK 支持** - JavaScript / Python SDK
+- [ ] **电话接入** - 电话呼入自动接听 / 自动呼出接口和批量呼出
+
+### 效果评估
+
+- [ ] **自动化测试工具** - 固定测试 + 智能测试
+
+---
+
+## 计划中 :spiral_notepad:
+
+### 开放接口
+
+- [ ] **Webhook 回调** - 会话事件通知机制
+
+### 数据与评估
+
+- [ ] **实时仪表盘增强** - 完善统计看板功能
+- [ ] **评估闭环** - 测试、评分、回归与变更追踪
+
+### 企业能力
+
+- [ ] **多租户支持** - 团队 / 组织管理
+- [ ] **权限管理** - RBAC 角色权限控制
+- [ ] **审计日志** - 操作记录追踪
+
+### 生态集成
+
+- [ ] **更多模型供应商** - 讯飞、百度、腾讯等
+- [ ] **CRM 集成** - Salesforce、HubSpot 等
+- [ ] **呼叫中心集成** - SIP / PSTN 网关
+
+---
+
+## 版本规划
+
+| 版本 | 目标 | 状态 |
+|------|------|------|
+| **v0.1.0** | 核心功能 MVP，管线式引擎 | :white_check_mark: 已发布 |
+| **v0.2.0** | 工作流编辑器，知识库集成 | :construction: 开发中 |
+| **v0.3.0** | SDK 发布，多模态模型支持 | :spiral_notepad: 计划中 |
+| **v1.0.0** | 生产就绪，企业特性 | :spiral_notepad: 计划中 |
+
+---
+
+## 生态参考
+
+### 开源项目
+
+- [Livekit Agent](https://github.com/livekit/agents)
+- [Pipecat](https://github.com/pipecat-ai/pipecat)
+- [Vision Agents](https://github.com/GetStream/Vision-Agents)
+- [active-call](https://github.com/miuda-ai/active-call)
+- [TEN](https://github.com/TEN-framework/ten-framework)
+- [airi](https://github.com/moeru-ai/airi)
+- [Vocode Core](https://github.com/vocodedev/vocode-core)
+- [awesome-voice-agents](https://github.com/yzfly/awesome-voice-agents)
+
+### 文档与研究参考
+
+- [Voice AI & Voice Agents](https://voiceaiandvoiceagents.com/)
--- a/docs/content/stylesheets/extra.css
+++ b/docs/content/stylesheets/extra.css
@@ -0,0 +1,160 @@
+/* Realtime Agent Studio - Custom Styles */
+
+:root {
+  --md-primary-fg-color: #4f46e5;
+  --md-primary-fg-color--light: #6366f1;
+  --md-primary-fg-color--dark: #4338ca;
+  --md-accent-fg-color: #6366f1;
+}
+
+/* Hero Section - Center aligned content */
+.md-typeset p[align="center"] {
+  text-align: center;
+}
+
+.md-typeset p[align="center"] img {
+  display: inline-block;
+  margin: 0 4px;
+  vertical-align: middle;
+}
+
+.md-typeset p[align="center"] a {
+  margin: 0 8px;
+}
+
+[data-md-color-scheme="slate"] {
+  --md-primary-fg-color: #818cf8;
+  --md-primary-fg-color--light: #a5b4fc;
+  --md-primary-fg-color--dark: #6366f1;
+  --md-accent-fg-color: #818cf8;
+}
+
+/* Hero Section Styling */
+.md-content h1 {
+  font-weight: 700;
+  letter-spacing: -0.02em;
+}
+
+/* Badge Styling */
+.md-content img[src*="badge"] {
+  margin: 0 4px;
+  vertical-align: middle;
+}
+
+/* Grid Cards Enhancement */
+.md-typeset .grid.cards > ul > li {
+  border: 1px solid var(--md-default-fg-color--lightest);
+  border-radius: 8px;
+  transition: all 0.2s ease;
+}
+
+.md-typeset .grid.cards > ul > li:hover {
+  border-color: var(--md-primary-fg-color);
+  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+  transform: translateY(-2px);
+}
+
+/* Code Block Enhancement */
+.md-typeset pre > code {
+  border-radius: 8px;
+}
+
+.md-typeset .highlight {
+  border-radius: 8px;
+  overflow: hidden;
+}
+
+/* Table Enhancement */
+.md-typeset table:not([class]) {
+  border-radius: 8px;
+  overflow: hidden;
+  border: 1px solid var(--md-default-fg-color--lightest);
+}
+
+.md-typeset table:not([class]) th {
+  background-color: var(--md-default-fg-color--lightest);
+  font-weight: 600;
+}
+
+/* Admonition Enhancement */
+.md-typeset .admonition,
+.md-typeset details {
+  border-radius: 8px;
+  border: none;
+}
+
+/* Mermaid Diagram Styling - consistent element size across diagrams */
+.mermaid {
+  margin: 1.5rem 0;
+  overflow-x: auto;
+}
+.mermaid svg {
+  min-width: min-content;
+}
+
+/* Navigation Enhancement */
+.md-nav__link {
+  font-weight: 500;
+}
+
+.md-nav__item--active > .md-nav__link {
+  font-weight: 600;
+}
+
+/* Footer Styling */
+.md-footer {
+  margin-top: 3rem;
+}
+
+/* Center align for hero badges */
+.md-content > .md-typeset > div[align="center"] img {
+  margin: 0.25rem;
+}
+
+/* Task list styling */
+.md-typeset .task-list-item input[type="checkbox"] {
+  margin-right: 0.5rem;
+}
+
+/* Improve readability */
+.md-typeset {
+  font-size: 0.85rem;
+  line-height: 1.75;
+}
+
+.md-typeset h2 {
+  margin-top: 2.5rem;
+  padding-bottom: 0.5rem;
+  border-bottom: 1px solid var(--md-default-fg-color--lightest);
+}
+
+.md-typeset h3 {
+  margin-top: 1.5rem;
+}
+
+/* Responsive improvements */
+@media screen and (max-width: 76.1875em) {
+  .md-typeset .grid.cards > ul > li {
+    padding: 1rem;
+  }
+}
+
+/* Animation for interactive elements */
+.md-typeset a:not(.md-button) {
+  transition: color 0.15s ease;
+}
+
+.md-typeset a:not(.md-button):hover {
+  color: var(--md-accent-fg-color);
+}
+
+/* Version selector styling */
+.md-version {
+  font-size: 0.75rem;
+}
+
+/* Search highlight */
+.md-search-result mark {
+  background-color: var(--md-accent-fg-color--transparent);
+  color: inherit;
+}
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -1,21 +1,157 @@
-site_name: "AI Video Assistant"
-site_description: "AI 视频助手 - 智能对话与工作流管理平台"
-copyright: "2025"
-site_author: "AI Video Assistant Team"
+site_name: "Realtime Agent Studio"
+site_description: "Realtime Agent Studio（RAS）是一个通过管理控制台与 API 构建、部署和运营实时多模态助手的开源平台。"
+site_url: "https://your-org.github.io/AI-VideoAssistant"
+copyright: "Copyright &copy; 2025 RAS Team"
+site_author: "RAS Team"

 docs_dir: "content"
 site_dir: "site"

 nav:
-  - 首页: "index.md"
-  - 快速开始: "getting-started.md"
-  - 功能介绍:
-      - 仪表盘: "features/dashboard.md"
-      - 助手管理: "features/assistants.md"
-      - 工作流: "features/workflows.md"
-      - 模型配置: "features/models.md"
-      - 知识库: "features/knowledge.md"
-      - 历史记录: "features/history.md"
-      - 自动化测试: "features/autotest.md"
-      - 语音合成: "features/voices.md"
-  - 部署指南: "deployment.md"
+  - 首页: index.md
+  - 快速开始:
+      - 环境与部署: getting-started/index.md
+      - 创建第一个助手: quickstart/index.md
+  - 构建助手:
+      - 助手概览: concepts/assistants.md
+      - 基础配置: concepts/assistants/configuration.md
+      - 提示词: concepts/assistants/prompts.md
+      - LLM 模型: customization/models.md
+      - 语音识别: customization/asr.md
+      - 声音资源: customization/voices.md
+      - TTS 参数: customization/tts.md
+      - 知识库: customization/knowledge-base.md
+      - 工具: customization/tools.md
+      - 工作流: customization/workflows.md
+      - 测试与调试: concepts/assistants/testing.md
+  - 核心概念:
+      - 产品概览: overview/index.md
+      - 概念总览: concepts/index.md
+      - 引擎架构: concepts/engines.md
+      - Pipeline 引擎: concepts/pipeline-engine.md
+      - Realtime 引擎: concepts/realtime-engine.md
+      - 系统架构: overview/architecture.md
+  - 集成:
+      - API 参考: api-reference/index.md
+      - WebSocket 协议: api-reference/websocket.md
+      - 错误码: api-reference/errors.md
+  - 运维:
+      - 仪表盘: analysis/dashboard.md
+      - 历史记录: analysis/history.md
+      - 效果评估: analysis/evaluation.md
+      - 自动化测试: analysis/autotest.md
+      - 常见问题: resources/faq.md
+      - 故障排查: resources/troubleshooting.md
+      - 更新日志: changelog.md
+      - 路线图: roadmap.md
+theme:
+  name: material
+  language: zh
+  custom_dir: overrides
+  icon:
+    logo: material/robot-outline
+  font:
+    text: Inter
+    code: JetBrains Mono
+  palette:
+    - media: "(prefers-color-scheme: light)"
+      scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-7
+        name: 切换到深色模式
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-4
+        name: 切换到浅色模式
+  features:
+    - navigation.instant
+    - navigation.instant.prefetch
+    - navigation.tracking
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - navigation.sections
+    - navigation.expand
+    - navigation.path
+    - navigation.top
+    - navigation.footer
+    - toc.follow
+    - search.suggest
+    - search.highlight
+    - search.share
+    - content.code.copy
+    - content.code.annotate
+    - content.tabs.link
+markdown_extensions:
+  - abbr
+  - admonition
+  - attr_list
+  - def_list
+  - footnotes
+  - md_in_html
+  - tables
+  - toc:
+      permalink: true
+      toc_depth: 3
+  - pymdownx.arithmatex:
+      generic: true
+  - pymdownx.betterem:
+      smart_enable: all
+  - pymdownx.caret
+  - pymdownx.details
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.keys
+  - pymdownx.magiclink:
+      repo_url_shorthand: true
+      user: your-org
+      repo: AI-VideoAssistant
+  - pymdownx.mark
+  - pymdownx.smartsymbols
+  - pymdownx.snippets
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
+  - pymdownx.tabbed:
+      alternate_style: true
+  - pymdownx.tasklist:
+      custom_checkbox: true
+  - pymdownx.tilde
+
+plugins:
+  - search:
+      lang: zh
+      separator: '[\s\-\.]+'
+  - minify:
+      minify_html: true
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/your-org/AI-VideoAssistant
+      name: GitHub
+  generator: false
+  analytics:
+    provider: google
+    property: G-XXXXXXXXXX
+
+extra_css:
+  - stylesheets/extra.css
+
+extra_javascript:
+  - javascripts/mermaid.mjs
+  - javascripts/extra.js
+
+
--- a/Show More
+++ b/Show More