Compare commits
257 Commits
86744f0842
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9195957753 | ||
|
|
9b9fbf432f | ||
|
|
f3612a710d | ||
|
|
13684d498b | ||
|
|
47293ac46d | ||
|
|
373be4eb97 | ||
|
|
e4ccec6cc1 | ||
|
|
312fe0cf31 | ||
|
|
57264ad831 | ||
|
|
bfe165daae | ||
|
|
e07e5128fc | ||
|
|
a2fba260fd | ||
|
|
b300b469dc | ||
|
|
e41d34fe23 | ||
|
|
aeeeee20d1 | ||
|
|
3604db21eb | ||
|
|
65ae2287d5 | ||
|
|
da38157638 | ||
|
|
e11c3abb9e | ||
|
|
7e0b777923 | ||
|
|
4e2450e800 | ||
|
|
6b589a1b7c | ||
|
|
1cecbaa172 | ||
|
|
935f2fbd1f | ||
|
|
d0a6419990 | ||
|
|
b8760c24be | ||
|
|
14abbe6f10 | ||
|
|
efdcbe5550 | ||
|
|
3b6a2f75ee | ||
|
|
ac9b0047ee | ||
|
|
4748f3b5f1 | ||
|
|
947af3a525 | ||
|
|
d572e1a7f0 | ||
|
|
d03b3b0e0c | ||
|
|
526024d603 | ||
|
|
b4c6277d2a | ||
|
|
a8fa66e9cc | ||
|
|
aaef370d70 | ||
|
|
7d4af18815 | ||
|
|
530d95eea4 | ||
|
|
4c05131536 | ||
|
|
80fff09b76 | ||
|
|
eecde9f0fb | ||
|
|
7fbf52078f | ||
|
|
a003134477 | ||
|
|
85315ba6ca | ||
|
|
9734b38808 | ||
|
|
0a7a3253a6 | ||
|
|
a82100fc79 | ||
|
|
d0897aca92 | ||
|
|
70b4043f9b | ||
|
|
3aa9e0f432 | ||
|
|
fb017f9952 | ||
|
|
00b88c5afa | ||
|
|
b5cdb76e52 | ||
|
|
4d553de34d | ||
|
|
31b3969b96 | ||
|
|
3f22e2b875 | ||
|
|
531688aa6b | ||
|
|
3626297211 | ||
|
|
1561056a3d | ||
|
|
3a5d27d6c3 | ||
|
|
3643431565 | ||
|
|
2418df80e5 | ||
|
|
6a46ec69f4 | ||
|
|
b4fa664d73 | ||
|
|
0821d73e7c | ||
|
|
a7da109983 | ||
|
|
aae41d4512 | ||
|
|
c4c473105e | ||
|
|
8b59569b99 | ||
|
|
e40899613f | ||
|
|
915d2f4bd8 | ||
|
|
4d9f083e20 | ||
|
|
531cf6080a | ||
|
|
229243e832 | ||
|
|
95c6e93a9c | ||
|
|
cdd8275e35 | ||
|
|
b035e023c4 | ||
|
|
0f1165af64 | ||
|
|
487634c494 | ||
|
|
5f768edf68 | ||
|
|
d942c85eff | ||
|
|
8d453e10da | ||
|
|
e108ee66cb | ||
|
|
6178cc05bb | ||
|
|
71cbfa2b48 | ||
|
|
6a9b5fcff4 | ||
|
|
3272a7a68a | ||
|
|
f1b60bef22 | ||
|
|
403b4b93c7 | ||
|
|
0b308f9bce | ||
|
|
e14eac347f | ||
|
|
0f02de5fc3 | ||
|
|
0de6fe529e | ||
|
|
fb95e2abe2 | ||
|
|
833cb0d4c4 | ||
|
|
fbbb2e0fee | ||
|
|
da83c8ec8a | ||
|
|
cfc8db3fe7 | ||
|
|
37b646186d | ||
|
|
14b4b3d966 | ||
|
|
1bcf625f86 | ||
|
|
8bc21c7874 | ||
|
|
f77f7c7531 | ||
|
|
b193f91432 | ||
|
|
562341a72c | ||
|
|
6744646390 | ||
|
|
72ed7d0512 | ||
|
|
56f8aa2191 | ||
|
|
81ed89b84f | ||
|
|
3c7efce80b | ||
|
|
20afc63a28 | ||
|
|
da1293e39a | ||
|
|
28ca003662 | ||
|
|
14991af1bf | ||
|
|
ff3a03b1ad | ||
|
|
260ff621bf | ||
|
|
0f9543d8a4 | ||
|
|
024beeaea3 | ||
|
|
98207936ae | ||
|
|
35bd83767e | ||
|
|
838c19bf9c | ||
|
|
aabf2ce8b9 | ||
|
|
543528239e | ||
|
|
a92a56b845 | ||
|
|
bbfb5570cc | ||
|
|
399c9c97b1 | ||
|
|
6744704c7e | ||
|
|
39bcd67eac | ||
|
|
82521e7b90 | ||
|
|
edcbc2cec7 | ||
|
|
56ca95c200 | ||
|
|
cbebfe1c7a | ||
|
|
a7ef8858de | ||
|
|
ef13ddb6b2 | ||
|
|
a17ef6f182 | ||
|
|
d41db6418c | ||
|
|
6179053388 | ||
|
|
6e63b49a4c | ||
|
|
44ad52669f | ||
|
|
2ab4075fcd | ||
|
|
24ec548924 | ||
|
|
ede12e2df0 | ||
|
|
e7605f661b | ||
|
|
02d12ea996 | ||
|
|
4c46793169 | ||
|
|
80e1d24443 | ||
|
|
9304927fe9 | ||
|
|
180a69ca67 | ||
|
|
6798806acd | ||
|
|
15523d9ec2 | ||
|
|
2d7fc2b700 | ||
|
|
6cac24918d | ||
|
|
54eb48fb74 | ||
|
|
4b8da32787 | ||
|
|
539cf2fda2 | ||
|
|
d2aaba999b | ||
|
|
bad0206478 | ||
|
|
c38782e608 | ||
|
|
5f50c137e4 | ||
|
|
30f757529f | ||
|
|
375181a524 | ||
|
|
aa2a358b38 | ||
|
|
323ef61573 | ||
|
|
94a562a1d5 | ||
|
|
1462488969 | ||
|
|
436fb3c1e5 | ||
|
|
bbeffa89ed | ||
|
|
6b4391c423 | ||
|
|
ed1f7fc8b0 | ||
|
|
6a42c47700 | ||
|
|
3537a865f7 | ||
|
|
b3e1c3d380 | ||
|
|
b34d500479 | ||
|
|
0135f718f3 | ||
|
|
29d0b931eb | ||
|
|
a140e3a599 | ||
|
|
2b7e3dd499 | ||
|
|
b57d9f3a65 | ||
|
|
220f394f85 | ||
|
|
088943166c | ||
|
|
b52cd65848 | ||
|
|
9cac01e31d | ||
|
|
1d7c282066 | ||
|
|
ab90b7c7df | ||
|
|
38e20052f7 | ||
|
|
a26e3f4026 | ||
|
|
cbae28263b | ||
|
|
29d8361ca9 | ||
|
|
c961d63e75 | ||
|
|
6648f1d478 | ||
|
|
11016c04da | ||
|
|
3b5d7d0d4d | ||
|
|
45d7a41cff | ||
|
|
ed044bd8ad | ||
|
|
cb5c08d84d | ||
|
|
8fd6daaed1 | ||
|
|
fe05cf5d74 | ||
|
|
65aab79fb6 | ||
|
|
1b83b58d48 | ||
|
|
cd68ebe306 | ||
|
|
a42dd4c712 | ||
|
|
c7044c4c77 | ||
|
|
97984014e8 | ||
|
|
d4b645568e | ||
|
|
b3af104300 | ||
|
|
210301dc6b | ||
|
|
5349ed88e7 | ||
|
|
7df11dd846 | ||
|
|
0ae85513fb | ||
|
|
2bbe659897 | ||
|
|
d657a21024 | ||
|
|
fdfb3df714 | ||
|
|
b390ad2171 | ||
|
|
91ebe26f49 | ||
|
|
a8ef883050 | ||
|
|
f2b6cefd30 | ||
|
|
5c1d236f0d | ||
|
|
17bd4a78f4 | ||
|
|
f4aa432f0e | ||
|
|
c5ff3ea261 | ||
|
|
ab8e3a82d9 | ||
|
|
d841c59f45 | ||
|
|
7c5b71a101 | ||
|
|
8edbe14382 | ||
|
|
0643780c63 | ||
|
|
59f326f8e5 | ||
|
|
c7260677a1 | ||
|
|
8c88d7c57a | ||
|
|
c15c5283e2 | ||
|
|
fb6d1eb1da | ||
|
|
479cfb797b | ||
|
|
0d13d6acdb | ||
|
|
a12ba0c4a4 | ||
|
|
bdd5a7a274 | ||
|
|
7206c313d2 | ||
|
|
e643c7db17 | ||
|
|
59cda0987f | ||
|
|
d0b96a3f72 | ||
|
|
77b186dceb | ||
|
|
0fc56e2685 | ||
|
|
be68e335f1 | ||
|
|
6462c4f432 | ||
|
|
4bf2f788ad | ||
|
|
97e3236e76 | ||
|
|
8ec91a7fa8 | ||
|
|
68f69f9b09 | ||
|
|
cd5d0a668d | ||
|
|
8069a16227 | ||
|
|
68e47320cd | ||
|
|
9410265a2b | ||
|
|
c563b13fbf | ||
|
|
eed3ee824f | ||
|
|
3d8635670f | ||
|
|
7012f8edaf | ||
|
|
727fe8a997 |
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# OS artifacts
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
8
api/.gitignore
vendored
8
api/.gitignore
vendored
@@ -36,8 +36,12 @@ env/
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# Vector store data
|
||||
data/vector_store/
|
||||
# Runtime data (SQLite, vector store, uploads, generated artifacts)
|
||||
data/**
|
||||
!data/
|
||||
!data/.gitkeep
|
||||
!data/vector_store/
|
||||
data/vector_store/**
|
||||
!data/vector_store/.gitkeep
|
||||
|
||||
# IDE
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
FROM python:3.11-slim
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Install build tools for C++11 (needed for native extensions, e.g. chromadb)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装依赖
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 复制代码
|
||||
COPY . .
|
||||
|
||||
# 创建数据目录
|
||||
RUN mkdir -p /app/data
|
||||
|
||||
EXPOSE 8000
|
||||
EXPOSE 8100
|
||||
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8100", "--reload"]
|
||||
|
||||
288
api/README.md
288
api/README.md
@@ -1,13 +1,13 @@
|
||||
# AI VideoAssistant Backend
|
||||
|
||||
Python 后端 API,配合前端 `ai-videoassistant-frontend` 使用。
|
||||
Python 后端 API,配合前端 `web/` 模块使用。
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 安装依赖
|
||||
|
||||
```bash
|
||||
cd ~/Code/ai-videoassistant-backend
|
||||
cd api
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
@@ -21,48 +21,182 @@ python init_db.py
|
||||
- 创建 `data/app.db` SQLite 数据库
|
||||
- 初始化默认声音数据
|
||||
|
||||
可选参数(按需重建):
|
||||
|
||||
```bash
|
||||
# 仅重建数据库(drop + create)并初始化默认数据
|
||||
python init_db.py --rebuild-db
|
||||
|
||||
# 仅重建向量库集合(不动 DB 表结构);会重置文档索引状态为 pending
|
||||
python init_db.py --rebuild-vector-store
|
||||
|
||||
# 同时重建 DB 和向量库
|
||||
python init_db.py --rebuild-db --rebuild-vector-store
|
||||
|
||||
# 仅执行重建,不写入默认数据
|
||||
python init_db.py --rebuild-db --skip-seed
|
||||
```
|
||||
|
||||
### 3. 启动服务
|
||||
|
||||
```bash
|
||||
# 开发模式 (热重载)
|
||||
python -m uvicorn main:app --reload --host 0.0.0.0 --port 8000
|
||||
python -m uvicorn app.main:app --reload --host 0.0.0.0 --port 8100
|
||||
```
|
||||
|
||||
服务运行在: http://localhost:8100
|
||||
|
||||
### 4. 测试 API
|
||||
|
||||
```bash
|
||||
# 健康检查
|
||||
curl http://localhost:8000/health
|
||||
curl http://localhost:8100/health
|
||||
|
||||
# 获取助手列表
|
||||
curl http://localhost:8000/api/assistants
|
||||
curl http://localhost:8100/api/assistants
|
||||
|
||||
# 获取声音列表
|
||||
curl http://localhost:8000/api/voices
|
||||
curl http://localhost:8100/api/voices
|
||||
|
||||
# 获取通话历史
|
||||
curl http://localhost:8000/api/history
|
||||
curl http://localhost:8100/api/history
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API 文档
|
||||
|
||||
| 端点 | 方法 | 说明 |
|
||||
完整 API 文档位于 [docs/](docs/) 目录:
|
||||
|
||||
| 模块 | 端点 | 方法 | 说明 |
|
||||
|------|------|------|------|
|
||||
| **Assistant** | `/api/assistants` | GET | 助手列表 |
|
||||
| | | POST | 创建助手 |
|
||||
| | `/api/assistants/{id}` | GET | 助手详情 |
|
||||
| | | PUT | 更新助手 |
|
||||
| | | DELETE | 删除助手 |
|
||||
| **Voice** | `/api/voices` | GET | 声音库列表 |
|
||||
| | | POST | 添加声音 |
|
||||
| | `/api/voices/{id}` | GET | 声音详情 |
|
||||
| | | PUT | 更新声音 |
|
||||
| | | DELETE | 删除声音 |
|
||||
| | `/api/voices/{id}/preview` | POST | 预览声音 |
|
||||
| **LLM Models** | `/api/llm` | GET | LLM 模型列表 |
|
||||
| | | POST | 添加模型 |
|
||||
| | `/api/llm/{id}` | GET | 模型详情 |
|
||||
| | | PUT | 更新模型 |
|
||||
| | | DELETE | 删除模型 |
|
||||
| | `/api/llm/{id}/test` | POST | 测试模型连接 |
|
||||
| **ASR Models** | `/api/asr` | GET | ASR 模型列表 |
|
||||
| | | POST | 添加模型 |
|
||||
| | `/api/asr/{id}` | GET | 模型详情 |
|
||||
| | | PUT | 更新模型 |
|
||||
| | | DELETE | 删除模型 |
|
||||
| | `/api/asr/{id}/test` | POST | 测试识别 |
|
||||
| **History** | `/api/history` | GET | 通话历史列表 |
|
||||
| | `/api/history/{id}` | GET | 通话详情 |
|
||||
| | | PUT | 更新通话记录 |
|
||||
| | | DELETE | 删除记录 |
|
||||
| | `/api/history/{id}/transcripts` | POST | 添加转写 |
|
||||
| | `/api/history/search` | GET | 搜索历史 |
|
||||
| | `/api/history/stats` | GET | 统计数据 |
|
||||
| **Knowledge** | `/api/knowledge/bases` | GET | 知识库列表 |
|
||||
| | | POST | 创建知识库 |
|
||||
| | `/api/knowledge/bases/{id}` | GET | 知识库详情 |
|
||||
| | | PUT | 更新知识库 |
|
||||
| | | DELETE | 删除知识库 |
|
||||
| | `/api/knowledge/bases/{kb_id}/documents` | POST | 上传文档 |
|
||||
| | `/api/knowledge/bases/{kb_id}/documents/{doc_id}` | DELETE | 删除文档 |
|
||||
| | `/api/knowledge/bases/{kb_id}/documents/{doc_id}/index` | POST | 索引文档 |
|
||||
| | `/api/knowledge/search` | POST | 知识搜索 |
|
||||
| **Workflow** | `/api/workflows` | GET | 工作流列表 |
|
||||
| | | POST | 创建工作流 |
|
||||
| | `/api/workflows/{id}` | GET | 工作流详情 |
|
||||
| | | PUT | 更新工作流 |
|
||||
| | | DELETE | 删除工作流 |
|
||||
|
||||
---
|
||||
|
||||
## 数据模型
|
||||
|
||||
### Assistant (小助手)
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `/api/assistants` | GET | 助手列表 |
|
||||
| `/api/assistants` | POST | 创建助手 |
|
||||
| `/api/assistants/{id}` | GET | 助手详情 |
|
||||
| `/api/assistants/{id}` | PUT | 更新助手 |
|
||||
| `/api/assistants/{id}` | DELETE | 删除助手 |
|
||||
| `/api/voices` | GET | 声音库列表 |
|
||||
| `/api/history` | GET | 通话历史列表 |
|
||||
| `/api/history/{id}` | GET | 通话详情 |
|
||||
| `/api/history/{id}/transcripts` | POST | 添加转写 |
|
||||
| `/api/history/{id}/audio/{turn}` | GET | 获取音频 |
|
||||
| id | string | 助手 ID |
|
||||
| name | string | 助手名称 |
|
||||
| opener | string | 开场白 |
|
||||
| prompt | string | 系统提示词 |
|
||||
| knowledgeBaseId | string | 关联知识库 ID |
|
||||
| language | string | 语言: zh/en |
|
||||
| voice | string | 声音 ID |
|
||||
| speed | float | 语速 (0.5-2.0) |
|
||||
| hotwords | array | 热词列表 |
|
||||
| tools | array | 启用的工具列表 |
|
||||
| llmModelId | string | LLM 模型 ID |
|
||||
| asrModelId | string | ASR 模型 ID |
|
||||
| embeddingModelId | string | Embedding 模型 ID |
|
||||
| rerankModelId | string | Rerank 模型 ID |
|
||||
|
||||
### Voice (声音资源)
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| id | string | 声音 ID |
|
||||
| name | string | 声音名称 |
|
||||
| vendor | string | 厂商: Ali/Volcano/Minimax |
|
||||
| gender | string | 性别: Male/Female |
|
||||
| language | string | 语言: zh/en |
|
||||
| model | string | 厂商模型标识 |
|
||||
| voice_key | string | 厂商 voice_key |
|
||||
| speed | float | 语速 |
|
||||
| gain | int | 增益 (dB) |
|
||||
| pitch | int | 音调 |
|
||||
|
||||
### LLMModel (模型接入)
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| id | string | 模型 ID |
|
||||
| name | string | 模型名称 |
|
||||
| vendor | string | 厂商 |
|
||||
| type | string | 类型: text/embedding/rerank |
|
||||
| base_url | string | API 地址 |
|
||||
| api_key | string | API 密钥 |
|
||||
| model_name | string | 模型名称 |
|
||||
| temperature | float | 温度参数 |
|
||||
|
||||
### ASRModel (语音识别)
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| id | string | 模型 ID |
|
||||
| name | string | 模型名称 |
|
||||
| vendor | string | 厂商 |
|
||||
| language | string | 语言: zh/en/Multi-lingual |
|
||||
| base_url | string | API 地址 |
|
||||
| api_key | string | API 密钥 |
|
||||
| hotwords | array | 热词列表 |
|
||||
|
||||
### CallRecord (通话记录)
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| id | string | 记录 ID |
|
||||
| assistant_id | string | 助手 ID |
|
||||
| source | string | 来源: debug/external |
|
||||
| status | string | 状态: connected/missed/failed |
|
||||
| started_at | string | 开始时间 |
|
||||
| duration_seconds | int | 通话时长 |
|
||||
| summary | string | 通话摘要 |
|
||||
| transcripts | array | 对话转写 |
|
||||
|
||||
---
|
||||
|
||||
## 使用 Docker 启动
|
||||
|
||||
```bash
|
||||
cd ~/Code/ai-videoassistant-backend
|
||||
cd api
|
||||
|
||||
# 启动所有服务
|
||||
docker-compose up -d
|
||||
@@ -71,33 +205,143 @@ docker-compose up -d
|
||||
docker-compose logs -f backend
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 目录结构
|
||||
|
||||
```
|
||||
backend/
|
||||
api/
|
||||
├── app/
|
||||
│ ├── __init__.py
|
||||
│ ├── main.py # FastAPI 入口
|
||||
│ ├── db.py # SQLite 连接
|
||||
│ ├── models.py # 数据模型
|
||||
│ ├── models.py # SQLAlchemy 数据模型
|
||||
│ ├── schemas.py # Pydantic 模型
|
||||
│ ├── storage.py # MinIO 存储
|
||||
│ ├── vector_store.py # 向量存储
|
||||
│ └── routers/
|
||||
│ ├── __init__.py
|
||||
│ ├── assistants.py # 助手 API
|
||||
│ └── history.py # 通话记录 API
|
||||
│ ├── history.py # 通话记录 API
|
||||
│ └── knowledge.py # 知识库 API
|
||||
├── data/ # 数据库文件
|
||||
├── docs/ # API 文档
|
||||
├── requirements.txt
|
||||
├── .env
|
||||
├── init_db.py
|
||||
└── docker-compose.yml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 环境变量
|
||||
|
||||
| 变量 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `PORT` | `8100` | 服务端口 |
|
||||
| `DATABASE_URL` | `sqlite:///./data/app.db` | 数据库连接 |
|
||||
| `MINIO_ENDPOINT` | `localhost:9000` | MinIO 地址 |
|
||||
| `MINIO_ACCESS_KEY` | `admin` | MinIO 密钥 |
|
||||
| `MINIO_SECRET_KEY` | `password123` | MinIO 密码 |
|
||||
| `MINIO_BUCKET` | `ai-audio` | 存储桶名称 |
|
||||
|
||||
---
|
||||
|
||||
## 数据库迁移
|
||||
|
||||
开发环境重新创建数据库:
|
||||
|
||||
```bash
|
||||
rm -f api/data/app.db
|
||||
python api/init_db.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 测试
|
||||
|
||||
### 安装测试依赖
|
||||
|
||||
```bash
|
||||
cd api
|
||||
pip install pytest pytest-cov -q
|
||||
```
|
||||
|
||||
### 运行所有测试
|
||||
|
||||
```bash
|
||||
# Windows
|
||||
run_tests.bat
|
||||
|
||||
# 或使用 pytest
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
### 运行特定测试
|
||||
|
||||
```bash
|
||||
# 只测试声音 API
|
||||
pytest tests/test_voices.py -v
|
||||
|
||||
# 只测试助手 API
|
||||
pytest tests/test_assistants.py -v
|
||||
|
||||
# 只测试历史记录 API
|
||||
pytest tests/test_history.py -v
|
||||
|
||||
# 只测试知识库 API
|
||||
pytest tests/test_knowledge.py -v
|
||||
```
|
||||
|
||||
### 测试覆盖率
|
||||
|
||||
```bash
|
||||
pytest tests/ --cov=app --cov-report=html
|
||||
# 查看报告: open htmlcov/index.html
|
||||
```
|
||||
|
||||
### 测试目录结构
|
||||
|
||||
```
|
||||
tests/
|
||||
├── __init__.py
|
||||
├── conftest.py # pytest fixtures
|
||||
├── test_voices.py # 声音 API 测试
|
||||
├── test_assistants.py # 助手 API 测试
|
||||
├── test_history.py # 历史记录 API 测试
|
||||
└── test_knowledge.py # 知识库 API 测试
|
||||
```
|
||||
|
||||
### 测试用例统计
|
||||
|
||||
| 模块 | 测试用例数 |
|
||||
|------|-----------|
|
||||
| Voice | 13 |
|
||||
| Assistant | 14 |
|
||||
| History | 18 |
|
||||
| Knowledge | 19 |
|
||||
| **总计** | **64** |
|
||||
|
||||
### CI/CD 示例 (.github/workflows/test.yml)
|
||||
|
||||
```yaml
|
||||
name: Tests
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r api/requirements.txt
|
||||
pip install pytest pytest-cov
|
||||
- name: Run tests
|
||||
run: pytest api/tests/ -v --cov=app
|
||||
```
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, DeclarativeBase
|
||||
import os
|
||||
|
||||
DATABASE_URL = "sqlite:///./data/app.db"
|
||||
# 使用绝对路径
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
DATABASE_URL = f"sqlite:///{os.path.join(BASE_DIR, 'data', 'app.db')}"
|
||||
|
||||
engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
17
api/app/id_generator.py
Normal file
17
api/app/id_generator.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import uuid
|
||||
from typing import Any, Type
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
def short_id(prefix: str, size: int = 8) -> str:
|
||||
return f"{prefix}_{uuid.uuid4().hex[:size]}"
|
||||
|
||||
|
||||
def unique_short_id(prefix: str, db: Session, model_cls: Type[Any], size: int = 8) -> str:
|
||||
for _ in range(10):
|
||||
candidate = short_id(prefix, size=size)
|
||||
exists = db.query(model_cls.id).filter(model_cls.id == candidate).first()
|
||||
if not exists:
|
||||
return candidate
|
||||
raise RuntimeError(f"failed to generate unique id for {model_cls.__name__}")
|
||||
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
|
||||
import os
|
||||
|
||||
from .db import Base, engine
|
||||
from .routers import assistants, history
|
||||
from .routers import assistants, voices, workflows, history, knowledge, llm, asr, tools
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -32,7 +32,13 @@ app.add_middleware(
|
||||
|
||||
# 路由
|
||||
app.include_router(assistants.router, prefix="/api")
|
||||
app.include_router(voices.router, prefix="/api")
|
||||
app.include_router(workflows.router, prefix="/api")
|
||||
app.include_router(history.router, prefix="/api")
|
||||
app.include_router(knowledge.router, prefix="/api")
|
||||
app.include_router(llm.router, prefix="/api")
|
||||
app.include_router(asr.router, prefix="/api")
|
||||
app.include_router(tools.router, prefix="/api")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
@@ -43,30 +49,3 @@ def root():
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# 初始化默认数据
|
||||
@app.on_event("startup")
|
||||
def init_default_data():
|
||||
from sqlalchemy.orm import Session
|
||||
from .db import SessionLocal
|
||||
from .models import Voice
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# 检查是否已有数据
|
||||
if db.query(Voice).count() == 0:
|
||||
# 插入默认声音
|
||||
voices = [
|
||||
Voice(id="v1", name="Xiaoyun", vendor="Ali", gender="Female", language="zh", description="Gentle and professional."),
|
||||
Voice(id="v2", name="Kevin", vendor="Volcano", gender="Male", language="en", description="Deep and authoritative."),
|
||||
Voice(id="v3", name="Abby", vendor="Minimax", gender="Female", language="en", description="Cheerful and lively."),
|
||||
Voice(id="v4", name="Guang", vendor="Ali", gender="Male", language="zh", description="Standard newscast style."),
|
||||
Voice(id="v5", name="Doubao", vendor="Volcano", gender="Female", language="zh", description="Cute and young."),
|
||||
]
|
||||
for v in voices:
|
||||
db.add(v)
|
||||
db.commit()
|
||||
print("✅ 默认声音数据已初始化")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from sqlalchemy import String, Integer, DateTime, Text, Float, ForeignKey, JSON
|
||||
from sqlalchemy import String, Integer, DateTime, Text, Float, ForeignKey, JSON, Enum
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from .db import Base
|
||||
@@ -15,18 +15,99 @@ class User(Base):
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
|
||||
# ============ Voice ============
|
||||
class Voice(Base):
|
||||
__tablename__ = "voices"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
user_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("users.id"), index=True, nullable=True)
|
||||
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
vendor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
gender: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
language: Mapped[str] = mapped_column(String(16), nullable=False)
|
||||
description: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
voice_params: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
model: Mapped[Optional[str]] = mapped_column(String(128), nullable=True) # 厂商语音模型标识
|
||||
voice_key: Mapped[Optional[str]] = mapped_column(String(128), nullable=True) # 厂商voice_key
|
||||
api_key: Mapped[Optional[str]] = mapped_column(String(512), nullable=True) # 每个声音独立 API key
|
||||
base_url: Mapped[Optional[str]] = mapped_column(String(512), nullable=True) # 每个声音独立 OpenAI-compatible base_url
|
||||
speed: Mapped[float] = mapped_column(Float, default=1.0)
|
||||
gain: Mapped[int] = mapped_column(Integer, default=0)
|
||||
pitch: Mapped[int] = mapped_column(Integer, default=0)
|
||||
enabled: Mapped[bool] = mapped_column(default=True)
|
||||
is_system: Mapped[bool] = mapped_column(default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
user = relationship("User", foreign_keys=[user_id])
|
||||
|
||||
|
||||
# ============ LLM Model ============
|
||||
class LLMModel(Base):
|
||||
__tablename__ = "llm_models"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True)
|
||||
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
vendor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
type: Mapped[str] = mapped_column(String(32), nullable=False) # text/embedding/rerank
|
||||
base_url: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
api_key: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
model_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
temperature: Mapped[Optional[float]] = mapped_column(Float, nullable=True)
|
||||
context_length: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
enabled: Mapped[bool] = mapped_column(default=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
user = relationship("User")
|
||||
|
||||
|
||||
# ============ ASR Model ============
|
||||
class ASRModel(Base):
|
||||
__tablename__ = "asr_models"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True)
|
||||
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
vendor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
language: Mapped[str] = mapped_column(String(32), nullable=False) # zh/en/Multi-lingual
|
||||
base_url: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
api_key: Mapped[str] = mapped_column(String(512), nullable=False)
|
||||
model_name: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
hotwords: Mapped[dict] = mapped_column(JSON, default=list)
|
||||
enable_punctuation: Mapped[bool] = mapped_column(default=True)
|
||||
enable_normalization: Mapped[bool] = mapped_column(default=True)
|
||||
enabled: Mapped[bool] = mapped_column(default=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
user = relationship("User")
|
||||
|
||||
|
||||
# ============ Tool Resource ============
|
||||
class ToolResource(Base):
|
||||
__tablename__ = "tool_resources"
|
||||
|
||||
id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
user_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("users.id"), index=True, nullable=True)
|
||||
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
description: Mapped[str] = mapped_column(String(512), nullable=False, default="")
|
||||
category: Mapped[str] = mapped_column(String(32), nullable=False, default="system") # system/query
|
||||
icon: Mapped[str] = mapped_column(String(64), nullable=False, default="Wrench")
|
||||
http_method: Mapped[str] = mapped_column(String(16), nullable=False, default="GET")
|
||||
http_url: Mapped[Optional[str]] = mapped_column(String(1024), nullable=True)
|
||||
http_headers: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
http_timeout_ms: Mapped[int] = mapped_column(Integer, default=10000)
|
||||
parameter_schema: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
parameter_defaults: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
wait_for_response: Mapped[bool] = mapped_column(default=False)
|
||||
enabled: Mapped[bool] = mapped_column(default=True)
|
||||
is_system: Mapped[bool] = mapped_column(default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
user = relationship("User")
|
||||
|
||||
|
||||
# ============ Assistant ============
|
||||
class Assistant(Base):
|
||||
__tablename__ = "assistants"
|
||||
|
||||
@@ -34,25 +115,57 @@ class Assistant(Base):
|
||||
user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True)
|
||||
name: Mapped[str] = mapped_column(String(255), nullable=False)
|
||||
call_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
first_turn_mode: Mapped[str] = mapped_column(String(32), default="bot_first")
|
||||
opener: Mapped[str] = mapped_column(Text, default="")
|
||||
manual_opener_tool_calls: Mapped[list] = mapped_column(JSON, default=list)
|
||||
generated_opener_enabled: Mapped[bool] = mapped_column(default=False)
|
||||
prompt: Mapped[str] = mapped_column(Text, default="")
|
||||
knowledge_base_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
language: Mapped[str] = mapped_column(String(16), default="zh")
|
||||
voice_output_enabled: Mapped[bool] = mapped_column(default=True)
|
||||
voice: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
speed: Mapped[float] = mapped_column(Float, default=1.0)
|
||||
hotwords: Mapped[dict] = mapped_column(JSON, default=list)
|
||||
tools: Mapped[dict] = mapped_column(JSON, default=list)
|
||||
asr_interim_enabled: Mapped[bool] = mapped_column(default=False)
|
||||
bot_cannot_be_interrupted: Mapped[bool] = mapped_column(default=False)
|
||||
interruption_sensitivity: Mapped[int] = mapped_column(Integer, default=500)
|
||||
config_mode: Mapped[str] = mapped_column(String(32), default="platform")
|
||||
api_url: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
api_key: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
app_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
|
||||
# 模型关联
|
||||
llm_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
asr_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
embedding_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
rerank_model_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
user = relationship("User")
|
||||
call_records = relationship("CallRecord", back_populates="assistant")
|
||||
opener_audio = relationship("AssistantOpenerAudio", back_populates="assistant", uselist=False, cascade="all, delete-orphan")
|
||||
|
||||
|
||||
class AssistantOpenerAudio(Base):
|
||||
__tablename__ = "assistant_opener_audio"
|
||||
|
||||
assistant_id: Mapped[str] = mapped_column(String(64), ForeignKey("assistants.id"), primary_key=True)
|
||||
enabled: Mapped[bool] = mapped_column(default=False)
|
||||
file_path: Mapped[Optional[str]] = mapped_column(String(512), nullable=True)
|
||||
encoding: Mapped[str] = mapped_column(String(32), default="pcm_s16le")
|
||||
sample_rate_hz: Mapped[int] = mapped_column(Integer, default=16000)
|
||||
channels: Mapped[int] = mapped_column(Integer, default=1)
|
||||
duration_ms: Mapped[int] = mapped_column(Integer, default=0)
|
||||
text_hash: Mapped[Optional[str]] = mapped_column(String(128), nullable=True)
|
||||
tts_fingerprint: Mapped[Optional[str]] = mapped_column(String(256), nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
|
||||
|
||||
assistant = relationship("Assistant", back_populates="opener_audio")
|
||||
|
||||
|
||||
# ============ Knowledge Base ============
|
||||
class KnowledgeBase(Base):
|
||||
__tablename__ = "knowledge_bases"
|
||||
|
||||
@@ -92,6 +205,7 @@ class KnowledgeDocument(Base):
|
||||
kb = relationship("KnowledgeBase", back_populates="documents")
|
||||
|
||||
|
||||
# ============ Workflow ============
|
||||
class Workflow(Base):
|
||||
__tablename__ = "workflows"
|
||||
|
||||
@@ -108,6 +222,7 @@ class Workflow(Base):
|
||||
user = relationship("User")
|
||||
|
||||
|
||||
# ============ Call Record ============
|
||||
class CallRecord(Base):
|
||||
__tablename__ = "call_records"
|
||||
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from . import assistants
|
||||
from . import voices
|
||||
from . import workflows
|
||||
from . import history
|
||||
from . import knowledge
|
||||
from . import llm
|
||||
from . import asr
|
||||
from . import tools
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
router.include_router(assistants.router)
|
||||
router.include_router(voices.router)
|
||||
router.include_router(workflows.router)
|
||||
router.include_router(history.router)
|
||||
router.include_router(knowledge.router)
|
||||
router.include_router(llm.router)
|
||||
router.include_router(asr.router)
|
||||
router.include_router(tools.router)
|
||||
|
||||
785
api/app/routers/asr.py
Normal file
785
api/app/routers/asr.py
Normal file
@@ -0,0 +1,785 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import wave
|
||||
from array import array
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db import get_db
|
||||
from ..id_generator import unique_short_id
|
||||
from ..models import ASRModel
|
||||
from ..schemas import (
|
||||
ASRModelCreate, ASRModelUpdate, ASRModelOut,
|
||||
ASRTestRequest, ASRTestResponse
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/asr", tags=["ASR Models"])
|
||||
|
||||
OPENAI_COMPATIBLE_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
|
||||
DASHSCOPE_DEFAULT_ASR_MODEL = "qwen3-asr-flash-realtime"
|
||||
DASHSCOPE_DEFAULT_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
|
||||
try:
|
||||
import dashscope
|
||||
from dashscope.audio.qwen_omni import MultiModality, OmniRealtimeCallback, OmniRealtimeConversation
|
||||
|
||||
try:
|
||||
from dashscope.audio.qwen_omni import TranscriptionParams
|
||||
except ImportError:
|
||||
from dashscope.audio.qwen_omni.omni_realtime import TranscriptionParams
|
||||
|
||||
DASHSCOPE_SDK_AVAILABLE = True
|
||||
DASHSCOPE_IMPORT_ERROR = ""
|
||||
except Exception as exc:
|
||||
dashscope = None # type: ignore[assignment]
|
||||
MultiModality = None # type: ignore[assignment]
|
||||
OmniRealtimeConversation = None # type: ignore[assignment]
|
||||
TranscriptionParams = None # type: ignore[assignment]
|
||||
DASHSCOPE_SDK_AVAILABLE = False
|
||||
DASHSCOPE_IMPORT_ERROR = f"{type(exc).__name__}: {exc}"
|
||||
|
||||
class OmniRealtimeCallback: # type: ignore[no-redef]
|
||||
"""Fallback callback base when DashScope SDK is unavailable."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def _is_openai_compatible_vendor(vendor: str) -> bool:
|
||||
normalized = (vendor or "").strip().lower()
|
||||
return normalized in {
|
||||
"openai compatible",
|
||||
"openai-compatible",
|
||||
"siliconflow", # backward compatibility
|
||||
"硅基流动", # backward compatibility
|
||||
}
|
||||
|
||||
|
||||
def _is_dashscope_vendor(vendor: str) -> bool:
|
||||
return (vendor or "").strip().lower() == "dashscope"
|
||||
|
||||
|
||||
def _default_asr_model(vendor: str) -> str:
|
||||
if _is_openai_compatible_vendor(vendor):
|
||||
return OPENAI_COMPATIBLE_DEFAULT_ASR_MODEL
|
||||
if _is_dashscope_vendor(vendor):
|
||||
return DASHSCOPE_DEFAULT_ASR_MODEL
|
||||
return "whisper-1"
|
||||
|
||||
|
||||
def _dashscope_language(language: Optional[str]) -> Optional[str]:
|
||||
normalized = (language or "").strip().lower()
|
||||
if not normalized or normalized in {"multi-lingual", "multilingual", "multi_lingual", "auto"}:
|
||||
return None
|
||||
if normalized.startswith("zh"):
|
||||
return "zh"
|
||||
if normalized.startswith("en"):
|
||||
return "en"
|
||||
return normalized
|
||||
|
||||
|
||||
class _DashScopePreviewCallback(OmniRealtimeCallback):
|
||||
"""Collect DashScope ASR websocket events for preview/test flows."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._open_event = threading.Event()
|
||||
self._session_ready_event = threading.Event()
|
||||
self._done_event = threading.Event()
|
||||
self._lock = threading.Lock()
|
||||
self._final_text = ""
|
||||
self._last_interim_text = ""
|
||||
self._error_message: Optional[str] = None
|
||||
|
||||
def on_open(self) -> None:
|
||||
self._open_event.set()
|
||||
|
||||
def on_close(self, code: int, reason: str) -> None:
|
||||
if self._done_event.is_set():
|
||||
return
|
||||
self._error_message = f"DashScope websocket closed unexpectedly: {code} {reason}"
|
||||
self._done_event.set()
|
||||
self._session_ready_event.set()
|
||||
|
||||
def on_error(self, message: Any) -> None:
|
||||
self._error_message = str(message)
|
||||
self._done_event.set()
|
||||
self._session_ready_event.set()
|
||||
|
||||
def on_event(self, response: Any) -> None:
|
||||
payload = _coerce_dashscope_event(response)
|
||||
event_type = str(payload.get("type") or "").strip()
|
||||
if not event_type:
|
||||
return
|
||||
|
||||
if event_type in {"session.created", "session.updated"}:
|
||||
self._session_ready_event.set()
|
||||
return
|
||||
|
||||
if event_type == "error" or event_type.endswith(".failed"):
|
||||
self._error_message = _format_dashscope_error_event(payload)
|
||||
self._done_event.set()
|
||||
self._session_ready_event.set()
|
||||
return
|
||||
|
||||
if event_type == "conversation.item.input_audio_transcription.text":
|
||||
interim_text = _extract_dashscope_text(payload, keys=("stash", "text", "transcript"))
|
||||
if interim_text:
|
||||
with self._lock:
|
||||
self._last_interim_text = interim_text
|
||||
return
|
||||
|
||||
if event_type == "conversation.item.input_audio_transcription.completed":
|
||||
final_text = _extract_dashscope_text(payload, keys=("transcript", "text", "stash"))
|
||||
with self._lock:
|
||||
if final_text:
|
||||
self._final_text = final_text
|
||||
self._done_event.set()
|
||||
return
|
||||
|
||||
if event_type in {"response.done", "session.finished"}:
|
||||
self._done_event.set()
|
||||
|
||||
def wait_for_open(self, timeout: float = 10.0) -> None:
|
||||
if not self._open_event.wait(timeout):
|
||||
raise TimeoutError("DashScope websocket open timeout")
|
||||
|
||||
def wait_for_session_ready(self, timeout: float = 6.0) -> bool:
|
||||
return self._session_ready_event.wait(timeout)
|
||||
|
||||
def wait_for_done(self, timeout: float = 20.0) -> None:
|
||||
if not self._done_event.wait(timeout):
|
||||
raise TimeoutError("DashScope transcription timeout")
|
||||
|
||||
def raise_if_error(self) -> None:
|
||||
if self._error_message:
|
||||
raise RuntimeError(self._error_message)
|
||||
|
||||
def read_text(self) -> str:
|
||||
with self._lock:
|
||||
return self._final_text or self._last_interim_text
|
||||
|
||||
|
||||
def _coerce_dashscope_event(response: Any) -> Dict[str, Any]:
|
||||
if isinstance(response, dict):
|
||||
return response
|
||||
if isinstance(response, str):
|
||||
try:
|
||||
parsed = json.loads(response)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return {"type": "raw", "message": str(response)}
|
||||
|
||||
|
||||
def _format_dashscope_error_event(payload: Dict[str, Any]) -> str:
|
||||
error = payload.get("error")
|
||||
if isinstance(error, dict):
|
||||
code = str(error.get("code") or "").strip()
|
||||
message = str(error.get("message") or "").strip()
|
||||
if code and message:
|
||||
return f"{code}: {message}"
|
||||
return message or str(error)
|
||||
return str(error or "DashScope realtime ASR error")
|
||||
|
||||
|
||||
def _extract_dashscope_text(payload: Dict[str, Any], *, keys: Tuple[str, ...]) -> str:
|
||||
for key in keys:
|
||||
value = payload.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
if isinstance(value, dict):
|
||||
nested = _extract_dashscope_text(value, keys=keys)
|
||||
if nested:
|
||||
return nested
|
||||
|
||||
for value in payload.values():
|
||||
if isinstance(value, dict):
|
||||
nested = _extract_dashscope_text(value, keys=keys)
|
||||
if nested:
|
||||
return nested
|
||||
return ""
|
||||
|
||||
|
||||
def _create_dashscope_realtime_client(
|
||||
*,
|
||||
model: str,
|
||||
callback: _DashScopePreviewCallback,
|
||||
url: str,
|
||||
api_key: str,
|
||||
) -> Any:
|
||||
if OmniRealtimeConversation is None:
|
||||
raise RuntimeError("DashScope SDK unavailable")
|
||||
|
||||
init_kwargs = {
|
||||
"model": model,
|
||||
"callback": callback,
|
||||
"url": url,
|
||||
}
|
||||
try:
|
||||
return OmniRealtimeConversation(api_key=api_key, **init_kwargs) # type: ignore[misc]
|
||||
except TypeError as exc:
|
||||
if "api_key" not in str(exc):
|
||||
raise
|
||||
return OmniRealtimeConversation(**init_kwargs) # type: ignore[misc]
|
||||
|
||||
|
||||
def _close_dashscope_client(client: Any) -> None:
|
||||
finish_fn = getattr(client, "finish", None)
|
||||
if callable(finish_fn):
|
||||
try:
|
||||
finish_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
close_fn = getattr(client, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _configure_dashscope_session(
|
||||
*,
|
||||
client: Any,
|
||||
callback: _DashScopePreviewCallback,
|
||||
sample_rate: int,
|
||||
language: Optional[str],
|
||||
) -> None:
|
||||
update_fn = getattr(client, "update_session", None)
|
||||
if not callable(update_fn):
|
||||
raise RuntimeError("DashScope ASR SDK missing update_session method")
|
||||
|
||||
text_modality: Any = "text"
|
||||
if MultiModality is not None and hasattr(MultiModality, "TEXT"):
|
||||
text_modality = MultiModality.TEXT
|
||||
|
||||
transcription_params: Optional[Any] = None
|
||||
language_hint = _dashscope_language(language)
|
||||
if TranscriptionParams is not None:
|
||||
try:
|
||||
params_kwargs: Dict[str, Any] = {
|
||||
"sample_rate": sample_rate,
|
||||
"input_audio_format": "pcm",
|
||||
}
|
||||
if language_hint:
|
||||
params_kwargs["language"] = language_hint
|
||||
transcription_params = TranscriptionParams(**params_kwargs)
|
||||
except Exception:
|
||||
transcription_params = None
|
||||
|
||||
update_attempts = [
|
||||
{
|
||||
"output_modalities": [text_modality],
|
||||
"enable_turn_detection": False,
|
||||
"enable_input_audio_transcription": True,
|
||||
"transcription_params": transcription_params,
|
||||
},
|
||||
{
|
||||
"output_modalities": [text_modality],
|
||||
"enable_turn_detection": False,
|
||||
"enable_input_audio_transcription": True,
|
||||
},
|
||||
{
|
||||
"output_modalities": [text_modality],
|
||||
},
|
||||
]
|
||||
|
||||
last_error: Optional[Exception] = None
|
||||
for params in update_attempts:
|
||||
if params.get("transcription_params") is None:
|
||||
params = {key: value for key, value in params.items() if key != "transcription_params"}
|
||||
try:
|
||||
update_fn(**params)
|
||||
callback.wait_for_session_ready()
|
||||
callback.raise_if_error()
|
||||
return
|
||||
except TypeError as exc:
|
||||
last_error = exc
|
||||
continue
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
continue
|
||||
|
||||
raise RuntimeError(f"DashScope ASR session.update failed: {last_error}")
|
||||
|
||||
|
||||
def _load_wav_pcm16_mono(audio_bytes: bytes) -> Tuple[bytes, int]:
|
||||
try:
|
||||
with wave.open(io.BytesIO(audio_bytes), "rb") as wav_file:
|
||||
channel_count = wav_file.getnchannels()
|
||||
sample_width = wav_file.getsampwidth()
|
||||
sample_rate = wav_file.getframerate()
|
||||
compression = wav_file.getcomptype()
|
||||
pcm_frames = wav_file.readframes(wav_file.getnframes())
|
||||
except wave.Error as exc:
|
||||
raise RuntimeError("DashScope preview currently supports WAV audio. Record in browser or upload a .wav file.") from exc
|
||||
|
||||
if compression != "NONE":
|
||||
raise RuntimeError("DashScope preview requires uncompressed PCM WAV audio.")
|
||||
if sample_width != 2:
|
||||
raise RuntimeError("DashScope preview requires 16-bit PCM WAV audio.")
|
||||
if not pcm_frames:
|
||||
raise RuntimeError("Uploaded WAV file is empty")
|
||||
if channel_count <= 1:
|
||||
return pcm_frames, sample_rate
|
||||
|
||||
samples = array("h")
|
||||
samples.frombytes(pcm_frames)
|
||||
if sys.byteorder == "big":
|
||||
samples.byteswap()
|
||||
|
||||
mono_samples = array(
|
||||
"h",
|
||||
(
|
||||
int(sum(samples[index:index + channel_count]) / channel_count)
|
||||
for index in range(0, len(samples), channel_count)
|
||||
),
|
||||
)
|
||||
if sys.byteorder == "big":
|
||||
mono_samples.byteswap()
|
||||
return mono_samples.tobytes(), sample_rate
|
||||
|
||||
|
||||
def _probe_dashscope_asr_connection(*, api_key: str, base_url: str, model: str, language: Optional[str]) -> None:
|
||||
if not DASHSCOPE_SDK_AVAILABLE:
|
||||
hint = f"`{sys.executable} -m pip install dashscope>=1.25.11`"
|
||||
detail = f"; import error: {DASHSCOPE_IMPORT_ERROR}" if DASHSCOPE_IMPORT_ERROR else ""
|
||||
raise RuntimeError(f"dashscope package not installed; install with {hint}{detail}")
|
||||
|
||||
callback = _DashScopePreviewCallback()
|
||||
if dashscope is not None:
|
||||
dashscope.api_key = api_key
|
||||
client = _create_dashscope_realtime_client(
|
||||
model=model,
|
||||
callback=callback,
|
||||
url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
try:
|
||||
client.connect()
|
||||
callback.wait_for_open()
|
||||
_configure_dashscope_session(
|
||||
client=client,
|
||||
callback=callback,
|
||||
sample_rate=16000,
|
||||
language=language,
|
||||
)
|
||||
finally:
|
||||
_close_dashscope_client(client)
|
||||
|
||||
|
||||
def _transcribe_dashscope_preview(
|
||||
*,
|
||||
audio_bytes: bytes,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
model: str,
|
||||
language: Optional[str],
|
||||
) -> Dict[str, Any]:
|
||||
if not DASHSCOPE_SDK_AVAILABLE:
|
||||
hint = f"`{sys.executable} -m pip install dashscope>=1.25.11`"
|
||||
detail = f"; import error: {DASHSCOPE_IMPORT_ERROR}" if DASHSCOPE_IMPORT_ERROR else ""
|
||||
raise RuntimeError(f"dashscope package not installed; install with {hint}{detail}")
|
||||
|
||||
pcm_audio, sample_rate = _load_wav_pcm16_mono(audio_bytes)
|
||||
callback = _DashScopePreviewCallback()
|
||||
if dashscope is not None:
|
||||
dashscope.api_key = api_key
|
||||
client = _create_dashscope_realtime_client(
|
||||
model=model,
|
||||
callback=callback,
|
||||
url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
try:
|
||||
client.connect()
|
||||
callback.wait_for_open()
|
||||
_configure_dashscope_session(
|
||||
client=client,
|
||||
callback=callback,
|
||||
sample_rate=sample_rate,
|
||||
language=language,
|
||||
)
|
||||
|
||||
append_fn = getattr(client, "append_audio", None)
|
||||
if not callable(append_fn):
|
||||
raise RuntimeError("DashScope ASR SDK missing append_audio method")
|
||||
commit_fn = getattr(client, "commit", None)
|
||||
if not callable(commit_fn):
|
||||
raise RuntimeError("DashScope ASR SDK missing commit method")
|
||||
|
||||
append_fn(base64.b64encode(pcm_audio).decode("ascii"))
|
||||
commit_fn()
|
||||
callback.wait_for_done()
|
||||
callback.raise_if_error()
|
||||
return {
|
||||
"transcript": callback.read_text(),
|
||||
"language": _dashscope_language(language) or "Multi-lingual",
|
||||
"confidence": None,
|
||||
}
|
||||
finally:
|
||||
_close_dashscope_client(client)
|
||||
|
||||
|
||||
# ============ ASR Models CRUD ============
|
||||
@router.get("")
|
||||
def list_asr_models(
|
||||
language: Optional[str] = None,
|
||||
enabled: Optional[bool] = None,
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取ASR模型列表"""
|
||||
query = db.query(ASRModel)
|
||||
|
||||
if language:
|
||||
query = query.filter(ASRModel.language == language)
|
||||
if enabled is not None:
|
||||
query = query.filter(ASRModel.enabled == enabled)
|
||||
|
||||
total = query.count()
|
||||
models = query.order_by(ASRModel.created_at.desc()) \
|
||||
.offset((page-1)*limit).limit(limit).all()
|
||||
|
||||
return {"total": total, "page": page, "limit": limit, "list": models}
|
||||
|
||||
|
||||
@router.get("/{id}", response_model=ASRModelOut)
|
||||
def get_asr_model(id: str, db: Session = Depends(get_db)):
|
||||
"""获取单个ASR模型详情"""
|
||||
model = db.query(ASRModel).filter(ASRModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="ASR Model not found")
|
||||
return model
|
||||
|
||||
|
||||
@router.post("", response_model=ASRModelOut)
|
||||
def create_asr_model(data: ASRModelCreate, db: Session = Depends(get_db)):
|
||||
"""创建ASR模型"""
|
||||
asr_model = ASRModel(
|
||||
id=unique_short_id("asr", db, ASRModel),
|
||||
user_id=1, # 默认用户
|
||||
name=data.name,
|
||||
vendor=data.vendor,
|
||||
language=data.language,
|
||||
base_url=data.base_url,
|
||||
api_key=data.api_key,
|
||||
model_name=data.model_name,
|
||||
hotwords=data.hotwords,
|
||||
enable_punctuation=data.enable_punctuation,
|
||||
enable_normalization=data.enable_normalization,
|
||||
enabled=data.enabled,
|
||||
)
|
||||
db.add(asr_model)
|
||||
db.commit()
|
||||
db.refresh(asr_model)
|
||||
return asr_model
|
||||
|
||||
|
||||
@router.put("/{id}", response_model=ASRModelOut)
|
||||
def update_asr_model(id: str, data: ASRModelUpdate, db: Session = Depends(get_db)):
|
||||
"""更新ASR模型"""
|
||||
model = db.query(ASRModel).filter(ASRModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="ASR Model not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(model, field, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(model)
|
||||
return model
|
||||
|
||||
|
||||
@router.delete("/{id}")
|
||||
def delete_asr_model(id: str, db: Session = Depends(get_db)):
|
||||
"""删除ASR模型"""
|
||||
model = db.query(ASRModel).filter(ASRModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="ASR Model not found")
|
||||
db.delete(model)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
|
||||
@router.post("/{id}/test", response_model=ASRTestResponse)
|
||||
def test_asr_model(
|
||||
id: str,
|
||||
request: Optional[ASRTestRequest] = None,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""测试ASR模型"""
|
||||
model = db.query(ASRModel).filter(ASRModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="ASR Model not found")
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
if _is_dashscope_vendor(model.vendor):
|
||||
effective_api_key = (model.api_key or "").strip() or os.getenv("DASHSCOPE_API_KEY", "").strip() or os.getenv("ASR_API_KEY", "").strip()
|
||||
if not effective_api_key:
|
||||
return ASRTestResponse(success=False, error=f"API key is required for ASR model: {model.name}")
|
||||
|
||||
base_url = (model.base_url or "").strip() or DASHSCOPE_DEFAULT_BASE_URL
|
||||
selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
|
||||
_probe_dashscope_asr_connection(
|
||||
api_key=effective_api_key,
|
||||
base_url=base_url,
|
||||
model=selected_model,
|
||||
language=model.language,
|
||||
)
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
return ASRTestResponse(
|
||||
success=True,
|
||||
language=model.language,
|
||||
latency_ms=latency_ms,
|
||||
message="DashScope realtime ASR connected",
|
||||
)
|
||||
|
||||
# 连接性测试优先,避免依赖真实音频输入
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
if _is_openai_compatible_vendor(model.vendor) or model.vendor.lower() == "paraformer":
|
||||
response = client.get(f"{model.base_url}/asr", headers=headers)
|
||||
elif model.vendor.lower() == "openai":
|
||||
response = client.get(f"{model.base_url}/audio/models", headers=headers)
|
||||
else:
|
||||
response = client.get(f"{model.base_url}/health", headers=headers)
|
||||
response.raise_for_status()
|
||||
raw_result = response.json()
|
||||
|
||||
# 兼容不同供应商格式
|
||||
if isinstance(raw_result, dict) and "results" in raw_result:
|
||||
result = raw_result
|
||||
elif isinstance(raw_result, dict) and "text" in raw_result:
|
||||
result = {"results": [{"transcript": raw_result.get("text", "")}]}
|
||||
else:
|
||||
result = {"results": [{"transcript": ""}]}
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# 解析结果
|
||||
if result_data := result.get("results", [{}])[0]:
|
||||
transcript = result_data.get("transcript", "")
|
||||
return ASRTestResponse(
|
||||
success=True,
|
||||
transcript=transcript,
|
||||
language=result_data.get("language", model.language),
|
||||
confidence=result_data.get("confidence"),
|
||||
latency_ms=latency_ms,
|
||||
)
|
||||
|
||||
return ASRTestResponse(
|
||||
success=False,
|
||||
message="No transcript in response",
|
||||
latency_ms=latency_ms
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
return ASRTestResponse(
|
||||
success=False,
|
||||
error=f"HTTP Error: {e.response.status_code} - {e.response.text[:200]}"
|
||||
)
|
||||
except Exception as e:
|
||||
return ASRTestResponse(
|
||||
success=False,
|
||||
error=str(e)[:200]
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{id}/transcribe")
|
||||
def transcribe_audio(
|
||||
id: str,
|
||||
audio_url: Optional[str] = None,
|
||||
audio_data: Optional[str] = None,
|
||||
hotwords: Optional[List[str]] = None,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""转写音频"""
|
||||
model = db.query(ASRModel).filter(ASRModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="ASR Model not found")
|
||||
|
||||
try:
|
||||
payload = {
|
||||
"model": model.model_name or "paraformer-v2",
|
||||
"input": {},
|
||||
"parameters": {
|
||||
"hotwords": " ".join(hotwords or model.hotwords or []),
|
||||
"enable_punctuation": model.enable_punctuation,
|
||||
"enable_normalization": model.enable_normalization,
|
||||
}
|
||||
}
|
||||
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
|
||||
if audio_url:
|
||||
payload["input"]["url"] = audio_url
|
||||
elif audio_data:
|
||||
payload["input"]["file_urls"] = []
|
||||
|
||||
with httpx.Client(timeout=120.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url}/asr",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
|
||||
if result_data := result.get("results", [{}])[0]:
|
||||
return {
|
||||
"success": True,
|
||||
"transcript": result_data.get("transcript", ""),
|
||||
"language": result_data.get("language", model.language),
|
||||
"confidence": result_data.get("confidence"),
|
||||
}
|
||||
|
||||
return {"success": False, "error": "No transcript in response"}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/{id}/preview", response_model=ASRTestResponse)
|
||||
async def preview_asr_model(
|
||||
id: str,
|
||||
file: UploadFile = File(...),
|
||||
language: Optional[str] = Form(None),
|
||||
api_key: Optional[str] = Form(None),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""预览 ASR:根据供应商调用 OpenAI-compatible 或 DashScope 实时识别。"""
|
||||
model = db.query(ASRModel).filter(ASRModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="ASR Model not found")
|
||||
|
||||
if not file:
|
||||
raise HTTPException(status_code=400, detail="Audio file is required")
|
||||
|
||||
filename = file.filename or "preview.wav"
|
||||
content_type = file.content_type or "application/octet-stream"
|
||||
if not content_type.startswith("audio/"):
|
||||
raise HTTPException(status_code=400, detail="Only audio files are supported")
|
||||
|
||||
audio_bytes = await file.read()
|
||||
if not audio_bytes:
|
||||
raise HTTPException(status_code=400, detail="Uploaded audio file is empty")
|
||||
|
||||
effective_api_key = (api_key or "").strip() or (model.api_key or "").strip()
|
||||
if not effective_api_key:
|
||||
if _is_openai_compatible_vendor(model.vendor):
|
||||
effective_api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
|
||||
elif _is_dashscope_vendor(model.vendor):
|
||||
effective_api_key = os.getenv("DASHSCOPE_API_KEY", "").strip() or os.getenv("ASR_API_KEY", "").strip()
|
||||
if not effective_api_key:
|
||||
raise HTTPException(status_code=400, detail=f"API key is required for ASR model: {model.name}")
|
||||
|
||||
base_url = (model.base_url or "").strip().rstrip("/")
|
||||
if _is_dashscope_vendor(model.vendor) and not base_url:
|
||||
base_url = DASHSCOPE_DEFAULT_BASE_URL
|
||||
if not base_url:
|
||||
raise HTTPException(status_code=400, detail=f"Base URL is required for ASR model: {model.name}")
|
||||
|
||||
selected_model = (model.model_name or "").strip() or _default_asr_model(model.vendor)
|
||||
effective_language = (language or "").strip() or None
|
||||
|
||||
start_time = time.time()
|
||||
if _is_dashscope_vendor(model.vendor):
|
||||
try:
|
||||
payload = await asyncio.to_thread(
|
||||
_transcribe_dashscope_preview,
|
||||
audio_bytes=audio_bytes,
|
||||
api_key=effective_api_key,
|
||||
base_url=base_url,
|
||||
model=selected_model,
|
||||
language=effective_language or model.language,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"DashScope ASR request failed: {exc}") from exc
|
||||
|
||||
transcript = str(payload.get("transcript") or "")
|
||||
response_language = str(payload.get("language") or effective_language or model.language)
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
return ASRTestResponse(
|
||||
success=bool(transcript),
|
||||
transcript=transcript,
|
||||
language=response_language,
|
||||
confidence=None,
|
||||
latency_ms=latency_ms,
|
||||
message=None if transcript else "No transcript in response",
|
||||
)
|
||||
|
||||
data = {"model": selected_model}
|
||||
if effective_language:
|
||||
data["language"] = effective_language
|
||||
if model.hotwords:
|
||||
data["prompt"] = " ".join(model.hotwords)
|
||||
|
||||
headers = {"Authorization": f"Bearer {effective_api_key}"}
|
||||
files = {"file": (filename, audio_bytes, content_type)}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=90.0) as client:
|
||||
response = client.post(
|
||||
f"{base_url}/audio/transcriptions",
|
||||
headers=headers,
|
||||
data=data,
|
||||
files=files,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"ASR request failed: {exc}") from exc
|
||||
|
||||
if response.status_code != 200:
|
||||
detail = response.text
|
||||
try:
|
||||
detail_json = response.json()
|
||||
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
||||
except Exception:
|
||||
pass
|
||||
raise HTTPException(status_code=502, detail=f"ASR vendor error: {detail}")
|
||||
|
||||
try:
|
||||
payload = response.json()
|
||||
except Exception:
|
||||
payload = {"text": response.text}
|
||||
|
||||
transcript = ""
|
||||
response_language = model.language
|
||||
confidence = None
|
||||
if isinstance(payload, dict):
|
||||
transcript = str(payload.get("text") or payload.get("transcript") or "")
|
||||
response_language = str(payload.get("language") or effective_language or model.language)
|
||||
raw_confidence = payload.get("confidence")
|
||||
if raw_confidence is not None:
|
||||
try:
|
||||
confidence = float(raw_confidence)
|
||||
except (TypeError, ValueError):
|
||||
confidence = None
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
return ASRTestResponse(
|
||||
success=bool(transcript),
|
||||
transcript=transcript,
|
||||
language=response_language,
|
||||
confidence=confidence,
|
||||
latency_ms=latency_ms,
|
||||
message=None if transcript else "No transcript in response",
|
||||
)
|
||||
@@ -1,157 +1,860 @@
|
||||
import audioop
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import wave
|
||||
from pathlib import Path
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy import inspect, text
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import List
|
||||
from typing import Any, Dict, List, Optional
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from ..db import get_db
|
||||
from ..models import Assistant, Voice, Workflow
|
||||
from ..models import Assistant, AssistantOpenerAudio, LLMModel, ASRModel, Voice, ToolResource
|
||||
from ..schemas import (
|
||||
AssistantCreate, AssistantUpdate, AssistantOut,
|
||||
VoiceOut,
|
||||
WorkflowCreate, WorkflowUpdate, WorkflowOut
|
||||
AssistantCreate,
|
||||
AssistantUpdate,
|
||||
AssistantOut,
|
||||
AssistantEngineConfigResponse,
|
||||
AssistantOpenerAudioGenerateRequest,
|
||||
AssistantOpenerAudioOut,
|
||||
)
|
||||
from .tools import (
|
||||
TOOL_REGISTRY,
|
||||
TOOL_CATEGORY_MAP,
|
||||
TOOL_PARAMETER_DEFAULTS,
|
||||
TOOL_WAIT_FOR_RESPONSE_DEFAULTS,
|
||||
normalize_tool_id,
|
||||
_ensure_tool_resource_schema,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
router = APIRouter(prefix="/assistants", tags=["Assistants"])
|
||||
|
||||
OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
OPENAI_COMPATIBLE_DEFAULT_BASE_URL = "https://api.siliconflow.cn/v1"
|
||||
OPENER_AUDIO_DIR = Path(__file__).resolve().parents[2] / "data" / "opener_audio"
|
||||
OPENAI_COMPATIBLE_KNOWN_VOICES = {
|
||||
"alex",
|
||||
"anna",
|
||||
"bella",
|
||||
"benjamin",
|
||||
"charles",
|
||||
"claire",
|
||||
"david",
|
||||
"diana",
|
||||
}
|
||||
|
||||
|
||||
# ============ Voices ============
|
||||
@router.get("/voices", response_model=List[VoiceOut])
|
||||
def list_voices(db: Session = Depends(get_db)):
|
||||
"""获取声音库列表"""
|
||||
voices = db.query(Voice).all()
|
||||
return voices
|
||||
def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
|
||||
return (vendor or "").strip().lower() in {
|
||||
"siliconflow",
|
||||
"硅基流动",
|
||||
"openai compatible",
|
||||
"openai-compatible",
|
||||
}
|
||||
|
||||
|
||||
def _is_dashscope_vendor(vendor: Optional[str]) -> bool:
|
||||
return (vendor or "").strip().lower() in {
|
||||
"dashscope",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_openai_compatible_voice_key(voice_value: str, model: str) -> str:
|
||||
raw = (voice_value or "").strip()
|
||||
model_name = (model or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||
if not raw:
|
||||
return f"{model_name}:anna"
|
||||
|
||||
if ":" in raw:
|
||||
voice_model, voice_id = raw.split(":", 1)
|
||||
voice_model = voice_model.strip() or model_name
|
||||
voice_id = voice_id.strip()
|
||||
if voice_id.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES:
|
||||
voice_id = voice_id.lower()
|
||||
return f"{voice_model}:{voice_id}"
|
||||
|
||||
voice_id = raw.lower() if raw.lower() in OPENAI_COMPATIBLE_KNOWN_VOICES else raw
|
||||
return f"{model_name}:{voice_id}"
|
||||
|
||||
|
||||
def _config_version_id(assistant: Assistant) -> str:
|
||||
updated = assistant.updated_at or assistant.created_at or datetime.utcnow()
|
||||
return f"asst_{assistant.id}_{updated.strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
|
||||
def _normalize_runtime_tool_schema(tool_id: str, raw_schema: Any) -> Dict[str, Any]:
|
||||
schema = dict(raw_schema) if isinstance(raw_schema, dict) else {}
|
||||
if not schema:
|
||||
fallback = TOOL_REGISTRY.get(tool_id, {}).get("parameters")
|
||||
if isinstance(fallback, dict):
|
||||
schema = dict(fallback)
|
||||
schema.setdefault("type", "object")
|
||||
if not isinstance(schema.get("properties"), dict):
|
||||
schema["properties"] = {}
|
||||
required = schema.get("required")
|
||||
if required is None or not isinstance(required, list):
|
||||
schema["required"] = []
|
||||
return schema
|
||||
|
||||
|
||||
def _compose_runtime_system_prompt(base_prompt: Optional[str]) -> str:
|
||||
raw = str(base_prompt or "").strip()
|
||||
tool_policy = (
|
||||
"Tool usage policy:\n"
|
||||
"- Tool function names/IDs are internal and must never be shown to users.\n"
|
||||
"- When users ask which tools are available, describe capabilities in natural language.\n"
|
||||
"- Do not expose raw tool call payloads, IDs, or executor details."
|
||||
)
|
||||
return f"{raw}\n\n{tool_policy}" if raw else tool_policy
|
||||
|
||||
|
||||
def _ensure_assistant_schema(db: Session) -> None:
|
||||
"""Apply lightweight SQLite migrations for newly added assistants columns."""
|
||||
bind = db.get_bind()
|
||||
inspector = inspect(bind)
|
||||
try:
|
||||
columns = {col["name"] for col in inspector.get_columns("assistants")}
|
||||
except Exception:
|
||||
return
|
||||
|
||||
altered = False
|
||||
if "manual_opener_tool_calls" not in columns:
|
||||
db.execute(text("ALTER TABLE assistants ADD COLUMN manual_opener_tool_calls JSON"))
|
||||
altered = True
|
||||
if "asr_interim_enabled" not in columns:
|
||||
db.execute(text("ALTER TABLE assistants ADD COLUMN asr_interim_enabled BOOLEAN DEFAULT 0"))
|
||||
altered = True
|
||||
if "app_id" not in columns:
|
||||
db.execute(text("ALTER TABLE assistants ADD COLUMN app_id VARCHAR(255)"))
|
||||
altered = True
|
||||
|
||||
if altered:
|
||||
db.commit()
|
||||
|
||||
|
||||
def _normalize_manual_opener_tool_calls(raw: Any, warnings: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
||||
normalized: List[Dict[str, Any]] = []
|
||||
if not isinstance(raw, list):
|
||||
return normalized
|
||||
|
||||
for idx, item in enumerate(raw):
|
||||
if not isinstance(item, dict):
|
||||
if warnings is not None:
|
||||
warnings.append(f"Ignored invalid manual opener tool call at index {idx}: not an object")
|
||||
continue
|
||||
|
||||
tool_name = normalize_tool_id(str(
|
||||
item.get("toolName")
|
||||
or item.get("tool_name")
|
||||
or item.get("name")
|
||||
or ""
|
||||
).strip())
|
||||
if not tool_name:
|
||||
if warnings is not None:
|
||||
warnings.append(f"Ignored invalid manual opener tool call at index {idx}: missing toolName")
|
||||
continue
|
||||
|
||||
args_raw = item.get("arguments")
|
||||
args: Dict[str, Any] = {}
|
||||
if isinstance(args_raw, dict):
|
||||
args = dict(args_raw)
|
||||
elif isinstance(args_raw, str):
|
||||
text_value = args_raw.strip()
|
||||
if text_value:
|
||||
try:
|
||||
parsed = json.loads(text_value)
|
||||
if isinstance(parsed, dict):
|
||||
args = parsed
|
||||
else:
|
||||
if warnings is not None:
|
||||
warnings.append(
|
||||
f"Ignored non-object arguments for manual opener tool call '{tool_name}' at index {idx}"
|
||||
)
|
||||
except Exception:
|
||||
if warnings is not None:
|
||||
warnings.append(f"Ignored invalid JSON arguments for manual opener tool call '{tool_name}' at index {idx}")
|
||||
elif args_raw is not None and warnings is not None:
|
||||
warnings.append(f"Ignored unsupported arguments type for manual opener tool call '{tool_name}' at index {idx}")
|
||||
|
||||
normalized.append({"toolName": tool_name, "arguments": args})
|
||||
|
||||
# Keep opener sequence intentionally short to avoid long pre-dialog delays.
|
||||
return normalized[:8]
|
||||
|
||||
|
||||
def _normalize_assistant_tool_ids(raw: Any) -> List[str]:
|
||||
if not isinstance(raw, list):
|
||||
return []
|
||||
normalized: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for item in raw:
|
||||
tool_id = normalize_tool_id(item)
|
||||
if not tool_id or tool_id in seen:
|
||||
continue
|
||||
seen.add(tool_id)
|
||||
normalized.append(tool_id)
|
||||
return normalized
|
||||
|
||||
|
||||
def _resolve_runtime_tools(db: Session, selected_tool_ids: List[str], warnings: List[str]) -> List[Dict[str, Any]]:
|
||||
_ensure_tool_resource_schema(db)
|
||||
ids = _normalize_assistant_tool_ids(selected_tool_ids)
|
||||
if not ids:
|
||||
return []
|
||||
|
||||
resources = (
|
||||
db.query(ToolResource)
|
||||
.filter(ToolResource.id.in_(ids))
|
||||
.all()
|
||||
)
|
||||
by_id = {str(item.id): item for item in resources}
|
||||
|
||||
runtime_tools: List[Dict[str, Any]] = []
|
||||
for tool_id in ids:
|
||||
resource = by_id.get(tool_id)
|
||||
if resource and resource.enabled is False:
|
||||
warnings.append(f"Tool is disabled and skipped in runtime config: {tool_id}")
|
||||
continue
|
||||
|
||||
category = str(resource.category if resource else TOOL_CATEGORY_MAP.get(tool_id, "query"))
|
||||
display_name = (
|
||||
str(resource.name or tool_id).strip()
|
||||
if resource
|
||||
else str(TOOL_REGISTRY.get(tool_id, {}).get("name") or tool_id).strip()
|
||||
)
|
||||
description = (
|
||||
str(resource.description or resource.name or "").strip()
|
||||
if resource
|
||||
else str(TOOL_REGISTRY.get(tool_id, {}).get("description") or "").strip()
|
||||
)
|
||||
schema = _normalize_runtime_tool_schema(
|
||||
tool_id,
|
||||
resource.parameter_schema if resource else TOOL_REGISTRY.get(tool_id, {}).get("parameters"),
|
||||
)
|
||||
defaults_raw = resource.parameter_defaults if resource else TOOL_PARAMETER_DEFAULTS.get(tool_id)
|
||||
defaults = dict(defaults_raw) if isinstance(defaults_raw, dict) else {}
|
||||
wait_for_response = (
|
||||
bool(resource.wait_for_response)
|
||||
if resource
|
||||
else bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False))
|
||||
)
|
||||
|
||||
if not resource and tool_id not in TOOL_REGISTRY:
|
||||
warnings.append(f"Tool resource not found: {tool_id}")
|
||||
|
||||
runtime_tool: Dict[str, Any] = {
|
||||
"type": "function",
|
||||
"executor": "client" if category == "system" else "server",
|
||||
"function": {
|
||||
"name": tool_id,
|
||||
"description": (
|
||||
f"Display name: {display_name}. {description}".strip()
|
||||
if display_name
|
||||
else (description or tool_id)
|
||||
),
|
||||
"parameters": schema,
|
||||
},
|
||||
"displayName": display_name or tool_id,
|
||||
"toolId": tool_id,
|
||||
"waitForResponse": wait_for_response,
|
||||
}
|
||||
if defaults:
|
||||
runtime_tool["defaultArgs"] = defaults
|
||||
runtime_tools.append(runtime_tool)
|
||||
|
||||
return runtime_tools
|
||||
|
||||
|
||||
def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], List[str]]:
|
||||
warnings: List[str] = []
|
||||
generated_opener_enabled = bool(assistant.generated_opener_enabled)
|
||||
manual_opener_tool_calls = _normalize_manual_opener_tool_calls(
|
||||
assistant.manual_opener_tool_calls,
|
||||
warnings=warnings,
|
||||
)
|
||||
metadata: Dict[str, Any] = {
|
||||
"systemPrompt": _compose_runtime_system_prompt(assistant.prompt),
|
||||
"firstTurnMode": assistant.first_turn_mode or "bot_first",
|
||||
# Generated opener should rely on systemPrompt instead of fixed opener text.
|
||||
"greeting": "" if generated_opener_enabled else (assistant.opener or ""),
|
||||
"generatedOpenerEnabled": generated_opener_enabled,
|
||||
"manualOpenerToolCalls": manual_opener_tool_calls,
|
||||
"output": {"mode": "audio" if assistant.voice_output_enabled else "text"},
|
||||
"bargeIn": {
|
||||
"enabled": not bool(assistant.bot_cannot_be_interrupted),
|
||||
"minDurationMs": int(assistant.interruption_sensitivity or 500),
|
||||
},
|
||||
"services": {},
|
||||
"tools": _resolve_runtime_tools(db, assistant.tools or [], warnings),
|
||||
"history": {
|
||||
"assistantId": assistant.id,
|
||||
"userId": int(assistant.user_id or 1),
|
||||
"source": "debug",
|
||||
},
|
||||
}
|
||||
|
||||
config_mode = str(assistant.config_mode or "platform").strip().lower()
|
||||
|
||||
if config_mode == "dify":
|
||||
metadata["services"]["llm"] = {
|
||||
"provider": "openai",
|
||||
"model": "",
|
||||
"apiKey": assistant.api_key,
|
||||
"baseUrl": assistant.api_url,
|
||||
}
|
||||
if not (assistant.api_url or "").strip():
|
||||
warnings.append(f"External LLM API URL is empty for mode: {assistant.config_mode}")
|
||||
if not (assistant.api_key or "").strip():
|
||||
warnings.append(f"External LLM API key is empty for mode: {assistant.config_mode}")
|
||||
elif config_mode == "fastgpt":
|
||||
metadata["services"]["llm"] = {
|
||||
"provider": "fastgpt",
|
||||
"model": "fastgpt",
|
||||
"apiKey": assistant.api_key,
|
||||
"baseUrl": assistant.api_url,
|
||||
}
|
||||
if (assistant.app_id or "").strip():
|
||||
metadata["services"]["llm"]["appId"] = assistant.app_id
|
||||
if not (assistant.api_url or "").strip():
|
||||
warnings.append(f"FastGPT API URL is empty for mode: {assistant.config_mode}")
|
||||
if not (assistant.api_key or "").strip():
|
||||
warnings.append(f"FastGPT API key is empty for mode: {assistant.config_mode}")
|
||||
elif assistant.llm_model_id:
|
||||
llm = db.query(LLMModel).filter(LLMModel.id == assistant.llm_model_id).first()
|
||||
if llm:
|
||||
metadata["services"]["llm"] = {
|
||||
"provider": "openai",
|
||||
"model": llm.model_name or llm.name,
|
||||
"apiKey": llm.api_key,
|
||||
"baseUrl": llm.base_url,
|
||||
}
|
||||
else:
|
||||
warnings.append(f"LLM model not found: {assistant.llm_model_id}")
|
||||
|
||||
asr_runtime: Dict[str, Any] = {
|
||||
"enableInterim": bool(assistant.asr_interim_enabled),
|
||||
}
|
||||
if assistant.asr_model_id:
|
||||
asr = db.query(ASRModel).filter(ASRModel.id == assistant.asr_model_id).first()
|
||||
if asr:
|
||||
if _is_dashscope_vendor(asr.vendor):
|
||||
asr_provider = "dashscope"
|
||||
elif _is_openai_compatible_vendor(asr.vendor):
|
||||
asr_provider = "openai_compatible"
|
||||
else:
|
||||
asr_provider = "buffered"
|
||||
asr_runtime.update({
|
||||
"provider": asr_provider,
|
||||
"model": asr.model_name or asr.name,
|
||||
"apiKey": asr.api_key if asr_provider in {"openai_compatible", "dashscope"} else None,
|
||||
"baseUrl": asr.base_url if asr_provider in {"openai_compatible", "dashscope"} else None,
|
||||
})
|
||||
else:
|
||||
warnings.append(f"ASR model not found: {assistant.asr_model_id}")
|
||||
metadata["services"]["asr"] = asr_runtime
|
||||
|
||||
if not assistant.voice_output_enabled:
|
||||
metadata["services"]["tts"] = {"enabled": False}
|
||||
elif assistant.voice:
|
||||
voice = db.query(Voice).filter(Voice.id == assistant.voice).first()
|
||||
if voice:
|
||||
if _is_dashscope_vendor(voice.vendor):
|
||||
tts_provider = "dashscope"
|
||||
elif _is_openai_compatible_vendor(voice.vendor):
|
||||
tts_provider = "openai_compatible"
|
||||
else:
|
||||
tts_provider = "edge"
|
||||
model = voice.model
|
||||
runtime_voice = voice.voice_key or voice.id
|
||||
if tts_provider == "openai_compatible":
|
||||
model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||
runtime_voice = _normalize_openai_compatible_voice_key(runtime_voice, model)
|
||||
metadata["services"]["tts"] = {
|
||||
"enabled": True,
|
||||
"provider": tts_provider,
|
||||
"model": model,
|
||||
"apiKey": voice.api_key if tts_provider in {"openai_compatible", "dashscope"} else None,
|
||||
"baseUrl": voice.base_url if tts_provider in {"openai_compatible", "dashscope"} else None,
|
||||
"voice": runtime_voice,
|
||||
"speed": assistant.speed or voice.speed,
|
||||
}
|
||||
else:
|
||||
# Keep assistant.voice as direct voice identifier fallback
|
||||
metadata["services"]["tts"] = {
|
||||
"enabled": True,
|
||||
"voice": assistant.voice,
|
||||
"speed": assistant.speed or 1.0,
|
||||
}
|
||||
warnings.append(f"Voice resource not found: {assistant.voice}")
|
||||
|
||||
if assistant.knowledge_base_id:
|
||||
metadata["knowledgeBaseId"] = assistant.knowledge_base_id
|
||||
metadata["knowledge"] = {
|
||||
"enabled": True,
|
||||
"kbId": assistant.knowledge_base_id,
|
||||
"nResults": 5,
|
||||
}
|
||||
opener_audio = assistant.opener_audio
|
||||
opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
|
||||
metadata["openerAudio"] = {
|
||||
"enabled": bool(opener_audio.enabled) if opener_audio else False,
|
||||
"ready": opener_audio_ready,
|
||||
"encoding": opener_audio.encoding if opener_audio else "pcm_s16le",
|
||||
"sampleRateHz": int(opener_audio.sample_rate_hz) if opener_audio else 16000,
|
||||
"channels": int(opener_audio.channels) if opener_audio else 1,
|
||||
"durationMs": int(opener_audio.duration_ms) if opener_audio else 0,
|
||||
"textHash": opener_audio.text_hash if opener_audio else None,
|
||||
"ttsFingerprint": opener_audio.tts_fingerprint if opener_audio else None,
|
||||
"pcmUrl": f"/api/assistants/{assistant.id}/opener-audio/pcm" if opener_audio_ready else None,
|
||||
}
|
||||
return metadata, warnings
|
||||
|
||||
|
||||
def _build_engine_assistant_config(db: Session, assistant: Assistant) -> Dict[str, Any]:
|
||||
session_metadata, warnings = _resolve_runtime_metadata(db, assistant)
|
||||
config_version_id = _config_version_id(assistant)
|
||||
assistant_cfg = dict(session_metadata)
|
||||
assistant_cfg["assistantId"] = assistant.id
|
||||
assistant_cfg["configVersionId"] = config_version_id
|
||||
|
||||
return {
|
||||
"assistantId": assistant.id,
|
||||
"configVersionId": config_version_id,
|
||||
"assistant": assistant_cfg,
|
||||
"sessionStartMetadata": session_metadata,
|
||||
"sources": {
|
||||
"llmModelId": assistant.llm_model_id,
|
||||
"asrModelId": assistant.asr_model_id,
|
||||
"voiceId": assistant.voice,
|
||||
"knowledgeBaseId": assistant.knowledge_base_id,
|
||||
},
|
||||
"warnings": warnings,
|
||||
}
|
||||
|
||||
|
||||
def assistant_to_dict(assistant: Assistant) -> dict:
|
||||
opener_audio = assistant.opener_audio
|
||||
opener_audio_ready = bool(opener_audio and opener_audio.file_path and Path(opener_audio.file_path).exists())
|
||||
return {
|
||||
"id": assistant.id,
|
||||
"name": assistant.name,
|
||||
"callCount": assistant.call_count,
|
||||
"firstTurnMode": assistant.first_turn_mode or "bot_first",
|
||||
"opener": assistant.opener or "",
|
||||
"manualOpenerToolCalls": _normalize_manual_opener_tool_calls(assistant.manual_opener_tool_calls),
|
||||
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
|
||||
"openerAudioEnabled": bool(opener_audio.enabled) if opener_audio else False,
|
||||
"openerAudioReady": opener_audio_ready,
|
||||
"openerAudioDurationMs": int(opener_audio.duration_ms) if opener_audio else 0,
|
||||
"openerAudioUpdatedAt": opener_audio.updated_at if opener_audio else None,
|
||||
"prompt": assistant.prompt or "",
|
||||
"knowledgeBaseId": assistant.knowledge_base_id,
|
||||
"language": assistant.language,
|
||||
"voiceOutputEnabled": assistant.voice_output_enabled,
|
||||
"voice": assistant.voice,
|
||||
"speed": assistant.speed,
|
||||
"hotwords": assistant.hotwords or [],
|
||||
"tools": _normalize_assistant_tool_ids(assistant.tools),
|
||||
"asrInterimEnabled": bool(assistant.asr_interim_enabled),
|
||||
"botCannotBeInterrupted": bool(assistant.bot_cannot_be_interrupted),
|
||||
"interruptionSensitivity": assistant.interruption_sensitivity,
|
||||
"configMode": assistant.config_mode,
|
||||
"apiUrl": assistant.api_url,
|
||||
"apiKey": assistant.api_key,
|
||||
"appId": assistant.app_id,
|
||||
"llmModelId": assistant.llm_model_id,
|
||||
"asrModelId": assistant.asr_model_id,
|
||||
"embeddingModelId": assistant.embedding_model_id,
|
||||
"rerankModelId": assistant.rerank_model_id,
|
||||
"created_at": assistant.created_at,
|
||||
"updated_at": assistant.updated_at,
|
||||
}
|
||||
|
||||
|
||||
def _apply_assistant_update(assistant: Assistant, update_data: dict) -> None:
|
||||
field_map = {
|
||||
"knowledgeBaseId": "knowledge_base_id",
|
||||
"firstTurnMode": "first_turn_mode",
|
||||
"manualOpenerToolCalls": "manual_opener_tool_calls",
|
||||
"interruptionSensitivity": "interruption_sensitivity",
|
||||
"asrInterimEnabled": "asr_interim_enabled",
|
||||
"botCannotBeInterrupted": "bot_cannot_be_interrupted",
|
||||
"configMode": "config_mode",
|
||||
"voiceOutputEnabled": "voice_output_enabled",
|
||||
"generatedOpenerEnabled": "generated_opener_enabled",
|
||||
"apiUrl": "api_url",
|
||||
"apiKey": "api_key",
|
||||
"appId": "app_id",
|
||||
"llmModelId": "llm_model_id",
|
||||
"asrModelId": "asr_model_id",
|
||||
"embeddingModelId": "embedding_model_id",
|
||||
"rerankModelId": "rerank_model_id",
|
||||
}
|
||||
for field, value in update_data.items():
|
||||
setattr(assistant, field_map.get(field, field), value)
|
||||
|
||||
|
||||
def _ensure_assistant_opener_audio(db: Session, assistant: Assistant) -> AssistantOpenerAudio:
|
||||
record = assistant.opener_audio
|
||||
if record:
|
||||
return record
|
||||
record = AssistantOpenerAudio(assistant_id=assistant.id, enabled=False)
|
||||
db.add(record)
|
||||
db.flush()
|
||||
return record
|
||||
|
||||
|
||||
def _resolve_tts_runtime_for_assistant(db: Session, assistant: Assistant) -> tuple[Dict[str, Any], Optional[Voice]]:
|
||||
metadata, _ = _resolve_runtime_metadata(db, assistant)
|
||||
services = metadata.get("services") if isinstance(metadata.get("services"), dict) else {}
|
||||
tts = services.get("tts") if isinstance(services, dict) and isinstance(services.get("tts"), dict) else {}
|
||||
voice = db.query(Voice).filter(Voice.id == assistant.voice).first() if assistant.voice else None
|
||||
return tts, voice
|
||||
|
||||
|
||||
def _tts_fingerprint(tts_cfg: Dict[str, Any], opener_text: str) -> str:
|
||||
identity = {
|
||||
"provider": tts_cfg.get("provider"),
|
||||
"model": tts_cfg.get("model"),
|
||||
"voice": tts_cfg.get("voice"),
|
||||
"speed": tts_cfg.get("speed"),
|
||||
"text": opener_text,
|
||||
}
|
||||
return hashlib.sha256(str(identity).encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _synthesize_openai_compatible_wav(
|
||||
*,
|
||||
text: str,
|
||||
model: str,
|
||||
voice_key: str,
|
||||
speed: float,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
) -> bytes:
|
||||
payload = {
|
||||
"model": model or OPENAI_COMPATIBLE_DEFAULT_MODEL,
|
||||
"input": text,
|
||||
"voice": voice_key,
|
||||
"response_format": "wav",
|
||||
"speed": speed,
|
||||
}
|
||||
with httpx.Client(timeout=45.0) as client:
|
||||
response = client.post(
|
||||
f"{base_url.rstrip('/')}/audio/speech",
|
||||
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||
json=payload,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
detail = response.text
|
||||
try:
|
||||
detail_json = response.json()
|
||||
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
||||
except Exception:
|
||||
pass
|
||||
raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
|
||||
return response.content
|
||||
|
||||
|
||||
def _wav_to_pcm16_mono_16k(wav_bytes: bytes) -> tuple[bytes, int]:
|
||||
with wave.open(io.BytesIO(wav_bytes), "rb") as wav_file:
|
||||
channels = wav_file.getnchannels()
|
||||
sample_width = wav_file.getsampwidth()
|
||||
sample_rate = wav_file.getframerate()
|
||||
frames = wav_file.getnframes()
|
||||
raw = wav_file.readframes(frames)
|
||||
|
||||
if sample_width != 2:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported WAV sample width: {sample_width * 8}bit")
|
||||
|
||||
if channels > 1:
|
||||
raw = audioop.tomono(raw, sample_width, 0.5, 0.5)
|
||||
|
||||
if sample_rate != 16000:
|
||||
raw, _ = audioop.ratecv(raw, sample_width, 1, sample_rate, 16000, None)
|
||||
|
||||
duration_ms = int((len(raw) / (16000 * 2)) * 1000)
|
||||
return raw, duration_ms
|
||||
|
||||
|
||||
def _persist_opener_audio_pcm(assistant_id: str, pcm_bytes: bytes) -> str:
|
||||
OPENER_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
||||
file_path = OPENER_AUDIO_DIR / f"{assistant_id}.pcm"
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(pcm_bytes)
|
||||
return str(file_path)
|
||||
|
||||
|
||||
def _opener_audio_out(record: Optional[AssistantOpenerAudio]) -> AssistantOpenerAudioOut:
|
||||
if not record:
|
||||
return AssistantOpenerAudioOut()
|
||||
ready = bool(record.file_path and Path(record.file_path).exists())
|
||||
return AssistantOpenerAudioOut(
|
||||
enabled=bool(record.enabled),
|
||||
ready=ready,
|
||||
encoding=record.encoding,
|
||||
sample_rate_hz=record.sample_rate_hz,
|
||||
channels=record.channels,
|
||||
duration_ms=record.duration_ms,
|
||||
updated_at=record.updated_at,
|
||||
text_hash=record.text_hash,
|
||||
tts_fingerprint=record.tts_fingerprint,
|
||||
)
|
||||
|
||||
|
||||
# ============ Assistants ============
|
||||
@router.get("/assistants")
|
||||
@router.get("")
|
||||
def list_assistants(
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取助手列表"""
|
||||
_ensure_assistant_schema(db)
|
||||
query = db.query(Assistant)
|
||||
total = query.count()
|
||||
assistants = query.order_by(Assistant.created_at.desc()) \
|
||||
.offset((page-1)*limit).limit(limit).all()
|
||||
return {"total": total, "page": page, "limit": limit, "list": assistants}
|
||||
return {
|
||||
"total": total,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
"list": [assistant_to_dict(a) for a in assistants]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/assistants/{id}", response_model=AssistantOut)
|
||||
@router.get("/{id}", response_model=AssistantOut)
|
||||
def get_assistant(id: str, db: Session = Depends(get_db)):
|
||||
"""获取单个助手详情"""
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
return assistant
|
||||
return assistant_to_dict(assistant)
|
||||
|
||||
|
||||
@router.post("/assistants", response_model=AssistantOut)
|
||||
@router.get("/{id}/config", response_model=AssistantEngineConfigResponse)
|
||||
def get_assistant_config(id: str, db: Session = Depends(get_db)):
|
||||
"""Canonical engine config endpoint consumed by engine backend adapter."""
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
return _build_engine_assistant_config(db, assistant)
|
||||
|
||||
|
||||
@router.get("/{id}/runtime-config", response_model=AssistantEngineConfigResponse)
|
||||
def get_assistant_runtime_config(id: str, db: Session = Depends(get_db)):
|
||||
"""Legacy alias for resolved engine runtime config."""
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
return _build_engine_assistant_config(db, assistant)
|
||||
|
||||
|
||||
@router.post("", response_model=AssistantOut)
|
||||
def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)):
|
||||
"""创建新助手"""
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = Assistant(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
user_id=1, # 默认用户,后续添加认证
|
||||
name=data.name,
|
||||
first_turn_mode=data.firstTurnMode,
|
||||
opener=data.opener,
|
||||
manual_opener_tool_calls=_normalize_manual_opener_tool_calls(data.manualOpenerToolCalls),
|
||||
generated_opener_enabled=data.generatedOpenerEnabled,
|
||||
prompt=data.prompt,
|
||||
knowledge_base_id=data.knowledgeBaseId,
|
||||
language=data.language,
|
||||
voice_output_enabled=data.voiceOutputEnabled,
|
||||
voice=data.voice,
|
||||
speed=data.speed,
|
||||
hotwords=data.hotwords,
|
||||
tools=data.tools,
|
||||
tools=_normalize_assistant_tool_ids(data.tools),
|
||||
asr_interim_enabled=data.asrInterimEnabled,
|
||||
bot_cannot_be_interrupted=data.botCannotBeInterrupted,
|
||||
interruption_sensitivity=data.interruptionSensitivity,
|
||||
config_mode=data.configMode,
|
||||
api_url=data.apiUrl,
|
||||
api_key=data.apiKey,
|
||||
app_id=data.appId,
|
||||
llm_model_id=data.llmModelId,
|
||||
asr_model_id=data.asrModelId,
|
||||
embedding_model_id=data.embeddingModelId,
|
||||
rerank_model_id=data.rerankModelId,
|
||||
)
|
||||
db.add(assistant)
|
||||
db.commit()
|
||||
db.refresh(assistant)
|
||||
return assistant
|
||||
opener_audio = _ensure_assistant_opener_audio(db, assistant)
|
||||
opener_audio.enabled = bool(data.openerAudioEnabled)
|
||||
opener_audio.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(assistant)
|
||||
return assistant_to_dict(assistant)
|
||||
|
||||
|
||||
@router.put("/assistants/{id}")
|
||||
def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
|
||||
"""更新助手"""
|
||||
@router.get("/{id}/opener-audio", response_model=AssistantOpenerAudioOut)
|
||||
def get_assistant_opener_audio(id: str, db: Session = Depends(get_db)):
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
|
||||
return _opener_audio_out(assistant.opener_audio)
|
||||
|
||||
|
||||
@router.get("/{id}/opener-audio/pcm")
|
||||
def get_assistant_opener_audio_pcm(id: str, db: Session = Depends(get_db)):
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
record = assistant.opener_audio
|
||||
if not record or not record.file_path:
|
||||
raise HTTPException(status_code=404, detail="Opener audio not generated")
|
||||
file_path = Path(record.file_path)
|
||||
if not file_path.exists():
|
||||
raise HTTPException(status_code=404, detail="Opener audio file missing")
|
||||
return FileResponse(
|
||||
str(file_path),
|
||||
media_type="application/octet-stream",
|
||||
filename=f"{assistant.id}.pcm",
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{id}/opener-audio/generate", response_model=AssistantOpenerAudioOut)
|
||||
def generate_assistant_opener_audio(
|
||||
id: str,
|
||||
data: AssistantOpenerAudioGenerateRequest,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
if not assistant.voice_output_enabled:
|
||||
raise HTTPException(status_code=400, detail="Voice output is disabled")
|
||||
|
||||
opener_text = (data.text if data.text is not None else assistant.opener or "").strip()
|
||||
if not opener_text:
|
||||
raise HTTPException(status_code=400, detail="Opener text is empty")
|
||||
|
||||
tts_cfg, voice = _resolve_tts_runtime_for_assistant(db, assistant)
|
||||
provider = str(tts_cfg.get("provider") or "").strip().lower()
|
||||
if provider not in {"openai_compatible", "dashscope"}:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported provider for preloaded opener audio: {provider or 'unknown'}")
|
||||
|
||||
speed = float(tts_cfg.get("speed") or assistant.speed or 1.0)
|
||||
voice_key = str(tts_cfg.get("voice") or "").strip()
|
||||
model = str(tts_cfg.get("model") or "").strip() or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||
api_key = str(tts_cfg.get("apiKey") or "").strip()
|
||||
base_url = str(tts_cfg.get("baseUrl") or "").strip()
|
||||
|
||||
if provider == "openai_compatible":
|
||||
if not api_key:
|
||||
if voice and voice.api_key:
|
||||
api_key = voice.api_key.strip()
|
||||
if not api_key:
|
||||
api_key = (os.getenv("SILICONFLOW_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail="TTS API key is missing")
|
||||
if not base_url:
|
||||
base_url = OPENAI_COMPATIBLE_DEFAULT_BASE_URL
|
||||
wav_bytes = _synthesize_openai_compatible_wav(
|
||||
text=opener_text,
|
||||
model=model,
|
||||
voice_key=voice_key,
|
||||
speed=speed,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
)
|
||||
else:
|
||||
from .voices import _synthesize_dashscope_preview, DASHSCOPE_DEFAULT_BASE_URL, DASHSCOPE_DEFAULT_MODEL, DASHSCOPE_DEFAULT_VOICE_KEY
|
||||
if not api_key:
|
||||
if voice and voice.api_key:
|
||||
api_key = voice.api_key.strip()
|
||||
if not api_key:
|
||||
api_key = (os.getenv("DASHSCOPE_API_KEY", "") or os.getenv("TTS_API_KEY", "")).strip()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail="DashScope API key is missing")
|
||||
if not base_url:
|
||||
base_url = DASHSCOPE_DEFAULT_BASE_URL
|
||||
if not model:
|
||||
model = DASHSCOPE_DEFAULT_MODEL
|
||||
if not voice_key:
|
||||
voice_key = DASHSCOPE_DEFAULT_VOICE_KEY
|
||||
try:
|
||||
wav_bytes = _synthesize_dashscope_preview(
|
||||
text=opener_text,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=model,
|
||||
voice_key=voice_key,
|
||||
speed=speed,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"DashScope opener audio generation failed: {exc}") from exc
|
||||
|
||||
pcm_bytes, duration_ms = _wav_to_pcm16_mono_16k(wav_bytes)
|
||||
record = _ensure_assistant_opener_audio(db, assistant)
|
||||
record.enabled = True
|
||||
record.file_path = _persist_opener_audio_pcm(assistant.id, pcm_bytes)
|
||||
record.encoding = "pcm_s16le"
|
||||
record.sample_rate_hz = 16000
|
||||
record.channels = 1
|
||||
record.duration_ms = duration_ms
|
||||
record.text_hash = hashlib.sha256(opener_text.encode("utf-8")).hexdigest()
|
||||
record.tts_fingerprint = _tts_fingerprint(tts_cfg, opener_text)
|
||||
now = datetime.utcnow()
|
||||
if not record.created_at:
|
||||
record.created_at = now
|
||||
record.updated_at = now
|
||||
assistant.updated_at = now
|
||||
db.commit()
|
||||
db.refresh(assistant)
|
||||
return _opener_audio_out(assistant.opener_audio)
|
||||
|
||||
|
||||
@router.put("/{id}")
|
||||
def update_assistant(id: str, data: AssistantUpdate, db: Session = Depends(get_db)):
|
||||
"""更新助手"""
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(assistant, field, value)
|
||||
|
||||
opener_audio_enabled = update_data.pop("openerAudioEnabled", None)
|
||||
if "manualOpenerToolCalls" in update_data:
|
||||
update_data["manualOpenerToolCalls"] = _normalize_manual_opener_tool_calls(update_data.get("manualOpenerToolCalls"))
|
||||
if "tools" in update_data:
|
||||
update_data["tools"] = _normalize_assistant_tool_ids(update_data.get("tools"))
|
||||
_apply_assistant_update(assistant, update_data)
|
||||
if opener_audio_enabled is not None:
|
||||
record = _ensure_assistant_opener_audio(db, assistant)
|
||||
record.enabled = bool(opener_audio_enabled)
|
||||
record.updated_at = datetime.utcnow()
|
||||
|
||||
assistant.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(assistant)
|
||||
return assistant
|
||||
return assistant_to_dict(assistant)
|
||||
|
||||
|
||||
@router.delete("/assistants/{id}")
|
||||
@router.delete("/{id}")
|
||||
def delete_assistant(id: str, db: Session = Depends(get_db)):
|
||||
"""删除助手"""
|
||||
_ensure_assistant_schema(db)
|
||||
assistant = db.query(Assistant).filter(Assistant.id == id).first()
|
||||
if not assistant:
|
||||
raise HTTPException(status_code=404, detail="Assistant not found")
|
||||
db.delete(assistant)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
|
||||
# ============ Workflows ============
|
||||
@router.get("/workflows", response_model=List[WorkflowOut])
|
||||
def list_workflows(db: Session = Depends(get_db)):
|
||||
"""获取工作流列表"""
|
||||
workflows = db.query(Workflow).all()
|
||||
return workflows
|
||||
|
||||
|
||||
@router.post("/workflows", response_model=WorkflowOut)
|
||||
def create_workflow(data: WorkflowCreate, db: Session = Depends(get_db)):
|
||||
"""创建工作流"""
|
||||
workflow = Workflow(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
user_id=1,
|
||||
name=data.name,
|
||||
node_count=data.nodeCount,
|
||||
created_at=data.createdAt or datetime.utcnow().isoformat(),
|
||||
updated_at=data.updatedAt or "",
|
||||
global_prompt=data.globalPrompt,
|
||||
nodes=data.nodes,
|
||||
edges=data.edges,
|
||||
)
|
||||
db.add(workflow)
|
||||
db.commit()
|
||||
db.refresh(workflow)
|
||||
return workflow
|
||||
|
||||
|
||||
@router.put("/workflows/{id}", response_model=WorkflowOut)
|
||||
def update_workflow(id: str, data: WorkflowUpdate, db: Session = Depends(get_db)):
|
||||
"""更新工作流"""
|
||||
workflow = db.query(Workflow).filter(Workflow.id == id).first()
|
||||
if not workflow:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
setattr(workflow, field, value)
|
||||
|
||||
workflow.updated_at = datetime.utcnow().isoformat()
|
||||
db.commit()
|
||||
db.refresh(workflow)
|
||||
return workflow
|
||||
|
||||
|
||||
@router.delete("/workflows/{id}")
|
||||
def delete_workflow(id: str, db: Session = Depends(get_db)):
|
||||
"""删除工作流"""
|
||||
workflow = db.query(Workflow).filter(Workflow.id == id).first()
|
||||
if not workflow:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
db.delete(workflow)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
@@ -7,14 +7,32 @@ from datetime import datetime
|
||||
from ..db import get_db
|
||||
from ..models import CallRecord, CallTranscript, CallAudioSegment
|
||||
from ..storage import get_audio_url
|
||||
from ..schemas import CallRecordCreate, CallRecordUpdate, TranscriptCreate
|
||||
|
||||
router = APIRouter(prefix="/history", tags=["history"])
|
||||
|
||||
|
||||
def record_to_dict(record: CallRecord) -> dict:
|
||||
return {
|
||||
"id": record.id,
|
||||
"user_id": record.user_id,
|
||||
"assistant_id": record.assistant_id,
|
||||
"source": record.source,
|
||||
"status": record.status,
|
||||
"started_at": record.started_at,
|
||||
"ended_at": record.ended_at,
|
||||
"duration_seconds": record.duration_seconds,
|
||||
"summary": record.summary,
|
||||
"cost": record.cost,
|
||||
"created_at": record.created_at,
|
||||
}
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_history(
|
||||
assistant_id: Optional[str] = None,
|
||||
status: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
db: Session = Depends(get_db)
|
||||
@@ -26,12 +44,19 @@ def list_history(
|
||||
query = query.filter(CallRecord.assistant_id == assistant_id)
|
||||
if status:
|
||||
query = query.filter(CallRecord.status == status)
|
||||
if source:
|
||||
query = query.filter(CallRecord.source == source)
|
||||
|
||||
total = query.count()
|
||||
records = query.order_by(CallRecord.started_at.desc()) \
|
||||
.offset((page-1)*limit).limit(limit).all()
|
||||
|
||||
return {"total": total, "page": page, "limit": limit, "list": records}
|
||||
return {
|
||||
"total": total,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
"list": [record_to_dict(r) for r in records]
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{call_id}")
|
||||
@@ -46,10 +71,12 @@ def get_history_detail(call_id: str, db: Session = Depends(get_db)):
|
||||
.filter(CallTranscript.call_id == call_id) \
|
||||
.order_by(CallTranscript.turn_index).all()
|
||||
|
||||
# 补充音频 URL
|
||||
audio_segments = db.query(CallAudioSegment).filter(CallAudioSegment.call_id == call_id).all()
|
||||
audio_by_turn = {seg.turn_index: seg.audio_url for seg in audio_segments if seg.turn_index is not None}
|
||||
|
||||
transcript_list = []
|
||||
for t in transcripts:
|
||||
audio_url = t.audio_url or get_audio_url(call_id, t.turn_index)
|
||||
audio_url = audio_by_turn.get(t.turn_index) or get_audio_url(call_id, t.turn_index)
|
||||
transcript_list.append({
|
||||
"turnIndex": t.turn_index,
|
||||
"speaker": t.speaker,
|
||||
@@ -77,32 +104,29 @@ def get_history_detail(call_id: str, db: Session = Depends(get_db)):
|
||||
|
||||
@router.post("")
|
||||
def create_call_record(
|
||||
user_id: int,
|
||||
assistant_id: Optional[str] = None,
|
||||
source: str = "debug",
|
||||
data: CallRecordCreate,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""创建通话记录(引擎回调使用)"""
|
||||
record = CallRecord(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
user_id=user_id,
|
||||
assistant_id=assistant_id,
|
||||
source=source,
|
||||
status="connected",
|
||||
user_id=data.user_id,
|
||||
assistant_id=data.assistant_id,
|
||||
source=data.source,
|
||||
status=data.status or "connected",
|
||||
started_at=datetime.utcnow().isoformat(),
|
||||
cost=data.cost or 0.0,
|
||||
)
|
||||
db.add(record)
|
||||
db.commit()
|
||||
db.refresh(record)
|
||||
return record
|
||||
return record_to_dict(record)
|
||||
|
||||
|
||||
@router.put("/{call_id}")
|
||||
def update_call_record(
|
||||
call_id: str,
|
||||
status: Optional[str] = None,
|
||||
summary: Optional[str] = None,
|
||||
duration_seconds: Optional[int] = None,
|
||||
data: CallRecordUpdate,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""更新通话记录"""
|
||||
@@ -110,59 +134,64 @@ def update_call_record(
|
||||
if not record:
|
||||
raise HTTPException(status_code=404, detail="Call record not found")
|
||||
|
||||
if status:
|
||||
record.status = status
|
||||
if summary:
|
||||
record.summary = summary
|
||||
if duration_seconds:
|
||||
record.duration_seconds = duration_seconds
|
||||
if data.status is not None:
|
||||
record.status = data.status
|
||||
if data.summary is not None:
|
||||
record.summary = data.summary
|
||||
if data.duration_seconds is not None:
|
||||
record.duration_seconds = data.duration_seconds
|
||||
record.ended_at = datetime.utcnow().isoformat()
|
||||
if data.ended_at is not None:
|
||||
record.ended_at = data.ended_at
|
||||
if data.cost is not None:
|
||||
record.cost = data.cost
|
||||
if data.metadata is not None:
|
||||
record.call_metadata = data.metadata
|
||||
|
||||
db.commit()
|
||||
return {"message": "Updated successfully"}
|
||||
db.refresh(record)
|
||||
return record_to_dict(record)
|
||||
|
||||
|
||||
@router.post("/{call_id}/transcripts")
|
||||
def add_transcript(
|
||||
call_id: str,
|
||||
turn_index: int,
|
||||
speaker: str,
|
||||
content: str,
|
||||
start_ms: int,
|
||||
end_ms: int,
|
||||
confidence: Optional[float] = None,
|
||||
duration_ms: Optional[int] = None,
|
||||
emotion: Optional[str] = None,
|
||||
data: TranscriptCreate,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""添加转写片段"""
|
||||
record = db.query(CallRecord).filter(CallRecord.id == call_id).first()
|
||||
if not record:
|
||||
raise HTTPException(status_code=404, detail="Call record not found")
|
||||
|
||||
transcript = CallTranscript(
|
||||
call_id=call_id,
|
||||
turn_index=turn_index,
|
||||
speaker=speaker,
|
||||
content=content,
|
||||
confidence=confidence,
|
||||
start_ms=start_ms,
|
||||
end_ms=end_ms,
|
||||
duration_ms=duration_ms,
|
||||
emotion=emotion,
|
||||
turn_index=data.turn_index,
|
||||
speaker=data.speaker,
|
||||
content=data.content,
|
||||
confidence=data.confidence,
|
||||
start_ms=data.start_ms,
|
||||
end_ms=data.end_ms,
|
||||
duration_ms=data.duration_ms,
|
||||
emotion=data.emotion,
|
||||
)
|
||||
db.add(transcript)
|
||||
db.commit()
|
||||
db.refresh(transcript)
|
||||
|
||||
# 补充音频 URL
|
||||
audio_url = get_audio_url(call_id, turn_index)
|
||||
audio_url = get_audio_url(call_id, data.turn_index)
|
||||
|
||||
return {
|
||||
"id": transcript.id,
|
||||
"turn_index": turn_index,
|
||||
"speaker": speaker,
|
||||
"content": content,
|
||||
"confidence": confidence,
|
||||
"start_ms": start_ms,
|
||||
"end_ms": end_ms,
|
||||
"duration_ms": duration_ms,
|
||||
"turn_index": data.turn_index,
|
||||
"speaker": data.speaker,
|
||||
"content": data.content,
|
||||
"confidence": data.confidence,
|
||||
"start_ms": data.start_ms,
|
||||
"end_ms": data.end_ms,
|
||||
"duration_ms": data.duration_ms,
|
||||
"emotion": data.emotion,
|
||||
"audio_url": audio_url,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Request
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Optional
|
||||
import uuid
|
||||
import os
|
||||
import json
|
||||
from io import BytesIO
|
||||
from datetime import datetime
|
||||
|
||||
from ..db import get_db
|
||||
@@ -11,6 +13,7 @@ from ..schemas import (
|
||||
KnowledgeBaseCreate, KnowledgeBaseUpdate, KnowledgeBaseOut,
|
||||
KnowledgeSearchQuery, KnowledgeSearchResult, KnowledgeStats,
|
||||
DocumentIndexRequest,
|
||||
KnowledgeDocumentCreate,
|
||||
)
|
||||
from ..vector_store import (
|
||||
vector_store, search_knowledge, index_document, delete_document_from_vector
|
||||
@@ -19,20 +22,71 @@ from ..vector_store import (
|
||||
router = APIRouter(prefix="/knowledge", tags=["knowledge"])
|
||||
|
||||
|
||||
def _refresh_kb_stats(db: Session, kb_id: str) -> None:
|
||||
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
if not kb:
|
||||
return
|
||||
docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all()
|
||||
completed_docs = [d for d in docs if d.status == "completed"]
|
||||
kb.doc_count = len(completed_docs)
|
||||
kb.chunk_count = sum(max(0, d.chunk_count or 0) for d in completed_docs)
|
||||
|
||||
|
||||
def _decode_text_bytes(raw: bytes) -> str:
|
||||
for encoding in ("utf-8", "utf-8-sig", "gb18030", "gbk", "latin-1"):
|
||||
try:
|
||||
return raw.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
return raw.decode("utf-8", errors="ignore")
|
||||
|
||||
|
||||
def _extract_text_from_upload(filename: str, content_type: Optional[str], raw: bytes) -> str:
|
||||
ext = os.path.splitext((filename or "").lower())[1]
|
||||
if ext in {".txt", ".md", ".csv"}:
|
||||
return _decode_text_bytes(raw)
|
||||
if ext == ".json":
|
||||
try:
|
||||
parsed = json.loads(_decode_text_bytes(raw))
|
||||
return json.dumps(parsed, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
return _decode_text_bytes(raw)
|
||||
if ext == ".pdf":
|
||||
try:
|
||||
from pypdf import PdfReader # type: ignore
|
||||
except Exception as exc:
|
||||
raise ValueError("PDF parsing requires installing pypdf") from exc
|
||||
reader = PdfReader(BytesIO(raw))
|
||||
return "\n".join((page.extract_text() or "") for page in reader.pages).strip()
|
||||
if ext == ".docx":
|
||||
try:
|
||||
from docx import Document # type: ignore
|
||||
except Exception as exc:
|
||||
raise ValueError("DOCX parsing requires installing python-docx") from exc
|
||||
doc = Document(BytesIO(raw))
|
||||
return "\n".join(p.text for p in doc.paragraphs).strip()
|
||||
if ext == ".doc":
|
||||
raise ValueError("DOC format is not supported for auto indexing. Please convert to DOCX/TXT.")
|
||||
# fallback: attempt plain text decode
|
||||
if (content_type or "").startswith("text/"):
|
||||
return _decode_text_bytes(raw)
|
||||
raise ValueError(f"Unsupported file type for auto indexing: {ext or content_type or 'unknown'}")
|
||||
|
||||
|
||||
def kb_to_dict(kb: KnowledgeBase) -> dict:
|
||||
return {
|
||||
"id": kb.id,
|
||||
"user_id": kb.user_id,
|
||||
"name": kb.name,
|
||||
"description": kb.description,
|
||||
"embedding_model": kb.embedding_model,
|
||||
"chunk_size": kb.chunk_size,
|
||||
"chunk_overlap": kb.chunk_overlap,
|
||||
"doc_count": kb.doc_count,
|
||||
"chunk_count": kb.chunk_count,
|
||||
"embeddingModel": kb.embedding_model,
|
||||
"chunkSize": kb.chunk_size,
|
||||
"chunkOverlap": kb.chunk_overlap,
|
||||
"docCount": kb.doc_count,
|
||||
"chunkCount": kb.chunk_count,
|
||||
"status": kb.status,
|
||||
"created_at": kb.created_at.isoformat() if kb.created_at else None,
|
||||
"updated_at": kb.updated_at.isoformat() if kb.updated_at else None,
|
||||
"createdAt": kb.created_at.isoformat() if kb.created_at else None,
|
||||
"updatedAt": kb.updated_at.isoformat() if kb.updated_at else None,
|
||||
}
|
||||
|
||||
|
||||
@@ -42,28 +96,35 @@ def doc_to_dict(d: KnowledgeDocument) -> dict:
|
||||
"kb_id": d.kb_id,
|
||||
"name": d.name,
|
||||
"size": d.size,
|
||||
"file_type": d.file_type,
|
||||
"storage_url": d.storage_url,
|
||||
"fileType": d.file_type,
|
||||
"storageUrl": d.storage_url,
|
||||
"status": d.status,
|
||||
"chunk_count": d.chunk_count,
|
||||
"error_message": d.error_message,
|
||||
"upload_date": d.upload_date,
|
||||
"created_at": d.created_at.isoformat() if d.created_at else None,
|
||||
"processed_at": d.processed_at.isoformat() if d.processed_at else None,
|
||||
"chunkCount": d.chunk_count,
|
||||
"errorMessage": d.error_message,
|
||||
"uploadDate": d.upload_date,
|
||||
"createdAt": d.created_at.isoformat() if d.created_at else None,
|
||||
"processedAt": d.processed_at.isoformat() if d.processed_at else None,
|
||||
}
|
||||
|
||||
|
||||
# ============ Knowledge Bases ============
|
||||
@router.get("/bases")
|
||||
def list_knowledge_bases(user_id: int = 1, db: Session = Depends(get_db)):
|
||||
kbs = db.query(KnowledgeBase).filter(KnowledgeBase.user_id == user_id).all()
|
||||
def list_knowledge_bases(
|
||||
user_id: int = 1,
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
query = db.query(KnowledgeBase).filter(KnowledgeBase.user_id == user_id)
|
||||
total = query.count()
|
||||
kbs = query.order_by(KnowledgeBase.created_at.desc()).offset((page - 1) * limit).limit(limit).all()
|
||||
result = []
|
||||
for kb in kbs:
|
||||
docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb.id).all()
|
||||
kb_data = kb_to_dict(kb)
|
||||
kb_data["documents"] = [doc_to_dict(d) for d in docs]
|
||||
result.append(kb_data)
|
||||
return {"total": len(result), "list": result}
|
||||
return {"total": total, "page": page, "limit": limit, "list": result}
|
||||
|
||||
|
||||
@router.get("/bases/{kb_id}")
|
||||
@@ -79,10 +140,21 @@ def get_knowledge_base(kb_id: str, db: Session = Depends(get_db)):
|
||||
|
||||
@router.post("/bases")
|
||||
def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Session = Depends(get_db)):
|
||||
name = (data.name or "").strip()
|
||||
if not name:
|
||||
raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty")
|
||||
|
||||
exists = db.query(KnowledgeBase).filter(
|
||||
KnowledgeBase.user_id == user_id,
|
||||
KnowledgeBase.name == name
|
||||
).first()
|
||||
if exists:
|
||||
raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {name}")
|
||||
|
||||
kb = KnowledgeBase(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
user_id=user_id,
|
||||
name=data.name,
|
||||
name=name,
|
||||
description=data.description,
|
||||
embedding_model=data.embeddingModel,
|
||||
chunk_size=data.chunkSize,
|
||||
@@ -91,7 +163,13 @@ def create_knowledge_base(data: KnowledgeBaseCreate, user_id: int = 1, db: Sessi
|
||||
db.add(kb)
|
||||
db.commit()
|
||||
db.refresh(kb)
|
||||
vector_store.create_collection(kb.id, data.embeddingModel)
|
||||
try:
|
||||
vector_store.create_collection(kb.id, data.embeddingModel)
|
||||
except Exception as exc:
|
||||
# Keep DB and vector store consistent on create failure
|
||||
db.delete(kb)
|
||||
db.commit()
|
||||
raise HTTPException(status_code=502, detail=f"Failed to create ChromaDB collection: {exc}") from exc
|
||||
return kb_to_dict(kb)
|
||||
|
||||
|
||||
@@ -101,8 +179,43 @@ def update_knowledge_base(kb_id: str, data: KnowledgeBaseUpdate, db: Session = D
|
||||
if not kb:
|
||||
raise HTTPException(status_code=404, detail="Knowledge base not found")
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
field_map = {
|
||||
"embeddingModel": "embedding_model",
|
||||
"chunkSize": "chunk_size",
|
||||
"chunkOverlap": "chunk_overlap",
|
||||
}
|
||||
if "name" in update_data:
|
||||
update_data["name"] = (update_data["name"] or "").strip()
|
||||
if not update_data["name"]:
|
||||
raise HTTPException(status_code=400, detail="Knowledge base name cannot be empty")
|
||||
name_exists = db.query(KnowledgeBase).filter(
|
||||
KnowledgeBase.user_id == kb.user_id,
|
||||
KnowledgeBase.name == update_data["name"],
|
||||
KnowledgeBase.id != kb.id
|
||||
).first()
|
||||
if name_exists:
|
||||
raise HTTPException(status_code=400, detail=f"Knowledge base name already exists: {update_data['name']}")
|
||||
|
||||
embedding_changed = "embeddingModel" in update_data and update_data["embeddingModel"] != kb.embedding_model
|
||||
if embedding_changed and kb.chunk_count > 0:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Cannot change embedding model when knowledge base has indexed chunks. Remove documents first."
|
||||
)
|
||||
|
||||
for field, value in update_data.items():
|
||||
setattr(kb, field, value)
|
||||
setattr(kb, field_map.get(field, field), value)
|
||||
|
||||
if embedding_changed:
|
||||
try:
|
||||
vector_store.delete_collection(kb_id)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
vector_store.create_collection(kb_id, kb.embedding_model)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"Failed to update ChromaDB collection: {exc}") from exc
|
||||
|
||||
kb.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(kb)
|
||||
@@ -114,42 +227,141 @@ def delete_knowledge_base(kb_id: str, db: Session = Depends(get_db)):
|
||||
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
if not kb:
|
||||
raise HTTPException(status_code=404, detail="Knowledge base not found")
|
||||
vector_store.delete_collection(kb_id)
|
||||
vector_deleted = True
|
||||
try:
|
||||
vector_store.delete_collection(kb_id)
|
||||
except Exception:
|
||||
vector_deleted = False
|
||||
docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb_id).all()
|
||||
for doc in docs:
|
||||
db.delete(doc)
|
||||
db.delete(kb)
|
||||
db.commit()
|
||||
if not vector_deleted:
|
||||
return {"message": "Deleted successfully", "warning": "Knowledge base deleted but failed to remove ChromaDB collection"}
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
|
||||
# ============ Documents ============
|
||||
@router.post("/bases/{kb_id}/documents")
|
||||
def upload_document(
|
||||
async def upload_document(
|
||||
kb_id: str,
|
||||
name: str = Query(...),
|
||||
size: str = Query(...),
|
||||
file_type: str = Query("txt"),
|
||||
storage_url: Optional[str] = Query(None),
|
||||
file: Optional[UploadFile] = File(default=None),
|
||||
name: Optional[str] = Form(default=None),
|
||||
size: Optional[str] = Form(default=None),
|
||||
file_type: Optional[str] = Form(default=None),
|
||||
storage_url: Optional[str] = Form(default=None),
|
||||
data: Optional[KnowledgeDocumentCreate] = None,
|
||||
request: Request = None,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
if not kb:
|
||||
raise HTTPException(status_code=404, detail="Knowledge base not found")
|
||||
|
||||
# New mode: multipart file upload with automatic indexing
|
||||
if file is not None:
|
||||
filename = file.filename or "uploaded.txt"
|
||||
file_type_value = file.content_type or file_type or "application/octet-stream"
|
||||
raw = file.file.read()
|
||||
if not raw:
|
||||
raise HTTPException(status_code=400, detail="Uploaded file is empty")
|
||||
|
||||
doc = KnowledgeDocument(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
kb_id=kb_id,
|
||||
name=filename,
|
||||
size=f"{len(raw)} bytes",
|
||||
file_type=file_type_value,
|
||||
storage_url=storage_url,
|
||||
status="processing",
|
||||
upload_date=datetime.utcnow().isoformat()
|
||||
)
|
||||
db.add(doc)
|
||||
db.commit()
|
||||
db.refresh(doc)
|
||||
|
||||
try:
|
||||
if vector_store.get_collection(kb_id) is None:
|
||||
vector_store.create_collection(kb_id, kb.embedding_model)
|
||||
|
||||
text = _extract_text_from_upload(filename, file.content_type, raw)
|
||||
if not text.strip():
|
||||
raise ValueError("No textual content extracted from file")
|
||||
|
||||
chunk_count = index_document(kb_id, doc.id, text)
|
||||
doc.status = "completed"
|
||||
doc.chunk_count = chunk_count
|
||||
doc.processed_at = datetime.utcnow()
|
||||
doc.error_message = None
|
||||
_refresh_kb_stats(db, kb_id)
|
||||
db.commit()
|
||||
return {
|
||||
"id": doc.id,
|
||||
"name": doc.name,
|
||||
"size": doc.size,
|
||||
"fileType": doc.file_type,
|
||||
"storageUrl": doc.storage_url,
|
||||
"status": doc.status,
|
||||
"chunkCount": doc.chunk_count,
|
||||
"message": "Document uploaded and indexed",
|
||||
}
|
||||
except ValueError as exc:
|
||||
doc.status = "failed"
|
||||
doc.error_message = str(exc)
|
||||
_refresh_kb_stats(db, kb_id)
|
||||
db.commit()
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
doc.status = "failed"
|
||||
doc.error_message = str(exc)
|
||||
_refresh_kb_stats(db, kb_id)
|
||||
db.commit()
|
||||
raise HTTPException(status_code=500, detail=f"Failed to index uploaded file: {exc}") from exc
|
||||
|
||||
# Backward-compatible mode: metadata-only document creation
|
||||
if data is None:
|
||||
if not name and not size and request is not None:
|
||||
try:
|
||||
raw_payload = await request.json()
|
||||
if isinstance(raw_payload, dict):
|
||||
name = raw_payload.get("name")
|
||||
size = raw_payload.get("size")
|
||||
file_type = raw_payload.get("fileType") or raw_payload.get("file_type") or file_type
|
||||
storage_url = raw_payload.get("storageUrl") or raw_payload.get("storage_url") or storage_url
|
||||
except Exception:
|
||||
pass
|
||||
if not name or not size:
|
||||
raise HTTPException(status_code=422, detail="name and size are required")
|
||||
data = KnowledgeDocumentCreate(
|
||||
name=name,
|
||||
size=size,
|
||||
fileType=file_type or "txt",
|
||||
storageUrl=storage_url,
|
||||
)
|
||||
|
||||
doc = KnowledgeDocument(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
kb_id=kb_id,
|
||||
name=name,
|
||||
size=size,
|
||||
file_type=file_type,
|
||||
storage_url=storage_url,
|
||||
name=data.name,
|
||||
size=data.size,
|
||||
file_type=data.fileType,
|
||||
storage_url=data.storageUrl,
|
||||
status="pending",
|
||||
upload_date=datetime.utcnow().isoformat()
|
||||
)
|
||||
db.add(doc)
|
||||
db.commit()
|
||||
db.refresh(doc)
|
||||
return {"id": doc.id, "name": doc.name, "status": doc.status, "message": "Document created"}
|
||||
return {
|
||||
"id": doc.id,
|
||||
"name": doc.name,
|
||||
"size": doc.size,
|
||||
"fileType": doc.file_type,
|
||||
"storageUrl": doc.storage_url,
|
||||
"status": doc.status,
|
||||
"message": "Document created",
|
||||
}
|
||||
|
||||
|
||||
@router.post("/bases/{kb_id}/documents/{doc_id}/index")
|
||||
@@ -180,21 +392,21 @@ def index_document_content(kb_id: str, doc_id: str, request: DocumentIndexReques
|
||||
db.commit()
|
||||
|
||||
try:
|
||||
if vector_store.get_collection(kb_id) is None:
|
||||
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
vector_store.create_collection(kb_id, kb.embedding_model if kb else "text-embedding-3-small")
|
||||
chunk_count = index_document(kb_id, doc_id, request.content)
|
||||
doc.status = "completed"
|
||||
doc.chunk_count = chunk_count
|
||||
doc.processed_at = datetime.utcnow()
|
||||
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
kb.doc_count = db.query(KnowledgeDocument).filter(
|
||||
KnowledgeDocument.kb_id == kb_id,
|
||||
KnowledgeDocument.status == "completed"
|
||||
).count()
|
||||
kb.chunk_count += chunk_count
|
||||
doc.error_message = None
|
||||
_refresh_kb_stats(db, kb_id)
|
||||
db.commit()
|
||||
return {"message": "Document indexed", "chunkCount": chunk_count}
|
||||
except Exception as e:
|
||||
doc.status = "failed"
|
||||
doc.error_message = str(e)
|
||||
_refresh_kb_stats(db, kb_id)
|
||||
db.commit()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@@ -211,10 +423,8 @@ def delete_document(kb_id: str, doc_id: str, db: Session = Depends(get_db)):
|
||||
delete_document_from_vector(kb_id, doc_id)
|
||||
except Exception:
|
||||
pass
|
||||
kb = db.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
kb.chunk_count -= doc.chunk_count
|
||||
kb.doc_count -= 1
|
||||
db.delete(doc)
|
||||
_refresh_kb_stats(db, kb_id)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
@@ -222,7 +432,10 @@ def delete_document(kb_id: str, doc_id: str, db: Session = Depends(get_db)):
|
||||
# ============ Search ============
|
||||
@router.post("/search")
|
||||
def search_knowledge_base(query: KnowledgeSearchQuery):
|
||||
return search_knowledge(kb_id=query.kb_id, query=query.query, n_results=query.nResults)
|
||||
try:
|
||||
return search_knowledge(kb_id=query.kb_id, query=query.query, n_results=query.nResults)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
|
||||
|
||||
# ============ Stats ============
|
||||
|
||||
296
api/app/routers/llm.py
Normal file
296
api/app/routers/llm.py
Normal file
@@ -0,0 +1,296 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import List, Optional
|
||||
import httpx
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from ..db import get_db
|
||||
from ..id_generator import unique_short_id
|
||||
from ..models import LLMModel
|
||||
from ..schemas import (
|
||||
LLMModelCreate, LLMModelUpdate, LLMModelOut,
|
||||
LLMModelTestResponse, LLMPreviewRequest, LLMPreviewResponse
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/llm", tags=["LLM Models"])
|
||||
|
||||
|
||||
# ============ LLM Models CRUD ============
|
||||
@router.get("")
|
||||
def list_llm_models(
|
||||
model_type: Optional[str] = None,
|
||||
enabled: Optional[bool] = None,
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取LLM模型列表"""
|
||||
query = db.query(LLMModel)
|
||||
|
||||
if model_type:
|
||||
query = query.filter(LLMModel.type == model_type)
|
||||
if enabled is not None:
|
||||
query = query.filter(LLMModel.enabled == enabled)
|
||||
|
||||
total = query.count()
|
||||
models = query.order_by(LLMModel.created_at.desc()) \
|
||||
.offset((page-1)*limit).limit(limit).all()
|
||||
|
||||
return {"total": total, "page": page, "limit": limit, "list": models}
|
||||
|
||||
|
||||
@router.get("/{id}", response_model=LLMModelOut)
|
||||
def get_llm_model(id: str, db: Session = Depends(get_db)):
|
||||
"""获取单个LLM模型详情"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
return model
|
||||
|
||||
|
||||
@router.post("", response_model=LLMModelOut)
|
||||
def create_llm_model(data: LLMModelCreate, db: Session = Depends(get_db)):
|
||||
"""创建LLM模型"""
|
||||
llm_model = LLMModel(
|
||||
id=unique_short_id("llm", db, LLMModel),
|
||||
user_id=1, # 默认用户
|
||||
name=data.name,
|
||||
vendor=data.vendor,
|
||||
type=data.type.value if hasattr(data.type, 'value') else data.type,
|
||||
base_url=data.base_url,
|
||||
api_key=data.api_key,
|
||||
model_name=data.model_name,
|
||||
temperature=data.temperature,
|
||||
context_length=data.context_length,
|
||||
enabled=data.enabled,
|
||||
)
|
||||
db.add(llm_model)
|
||||
db.commit()
|
||||
db.refresh(llm_model)
|
||||
return llm_model
|
||||
|
||||
|
||||
@router.put("/{id}", response_model=LLMModelOut)
|
||||
def update_llm_model(id: str, data: LLMModelUpdate, db: Session = Depends(get_db)):
|
||||
"""更新LLM模型"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
if "type" in update_data and update_data["type"] is not None and hasattr(update_data["type"], "value"):
|
||||
update_data["type"] = update_data["type"].value
|
||||
for field, value in update_data.items():
|
||||
setattr(model, field, value)
|
||||
|
||||
model.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
db.refresh(model)
|
||||
return model
|
||||
|
||||
|
||||
@router.delete("/{id}")
|
||||
def delete_llm_model(id: str, db: Session = Depends(get_db)):
|
||||
"""删除LLM模型"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
db.delete(model)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
|
||||
@router.post("/{id}/test", response_model=LLMModelTestResponse)
|
||||
def test_llm_model(id: str, db: Session = Depends(get_db)):
|
||||
"""测试LLM模型连接"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
# 构造测试请求
|
||||
test_messages = [{"role": "user", "content": "Hello, please reply with 'OK'."}]
|
||||
|
||||
payload = {
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": test_messages,
|
||||
"max_tokens": 10,
|
||||
"temperature": 0.1,
|
||||
}
|
||||
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
latency_ms = int((time.time() - start_time) * 1000)
|
||||
result = response.json()
|
||||
|
||||
if result.get("choices"):
|
||||
return LLMModelTestResponse(
|
||||
success=True,
|
||||
latency_ms=latency_ms,
|
||||
message="Connection successful"
|
||||
)
|
||||
else:
|
||||
return LLMModelTestResponse(
|
||||
success=False,
|
||||
latency_ms=latency_ms,
|
||||
message="Unexpected response format"
|
||||
)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
return LLMModelTestResponse(
|
||||
success=False,
|
||||
message=f"HTTP Error: {e.response.status_code} - {e.response.text[:200]}"
|
||||
)
|
||||
except Exception as e:
|
||||
return LLMModelTestResponse(
|
||||
success=False,
|
||||
message=str(e)[:200]
|
||||
)
|
||||
|
||||
|
||||
@router.post("/{id}/chat")
|
||||
def chat_with_llm(
|
||||
id: str,
|
||||
message: str,
|
||||
system_prompt: Optional[str] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
temperature: Optional[float] = None,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""与LLM模型对话"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": message})
|
||||
|
||||
payload = {
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens or 1000,
|
||||
"temperature": temperature if temperature is not None else model.temperature or 0.7,
|
||||
}
|
||||
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
if choice := result.get("choices", [{}])[0]:
|
||||
return {
|
||||
"success": True,
|
||||
"reply": choice.get("message", {}).get("content", ""),
|
||||
"usage": result.get("usage", {})
|
||||
}
|
||||
return {"success": False, "reply": "", "error": "No response"}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/{id}/preview", response_model=LLMPreviewResponse)
|
||||
def preview_llm_model(
|
||||
id: str,
|
||||
request: LLMPreviewRequest,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""预览模型输出,支持 text(chat) 与 embedding 两类模型。"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
|
||||
user_message = (request.message or "").strip()
|
||||
if not user_message:
|
||||
raise HTTPException(status_code=400, detail="Preview message cannot be empty")
|
||||
|
||||
model_id = model.model_name or "gpt-3.5-turbo"
|
||||
headers = {"Authorization": f"Bearer {(request.api_key or model.api_key).strip()}"}
|
||||
|
||||
start_time = time.time()
|
||||
endpoint = "/chat/completions"
|
||||
payload = {}
|
||||
|
||||
if model.type == "embedding":
|
||||
endpoint = "/embeddings"
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"input": user_message,
|
||||
}
|
||||
else:
|
||||
messages = []
|
||||
if request.system_prompt and request.system_prompt.strip():
|
||||
messages.append({"role": "system", "content": request.system_prompt.strip()})
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
payload = {
|
||||
"model": model_id,
|
||||
"messages": messages,
|
||||
"max_tokens": request.max_tokens or 512,
|
||||
"temperature": request.temperature if request.temperature is not None else (model.temperature or 0.7),
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url.rstrip('/')}{endpoint}",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"LLM request failed: {exc}") from exc
|
||||
|
||||
if response.status_code != 200:
|
||||
detail = response.text
|
||||
try:
|
||||
detail_json = response.json()
|
||||
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
||||
except Exception:
|
||||
pass
|
||||
raise HTTPException(status_code=502, detail=f"LLM vendor error: {detail}")
|
||||
|
||||
result = response.json()
|
||||
reply = ""
|
||||
if model.type == "embedding":
|
||||
data_list = result.get("data", [])
|
||||
embedding = []
|
||||
if data_list and isinstance(data_list, list):
|
||||
embedding = data_list[0].get("embedding", []) or []
|
||||
dims = len(embedding) if isinstance(embedding, list) else 0
|
||||
preview_values = []
|
||||
if isinstance(embedding, list):
|
||||
preview_values = embedding[:8]
|
||||
values_text = ", ".join(
|
||||
[f"{float(v):.6f}" if isinstance(v, (float, int)) else str(v) for v in preview_values]
|
||||
)
|
||||
reply = f"Embedding generated successfully. dims={dims}. head=[{values_text}]"
|
||||
else:
|
||||
choices = result.get("choices", [])
|
||||
if choices:
|
||||
reply = choices[0].get("message", {}).get("content", "") or ""
|
||||
|
||||
return LLMPreviewResponse(
|
||||
success=bool(reply),
|
||||
reply=reply,
|
||||
usage=result.get("usage"),
|
||||
latency_ms=int((time.time() - start_time) * 1000),
|
||||
error=None if reply else "No response content",
|
||||
)
|
||||
808
api/app/routers/tools.py
Normal file
808
api/app/routers/tools.py
Normal file
@@ -0,0 +1,808 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import inspect, text
|
||||
from typing import Optional, Dict, Any, List
|
||||
import time
|
||||
import uuid
|
||||
import httpx
|
||||
from datetime import datetime
|
||||
|
||||
from ..db import get_db
|
||||
from ..models import LLMModel, ASRModel, ToolResource
|
||||
from ..schemas import ToolResourceCreate, ToolResourceOut, ToolResourceUpdate
|
||||
|
||||
router = APIRouter(prefix="/tools", tags=["Tools & Autotest"])
|
||||
|
||||
|
||||
TOOL_ID_ALIASES: Dict[str, str] = {
|
||||
# legacy -> canonical
|
||||
"voice_message_prompt": "voice_msg_prompt",
|
||||
}
|
||||
|
||||
|
||||
def normalize_tool_id(tool_id: Optional[str]) -> str:
|
||||
raw = str(tool_id or "").strip()
|
||||
if not raw:
|
||||
return ""
|
||||
return TOOL_ID_ALIASES.get(raw, raw)
|
||||
|
||||
|
||||
# ============ Available Tools ============
|
||||
TOOL_REGISTRY = {
|
||||
"calculator": {
|
||||
"name": "计算器",
|
||||
"description": "执行数学计算",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"expression": {"type": "string", "description": "数学表达式,如: 2 + 3 * 4"}
|
||||
},
|
||||
"required": ["expression"]
|
||||
}
|
||||
},
|
||||
"code_interpreter": {
|
||||
"name": "代码执行",
|
||||
"description": "安全地执行Python代码",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {"type": "string", "description": "要执行的Python代码"}
|
||||
},
|
||||
"required": ["code"]
|
||||
}
|
||||
},
|
||||
"current_time": {
|
||||
"name": "当前时间",
|
||||
"description": "获取当前本地时间",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
"turn_on_camera": {
|
||||
"name": "打开摄像头",
|
||||
"description": "执行打开摄像头命令",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
"turn_off_camera": {
|
||||
"name": "关闭摄像头",
|
||||
"description": "执行关闭摄像头命令",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
"increase_volume": {
|
||||
"name": "调高音量",
|
||||
"description": "提升设备音量",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"step": {"type": "integer", "description": "调整步进,默认1"}
|
||||
},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
"decrease_volume": {
|
||||
"name": "调低音量",
|
||||
"description": "降低设备音量",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"step": {"type": "integer", "description": "调整步进,默认1"}
|
||||
},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
"voice_msg_prompt": {
|
||||
"name": "语音消息提示",
|
||||
"description": "播报一条语音提示消息",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {"type": "string", "description": "要播报的消息文本"}
|
||||
},
|
||||
"required": ["msg"]
|
||||
}
|
||||
},
|
||||
"text_msg_prompt": {
|
||||
"name": "文本消息提示",
|
||||
"description": "显示一条文本弹窗提示",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {"type": "string", "description": "提示文本内容"}
|
||||
},
|
||||
"required": ["msg"]
|
||||
}
|
||||
},
|
||||
"voice_choice_prompt": {
|
||||
"name": "语音选项提示",
|
||||
"description": "播报问题并展示可选项,等待用户选择后回传结果",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {"type": "string", "description": "向用户展示的问题文本"},
|
||||
"options": {
|
||||
"type": "array",
|
||||
"description": "可选项(字符串或含 id/label/value 的对象)",
|
||||
"minItems": 2,
|
||||
"items": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"label": {"type": "string"},
|
||||
"value": {"type": "string"}
|
||||
},
|
||||
"required": ["label"]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"voice_text": {"type": "string", "description": "可选,单独指定播报文本;为空则播报 question"}
|
||||
},
|
||||
"required": ["question", "options"]
|
||||
}
|
||||
},
|
||||
"text_choice_prompt": {
|
||||
"name": "文本选项提示",
|
||||
"description": "显示文本选项弹窗并等待用户选择后回传结果",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {"type": "string", "description": "向用户展示的问题文本"},
|
||||
"options": {
|
||||
"type": "array",
|
||||
"description": "可选项(字符串或含 id/label/value 的对象)",
|
||||
"minItems": 2,
|
||||
"items": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"label": {"type": "string"},
|
||||
"value": {"type": "string"}
|
||||
},
|
||||
"required": ["label"]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["question", "options"]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
TOOL_CATEGORY_MAP = {
|
||||
"calculator": "query",
|
||||
"current_time": "query",
|
||||
"code_interpreter": "query",
|
||||
"turn_on_camera": "system",
|
||||
"turn_off_camera": "system",
|
||||
"increase_volume": "system",
|
||||
"decrease_volume": "system",
|
||||
"voice_msg_prompt": "system",
|
||||
"voice_message_prompt": "system", # backward compatibility
|
||||
"text_msg_prompt": "system",
|
||||
"voice_choice_prompt": "system",
|
||||
"text_choice_prompt": "system",
|
||||
}
|
||||
|
||||
TOOL_ICON_MAP = {
|
||||
"calculator": "Terminal",
|
||||
"current_time": "Calendar",
|
||||
"code_interpreter": "Terminal",
|
||||
"turn_on_camera": "Camera",
|
||||
"turn_off_camera": "CameraOff",
|
||||
"increase_volume": "Volume2",
|
||||
"decrease_volume": "Volume2",
|
||||
"voice_msg_prompt": "Volume2",
|
||||
"voice_message_prompt": "Volume2", # backward compatibility
|
||||
"text_msg_prompt": "Terminal",
|
||||
"voice_choice_prompt": "Volume2",
|
||||
"text_choice_prompt": "Terminal",
|
||||
}
|
||||
|
||||
TOOL_HTTP_DEFAULTS = {
|
||||
}
|
||||
|
||||
TOOL_PARAMETER_DEFAULTS = {
|
||||
"increase_volume": {"step": 1},
|
||||
"decrease_volume": {"step": 1},
|
||||
}
|
||||
|
||||
TOOL_WAIT_FOR_RESPONSE_DEFAULTS = {
|
||||
"text_msg_prompt": True,
|
||||
"voice_choice_prompt": True,
|
||||
"text_choice_prompt": True,
|
||||
}
|
||||
|
||||
|
||||
def _normalize_parameter_schema(value: Any, *, tool_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
if not isinstance(value, dict):
|
||||
value = {}
|
||||
normalized = dict(value)
|
||||
if not normalized:
|
||||
fallback = TOOL_REGISTRY.get(str(tool_id or "").strip(), {}).get("parameters")
|
||||
if isinstance(fallback, dict):
|
||||
normalized = dict(fallback)
|
||||
normalized.setdefault("type", "object")
|
||||
if normalized.get("type") != "object":
|
||||
raise HTTPException(status_code=400, detail="parameter_schema.type must be 'object'")
|
||||
properties = normalized.get("properties")
|
||||
if not isinstance(properties, dict):
|
||||
normalized["properties"] = {}
|
||||
required = normalized.get("required")
|
||||
if required is None:
|
||||
normalized["required"] = []
|
||||
elif not isinstance(required, list):
|
||||
raise HTTPException(status_code=400, detail="parameter_schema.required must be an array")
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_parameter_defaults(value: Any) -> Dict[str, Any]:
|
||||
if value is None:
|
||||
return {}
|
||||
if not isinstance(value, dict):
|
||||
raise HTTPException(status_code=400, detail="parameter_defaults must be an object")
|
||||
return dict(value)
|
||||
|
||||
|
||||
def _ensure_tool_resource_schema(db: Session) -> None:
|
||||
"""Apply lightweight SQLite migrations for newly added tool_resources columns."""
|
||||
bind = db.get_bind()
|
||||
inspector = inspect(bind)
|
||||
try:
|
||||
columns = {col["name"] for col in inspector.get_columns("tool_resources")}
|
||||
except Exception:
|
||||
return
|
||||
|
||||
altered = False
|
||||
if "parameter_schema" not in columns:
|
||||
db.execute(text("ALTER TABLE tool_resources ADD COLUMN parameter_schema JSON"))
|
||||
altered = True
|
||||
if "parameter_defaults" not in columns:
|
||||
db.execute(text("ALTER TABLE tool_resources ADD COLUMN parameter_defaults JSON"))
|
||||
altered = True
|
||||
if "wait_for_response" not in columns:
|
||||
db.execute(text("ALTER TABLE tool_resources ADD COLUMN wait_for_response BOOLEAN DEFAULT 0"))
|
||||
altered = True
|
||||
if altered:
|
||||
db.commit()
|
||||
|
||||
|
||||
def _normalize_http_method(method: Optional[str]) -> str:
|
||||
normalized = str(method or "GET").strip().upper()
|
||||
return normalized if normalized in {"GET", "POST", "PUT", "PATCH", "DELETE"} else "GET"
|
||||
|
||||
|
||||
def _requires_http_request(category: str, tool_id: Optional[str]) -> bool:
|
||||
if category != "query":
|
||||
return False
|
||||
return str(tool_id or "").strip() not in {"calculator", "code_interpreter", "current_time"}
|
||||
|
||||
|
||||
def _validate_query_http_config(*, category: str, tool_id: Optional[str], http_url: Optional[str]) -> None:
|
||||
if _requires_http_request(category, tool_id) and not str(http_url or "").strip():
|
||||
raise HTTPException(status_code=400, detail="http_url is required for query tools (except calculator/code_interpreter)")
|
||||
|
||||
|
||||
def _migrate_legacy_system_tool_ids(db: Session) -> None:
|
||||
"""Rename legacy built-in system tool IDs to their canonical IDs."""
|
||||
changed = False
|
||||
for legacy_id, canonical_id in TOOL_ID_ALIASES.items():
|
||||
if legacy_id == canonical_id:
|
||||
continue
|
||||
legacy_item = (
|
||||
db.query(ToolResource)
|
||||
.filter(ToolResource.id == legacy_id)
|
||||
.first()
|
||||
)
|
||||
if not legacy_item or not bool(legacy_item.is_system):
|
||||
continue
|
||||
|
||||
canonical_item = (
|
||||
db.query(ToolResource)
|
||||
.filter(ToolResource.id == canonical_id)
|
||||
.first()
|
||||
)
|
||||
if canonical_item:
|
||||
db.delete(legacy_item)
|
||||
changed = True
|
||||
continue
|
||||
|
||||
legacy_item.id = canonical_id
|
||||
legacy_item.updated_at = datetime.utcnow()
|
||||
changed = True
|
||||
|
||||
if changed:
|
||||
db.commit()
|
||||
|
||||
|
||||
def _seed_default_tools_if_empty(db: Session) -> None:
|
||||
"""Ensure built-in tools exist in tool_resources without overriding custom edits."""
|
||||
_ensure_tool_resource_schema(db)
|
||||
_migrate_legacy_system_tool_ids(db)
|
||||
existing_system_count = (
|
||||
db.query(ToolResource.id)
|
||||
.filter(ToolResource.is_system.is_(True))
|
||||
.count()
|
||||
)
|
||||
if existing_system_count > 0:
|
||||
return
|
||||
existing_ids = {
|
||||
str(item[0])
|
||||
for item in db.query(ToolResource.id).all()
|
||||
}
|
||||
changed = False
|
||||
for tool_id, payload in TOOL_REGISTRY.items():
|
||||
if tool_id in existing_ids:
|
||||
continue
|
||||
http_defaults = TOOL_HTTP_DEFAULTS.get(tool_id, {})
|
||||
db.add(ToolResource(
|
||||
id=tool_id,
|
||||
user_id=1,
|
||||
name=payload.get("name", tool_id),
|
||||
description=payload.get("description", ""),
|
||||
category=TOOL_CATEGORY_MAP.get(tool_id, "system"),
|
||||
icon=TOOL_ICON_MAP.get(tool_id, "Wrench"),
|
||||
http_method=_normalize_http_method(http_defaults.get("http_method")),
|
||||
http_url=http_defaults.get("http_url"),
|
||||
http_headers=http_defaults.get("http_headers") or {},
|
||||
http_timeout_ms=int(http_defaults.get("http_timeout_ms") or 10000),
|
||||
parameter_schema=_normalize_parameter_schema(payload.get("parameters"), tool_id=tool_id),
|
||||
parameter_defaults=_normalize_parameter_defaults(TOOL_PARAMETER_DEFAULTS.get(tool_id)),
|
||||
wait_for_response=bool(TOOL_WAIT_FOR_RESPONSE_DEFAULTS.get(tool_id, False)),
|
||||
enabled=True,
|
||||
is_system=True,
|
||||
))
|
||||
changed = True
|
||||
if changed:
|
||||
db.commit()
|
||||
|
||||
|
||||
def recreate_tool_resources(db: Session) -> None:
|
||||
"""Recreate tool resources table content with current built-in defaults."""
|
||||
bind = db.get_bind()
|
||||
ToolResource.__table__.drop(bind=bind, checkfirst=True)
|
||||
ToolResource.__table__.create(bind=bind, checkfirst=True)
|
||||
_seed_default_tools_if_empty(db)
|
||||
|
||||
|
||||
@router.get("/list")
|
||||
def list_available_tools():
|
||||
"""获取可用的工具列表"""
|
||||
return {"tools": TOOL_REGISTRY}
|
||||
|
||||
|
||||
@router.get("/list/{tool_id}")
|
||||
def get_tool_detail(tool_id: str):
|
||||
"""获取工具详情"""
|
||||
canonical_tool_id = normalize_tool_id(tool_id)
|
||||
if canonical_tool_id not in TOOL_REGISTRY:
|
||||
raise HTTPException(status_code=404, detail="Tool not found")
|
||||
return TOOL_REGISTRY[canonical_tool_id]
|
||||
|
||||
|
||||
# ============ Tool Resource CRUD ============
|
||||
@router.get("/resources")
|
||||
def list_tool_resources(
|
||||
category: Optional[str] = None,
|
||||
enabled: Optional[bool] = None,
|
||||
include_system: bool = True,
|
||||
page: int = 1,
|
||||
limit: int = 100,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""获取工具资源列表。system/query 仅表示工具执行类型,不代表权限。"""
|
||||
_seed_default_tools_if_empty(db)
|
||||
query = db.query(ToolResource)
|
||||
if not include_system:
|
||||
query = query.filter(ToolResource.is_system == False)
|
||||
if category:
|
||||
query = query.filter(ToolResource.category == category)
|
||||
if enabled is not None:
|
||||
query = query.filter(ToolResource.enabled == enabled)
|
||||
total = query.count()
|
||||
rows = query.order_by(ToolResource.created_at.desc()).offset(max(page - 1, 0) * limit).limit(limit).all()
|
||||
return {"total": total, "page": page, "limit": limit, "list": rows}
|
||||
|
||||
|
||||
@router.get("/resources/{id}", response_model=ToolResourceOut)
|
||||
def get_tool_resource(id: str, db: Session = Depends(get_db)):
|
||||
"""获取单个工具资源详情。"""
|
||||
_seed_default_tools_if_empty(db)
|
||||
item = db.query(ToolResource).filter(ToolResource.id == id).first()
|
||||
if not item:
|
||||
canonical_id = normalize_tool_id(id)
|
||||
if canonical_id and canonical_id != id:
|
||||
item = db.query(ToolResource).filter(ToolResource.id == canonical_id).first()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Tool resource not found")
|
||||
return item
|
||||
|
||||
|
||||
@router.post("/resources", response_model=ToolResourceOut)
|
||||
def create_tool_resource(data: ToolResourceCreate, db: Session = Depends(get_db)):
|
||||
"""创建自定义工具资源。"""
|
||||
_seed_default_tools_if_empty(db)
|
||||
candidate_id = normalize_tool_id((data.id or "").strip())
|
||||
if candidate_id and db.query(ToolResource).filter(ToolResource.id == candidate_id).first():
|
||||
raise HTTPException(status_code=400, detail="Tool ID already exists")
|
||||
|
||||
_validate_query_http_config(category=data.category, tool_id=candidate_id, http_url=data.http_url)
|
||||
parameter_schema = _normalize_parameter_schema(data.parameter_schema, tool_id=candidate_id)
|
||||
parameter_defaults = _normalize_parameter_defaults(data.parameter_defaults)
|
||||
|
||||
item = ToolResource(
|
||||
id=candidate_id or f"tool_{str(uuid.uuid4())[:8]}",
|
||||
user_id=1,
|
||||
name=data.name,
|
||||
description=data.description,
|
||||
category=data.category,
|
||||
icon=data.icon,
|
||||
http_method=_normalize_http_method(data.http_method),
|
||||
http_url=(data.http_url or "").strip() or None,
|
||||
http_headers=data.http_headers or {},
|
||||
http_timeout_ms=max(1000, int(data.http_timeout_ms or 10000)),
|
||||
parameter_schema=parameter_schema,
|
||||
parameter_defaults=parameter_defaults,
|
||||
wait_for_response=bool(data.wait_for_response) if data.category == "system" else False,
|
||||
enabled=data.enabled,
|
||||
is_system=False,
|
||||
)
|
||||
db.add(item)
|
||||
db.commit()
|
||||
db.refresh(item)
|
||||
return item
|
||||
|
||||
|
||||
@router.put("/resources/{id}", response_model=ToolResourceOut)
|
||||
def update_tool_resource(id: str, data: ToolResourceUpdate, db: Session = Depends(get_db)):
|
||||
"""更新工具资源。"""
|
||||
_seed_default_tools_if_empty(db)
|
||||
canonical_id = normalize_tool_id(id)
|
||||
item = db.query(ToolResource).filter(ToolResource.id == id).first()
|
||||
if not item and canonical_id and canonical_id != id:
|
||||
item = db.query(ToolResource).filter(ToolResource.id == canonical_id).first()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Tool resource not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
|
||||
new_category = update_data.get("category", item.category)
|
||||
new_http_url = update_data.get("http_url", item.http_url)
|
||||
_validate_query_http_config(category=new_category, tool_id=item.id, http_url=new_http_url)
|
||||
|
||||
if "http_method" in update_data:
|
||||
update_data["http_method"] = _normalize_http_method(update_data.get("http_method"))
|
||||
if "http_timeout_ms" in update_data and update_data.get("http_timeout_ms") is not None:
|
||||
update_data["http_timeout_ms"] = max(1000, int(update_data["http_timeout_ms"]))
|
||||
if "parameter_schema" in update_data:
|
||||
update_data["parameter_schema"] = _normalize_parameter_schema(update_data.get("parameter_schema"), tool_id=item.id)
|
||||
if "parameter_defaults" in update_data:
|
||||
update_data["parameter_defaults"] = _normalize_parameter_defaults(update_data.get("parameter_defaults"))
|
||||
if new_category != "system":
|
||||
update_data["wait_for_response"] = False
|
||||
|
||||
for field, value in update_data.items():
|
||||
setattr(item, field, value)
|
||||
item.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
db.refresh(item)
|
||||
return item
|
||||
|
||||
|
||||
@router.delete("/resources/{id}")
|
||||
def delete_tool_resource(id: str, db: Session = Depends(get_db)):
|
||||
"""删除工具资源。"""
|
||||
_seed_default_tools_if_empty(db)
|
||||
canonical_id = normalize_tool_id(id)
|
||||
item = db.query(ToolResource).filter(ToolResource.id == id).first()
|
||||
if not item and canonical_id and canonical_id != id:
|
||||
item = db.query(ToolResource).filter(ToolResource.id == canonical_id).first()
|
||||
if not item:
|
||||
raise HTTPException(status_code=404, detail="Tool resource not found")
|
||||
db.delete(item)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
|
||||
# ============ Autotest ============
|
||||
class AutotestResult:
|
||||
"""自动测试结果"""
|
||||
|
||||
def __init__(self):
|
||||
self.id = str(uuid.uuid4())[:8]
|
||||
self.started_at = time.time()
|
||||
self.tests = []
|
||||
self.summary = {"passed": 0, "failed": 0, "total": 0}
|
||||
|
||||
def add_test(self, name: str, passed: bool, message: str = "", duration_ms: int = 0):
|
||||
self.tests.append({
|
||||
"name": name,
|
||||
"passed": passed,
|
||||
"message": message,
|
||||
"duration_ms": duration_ms
|
||||
})
|
||||
if passed:
|
||||
self.summary["passed"] += 1
|
||||
else:
|
||||
self.summary["failed"] += 1
|
||||
self.summary["total"] += 1
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"started_at": self.started_at,
|
||||
"duration_ms": int((time.time() - self.started_at) * 1000),
|
||||
"tests": self.tests,
|
||||
"summary": self.summary
|
||||
}
|
||||
|
||||
|
||||
@router.post("/autotest")
|
||||
def run_autotest(
|
||||
llm_model_id: Optional[str] = None,
|
||||
asr_model_id: Optional[str] = None,
|
||||
test_llm: bool = True,
|
||||
test_asr: bool = True,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""运行自动测试"""
|
||||
result = AutotestResult()
|
||||
|
||||
# 测试 LLM 模型
|
||||
if test_llm and llm_model_id:
|
||||
_test_llm_model(db, llm_model_id, result)
|
||||
|
||||
# 测试 ASR 模型
|
||||
if test_asr and asr_model_id:
|
||||
_test_asr_model(db, asr_model_id, result)
|
||||
|
||||
# 测试 TTS 功能(需要时可添加)
|
||||
if test_llm and not llm_model_id:
|
||||
result.add_test(
|
||||
"LLM Model Check",
|
||||
False,
|
||||
"No LLM model ID provided"
|
||||
)
|
||||
|
||||
if test_asr and not asr_model_id:
|
||||
result.add_test(
|
||||
"ASR Model Check",
|
||||
False,
|
||||
"No ASR model ID provided"
|
||||
)
|
||||
|
||||
return result.to_dict()
|
||||
|
||||
|
||||
@router.post("/autotest/llm/{model_id}")
|
||||
def autotest_llm_model(model_id: str, db: Session = Depends(get_db)):
|
||||
"""测试单个LLM模型"""
|
||||
result = AutotestResult()
|
||||
_test_llm_model(db, model_id, result)
|
||||
return result.to_dict()
|
||||
|
||||
|
||||
@router.post("/autotest/asr/{model_id}")
|
||||
def autotest_asr_model(model_id: str, db: Session = Depends(get_db)):
|
||||
"""测试单个ASR模型"""
|
||||
result = AutotestResult()
|
||||
_test_asr_model(db, model_id, result)
|
||||
return result.to_dict()
|
||||
|
||||
|
||||
def _test_llm_model(db: Session, model_id: str, result: AutotestResult):
|
||||
"""内部方法:测试LLM模型"""
|
||||
start_time = time.time()
|
||||
|
||||
# 1. 检查模型是否存在
|
||||
model = db.query(LLMModel).filter(LLMModel.id == model_id).first()
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
if not model:
|
||||
result.add_test("Model Existence", False, f"Model {model_id} not found", duration_ms)
|
||||
return
|
||||
|
||||
result.add_test("Model Existence", True, f"Found model: {model.name}", duration_ms)
|
||||
|
||||
# 2. 测试连接
|
||||
test_start = time.time()
|
||||
try:
|
||||
test_messages = [{"role": "user", "content": "Reply with 'OK'."}]
|
||||
payload = {
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": test_messages,
|
||||
"max_tokens": 10,
|
||||
"temperature": 0.1,
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result_text = response.json()
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
|
||||
if result_text.get("choices"):
|
||||
result.add_test("API Connection", True, f"Latency: {latency_ms}ms", latency_ms)
|
||||
else:
|
||||
result.add_test("API Connection", False, "Empty response", latency_ms)
|
||||
|
||||
except Exception as e:
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
result.add_test("API Connection", False, str(e)[:200], latency_ms)
|
||||
|
||||
# 3. 检查模型配置
|
||||
if model.temperature is not None:
|
||||
result.add_test("Temperature Setting", True, f"temperature={model.temperature}")
|
||||
else:
|
||||
result.add_test("Temperature Setting", True, "Using default")
|
||||
|
||||
# 4. 测试流式响应(可选)
|
||||
if model.type == "text":
|
||||
test_start = time.time()
|
||||
try:
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
with client.stream(
|
||||
"POST",
|
||||
f"{model.base_url}/chat/completions",
|
||||
json={
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "Count from 1 to 3."}],
|
||||
"stream": True,
|
||||
},
|
||||
headers=headers
|
||||
) as response:
|
||||
response.raise_for_status()
|
||||
chunk_count = 0
|
||||
for _ in response.iter_bytes():
|
||||
chunk_count += 1
|
||||
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
result.add_test("Streaming Support", True, f"Received {chunk_count} chunks", latency_ms)
|
||||
except Exception as e:
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
result.add_test("Streaming Support", False, str(e)[:200], latency_ms)
|
||||
|
||||
|
||||
def _test_asr_model(db: Session, model_id: str, result: AutotestResult):
|
||||
"""内部方法:测试ASR模型"""
|
||||
start_time = time.time()
|
||||
|
||||
# 1. 检查模型是否存在
|
||||
model = db.query(ASRModel).filter(ASRModel.id == model_id).first()
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
if not model:
|
||||
result.add_test("Model Existence", False, f"Model {model_id} not found", duration_ms)
|
||||
return
|
||||
|
||||
result.add_test("Model Existence", True, f"Found model: {model.name}", duration_ms)
|
||||
|
||||
# 2. 测试配置
|
||||
if model.hotwords:
|
||||
result.add_test("Hotwords Config", True, f"Hotwords: {len(model.hotwords)} words")
|
||||
else:
|
||||
result.add_test("Hotwords Config", True, "No hotwords configured")
|
||||
|
||||
# 3. 测试API可用性
|
||||
test_start = time.time()
|
||||
try:
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
normalized_vendor = (model.vendor or "").strip().lower()
|
||||
if normalized_vendor in [
|
||||
"openai compatible",
|
||||
"openai-compatible",
|
||||
"siliconflow", # backward compatibility
|
||||
"paraformer",
|
||||
]:
|
||||
response = client.get(
|
||||
f"{model.base_url}/asr",
|
||||
headers=headers
|
||||
)
|
||||
elif model.vendor.lower() == "openai":
|
||||
response = client.get(
|
||||
f"{model.base_url}/audio/models",
|
||||
headers=headers
|
||||
)
|
||||
else:
|
||||
# 通用健康检查
|
||||
response = client.get(
|
||||
f"{model.base_url}/health",
|
||||
headers=headers
|
||||
)
|
||||
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
|
||||
if response.status_code in [200, 405]: # 405 = method not allowed but endpoint exists
|
||||
result.add_test("API Availability", True, f"Status: {response.status_code}", latency_ms)
|
||||
else:
|
||||
result.add_test("API Availability", False, f"Status: {response.status_code}", latency_ms)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
result.add_test("API Availability", False, "Connection timeout", latency_ms)
|
||||
except Exception as e:
|
||||
latency_ms = int((time.time() - test_start) * 1000)
|
||||
result.add_test("API Availability", False, str(e)[:200], latency_ms)
|
||||
|
||||
# 4. 检查语言配置
|
||||
if model.language in ["zh", "en", "Multi-lingual"]:
|
||||
result.add_test("Language Config", True, f"Language: {model.language}")
|
||||
else:
|
||||
result.add_test("Language Config", False, f"Unknown language: {model.language}")
|
||||
|
||||
|
||||
# ============ Quick Health Check ============
|
||||
@router.get("/health")
|
||||
def health_check():
|
||||
"""快速健康检查"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"timestamp": time.time(),
|
||||
"tools": list(TOOL_REGISTRY.keys())
|
||||
}
|
||||
|
||||
|
||||
@router.post("/test-message")
|
||||
def send_test_message(
|
||||
llm_model_id: str,
|
||||
message: str = "Hello, this is a test message.",
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""发送测试消息"""
|
||||
model = db.query(LLMModel).filter(LLMModel.id == llm_model_id).first()
|
||||
if not model:
|
||||
raise HTTPException(status_code=404, detail="LLM Model not found")
|
||||
|
||||
try:
|
||||
payload = {
|
||||
"model": model.model_name or "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": message}],
|
||||
"max_tokens": 500,
|
||||
"temperature": 0.7,
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {model.api_key}"}
|
||||
|
||||
with httpx.Client(timeout=60.0) as client:
|
||||
response = client.post(
|
||||
f"{model.base_url}/chat/completions",
|
||||
json=payload,
|
||||
headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
result = response.json()
|
||||
reply = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"reply": reply,
|
||||
"usage": result.get("usage", {})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
442
api/app/routers/voices.py
Normal file
442
api/app/routers/voices.py
Normal file
@@ -0,0 +1,442 @@
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import wave
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..db import get_db
|
||||
from ..id_generator import unique_short_id
|
||||
from ..models import Voice
|
||||
from ..schemas import VoiceCreate, VoiceOut, VoicePreviewRequest, VoicePreviewResponse, VoiceUpdate
|
||||
|
||||
router = APIRouter(prefix="/voices", tags=["Voices"])
|
||||
|
||||
OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
DASHSCOPE_DEFAULT_MODEL = "qwen3-tts-flash-realtime"
|
||||
DASHSCOPE_DEFAULT_VOICE_KEY = "Cherry"
|
||||
DASHSCOPE_DEFAULT_BASE_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
|
||||
try:
|
||||
import dashscope
|
||||
from dashscope.audio.qwen_tts_realtime import AudioFormat, QwenTtsRealtime, QwenTtsRealtimeCallback
|
||||
|
||||
DASHSCOPE_SDK_AVAILABLE = True
|
||||
except ImportError:
|
||||
dashscope = None # type: ignore[assignment]
|
||||
AudioFormat = None # type: ignore[assignment]
|
||||
QwenTtsRealtime = None # type: ignore[assignment]
|
||||
DASHSCOPE_SDK_AVAILABLE = False
|
||||
|
||||
class QwenTtsRealtimeCallback: # type: ignore[no-redef]
|
||||
"""Fallback callback base when DashScope SDK is unavailable."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class _DashScopePreviewCallback(QwenTtsRealtimeCallback):
|
||||
"""Collect DashScope realtime callback events and PCM chunks."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._open_event = threading.Event()
|
||||
self._done_event = threading.Event()
|
||||
self._lock = threading.Lock()
|
||||
self._audio_chunks: list[bytes] = []
|
||||
self._error_message: Optional[str] = None
|
||||
|
||||
def on_open(self) -> None:
|
||||
self._open_event.set()
|
||||
|
||||
def on_close(self, code: int, reason: str) -> None:
|
||||
if not self._done_event.is_set():
|
||||
self._error_message = f"DashScope websocket closed unexpectedly: {code} {reason}"
|
||||
self._done_event.set()
|
||||
|
||||
def on_error(self, message: str) -> None:
|
||||
self._error_message = str(message)
|
||||
self._done_event.set()
|
||||
|
||||
def on_event(self, response: Any) -> None:
|
||||
payload = _coerce_dashscope_event(response)
|
||||
event_type = str(payload.get("type") or "").strip()
|
||||
if event_type == "response.audio.delta":
|
||||
delta = payload.get("delta")
|
||||
if isinstance(delta, str):
|
||||
try:
|
||||
self._append_audio(base64.b64decode(delta))
|
||||
except Exception:
|
||||
return
|
||||
elif event_type in {"response.done", "session.finished"}:
|
||||
self._done_event.set()
|
||||
elif event_type == "error":
|
||||
self._error_message = _format_dashscope_error_event(payload)
|
||||
self._done_event.set()
|
||||
|
||||
def on_data(self, data: bytes) -> None:
|
||||
# Some SDK versions emit raw PCM frames via on_data.
|
||||
if isinstance(data, (bytes, bytearray)):
|
||||
self._append_audio(bytes(data))
|
||||
|
||||
def wait_for_open(self, timeout: float = 10.0) -> None:
|
||||
if not self._open_event.wait(timeout):
|
||||
raise TimeoutError("DashScope websocket open timeout")
|
||||
|
||||
def wait_for_done(self, timeout: float = 45.0) -> None:
|
||||
if not self._done_event.wait(timeout):
|
||||
raise TimeoutError("DashScope synthesis timeout")
|
||||
|
||||
def raise_if_error(self) -> None:
|
||||
if self._error_message:
|
||||
raise RuntimeError(self._error_message)
|
||||
|
||||
def read_audio(self) -> bytes:
|
||||
with self._lock:
|
||||
return b"".join(self._audio_chunks)
|
||||
|
||||
def _append_audio(self, chunk: bytes) -> None:
|
||||
if not chunk:
|
||||
return
|
||||
with self._lock:
|
||||
self._audio_chunks.append(chunk)
|
||||
|
||||
|
||||
def _coerce_dashscope_event(response: Any) -> Dict[str, Any]:
|
||||
if isinstance(response, dict):
|
||||
return response
|
||||
if isinstance(response, str):
|
||||
try:
|
||||
parsed = json.loads(response)
|
||||
if isinstance(parsed, dict):
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return {"type": "raw", "message": str(response)}
|
||||
|
||||
|
||||
def _format_dashscope_error_event(payload: Dict[str, Any]) -> str:
|
||||
error = payload.get("error")
|
||||
if isinstance(error, dict):
|
||||
code = str(error.get("code") or "").strip()
|
||||
message = str(error.get("message") or "").strip()
|
||||
if code and message:
|
||||
return f"{code}: {message}"
|
||||
return message or str(error)
|
||||
return str(error or "DashScope realtime TTS error")
|
||||
|
||||
|
||||
def _create_dashscope_realtime_client(*, model: str, callback: _DashScopePreviewCallback, url: str, api_key: str) -> Any:
|
||||
if QwenTtsRealtime is None:
|
||||
raise RuntimeError("DashScope SDK unavailable")
|
||||
|
||||
init_kwargs = {
|
||||
"model": model,
|
||||
"callback": callback,
|
||||
"url": url,
|
||||
}
|
||||
try:
|
||||
return QwenTtsRealtime(api_key=api_key, **init_kwargs) # type: ignore[misc]
|
||||
except TypeError as exc:
|
||||
if "api_key" not in str(exc):
|
||||
raise
|
||||
return QwenTtsRealtime(**init_kwargs) # type: ignore[misc]
|
||||
|
||||
|
||||
def _pcm16_to_wav_bytes(pcm_bytes: bytes, sample_rate: int = 24000) -> bytes:
|
||||
with io.BytesIO() as buffer:
|
||||
with wave.open(buffer, "wb") as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(sample_rate)
|
||||
wav_file.writeframes(pcm_bytes)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def _synthesize_dashscope_preview(
|
||||
*,
|
||||
text: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
model: str,
|
||||
voice_key: str,
|
||||
speed: Optional[float],
|
||||
) -> bytes:
|
||||
if not DASHSCOPE_SDK_AVAILABLE:
|
||||
raise RuntimeError("dashscope package not installed; install with `pip install dashscope>=1.25.11`")
|
||||
if not AudioFormat:
|
||||
raise RuntimeError("DashScope SDK AudioFormat unavailable")
|
||||
|
||||
callback = _DashScopePreviewCallback()
|
||||
if dashscope is not None:
|
||||
dashscope.api_key = api_key
|
||||
client = _create_dashscope_realtime_client(
|
||||
model=model,
|
||||
callback=callback,
|
||||
url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
try:
|
||||
client.connect()
|
||||
callback.wait_for_open()
|
||||
session_kwargs: Dict[str, Any] = {
|
||||
"voice": voice_key,
|
||||
"response_format": AudioFormat.PCM_24000HZ_MONO_16BIT,
|
||||
"mode": "commit",
|
||||
}
|
||||
# speech_rate is supported by qwen3-* realtime models.
|
||||
normalized_model = str(model or "").strip().lower()
|
||||
if speed is not None and normalized_model.startswith("qwen3-"):
|
||||
session_kwargs["speech_rate"] = max(0.5, min(2.0, float(speed)))
|
||||
client.update_session(**session_kwargs)
|
||||
client.append_text(text)
|
||||
client.commit()
|
||||
callback.wait_for_done()
|
||||
callback.raise_if_error()
|
||||
pcm_audio = callback.read_audio()
|
||||
if not pcm_audio:
|
||||
raise RuntimeError("No audio chunk returned from DashScope realtime synthesis")
|
||||
return _pcm16_to_wav_bytes(pcm_audio, sample_rate=24000)
|
||||
finally:
|
||||
finish_fn = getattr(client, "finish", None)
|
||||
if callable(finish_fn):
|
||||
try:
|
||||
finish_fn()
|
||||
except Exception:
|
||||
pass
|
||||
close_fn = getattr(client, "close", None)
|
||||
if callable(close_fn):
|
||||
try:
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _is_openai_compatible_vendor(vendor: str) -> bool:
|
||||
normalized = (vendor or "").strip().lower()
|
||||
return normalized in {
|
||||
"openai compatible",
|
||||
"openai-compatible",
|
||||
"siliconflow", # backward compatibility
|
||||
"硅基流动", # backward compatibility
|
||||
}
|
||||
|
||||
|
||||
def _is_dashscope_vendor(vendor: str) -> bool:
|
||||
normalized = (vendor or "").strip().lower()
|
||||
return normalized in {
|
||||
"dashscope",
|
||||
}
|
||||
|
||||
|
||||
def _default_base_url(vendor: str) -> Optional[str]:
|
||||
if _is_openai_compatible_vendor(vendor):
|
||||
return "https://api.siliconflow.cn/v1"
|
||||
if _is_dashscope_vendor(vendor):
|
||||
return DASHSCOPE_DEFAULT_BASE_URL
|
||||
return None
|
||||
|
||||
|
||||
def _build_openai_compatible_voice_key(voice: Voice, model: str) -> str:
|
||||
if voice.voice_key:
|
||||
return voice.voice_key
|
||||
if ":" in voice.id:
|
||||
return voice.id
|
||||
return f"{model}:{voice.id}"
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_voices(
|
||||
vendor: Optional[str] = None,
|
||||
language: Optional[str] = None,
|
||||
gender: Optional[str] = None,
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取声音库列表"""
|
||||
query = db.query(Voice)
|
||||
if vendor:
|
||||
query = query.filter(Voice.vendor == vendor)
|
||||
if language:
|
||||
query = query.filter(Voice.language == language)
|
||||
if gender:
|
||||
query = query.filter(Voice.gender == gender)
|
||||
|
||||
total = query.count()
|
||||
voices = query.order_by(Voice.created_at.desc()) \
|
||||
.offset((page - 1) * limit).limit(limit).all()
|
||||
return {"total": total, "page": page, "limit": limit, "list": voices}
|
||||
|
||||
|
||||
@router.post("", response_model=VoiceOut)
|
||||
def create_voice(data: VoiceCreate, db: Session = Depends(get_db)):
|
||||
"""创建声音"""
|
||||
vendor = data.vendor.strip()
|
||||
model = data.model
|
||||
voice_key = data.voice_key
|
||||
|
||||
if _is_openai_compatible_vendor(vendor):
|
||||
model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||
if not voice_key:
|
||||
raw_id = (data.id or data.name).strip()
|
||||
voice_key = raw_id if ":" in raw_id else f"{model}:{raw_id}"
|
||||
elif _is_dashscope_vendor(vendor):
|
||||
model = (model or "").strip() or DASHSCOPE_DEFAULT_MODEL
|
||||
voice_key = (voice_key or "").strip() or DASHSCOPE_DEFAULT_VOICE_KEY
|
||||
|
||||
voice = Voice(
|
||||
id=unique_short_id("tts", db, Voice),
|
||||
user_id=1,
|
||||
name=data.name,
|
||||
vendor=vendor,
|
||||
gender=data.gender,
|
||||
language=data.language,
|
||||
description=data.description,
|
||||
model=model,
|
||||
voice_key=voice_key,
|
||||
api_key=data.api_key,
|
||||
base_url=data.base_url,
|
||||
speed=data.speed,
|
||||
gain=data.gain,
|
||||
pitch=data.pitch,
|
||||
enabled=data.enabled,
|
||||
)
|
||||
db.add(voice)
|
||||
db.commit()
|
||||
db.refresh(voice)
|
||||
return voice
|
||||
|
||||
|
||||
@router.get("/{id}", response_model=VoiceOut)
|
||||
def get_voice(id: str, db: Session = Depends(get_db)):
|
||||
"""获取单个声音详情"""
|
||||
voice = db.query(Voice).filter(Voice.id == id).first()
|
||||
if not voice:
|
||||
raise HTTPException(status_code=404, detail="Voice not found")
|
||||
return voice
|
||||
|
||||
|
||||
@router.put("/{id}", response_model=VoiceOut)
|
||||
def update_voice(id: str, data: VoiceUpdate, db: Session = Depends(get_db)):
|
||||
"""更新声音"""
|
||||
voice = db.query(Voice).filter(Voice.id == id).first()
|
||||
if not voice:
|
||||
raise HTTPException(status_code=404, detail="Voice not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True)
|
||||
if "vendor" in update_data and update_data["vendor"] is not None:
|
||||
update_data["vendor"] = update_data["vendor"].strip()
|
||||
|
||||
vendor_for_defaults = update_data.get("vendor", voice.vendor)
|
||||
if _is_openai_compatible_vendor(vendor_for_defaults):
|
||||
model = update_data.get("model") or voice.model or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||
voice_key = update_data.get("voice_key") or voice.voice_key
|
||||
update_data["model"] = model
|
||||
update_data["voice_key"] = voice_key or _build_openai_compatible_voice_key(voice, model)
|
||||
elif _is_dashscope_vendor(vendor_for_defaults):
|
||||
model = update_data.get("model") or voice.model or DASHSCOPE_DEFAULT_MODEL
|
||||
voice_key = update_data.get("voice_key") or voice.voice_key or DASHSCOPE_DEFAULT_VOICE_KEY
|
||||
update_data["model"] = model
|
||||
update_data["voice_key"] = voice_key
|
||||
|
||||
for field, value in update_data.items():
|
||||
setattr(voice, field, value)
|
||||
|
||||
db.commit()
|
||||
db.refresh(voice)
|
||||
return voice
|
||||
|
||||
|
||||
@router.delete("/{id}")
|
||||
def delete_voice(id: str, db: Session = Depends(get_db)):
|
||||
"""删除声音"""
|
||||
voice = db.query(Voice).filter(Voice.id == id).first()
|
||||
if not voice:
|
||||
raise HTTPException(status_code=404, detail="Voice not found")
|
||||
db.delete(voice)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
|
||||
|
||||
@router.post("/{id}/preview", response_model=VoicePreviewResponse)
|
||||
def preview_voice(id: str, data: VoicePreviewRequest, db: Session = Depends(get_db)):
|
||||
"""试听指定声音,支持 OpenAI-compatible 与 DashScope Realtime。"""
|
||||
voice = db.query(Voice).filter(Voice.id == id).first()
|
||||
if not voice:
|
||||
raise HTTPException(status_code=404, detail="Voice not found")
|
||||
|
||||
text = data.text.strip()
|
||||
if not text:
|
||||
raise HTTPException(status_code=400, detail="Preview text cannot be empty")
|
||||
|
||||
if _is_dashscope_vendor(voice.vendor):
|
||||
api_key = (data.api_key or "").strip() or (voice.api_key or "").strip()
|
||||
if not api_key:
|
||||
api_key = os.getenv("DASHSCOPE_API_KEY", "").strip() or os.getenv("TTS_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail=f"API key is required for voice: {voice.name}")
|
||||
|
||||
base_url = (voice.base_url or "").strip() or DASHSCOPE_DEFAULT_BASE_URL
|
||||
model = (voice.model or "").strip() or DASHSCOPE_DEFAULT_MODEL
|
||||
voice_key = (voice.voice_key or "").strip() or DASHSCOPE_DEFAULT_VOICE_KEY
|
||||
effective_speed = data.speed if data.speed is not None else voice.speed
|
||||
try:
|
||||
wav_bytes = _synthesize_dashscope_preview(
|
||||
text=text,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=model,
|
||||
voice_key=voice_key,
|
||||
speed=effective_speed,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"DashScope preview failed: {exc}") from exc
|
||||
audio_base64 = base64.b64encode(wav_bytes).decode("utf-8")
|
||||
return VoicePreviewResponse(success=True, audio_url=f"data:audio/wav;base64,{audio_base64}")
|
||||
|
||||
api_key = (data.api_key or "").strip() or (voice.api_key or "").strip()
|
||||
if not api_key and _is_openai_compatible_vendor(voice.vendor):
|
||||
api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise HTTPException(status_code=400, detail=f"API key is required for voice: {voice.name}")
|
||||
|
||||
base_url = (voice.base_url or "").strip() or (_default_base_url(voice.vendor) or "")
|
||||
if not base_url:
|
||||
raise HTTPException(status_code=400, detail=f"Base URL is required for voice: {voice.name}")
|
||||
|
||||
model = voice.model or OPENAI_COMPATIBLE_DEFAULT_MODEL
|
||||
payload = {
|
||||
"model": model,
|
||||
"input": text,
|
||||
"voice": voice.voice_key or _build_openai_compatible_voice_key(voice, model),
|
||||
"response_format": "mp3",
|
||||
"speed": data.speed if data.speed is not None else voice.speed,
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=45.0) as client:
|
||||
response = client.post(
|
||||
f"{base_url.rstrip('/')}/audio/speech",
|
||||
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||
json=payload,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=502, detail=f"TTS request failed: {exc}") from exc
|
||||
|
||||
if response.status_code != 200:
|
||||
detail = response.text
|
||||
try:
|
||||
detail_json = response.json()
|
||||
detail = detail_json.get("error", {}).get("message") or detail_json.get("detail") or detail
|
||||
except Exception:
|
||||
pass
|
||||
raise HTTPException(status_code=502, detail=f"TTS vendor error: {detail}")
|
||||
|
||||
audio_base64 = base64.b64encode(response.content).decode("utf-8")
|
||||
return VoicePreviewResponse(success=True, audio_url=f"data:audio/mpeg;base64,{audio_base64}")
|
||||
112
api/app/routers/workflows.py
Normal file
112
api/app/routers/workflows.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
from ..db import get_db
|
||||
from ..models import Workflow
|
||||
from ..schemas import WorkflowCreate, WorkflowUpdate, WorkflowOut, WorkflowNode, WorkflowEdge
|
||||
|
||||
router = APIRouter(prefix="/workflows", tags=["Workflows"])
|
||||
|
||||
|
||||
def _normalize_graph_payload(nodes: List[Any], edges: List[Any]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||
"""Normalize graph payload to canonical dict structures."""
|
||||
parsed_nodes: List[WorkflowNode] = []
|
||||
for node in nodes:
|
||||
parsed_nodes.append(node if isinstance(node, WorkflowNode) else WorkflowNode.model_validate(node))
|
||||
|
||||
parsed_edges: List[WorkflowEdge] = []
|
||||
for edge in edges:
|
||||
parsed_edges.append(edge if isinstance(edge, WorkflowEdge) else WorkflowEdge.model_validate(edge))
|
||||
|
||||
normalized_nodes = [node.model_dump() for node in parsed_nodes]
|
||||
normalized_edges = [edge.model_dump() for edge in parsed_edges]
|
||||
return normalized_nodes, normalized_edges
|
||||
|
||||
|
||||
@router.get("")
|
||||
def list_workflows(
|
||||
page: int = 1,
|
||||
limit: int = 50,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""获取工作流列表"""
|
||||
query = db.query(Workflow)
|
||||
total = query.count()
|
||||
workflows = query.order_by(Workflow.created_at.desc()) \
|
||||
.offset((page - 1) * limit).limit(limit).all()
|
||||
return {"total": total, "page": page, "limit": limit, "list": workflows}
|
||||
|
||||
|
||||
@router.post("", response_model=WorkflowOut)
|
||||
def create_workflow(data: WorkflowCreate, db: Session = Depends(get_db)):
|
||||
"""创建工作流"""
|
||||
nodes, edges = _normalize_graph_payload(data.nodes, data.edges)
|
||||
workflow = Workflow(
|
||||
id=str(uuid.uuid4())[:8],
|
||||
user_id=1,
|
||||
name=data.name,
|
||||
node_count=data.nodeCount or len(nodes),
|
||||
created_at=data.createdAt or datetime.utcnow().isoformat(),
|
||||
updated_at=data.updatedAt or "",
|
||||
global_prompt=data.globalPrompt,
|
||||
nodes=nodes,
|
||||
edges=edges,
|
||||
)
|
||||
db.add(workflow)
|
||||
db.commit()
|
||||
db.refresh(workflow)
|
||||
return workflow
|
||||
|
||||
|
||||
@router.get("/{id}", response_model=WorkflowOut)
|
||||
def get_workflow(id: str, db: Session = Depends(get_db)):
|
||||
"""获取单个工作流"""
|
||||
workflow = db.query(Workflow).filter(Workflow.id == id).first()
|
||||
if not workflow:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
return workflow
|
||||
|
||||
|
||||
@router.put("/{id}", response_model=WorkflowOut)
|
||||
def update_workflow(id: str, data: WorkflowUpdate, db: Session = Depends(get_db)):
|
||||
"""更新工作流"""
|
||||
workflow = db.query(Workflow).filter(Workflow.id == id).first()
|
||||
if not workflow:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
|
||||
update_data = data.model_dump(exclude_unset=True, exclude={"nodes", "edges"})
|
||||
field_map = {
|
||||
"nodeCount": "node_count",
|
||||
"globalPrompt": "global_prompt",
|
||||
}
|
||||
for field, value in update_data.items():
|
||||
setattr(workflow, field_map.get(field, field), value)
|
||||
|
||||
if data.nodes is not None or data.edges is not None:
|
||||
existing_nodes = workflow.nodes if isinstance(workflow.nodes, list) else []
|
||||
existing_edges = workflow.edges if isinstance(workflow.edges, list) else []
|
||||
input_nodes = data.nodes if data.nodes is not None else existing_nodes
|
||||
input_edges = data.edges if data.edges is not None else existing_edges
|
||||
nodes, edges = _normalize_graph_payload(input_nodes, input_edges)
|
||||
workflow.nodes = nodes
|
||||
workflow.edges = edges
|
||||
workflow.node_count = len(nodes)
|
||||
|
||||
workflow.updated_at = datetime.utcnow().isoformat()
|
||||
db.commit()
|
||||
db.refresh(workflow)
|
||||
return workflow
|
||||
|
||||
|
||||
@router.delete("/{id}")
|
||||
def delete_workflow(id: str, db: Session = Depends(get_db)):
|
||||
"""删除工作流"""
|
||||
workflow = db.query(Workflow).filter(Workflow.id == id).first()
|
||||
if not workflow:
|
||||
raise HTTPException(status_code=404, detail="Workflow not found")
|
||||
db.delete(workflow)
|
||||
db.commit()
|
||||
return {"message": "Deleted successfully"}
|
||||
@@ -1,19 +1,276 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
|
||||
|
||||
# ============ Enums ============
|
||||
class AssistantConfigMode(str, Enum):
|
||||
PLATFORM = "platform"
|
||||
DIFY = "dify"
|
||||
FASTGPT = "fastgpt"
|
||||
NONE = "none"
|
||||
|
||||
|
||||
class LLMModelType(str, Enum):
|
||||
TEXT = "text"
|
||||
EMBEDDING = "embedding"
|
||||
RERANK = "rerank"
|
||||
|
||||
|
||||
class ASRLanguage(str, Enum):
|
||||
ZH = "zh"
|
||||
EN = "en"
|
||||
MULTILINGUAL = "Multi-lingual"
|
||||
|
||||
|
||||
class VoiceGender(str, Enum):
|
||||
MALE = "Male"
|
||||
FEMALE = "Female"
|
||||
|
||||
|
||||
class CallRecordSource(str, Enum):
|
||||
DEBUG = "debug"
|
||||
EXTERNAL = "external"
|
||||
|
||||
|
||||
class CallRecordStatus(str, Enum):
|
||||
CONNECTED = "connected"
|
||||
MISSED = "missed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
# ============ Voice ============
|
||||
class VoiceBase(BaseModel):
|
||||
name: str
|
||||
vendor: str
|
||||
gender: str
|
||||
language: str
|
||||
description: str
|
||||
gender: str # "Male" | "Female"
|
||||
language: str # "zh" | "en"
|
||||
description: str = ""
|
||||
|
||||
|
||||
class VoiceCreate(VoiceBase):
|
||||
id: Optional[str] = None
|
||||
model: Optional[str] = None # 厂商语音模型标识
|
||||
voice_key: Optional[str] = None # 厂商voice_key
|
||||
api_key: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
speed: float = 1.0
|
||||
gain: int = 0
|
||||
pitch: int = 0
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class VoiceUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
vendor: Optional[str] = None
|
||||
gender: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
voice_key: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
speed: Optional[float] = None
|
||||
gain: Optional[int] = None
|
||||
pitch: Optional[int] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
|
||||
class VoiceOut(VoiceBase):
|
||||
id: str
|
||||
user_id: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
voice_key: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
speed: float = 1.0
|
||||
gain: int = 0
|
||||
pitch: int = 0
|
||||
enabled: bool = True
|
||||
is_system: bool = False
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class VoicePreviewRequest(BaseModel):
|
||||
text: str
|
||||
api_key: Optional[str] = None
|
||||
speed: Optional[float] = None
|
||||
gain: Optional[int] = None
|
||||
pitch: Optional[int] = None
|
||||
|
||||
|
||||
class VoicePreviewResponse(BaseModel):
|
||||
success: bool
|
||||
audio_url: Optional[str] = None
|
||||
duration_ms: Optional[int] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
# ============ LLM Model ============
|
||||
class LLMModelBase(BaseModel):
|
||||
name: str
|
||||
vendor: str
|
||||
type: LLMModelType
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_name: Optional[str] = None
|
||||
temperature: Optional[float] = None
|
||||
context_length: Optional[int] = None
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class LLMModelCreate(LLMModelBase):
|
||||
id: Optional[str] = None
|
||||
|
||||
|
||||
class LLMModelUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
vendor: Optional[str] = None
|
||||
type: Optional[LLMModelType] = None
|
||||
base_url: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
model_name: Optional[str] = None
|
||||
temperature: Optional[float] = None
|
||||
context_length: Optional[int] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
|
||||
class LLMModelOut(LLMModelBase):
|
||||
id: str
|
||||
user_id: int
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class LLMModelTestResponse(BaseModel):
|
||||
success: bool
|
||||
latency_ms: Optional[int] = None
|
||||
message: Optional[str] = None
|
||||
|
||||
|
||||
class LLMPreviewRequest(BaseModel):
|
||||
message: str
|
||||
system_prompt: Optional[str] = None
|
||||
max_tokens: Optional[int] = None
|
||||
temperature: Optional[float] = None
|
||||
api_key: Optional[str] = None
|
||||
|
||||
|
||||
class LLMPreviewResponse(BaseModel):
|
||||
success: bool
|
||||
reply: Optional[str] = None
|
||||
usage: Optional[dict] = None
|
||||
latency_ms: Optional[int] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
# ============ ASR Model ============
|
||||
class ASRModelBase(BaseModel):
|
||||
name: str
|
||||
vendor: str
|
||||
language: str # "zh" | "en" | "Multi-lingual"
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_name: Optional[str] = None
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class ASRModelCreate(ASRModelBase):
|
||||
id: Optional[str] = None
|
||||
hotwords: List[str] = []
|
||||
enable_punctuation: bool = True
|
||||
enable_normalization: bool = True
|
||||
|
||||
|
||||
class ASRModelUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
vendor: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
model_name: Optional[str] = None
|
||||
hotwords: Optional[List[str]] = None
|
||||
enable_punctuation: Optional[bool] = None
|
||||
enable_normalization: Optional[bool] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
|
||||
class ASRModelOut(ASRModelBase):
|
||||
id: str
|
||||
user_id: int
|
||||
hotwords: List[str] = []
|
||||
enable_punctuation: bool = True
|
||||
enable_normalization: bool = True
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class ASRTestRequest(BaseModel):
|
||||
audio_url: Optional[str] = None
|
||||
audio_data: Optional[str] = None # base64 encoded
|
||||
|
||||
|
||||
class ASRTestResponse(BaseModel):
|
||||
success: bool
|
||||
transcript: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
confidence: Optional[float] = None
|
||||
duration_ms: Optional[int] = None
|
||||
latency_ms: Optional[int] = None
|
||||
message: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
# ============ Tool Resource ============
|
||||
class ToolResourceBase(BaseModel):
|
||||
name: str
|
||||
description: str = ""
|
||||
category: str = "system" # system/query
|
||||
icon: str = "Wrench"
|
||||
http_method: str = "GET"
|
||||
http_url: Optional[str] = None
|
||||
http_headers: Dict[str, str] = Field(default_factory=dict)
|
||||
http_timeout_ms: int = 10000
|
||||
parameter_schema: Dict[str, Any] = Field(default_factory=dict)
|
||||
parameter_defaults: Dict[str, Any] = Field(default_factory=dict)
|
||||
wait_for_response: bool = False
|
||||
enabled: bool = True
|
||||
|
||||
|
||||
class ToolResourceCreate(ToolResourceBase):
|
||||
id: Optional[str] = None
|
||||
|
||||
|
||||
class ToolResourceUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
category: Optional[str] = None
|
||||
icon: Optional[str] = None
|
||||
http_method: Optional[str] = None
|
||||
http_url: Optional[str] = None
|
||||
http_headers: Optional[Dict[str, str]] = None
|
||||
http_timeout_ms: Optional[int] = None
|
||||
parameter_schema: Optional[Dict[str, Any]] = None
|
||||
parameter_defaults: Optional[Dict[str, Any]] = None
|
||||
wait_for_response: Optional[bool] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
|
||||
class ToolResourceOut(ToolResourceBase):
|
||||
id: str
|
||||
user_id: Optional[int] = None
|
||||
is_system: bool = False
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
@@ -22,37 +279,131 @@ class VoiceOut(VoiceBase):
|
||||
# ============ Assistant ============
|
||||
class AssistantBase(BaseModel):
|
||||
name: str
|
||||
firstTurnMode: str = "bot_first"
|
||||
opener: str = ""
|
||||
manualOpenerToolCalls: List[Dict[str, Any]] = []
|
||||
generatedOpenerEnabled: bool = False
|
||||
openerAudioEnabled: bool = False
|
||||
prompt: str = ""
|
||||
knowledgeBaseId: Optional[str] = None
|
||||
language: str = "zh"
|
||||
voiceOutputEnabled: bool = True
|
||||
voice: Optional[str] = None
|
||||
speed: float = 1.0
|
||||
hotwords: List[str] = []
|
||||
tools: List[str] = []
|
||||
asrInterimEnabled: bool = False
|
||||
botCannotBeInterrupted: bool = False
|
||||
interruptionSensitivity: int = 500
|
||||
configMode: str = "platform"
|
||||
apiUrl: Optional[str] = None
|
||||
apiKey: Optional[str] = None
|
||||
appId: Optional[str] = None
|
||||
# 模型关联
|
||||
llmModelId: Optional[str] = None
|
||||
asrModelId: Optional[str] = None
|
||||
embeddingModelId: Optional[str] = None
|
||||
rerankModelId: Optional[str] = None
|
||||
|
||||
|
||||
class AssistantCreate(AssistantBase):
|
||||
pass
|
||||
|
||||
|
||||
class AssistantUpdate(AssistantBase):
|
||||
class AssistantUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
firstTurnMode: Optional[str] = None
|
||||
opener: Optional[str] = None
|
||||
manualOpenerToolCalls: Optional[List[Dict[str, Any]]] = None
|
||||
generatedOpenerEnabled: Optional[bool] = None
|
||||
openerAudioEnabled: Optional[bool] = None
|
||||
prompt: Optional[str] = None
|
||||
knowledgeBaseId: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
voiceOutputEnabled: Optional[bool] = None
|
||||
voice: Optional[str] = None
|
||||
speed: Optional[float] = None
|
||||
hotwords: Optional[List[str]] = None
|
||||
tools: Optional[List[str]] = None
|
||||
asrInterimEnabled: Optional[bool] = None
|
||||
botCannotBeInterrupted: Optional[bool] = None
|
||||
interruptionSensitivity: Optional[int] = None
|
||||
configMode: Optional[str] = None
|
||||
apiUrl: Optional[str] = None
|
||||
apiKey: Optional[str] = None
|
||||
appId: Optional[str] = None
|
||||
llmModelId: Optional[str] = None
|
||||
asrModelId: Optional[str] = None
|
||||
embeddingModelId: Optional[str] = None
|
||||
rerankModelId: Optional[str] = None
|
||||
|
||||
|
||||
class AssistantOut(AssistantBase):
|
||||
id: str
|
||||
callCount: int = 0
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class AssistantRuntimeMetadata(BaseModel):
|
||||
"""Canonical runtime metadata payload consumed by engine session.start."""
|
||||
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
systemPrompt: str = ""
|
||||
firstTurnMode: str = "bot_first"
|
||||
greeting: str = ""
|
||||
generatedOpenerEnabled: bool = False
|
||||
manualOpenerToolCalls: List[Dict[str, Any]] = Field(default_factory=list)
|
||||
output: Dict[str, Any] = Field(default_factory=dict)
|
||||
bargeIn: Dict[str, Any] = Field(default_factory=dict)
|
||||
services: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
|
||||
tools: List[Any] = Field(default_factory=list)
|
||||
knowledgeBaseId: Optional[str] = None
|
||||
knowledge: Dict[str, Any] = Field(default_factory=dict)
|
||||
history: Dict[str, Any] = Field(default_factory=dict)
|
||||
openerAudio: Dict[str, Any] = Field(default_factory=dict)
|
||||
assistantId: Optional[str] = None
|
||||
configVersionId: Optional[str] = None
|
||||
|
||||
|
||||
class AssistantEngineConfigResponse(BaseModel):
|
||||
assistantId: str
|
||||
configVersionId: Optional[str] = None
|
||||
assistant: AssistantRuntimeMetadata
|
||||
sessionStartMetadata: AssistantRuntimeMetadata
|
||||
sources: Dict[str, Optional[str]] = Field(default_factory=dict)
|
||||
warnings: List[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class AssistantOpenerAudioGenerateRequest(BaseModel):
|
||||
text: Optional[str] = None
|
||||
|
||||
|
||||
class AssistantOpenerAudioOut(BaseModel):
|
||||
enabled: bool = False
|
||||
ready: bool = False
|
||||
encoding: str = "pcm_s16le"
|
||||
sample_rate_hz: int = 16000
|
||||
channels: int = 1
|
||||
duration_ms: int = 0
|
||||
updated_at: Optional[datetime] = None
|
||||
text_hash: Optional[str] = None
|
||||
tts_fingerprint: Optional[str] = None
|
||||
|
||||
|
||||
class AssistantStats(BaseModel):
|
||||
assistant_id: str
|
||||
total_calls: int = 0
|
||||
connected_calls: int = 0
|
||||
missed_calls: int = 0
|
||||
avg_duration_seconds: float = 0.0
|
||||
today_calls: int = 0
|
||||
|
||||
|
||||
# ============ Knowledge Base ============
|
||||
class KnowledgeDocument(BaseModel):
|
||||
id: str
|
||||
@@ -137,24 +488,82 @@ class KnowledgeStats(BaseModel):
|
||||
|
||||
# ============ Workflow ============
|
||||
class WorkflowNode(BaseModel):
|
||||
name: str
|
||||
type: str
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
id: Optional[str] = None
|
||||
name: str = ""
|
||||
type: str = "assistant"
|
||||
isStart: Optional[bool] = None
|
||||
metadata: dict
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
prompt: Optional[str] = None
|
||||
messagePlan: Optional[dict] = None
|
||||
variableExtractionPlan: Optional[dict] = None
|
||||
tool: Optional[dict] = None
|
||||
globalNodePlan: Optional[dict] = None
|
||||
messagePlan: Optional[Dict[str, Any]] = None
|
||||
variableExtractionPlan: Optional[Dict[str, Any]] = None
|
||||
tool: Optional[Dict[str, Any]] = None
|
||||
globalNodePlan: Optional[Dict[str, Any]] = None
|
||||
assistantId: Optional[str] = None
|
||||
assistant: Optional[Dict[str, Any]] = None
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def _normalize_legacy_node(cls, data: Any) -> Any:
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
raw = dict(data)
|
||||
node_id = raw.get("id") or raw.get("name")
|
||||
if not node_id:
|
||||
node_id = f"node_{abs(hash(str(raw))) % 100000}"
|
||||
raw["id"] = str(node_id)
|
||||
raw["name"] = str(raw.get("name") or raw["id"])
|
||||
|
||||
node_type = str(raw.get("type") or "assistant").lower()
|
||||
if node_type == "conversation":
|
||||
node_type = "assistant"
|
||||
elif node_type == "human":
|
||||
node_type = "human_transfer"
|
||||
elif node_type not in {"start", "assistant", "tool", "human_transfer", "end"}:
|
||||
node_type = "assistant"
|
||||
raw["type"] = node_type
|
||||
|
||||
metadata = raw.get("metadata")
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = {}
|
||||
if "position" not in metadata and isinstance(raw.get("position"), dict):
|
||||
metadata["position"] = raw.get("position")
|
||||
raw["metadata"] = metadata
|
||||
|
||||
if raw.get("isStart") is None and node_type == "start":
|
||||
raw["isStart"] = True
|
||||
return raw
|
||||
|
||||
|
||||
class WorkflowEdge(BaseModel):
|
||||
from_: str
|
||||
to: str
|
||||
label: Optional[str] = None
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
id: Optional[str] = None
|
||||
fromNodeId: str
|
||||
toNodeId: str
|
||||
label: Optional[str] = None
|
||||
condition: Optional[Dict[str, Any]] = None
|
||||
priority: int = 100
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def _normalize_legacy_edge(cls, data: Any) -> Any:
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
raw = dict(data)
|
||||
from_node = raw.get("fromNodeId") or raw.get("from") or raw.get("from_") or raw.get("source")
|
||||
to_node = raw.get("toNodeId") or raw.get("to") or raw.get("target")
|
||||
raw["fromNodeId"] = str(from_node or "")
|
||||
raw["toNodeId"] = str(to_node or "")
|
||||
if raw.get("id") is None:
|
||||
raw["id"] = f"e_{raw['fromNodeId']}_{raw['toNodeId']}"
|
||||
if raw.get("condition") is None:
|
||||
if raw.get("label"):
|
||||
raw["condition"] = {"type": "contains", "source": "user", "value": str(raw["label"])}
|
||||
else:
|
||||
raw["condition"] = {"type": "always"}
|
||||
return raw
|
||||
|
||||
|
||||
class WorkflowBase(BaseModel):
|
||||
@@ -163,29 +572,85 @@ class WorkflowBase(BaseModel):
|
||||
createdAt: str = ""
|
||||
updatedAt: str = ""
|
||||
globalPrompt: Optional[str] = None
|
||||
nodes: List[dict] = []
|
||||
edges: List[dict] = []
|
||||
nodes: List[WorkflowNode] = Field(default_factory=list)
|
||||
edges: List[WorkflowEdge] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WorkflowCreate(WorkflowBase):
|
||||
pass
|
||||
@model_validator(mode="after")
|
||||
def _validate_graph(self) -> "WorkflowCreate":
|
||||
_validate_workflow_graph(self.nodes, self.edges)
|
||||
return self
|
||||
|
||||
|
||||
class WorkflowUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
nodeCount: Optional[int] = None
|
||||
nodes: Optional[List[dict]] = None
|
||||
edges: Optional[List[dict]] = None
|
||||
nodes: Optional[List[WorkflowNode]] = None
|
||||
edges: Optional[List[WorkflowEdge]] = None
|
||||
globalPrompt: Optional[str] = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _validate_partial_graph(self) -> "WorkflowUpdate":
|
||||
if self.nodes is not None and self.edges is not None:
|
||||
_validate_workflow_graph(self.nodes, self.edges)
|
||||
return self
|
||||
|
||||
|
||||
class WorkflowOut(WorkflowBase):
|
||||
id: str
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def _normalize_db_fields(cls, data: Any) -> Any:
|
||||
if isinstance(data, dict):
|
||||
raw = dict(data)
|
||||
else:
|
||||
raw = {
|
||||
"id": getattr(data, "id", None),
|
||||
"name": getattr(data, "name", None),
|
||||
"node_count": getattr(data, "node_count", None),
|
||||
"created_at": getattr(data, "created_at", None),
|
||||
"updated_at": getattr(data, "updated_at", None),
|
||||
"global_prompt": getattr(data, "global_prompt", None),
|
||||
"nodes": getattr(data, "nodes", None),
|
||||
"edges": getattr(data, "edges", None),
|
||||
}
|
||||
|
||||
if "nodeCount" not in raw and raw.get("node_count") is not None:
|
||||
raw["nodeCount"] = raw["node_count"]
|
||||
if "createdAt" not in raw and raw.get("created_at") is not None:
|
||||
raw["createdAt"] = raw["created_at"]
|
||||
if "updatedAt" not in raw and raw.get("updated_at") is not None:
|
||||
raw["updatedAt"] = raw["updated_at"]
|
||||
if "globalPrompt" not in raw and raw.get("global_prompt") is not None:
|
||||
raw["globalPrompt"] = raw["global_prompt"]
|
||||
return raw
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
def _validate_workflow_graph(nodes: List[WorkflowNode], edges: List[WorkflowEdge]) -> None:
|
||||
if not nodes:
|
||||
raise ValueError("Workflow must include at least one node")
|
||||
|
||||
node_ids = [node.id for node in nodes if node.id]
|
||||
if len(node_ids) != len(set(node_ids)):
|
||||
raise ValueError("Workflow node ids must be unique")
|
||||
|
||||
starts = [node for node in nodes if node.isStart or node.type == "start"]
|
||||
if not starts:
|
||||
raise ValueError("Workflow must define a start node (isStart=true or type=start)")
|
||||
|
||||
known = set(node_ids)
|
||||
for edge in edges:
|
||||
if edge.fromNodeId not in known:
|
||||
raise ValueError(f"Workflow edge fromNodeId not found: {edge.fromNodeId}")
|
||||
if edge.toNodeId not in known:
|
||||
raise ValueError(f"Workflow edge toNodeId not found: {edge.toNodeId}")
|
||||
|
||||
|
||||
# ============ Call Record ============
|
||||
class TranscriptSegment(BaseModel):
|
||||
turnIndex: int
|
||||
@@ -196,18 +661,24 @@ class TranscriptSegment(BaseModel):
|
||||
endMs: int
|
||||
durationMs: Optional[int] = None
|
||||
audioUrl: Optional[str] = None
|
||||
emotion: Optional[str] = None
|
||||
|
||||
|
||||
class CallRecordCreate(BaseModel):
|
||||
user_id: int
|
||||
assistant_id: Optional[str] = None
|
||||
source: str = "debug"
|
||||
status: Optional[str] = None
|
||||
cost: Optional[float] = None
|
||||
|
||||
|
||||
class CallRecordUpdate(BaseModel):
|
||||
status: Optional[str] = None
|
||||
summary: Optional[str] = None
|
||||
duration_seconds: Optional[int] = None
|
||||
ended_at: Optional[str] = None
|
||||
cost: Optional[float] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
class CallRecordOut(BaseModel):
|
||||
@@ -220,6 +691,9 @@ class CallRecordOut(BaseModel):
|
||||
ended_at: Optional[str] = None
|
||||
duration_seconds: Optional[int] = None
|
||||
summary: Optional[str] = None
|
||||
cost: float = 0.0
|
||||
metadata: dict = {}
|
||||
created_at: Optional[datetime] = None
|
||||
transcripts: List[TranscriptSegment] = []
|
||||
|
||||
class Config:
|
||||
@@ -246,6 +720,19 @@ class TranscriptOut(TranscriptCreate):
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# ============ History Stats ============
|
||||
class HistoryStats(BaseModel):
|
||||
total_calls: int = 0
|
||||
connected_calls: int = 0
|
||||
missed_calls: int = 0
|
||||
failed_calls: int = 0
|
||||
avg_duration_seconds: float = 0.0
|
||||
total_cost: float = 0.0
|
||||
by_status: dict = {}
|
||||
by_source: dict = {}
|
||||
daily_trend: List[dict] = []
|
||||
|
||||
|
||||
# ============ Dashboard ============
|
||||
class DashboardStats(BaseModel):
|
||||
totalCalls: int
|
||||
@@ -269,3 +756,9 @@ class ListResponse(BaseModel):
|
||||
page: int
|
||||
limit: int
|
||||
list: List
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
id: str
|
||||
started_at: str
|
||||
matched_content: Optional[str] = None
|
||||
|
||||
@@ -64,6 +64,8 @@ class VectorStore:
|
||||
):
|
||||
"""添加文档片段到向量库"""
|
||||
collection = self.get_collection(kb_id)
|
||||
if collection is None:
|
||||
raise ValueError(f"Knowledge collection not found for kb_id={kb_id}")
|
||||
|
||||
if ids is None:
|
||||
ids = [f"chunk-{i}" for i in range(len(documents))]
|
||||
@@ -93,6 +95,11 @@ class VectorStore:
|
||||
) -> Dict:
|
||||
"""检索相似文档"""
|
||||
collection = self.get_collection(kb_id)
|
||||
if collection is None:
|
||||
raise ValueError(
|
||||
f"Knowledge collection not found for kb_id={kb_id}. "
|
||||
"Please ensure the knowledge base exists and documents are indexed."
|
||||
)
|
||||
|
||||
# 生成查询向量
|
||||
query_embedding = embedding_service.embed_query(query)
|
||||
@@ -108,6 +115,8 @@ class VectorStore:
|
||||
def get_stats(self, kb_id: str) -> Dict:
|
||||
"""获取向量库统计"""
|
||||
collection = self.get_collection(kb_id)
|
||||
if collection is None:
|
||||
raise ValueError(f"Knowledge collection not found for kb_id={kb_id}")
|
||||
return {
|
||||
"count": collection.count(),
|
||||
"kb_id": kb_id
|
||||
@@ -116,11 +125,15 @@ class VectorStore:
|
||||
def delete_documents(self, kb_id: str, ids: List[str]):
|
||||
"""删除指定文档片段"""
|
||||
collection = self.get_collection(kb_id)
|
||||
if collection is None:
|
||||
return
|
||||
collection.delete(ids=ids)
|
||||
|
||||
def delete_by_metadata(self, kb_id: str, document_id: str):
|
||||
"""根据文档 ID 删除所有片段"""
|
||||
collection = self.get_collection(kb_id)
|
||||
if collection is None:
|
||||
return
|
||||
results = collection.get(where={"document_id": document_id})
|
||||
if results["ids"]:
|
||||
collection.delete(ids=results["ids"])
|
||||
@@ -244,9 +257,6 @@ embedding_service = EmbeddingService()
|
||||
|
||||
def search_knowledge(kb_id: str, query: str, n_results: int = 5) -> Dict:
|
||||
"""知识库检索"""
|
||||
# 生成查询向量
|
||||
query_vector = embedding_service.embed_query(query)
|
||||
|
||||
# 检索
|
||||
results = vector_store.search(
|
||||
kb_id=kb_id,
|
||||
|
||||
439
api/docs/asr.md
Normal file
439
api/docs/asr.md
Normal file
@@ -0,0 +1,439 @@
|
||||
# 语音识别 (ASR Model) API
|
||||
|
||||
语音识别 API 用于管理语音识别模型的配置和调用。
|
||||
|
||||
## 基础信息
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| Base URL | `/api/v1/asr` |
|
||||
| 认证方式 | Bearer Token (预留) |
|
||||
|
||||
---
|
||||
|
||||
## 数据模型
|
||||
|
||||
### ASRModel
|
||||
|
||||
```typescript
|
||||
interface ASRModel {
|
||||
id: string; // 模型唯一标识 (8位UUID)
|
||||
user_id: number; // 所属用户ID
|
||||
name: string; // 模型显示名称
|
||||
vendor: string; // 供应商: "OpenAI Compatible" | "Paraformer" | 等
|
||||
language: string; // 识别语言: "zh" | "en" | "Multi-lingual"
|
||||
base_url: string; // API Base URL
|
||||
api_key: string; // API Key
|
||||
model_name?: string; // 模型名称,如 "whisper-1" | "paraformer-v2"
|
||||
hotwords?: string[]; // 热词列表
|
||||
enable_punctuation: boolean; // 是否启用标点
|
||||
enable_normalization: boolean; // 是否启用文本规范化
|
||||
enabled: boolean; // 是否启用
|
||||
created_at: string;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API 端点
|
||||
|
||||
### 1. 获取 ASR 模型列表
|
||||
|
||||
```http
|
||||
GET /api/v1/asr
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| language | string | 否 | - | 过滤语言: "zh" \| "en" \| "Multi-lingual" |
|
||||
| enabled | boolean | 否 | - | 过滤启用状态 |
|
||||
| page | int | 否 | 1 | 页码 |
|
||||
| limit | int | 否 | 50 | 每页数量 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"total": 3,
|
||||
"page": 1,
|
||||
"limit": 50,
|
||||
"list": [
|
||||
{
|
||||
"id": "abc12345",
|
||||
"user_id": 1,
|
||||
"name": "Whisper 多语种识别",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"language": "Multi-lingual",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-***",
|
||||
"model_name": "whisper-1",
|
||||
"enable_punctuation": true,
|
||||
"enable_normalization": true,
|
||||
"enabled": true,
|
||||
"created_at": "2024-01-15T10:30:00Z"
|
||||
},
|
||||
{
|
||||
"id": "def67890",
|
||||
"user_id": 1,
|
||||
"name": "SenseVoice 中文识别",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"language": "zh",
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "sf-***",
|
||||
"model_name": "paraformer-v2",
|
||||
"hotwords": ["小助手", "帮我"],
|
||||
"enable_punctuation": true,
|
||||
"enable_normalization": true,
|
||||
"enabled": true,
|
||||
"created_at": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. 获取单个 ASR 模型详情
|
||||
|
||||
```http
|
||||
GET /api/v1/asr/{id}
|
||||
```
|
||||
|
||||
**Path Parameters:**
|
||||
|
||||
| 参数 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| id | string | 模型ID |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "abc12345",
|
||||
"user_id": 1,
|
||||
"name": "Whisper 多语种识别",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"language": "Multi-lingual",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-***",
|
||||
"model_name": "whisper-1",
|
||||
"hotwords": [],
|
||||
"enable_punctuation": true,
|
||||
"enable_normalization": true,
|
||||
"enabled": true,
|
||||
"created_at": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. 创建 ASR 模型
|
||||
|
||||
```http
|
||||
POST /api/v1/asr
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "SenseVoice 中文识别",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"language": "zh",
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "sk-your-api-key",
|
||||
"model_name": "paraformer-v2",
|
||||
"hotwords": ["小助手", "帮我"],
|
||||
"enable_punctuation": true,
|
||||
"enable_normalization": true,
|
||||
"enabled": true
|
||||
}
|
||||
```
|
||||
|
||||
**Fields 说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| name | string | 是 | 模型显示名称 |
|
||||
| vendor | string | 是 | 供应商: "OpenAI Compatible" / "Paraformer" |
|
||||
| language | string | 是 | 语言: "zh" / "en" / "Multi-lingual" |
|
||||
| base_url | string | 是 | API Base URL |
|
||||
| api_key | string | 是 | API Key |
|
||||
| model_name | string | 否 | 模型名称 |
|
||||
| hotwords | string[] | 否 | 热词列表,提升识别准确率 |
|
||||
| enable_punctuation | boolean | 否 | 是否输出标点,默认 true |
|
||||
| enable_normalization | boolean | 否 | 是否文本规范化,默认 true |
|
||||
| enabled | boolean | 否 | 是否启用,默认 true |
|
||||
| id | string | 否 | 指定模型ID,默认自动生成 |
|
||||
|
||||
---
|
||||
|
||||
### 4. 更新 ASR 模型
|
||||
|
||||
```http
|
||||
PUT /api/v1/asr/{id}
|
||||
```
|
||||
|
||||
**Request Body:** (部分更新)
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "Whisper-1 优化版",
|
||||
"language": "zh",
|
||||
"enable_punctuation": true,
|
||||
"hotwords": ["新词1", "新词2"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. 删除 ASR 模型
|
||||
|
||||
```http
|
||||
DELETE /api/v1/asr/{id}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "Deleted successfully"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 6. 测试 ASR 模型
|
||||
|
||||
```http
|
||||
POST /api/v1/asr/{id}/test
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"audio_url": "https://example.com/test-audio.wav"
|
||||
}
|
||||
```
|
||||
|
||||
或使用 Base64 编码的音频数据:
|
||||
|
||||
```json
|
||||
{
|
||||
"audio_data": "UklGRi..."
|
||||
}
|
||||
```
|
||||
|
||||
**Response (成功):**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"transcript": "您好,请问有什么可以帮助您?",
|
||||
"language": "zh",
|
||||
"confidence": 0.95,
|
||||
"latency_ms": 500
|
||||
}
|
||||
```
|
||||
|
||||
**Response (失败):**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": "HTTP Error: 401 - Unauthorized"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. 转写音频
|
||||
|
||||
```http
|
||||
POST /api/v1/asr/{id}/transcribe
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| audio_url | string | 否* | 音频文件URL |
|
||||
| audio_data | string | 否* | Base64编码的音频数据 |
|
||||
| hotwords | string[] | 否 | 热词列表 |
|
||||
|
||||
*二选一,至少提供一个
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"transcript": "您好,请问有什么可以帮助您?",
|
||||
"language": "zh",
|
||||
"confidence": 0.95
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 8. 预览 ASR (上传音频文件)
|
||||
|
||||
```http
|
||||
POST /api/v1/asr/{id}/preview
|
||||
```
|
||||
|
||||
上传音频文件进行识别预览。
|
||||
|
||||
**Request (multipart/form-data):**
|
||||
|
||||
| 参数 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| file | file | 是 | 音频文件 (audio/* | string | 否 | 指定语言,覆盖) |
|
||||
| language模型配置 |
|
||||
| api_key | string | 否 | 覆盖模型配置的 API Key |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"transcript": "您好,请问有什么可以帮助您?",
|
||||
"language": "zh",
|
||||
"confidence": 0.95,
|
||||
"latency_ms": 1500
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schema 定义
|
||||
|
||||
```python
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List
|
||||
from datetime import datetime
|
||||
|
||||
class ASRLanguage(str, Enum):
|
||||
ZH = "zh"
|
||||
EN = "en"
|
||||
MULTILINGUAL = "Multi-lingual"
|
||||
|
||||
class ASRModelBase(BaseModel):
|
||||
name: str
|
||||
vendor: str
|
||||
language: str # "zh" | "en" | "Multi-lingual"
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_name: Optional[str] = None
|
||||
hotwords: List[str] = []
|
||||
enable_punctuation: bool = True
|
||||
enable_normalization: bool = True
|
||||
enabled: bool = True
|
||||
|
||||
class ASRModelCreate(ASRModelBase):
|
||||
id: Optional[str] = None
|
||||
|
||||
class ASRModelUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
model_name: Optional[str] = None
|
||||
hotwords: Optional[List[str]] = None
|
||||
enable_punctuation: Optional[bool] = None
|
||||
enable_normalization: Optional[bool] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
class ASRModelOut(ASRModelBase):
|
||||
id: str
|
||||
user_id: int
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class ASRTestRequest(BaseModel):
|
||||
audio_url: Optional[str] = None
|
||||
audio_data: Optional[str] = None # base64 encoded
|
||||
|
||||
class ASRTestResponse(BaseModel):
|
||||
success: bool
|
||||
transcript: Optional[str] = None
|
||||
language: Optional[str] = None
|
||||
confidence: Optional[float] = None
|
||||
latency_ms: Optional[int] = None
|
||||
error: Optional[str] = None
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 供应商配置示例
|
||||
|
||||
### OpenAI Whisper
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "OpenAI Compatible",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-xxx",
|
||||
"model_name": "whisper-1",
|
||||
"language": "Multi-lingual",
|
||||
"enable_punctuation": true,
|
||||
"enable_normalization": true
|
||||
}
|
||||
```
|
||||
|
||||
### OpenAI Compatible Paraformer
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "OpenAI Compatible",
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "sf-xxx",
|
||||
"model_name": "paraformer-v2",
|
||||
"language": "zh",
|
||||
"hotwords": ["产品名称", "公司名"],
|
||||
"enable_punctuation": true,
|
||||
"enable_normalization": true
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 单元测试
|
||||
|
||||
项目包含完整的单元测试,位于 `api/tests/test_asr.py`。
|
||||
|
||||
### 测试用例概览
|
||||
|
||||
| 测试方法 | 说明 |
|
||||
|----------|------|
|
||||
| test_get_asr_models_empty | 空数据库获取测试 |
|
||||
| test_create_asr_model | 创建模型测试 |
|
||||
| test_create_asr_model_minimal | 最小数据创建测试 |
|
||||
| test_get_asr_model_by_id | 获取单个模型测试 |
|
||||
| test_get_asr_model_not_found | 获取不存在模型测试 |
|
||||
| test_update_asr_model | 更新模型测试 |
|
||||
| test_delete_asr_model | 删除模型测试 |
|
||||
| test_list_asr_models_with_pagination | 分页测试 |
|
||||
| test_filter_asr_models_by_language | 按语言过滤测试 |
|
||||
| test_filter_asr_models_by_enabled | 按启用状态过滤测试 |
|
||||
| test_create_asr_model_with_hotwords | 热词配置测试 |
|
||||
| test_test_asr_model_siliconflow | OpenAI Compatible 供应商测试 |
|
||||
| test_test_asr_model_openai | OpenAI 供应商测试 |
|
||||
| test_different_asr_languages | 多语言测试 |
|
||||
| test_different_asr_vendors | 多供应商测试 |
|
||||
|
||||
### 运行测试
|
||||
|
||||
```bash
|
||||
# 运行 ASR 相关测试
|
||||
pytest api/tests/test_asr.py -v
|
||||
|
||||
# 运行所有测试
|
||||
pytest api/tests/ -v
|
||||
```
|
||||
@@ -20,24 +20,31 @@ interface Assistant {
|
||||
id: string; // 助手唯一标识 (8位UUID)
|
||||
user_id: number; // 所属用户ID
|
||||
name: string; // 助手名称
|
||||
call_count: number; // 调用次数
|
||||
opener: string; // 开场白
|
||||
callCount: number; // 调用次数
|
||||
firstTurnMode: string; // 首轮模式: "bot_first" | "user_first"
|
||||
opener: string; // 开场白
|
||||
generatedOpenerEnabled: boolean; // 是否启用生成式开场白
|
||||
openerAudioEnabled: boolean; // 是否启用预生成开场音频
|
||||
openerAudioReady: boolean; // 开场音频是否已生成
|
||||
openerAudioDurationMs: number; // 开场音频时长(ms)
|
||||
prompt: string; // 系统提示词/人格设定
|
||||
knowledge_base_id?: string; // 关联知识库ID
|
||||
knowledgeBaseId?: string; // 关联知识库ID
|
||||
language: string; // 语言: "zh" | "en"
|
||||
voice?: string; // 声音ID
|
||||
voiceOutputEnabled: boolean; // 是否启用语音输出
|
||||
voice?: string; // 声音ID
|
||||
speed: number; // 语速 (0.5-2.0)
|
||||
hotwords: string[]; // 热词列表
|
||||
tools: string[]; // 启用的工具ID列表
|
||||
interruption_sensitivity: number; // 打断灵敏度 (ms)
|
||||
config_mode: string; // 配置模式: "platform" | "dify" | "fastgpt" | "none"
|
||||
api_url?: string; // 外部API URL
|
||||
api_key?: string; // 外部API Key
|
||||
// 模型关联 (新增)
|
||||
llm_model_id?: string; // LLM模型ID
|
||||
asr_model_id?: string; // ASR模型ID
|
||||
embedding_model_id?: string; // Embedding模型ID
|
||||
rerank_model_id?: string; // Rerank模型ID
|
||||
hotwords: string[]; // 热词列表
|
||||
tools: string[]; // 启用的工具ID列表
|
||||
botCannotBeInterrupted: boolean; // 是否禁止打断
|
||||
interruptionSensitivity: number; // 打断灵敏度 (ms)
|
||||
configMode: string; // 配置模式: "platform" | "dify" | "fastgpt" | "none"
|
||||
apiUrl?: string; // 外部API URL
|
||||
apiKey?: string; // 外部API Key
|
||||
// 模型关联
|
||||
llmModelId?: string; // LLM模型ID
|
||||
asrModelId?: string; // ASR模型ID
|
||||
embeddingModelId?: string; // Embedding模型ID
|
||||
rerankModelId?: string; // Rerank模型ID
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
@@ -219,22 +226,109 @@ DELETE. 删除助手
|
||||
|
||||
---
|
||||
|
||||
### 6. 获取助手调用统计
|
||||
### 6. 获取助手引擎配置
|
||||
|
||||
```http
|
||||
GET /api/v1/assistants/{id}/stats
|
||||
GET /api/v1/assistants/{id}/config
|
||||
```
|
||||
|
||||
获取助手的运行时引擎配置,包含 LLM、ASR、TTS、知识库等服务的完整配置信息。
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"assistantId": "abc12345",
|
||||
"configVersionId": "asst_abc12345_20240115103000",
|
||||
"assistant": {
|
||||
"systemPrompt": "你是一个专业的客服人员...",
|
||||
"firstTurnMode": "bot_first",
|
||||
"greeting": "您好,请问有什么可以帮助您?",
|
||||
"generatedOpenerEnabled": false,
|
||||
"output": {"mode": "audio"},
|
||||
"bargeIn": {"enabled": true, "minDurationMs": 500},
|
||||
"services": {
|
||||
"llm": {"provider": "openai", "model": "gpt-4o", "apiKey": "...", "baseUrl": "..."},
|
||||
"asr": {"provider": "openai_compatible", "model": "paraformer-realtime-v2", "apiKey": "..."},
|
||||
"tts": {"enabled": true, "provider": "dashscope", "model": "qwen3-tts-flash-realtime", "voice": "Cherry", "speed": 1.0}
|
||||
},
|
||||
"tools": [...],
|
||||
"knowledgeBaseId": "kb_001",
|
||||
"openerAudio": {"enabled": true, "ready": true, "pcmUrl": "/api/assistants/abc12345/opener-audio/pcm"}
|
||||
},
|
||||
"sessionStartMetadata": {...},
|
||||
"sources": {
|
||||
"llmModelId": "llm_001",
|
||||
"asrModelId": "asr_001",
|
||||
"voiceId": "voice_001",
|
||||
"knowledgeBaseId": "kb_001"
|
||||
},
|
||||
"warnings": []
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. 获取助手开场音频状态
|
||||
|
||||
```http
|
||||
GET /api/v1/assistants/{id}/opener-audio
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"assistant_id": "abc12345",
|
||||
"total_calls": 128,
|
||||
"connected_calls": 120,
|
||||
"missed_calls": 8,
|
||||
"avg_duration_seconds": 180,
|
||||
"today_calls": 15
|
||||
"enabled": true,
|
||||
"ready": true,
|
||||
"encoding": "pcm_s16le",
|
||||
"sampleRateHz": 16000,
|
||||
"channels": 1,
|
||||
"durationMs": 2500,
|
||||
"textHash": "abc123...",
|
||||
"ttsFingerprint": "def456...",
|
||||
"updatedAt": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 8. 下载开场音频 PCM 文件
|
||||
|
||||
```http
|
||||
GET /api/v1/assistants/{id}/opener-audio/pcm
|
||||
```
|
||||
|
||||
返回 PCM 音频文件 (application/octet-stream)。
|
||||
|
||||
---
|
||||
|
||||
### 9. 生成开场音频
|
||||
|
||||
```http
|
||||
POST /api/v1/assistants/{id}/opener-audio/generate
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "您好,请问有什么可以帮助您?"
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"enabled": true,
|
||||
"ready": true,
|
||||
"encoding": "pcm_s16le",
|
||||
"sampleRateHz": 16000,
|
||||
"channels": 1,
|
||||
"durationMs": 2500,
|
||||
"textHash": "abc123...",
|
||||
"ttsFingerprint": "def456..."
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -289,86 +289,7 @@ GET /api/v1/history/{call_id}/audio/{turn_index}
|
||||
|
||||
---
|
||||
|
||||
### 8. 搜索通话记录
|
||||
|
||||
```http
|
||||
GET /api/v1/history/search
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| q | string | 是 | 搜索关键词 |
|
||||
| page | int | 否 | 页码 |
|
||||
| limit | int | 否 | 每页数量 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"total": 5,
|
||||
"page": 1,
|
||||
"limit": 20,
|
||||
"list": [
|
||||
{
|
||||
"id": "call_001",
|
||||
"started_at": "2024-01-15T14:30:00Z",
|
||||
"matched_content": "用户咨询产品A的售后服务"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 9. 获取统计信息
|
||||
|
||||
```http
|
||||
GET /api/v1/history/stats
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| start_date | string | 否 | 开始日期 |
|
||||
| end_date | string | 否 | 结束日期 |
|
||||
| assistant_id | string | 否 | 助手ID |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"total_calls": 150,
|
||||
"connected_calls": 135,
|
||||
"missed_calls": 15,
|
||||
"failed_calls": 0,
|
||||
"avg_duration_seconds": 180,
|
||||
"total_cost": 7.50,
|
||||
"by_status": {
|
||||
"connected": 135,
|
||||
"missed": 15,
|
||||
"failed": 0
|
||||
},
|
||||
"by_source": {
|
||||
"debug": 100,
|
||||
"external": 50
|
||||
},
|
||||
"daily_trend": [
|
||||
{
|
||||
"date": "2024-01-15",
|
||||
"calls": 20,
|
||||
"connected": 18,
|
||||
"avg_duration": 175
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 推荐的 Schema 定义
|
||||
## Schema 定义
|
||||
|
||||
```python
|
||||
# ============ Call Record ============
|
||||
@@ -440,17 +361,6 @@ class TranscriptOut(TranscriptCreate):
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class HistoryStats(BaseModel):
|
||||
total_calls: int
|
||||
connected_calls: int
|
||||
missed_calls: int
|
||||
failed_calls: int
|
||||
avg_duration_seconds: float
|
||||
total_cost: float
|
||||
by_status: dict
|
||||
by_source: dict
|
||||
daily_trend: List[dict]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -7,9 +7,11 @@
|
||||
| 模块 | 文件 | 说明 |
|
||||
|------|------|------|
|
||||
| 小助手 | [assistant.md](./assistant.md) | AI 助手管理 |
|
||||
| 模型接入 | [model-access.md](./model-access.md) | LLM/ASR/TTS 模型配置 |
|
||||
| 语音识别 | [speech-recognition.md](./speech-recognition.md) | ASR 模型配置 |
|
||||
| 声音资源 | [voice-resources.md](./voice-resources.md) | TTS 声音库管理 |
|
||||
| LLM 模型 | [llm.md](./llm.md) | LLM 模型配置与管理 |
|
||||
| ASR 模型 | [asr.md](./asr.md) | 语音识别模型配置 |
|
||||
| 声音资源 | [voice-resources.md](./voice-resources.md) | TTS 语音配置 |
|
||||
| 工具与测试 | [tools.md](./tools.md) | 工具列表与自动测试 |
|
||||
| 知识库 | [knowledge.md](./knowledge.md) | 知识库与文档管理 |
|
||||
| 历史记录 | [history-records.md](./history-records.md) | 通话记录和转写 |
|
||||
|
||||
---
|
||||
|
||||
420
api/docs/knowledge.md
Normal file
420
api/docs/knowledge.md
Normal file
@@ -0,0 +1,420 @@
|
||||
# 知识库 (Knowledge Base) API
|
||||
|
||||
知识库 API 用于管理知识库和文档的创建、索引和搜索。
|
||||
|
||||
## 基础信息
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| Base URL | `/api/v1/knowledge` |
|
||||
| 认证方式 | Bearer Token (预留) |
|
||||
|
||||
---
|
||||
|
||||
## 数据模型
|
||||
|
||||
### KnowledgeBase
|
||||
|
||||
```typescript
|
||||
interface KnowledgeBase {
|
||||
id: string; // 知识库唯一标识 (8位UUID)
|
||||
user_id: number; // 所属用户ID
|
||||
name: string; // 知识库名称
|
||||
description: string; // 知识库描述
|
||||
embeddingModel: string; // Embedding 模型名称
|
||||
chunkSize: number; // 文档分块大小
|
||||
chunkOverlap: number; // 分块重叠大小
|
||||
docCount: number; // 文档数量
|
||||
chunkCount: number; // 切分后的文本块数量
|
||||
status: string; // 状态: "active" | "inactive"
|
||||
createdAt: string; // 创建时间
|
||||
updatedAt: string; // 更新时间
|
||||
documents: KnowledgeDocument[]; // 关联的文档列表
|
||||
}
|
||||
```
|
||||
|
||||
### KnowledgeDocument
|
||||
|
||||
```typescript
|
||||
interface KnowledgeDocument {
|
||||
id: string; // 文档唯一标识
|
||||
kb_id: string; // 所属知识库ID
|
||||
name: string; // 文档名称
|
||||
size: string; // 文件大小
|
||||
fileType: string; // 文件类型
|
||||
storageUrl: string; // 存储地址
|
||||
status: string; // 状态: "pending" | "processing" | "completed" | "failed"
|
||||
chunkCount: number; // 切分后的文本块数量
|
||||
errorMessage: string; // 错误信息
|
||||
uploadDate: string; // 上传时间
|
||||
createdAt: string; // 创建时间
|
||||
processedAt: string; // 处理完成时间
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API 端点
|
||||
|
||||
### 1. 获取知识库列表
|
||||
|
||||
```http
|
||||
GET /api/v1/knowledge/bases
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| user_id | int | 否 | 1 | 用户ID |
|
||||
| page | int | 否 | 1 | 页码 |
|
||||
| limit | int | 否 | 50 | 每页数量 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"total": 2,
|
||||
"page": 1,
|
||||
"limit": 50,
|
||||
"list": [
|
||||
{
|
||||
"id": "kb_001",
|
||||
"user_id": 1,
|
||||
"name": "产品知识库",
|
||||
"description": "产品文档和FAQ",
|
||||
"embeddingModel": "text-embedding-3-small",
|
||||
"chunkSize": 500,
|
||||
"chunkOverlap": 50,
|
||||
"docCount": 10,
|
||||
"chunkCount": 150,
|
||||
"status": "active",
|
||||
"createdAt": "2024-01-15T10:30:00",
|
||||
"updatedAt": "2024-01-15T10:30:00",
|
||||
"documents": [...]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. 获取单个知识库详情
|
||||
|
||||
```http
|
||||
GET /api/v1/knowledge/bases/{kb_id}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "kb_001",
|
||||
"user_id": 1,
|
||||
"name": "产品知识库",
|
||||
"description": "产品文档和FAQ",
|
||||
"embeddingModel": "text-embedding-3-small",
|
||||
"chunkSize": 500,
|
||||
"chunkOverlap": 50,
|
||||
"docCount": 10,
|
||||
"chunkCount": 150,
|
||||
"status": "active",
|
||||
"createdAt": "2024-01-15T10:30:00",
|
||||
"updatedAt": "2024-01-15T10:30:00",
|
||||
"documents": [
|
||||
{
|
||||
"id": "doc_001",
|
||||
"kb_id": "kb_001",
|
||||
"name": "产品手册.pdf",
|
||||
"size": "1.2 MB",
|
||||
"fileType": "application/pdf",
|
||||
"storageUrl": "",
|
||||
"status": "completed",
|
||||
"chunkCount": 45,
|
||||
"errorMessage": null,
|
||||
"uploadDate": "2024-01-15T10:30:00",
|
||||
"createdAt": "2024-01-15T10:30:00",
|
||||
"processedAt": "2024-01-15T10:30:05"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. 创建知识库
|
||||
|
||||
```http
|
||||
POST /api/v1/knowledge/bases
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "产品知识库",
|
||||
"description": "产品文档和FAQ",
|
||||
"embeddingModel": "text-embedding-3-small",
|
||||
"chunkSize": 500,
|
||||
"chunkOverlap": 50
|
||||
}
|
||||
```
|
||||
|
||||
**Fields 说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| name | string | 是 | 知识库名称 |
|
||||
| description | string | 否 | 知识库描述 |
|
||||
| embeddingModel | string | 否 | Embedding 模型名称,默认 "text-embedding-3-small" |
|
||||
| chunkSize | int | 否 | 文档分块大小,默认 500 |
|
||||
| chunkOverlap | int | 否 | 分块重叠大小,默认 50 |
|
||||
|
||||
---
|
||||
|
||||
### 4. 更新知识库
|
||||
|
||||
```http
|
||||
PUT /api/v1/knowledge/bases/{kb_id}
|
||||
```
|
||||
|
||||
**Request Body:** (部分更新)
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "更新后的知识库名称",
|
||||
"description": "新的描述",
|
||||
"chunkSize": 800
|
||||
}
|
||||
```
|
||||
|
||||
**注意:** 如果知识库中已有索引的文档,则不能修改 embeddingModel。如需修改,请先删除所有文档。
|
||||
|
||||
---
|
||||
|
||||
### 5. 删除知识库
|
||||
|
||||
```http
|
||||
DELETE /api/v1/knowledge/bases/{kb_id}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "Deleted successfully"
|
||||
}
|
||||
```
|
||||
|
||||
**注意:** 删除知识库会同时删除向量数据库中的相关数据。
|
||||
|
||||
---
|
||||
|
||||
### 6. 上传文档
|
||||
|
||||
```http
|
||||
POST /api/v1/knowledge/bases/{kb_id}/documents
|
||||
```
|
||||
|
||||
支持两种上传方式:
|
||||
|
||||
**方式一:文件上传 (multipart/form-data)**
|
||||
|
||||
| 参数 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| file | file | 是 | 要上传的文档文件 |
|
||||
|
||||
支持的文件类型:`.txt`, `.md`, `.csv`, `.json`, `.pdf`, `.docx`
|
||||
|
||||
**方式二:仅创建文档记录 (application/json)**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "document.pdf",
|
||||
"size": "1.2 MB",
|
||||
"fileType": "application/pdf",
|
||||
"storageUrl": "https://storage.example.com/doc.pdf"
|
||||
}
|
||||
```
|
||||
|
||||
**Response (文件上传):**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "doc_001",
|
||||
"name": "产品手册.pdf",
|
||||
"size": "1.2 MB",
|
||||
"fileType": "application/pdf",
|
||||
"storageUrl": "",
|
||||
"status": "completed",
|
||||
"chunkCount": 45,
|
||||
"message": "Document uploaded and indexed"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. 索引文档内容
|
||||
|
||||
```http
|
||||
POST /api/v1/knowledge/bases/{kb_id}/documents/{doc_id}/index
|
||||
```
|
||||
|
||||
直接向向量数据库索引文本内容,无需上传文件。
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"content": "要索引的文本内容..."
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "Document indexed",
|
||||
"chunkCount": 10
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 8. 删除文档
|
||||
|
||||
```http
|
||||
DELETE /api/v1/knowledge/bases/{kb_id}/documents/{doc_id}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "Deleted successfully"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 9. 搜索知识库
|
||||
|
||||
```http
|
||||
POST /api/v1/knowledge/search
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"kb_id": "kb_001",
|
||||
"query": "产品退货政策",
|
||||
"nResults": 5
|
||||
}
|
||||
```
|
||||
|
||||
**Fields 说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| kb_id | string | 是 | 知识库ID |
|
||||
| query | string | 是 | 搜索查询文本 |
|
||||
| nResults | int | 否 | 返回结果数量,默认 5 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": "doc_001",
|
||||
"text": "我们的退货政策是...",
|
||||
"score": 0.85,
|
||||
"metadata": {
|
||||
"document_name": "退货政策.pdf",
|
||||
"chunk_index": 3
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 10. 获取知识库统计
|
||||
|
||||
```http
|
||||
GET /api/v1/knowledge/bases/{kb_id}/stats
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"kb_id": "kb_001",
|
||||
"docCount": 10,
|
||||
"chunkCount": 150
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 支持的文件类型
|
||||
|
||||
| 文件类型 | 扩展名 | 说明 |
|
||||
|----------|--------|------|
|
||||
| 纯文本 | .txt | 纯文本文件 |
|
||||
| Markdown | .md | Markdown 格式文档 |
|
||||
| CSV | .csv | CSV 表格数据 |
|
||||
| JSON | .json | JSON 格式数据 |
|
||||
| PDF | .pdf | PDF 文档 (需要 pypdf) |
|
||||
| Word | .docx | Word 文档 (需要 python-docx) |
|
||||
|
||||
**注意:** 不支持旧的 .doc 格式,请转换为 .docx 或其他格式。
|
||||
|
||||
---
|
||||
|
||||
## Schema 定义
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List
|
||||
|
||||
class KnowledgeBaseCreate(BaseModel):
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
embeddingModel: Optional[str] = "text-embedding-3-small"
|
||||
chunkSize: Optional[int] = 500
|
||||
chunkOverlap: Optional[int] = 50
|
||||
|
||||
class KnowledgeBaseUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
embeddingModel: Optional[str] = None
|
||||
chunkSize: Optional[int] = None
|
||||
chunkOverlap: Optional[int] = None
|
||||
|
||||
class KnowledgeSearchQuery(BaseModel):
|
||||
kb_id: str
|
||||
query: str
|
||||
nResults: Optional[int] = 5
|
||||
|
||||
class DocumentIndexRequest(BaseModel):
|
||||
content: str
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 单元测试
|
||||
|
||||
项目包含完整的单元测试,位于 `api/tests/test_knowledge.py`。
|
||||
|
||||
### 运行测试
|
||||
|
||||
```bash
|
||||
# 运行知识库相关测试
|
||||
pytest api/tests/test_knowledge.py -v
|
||||
|
||||
# 运行所有测试
|
||||
pytest api/tests/ -v
|
||||
```
|
||||
463
api/docs/llm.md
Normal file
463
api/docs/llm.md
Normal file
@@ -0,0 +1,463 @@
|
||||
# LLM 模型 (LLM Model) API
|
||||
|
||||
LLM 模型 API 用于管理大语言模型的配置和调用。
|
||||
|
||||
## 基础信息
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| Base URL | `/api/v1/llm` |
|
||||
| 认证方式 | Bearer Token (预留) |
|
||||
|
||||
---
|
||||
|
||||
## 数据模型
|
||||
|
||||
### LLMModel
|
||||
|
||||
```typescript
|
||||
interface LLMModel {
|
||||
id: string; // 模型唯一标识 (8位UUID)
|
||||
user_id: number; // 所属用户ID
|
||||
name: string; // 模型显示名称
|
||||
vendor: string; // 供应商: "OpenAI Compatible" | "Dify" | "FastGPT" | 等
|
||||
type: string; // 类型: "text" | "embedding" | "rerank"
|
||||
base_url: string; // API Base URL
|
||||
api_key: string; // API Key
|
||||
model_name?: string; // 实际模型名称,如 "gpt-4o"
|
||||
temperature?: number; // 温度参数 (0-2)
|
||||
context_length?: int; // 上下文长度
|
||||
enabled: boolean; // 是否启用
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API 端点
|
||||
|
||||
### 1. 获取 LLM 模型列表
|
||||
|
||||
```http
|
||||
GET /api/v1/llm
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| model_type | string | 否 | - | 过滤类型: "text" \| "embedding" \| "rerank" |
|
||||
| enabled | boolean | 否 | - | 过滤启用状态 |
|
||||
| page | int | 否 | 1 | 页码 |
|
||||
| limit | int | 否 | 50 | 每页数量 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"total": 5,
|
||||
"page": 1,
|
||||
"limit": 50,
|
||||
"list": [
|
||||
{
|
||||
"id": "abc12345",
|
||||
"user_id": 1,
|
||||
"name": "GPT-4o",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"type": "text",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-***",
|
||||
"model_name": "gpt-4o",
|
||||
"temperature": 0.7,
|
||||
"context_length": 128000,
|
||||
"enabled": true,
|
||||
"created_at": "2024-01-15T10:30:00Z",
|
||||
"updated_at": "2024-01-15T10:30:00Z"
|
||||
},
|
||||
{
|
||||
"id": "def67890",
|
||||
"user_id": 1,
|
||||
"name": "Embedding-3-Small",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"type": "embedding",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-***",
|
||||
"model_name": "text-embedding-3-small",
|
||||
"enabled": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. 获取单个 LLM 模型详情
|
||||
|
||||
```http
|
||||
GET /api/v1/llm/{id}
|
||||
```
|
||||
|
||||
**Path Parameters:**
|
||||
|
||||
| 参数 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| id | string | 模型ID |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "abc12345",
|
||||
"user_id": 1,
|
||||
"name": "GPT-4o",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"type": "text",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-***",
|
||||
"model_name": "gpt-4o",
|
||||
"temperature": 0.7,
|
||||
"context_length": 128000,
|
||||
"enabled": true,
|
||||
"created_at": "2024-01-15T10:30:00Z",
|
||||
"updated_at": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. 创建 LLM 模型
|
||||
|
||||
```http
|
||||
POST /api/v1/llm
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "GPT-4o",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"type": "text",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-your-api-key",
|
||||
"model_name": "gpt-4o",
|
||||
"temperature": 0.7,
|
||||
"context_length": 128000,
|
||||
"enabled": true
|
||||
}
|
||||
```
|
||||
|
||||
**Fields 说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| name | string | 是 | 模型显示名称 |
|
||||
| vendor | string | 是 | 供应商名称 |
|
||||
| type | string | 是 | 模型类型: "text" / "embedding" / "rerank" |
|
||||
| base_url | string | 是 | API Base URL |
|
||||
| api_key | string | 是 | API Key |
|
||||
| model_name | string | 否 | 实际模型名称 |
|
||||
| temperature | number | 否 | 温度参数,默认 0.7 |
|
||||
| context_length | int | 否 | 上下文长度 |
|
||||
| enabled | boolean | 否 | 是否启用,默认 true |
|
||||
| id | string | 否 | 指定模型ID,默认自动生成 |
|
||||
|
||||
---
|
||||
|
||||
### 4. 更新 LLM 模型
|
||||
|
||||
```http
|
||||
PUT /api/v1/llm/{id}
|
||||
```
|
||||
|
||||
**Request Body:** (部分更新)
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "GPT-4o-Updated",
|
||||
"temperature": 0.8,
|
||||
"enabled": false
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. 删除 LLM 模型
|
||||
|
||||
```http
|
||||
DELETE /api/v1/llm/{id}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "Deleted successfully"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 6. 测试 LLM 模型连接
|
||||
|
||||
```http
|
||||
POST /api/v1/llm/{id}/test
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"latency_ms": 150,
|
||||
"message": "Connection successful"
|
||||
}
|
||||
```
|
||||
|
||||
**错误响应:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"latency_ms": 200,
|
||||
"message": "HTTP Error: 401 - Unauthorized"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. 与 LLM 模型对话
|
||||
|
||||
```http
|
||||
POST /api/v1/llm/{id}/chat
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| message | string | 是 | - | 用户消息 |
|
||||
| system_prompt | string | 否 | - | 系统提示词 |
|
||||
| max_tokens | int | 否 | 1000 | 最大生成token数 |
|
||||
| temperature | number | 否 | 模型配置 | 温度参数 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"reply": "您好!有什么可以帮助您的?",
|
||||
"usage": {
|
||||
"prompt_tokens": 20,
|
||||
"completion_tokens": 15,
|
||||
"total_tokens": 35
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 8. 预览模型输出
|
||||
|
||||
```http
|
||||
POST /api/v1/llm/{id}/preview
|
||||
```
|
||||
|
||||
预览模型输出,支持 text(chat) 与 embedding 两类模型。
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"message": "请介绍一下你自己",
|
||||
"system_prompt": "你是一个专业的AI助手",
|
||||
"max_tokens": 512,
|
||||
"temperature": 0.7
|
||||
}
|
||||
```
|
||||
|
||||
**Response (text model):**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"reply": "您好!我是一个...",
|
||||
"usage": {
|
||||
"prompt_tokens": 20,
|
||||
"completion_tokens": 50,
|
||||
"total_tokens": 70
|
||||
},
|
||||
"latency_ms": 1500,
|
||||
"error": null
|
||||
}
|
||||
```
|
||||
|
||||
**Response (embedding model):**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"reply": "Embedding generated successfully. dims=1536. head=[0.012345, -0.023456, ...]",
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"total_tokens": 10
|
||||
},
|
||||
"latency_ms": 800,
|
||||
"error": null
|
||||
}
|
||||
```
|
||||
|
||||
**Fields 说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| message | string | 是 | 用户消息/嵌入文本 |
|
||||
| system_prompt | string | 否 | 系统提示词 (仅 text 模型) |
|
||||
| max_tokens | int | 否 | 最大生成 token 数 (默认 512) |
|
||||
| temperature | float | 否 | 温度参数 |
|
||||
| api_key | string | 否 | 覆盖模型配置的 API Key |
|
||||
|
||||
---
|
||||
|
||||
## Schema 定义
|
||||
|
||||
```python
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
class LLMModelType(str, Enum):
|
||||
TEXT = "text"
|
||||
EMBEDDING = "embedding"
|
||||
RERANK = "rerank"
|
||||
|
||||
class LLMModelBase(BaseModel):
|
||||
name: str
|
||||
vendor: str
|
||||
type: LLMModelType
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_name: Optional[str] = None
|
||||
temperature: Optional[float] = None
|
||||
context_length: Optional[int] = None
|
||||
enabled: bool = True
|
||||
|
||||
class LLMModelCreate(LLMModelBase):
|
||||
id: Optional[str] = None
|
||||
|
||||
class LLMModelUpdate(BaseModel):
|
||||
name: Optional[str] = None
|
||||
vendor: Optional[str] = None
|
||||
base_url: Optional[str] = None
|
||||
api_key: Optional[str] = None
|
||||
model_name: Optional[str] = None
|
||||
temperature: Optional[float] = None
|
||||
context_length: Optional[int] = None
|
||||
enabled: Optional[bool] = None
|
||||
|
||||
class LLMModelOut(LLMModelBase):
|
||||
id: str
|
||||
user_id: int
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class LLMModelTestResponse(BaseModel):
|
||||
success: bool
|
||||
latency_ms: int
|
||||
message: str
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 供应商配置示例
|
||||
|
||||
### OpenAI Compatible (OpenAI Endpoint)
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "OpenAI Compatible",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-xxx",
|
||||
"model_name": "gpt-4o",
|
||||
"type": "text",
|
||||
"temperature": 0.7
|
||||
}
|
||||
```
|
||||
|
||||
### OpenAI Compatible
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "OpenAI Compatible",
|
||||
"base_url": "https://api.siliconflow.com/v1",
|
||||
"api_key": "sf-xxx",
|
||||
"model_name": "deepseek-v3",
|
||||
"type": "text",
|
||||
"temperature": 0.7
|
||||
}
|
||||
```
|
||||
|
||||
### Dify
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "Dify",
|
||||
"base_url": "https://your-dify.domain.com/v1",
|
||||
"api_key": "app-xxx",
|
||||
"model_name": "gpt-4",
|
||||
"type": "text"
|
||||
}
|
||||
```
|
||||
|
||||
### Embedding 模型
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "OpenAI Compatible",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-xxx",
|
||||
"model_name": "text-embedding-3-small",
|
||||
"type": "embedding"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 单元测试
|
||||
|
||||
项目包含完整的单元测试,位于 `api/tests/test_llm.py`。
|
||||
|
||||
### 测试用例概览
|
||||
|
||||
| 测试方法 | 说明 |
|
||||
|----------|------|
|
||||
| test_get_llm_models_empty | 空数据库获取测试 |
|
||||
| test_create_llm_model | 创建模型测试 |
|
||||
| test_create_llm_model_minimal | 最小数据创建测试 |
|
||||
| test_get_llm_model_by_id | 获取单个模型测试 |
|
||||
| test_get_llm_model_not_found | 获取不存在模型测试 |
|
||||
| test_update_llm_model | 更新模型测试 |
|
||||
| test_delete_llm_model | 删除模型测试 |
|
||||
| test_list_llm_models_with_pagination | 分页测试 |
|
||||
| test_filter_llm_models_by_type | 按类型过滤测试 |
|
||||
| test_filter_llm_models_by_enabled | 按启用状态过滤测试 |
|
||||
| test_create_llm_model_with_all_fields | 全字段创建测试 |
|
||||
| test_test_llm_model_success | 测试连接成功测试 |
|
||||
| test_test_llm_model_failure | 测试连接失败测试 |
|
||||
| test_different_llm_vendors | 多供应商测试 |
|
||||
| test_embedding_llm_model | Embedding 模型测试 |
|
||||
|
||||
### 运行测试
|
||||
|
||||
```bash
|
||||
# 运行 LLM 相关测试
|
||||
pytest api/tests/test_llm.py -v
|
||||
|
||||
# 运行所有测试
|
||||
pytest api/tests/ -v
|
||||
```
|
||||
@@ -20,7 +20,7 @@ interface LLMModel {
|
||||
id: string; // 模型唯一标识
|
||||
user_id: number; // 所属用户ID
|
||||
name: string; // 模型显示名称
|
||||
vendor: string; // 供应商: "OpenAI Compatible" | "SiliconFlow" | "Dify" | "FastGPT"
|
||||
vendor: string; // 供应商: "OpenAI Compatible" | "Dify" | "FastGPT"
|
||||
type: string; // 类型: "text" | "embedding" | "rerank"
|
||||
base_url: string; // API Base URL
|
||||
api_key: string; // API Key
|
||||
@@ -57,7 +57,7 @@ interface TTSModel {
|
||||
id: string;
|
||||
user_id: number;
|
||||
name: string;
|
||||
vendor: string; // "Ali" | "Volcano" | "Minimax" | "硅基流动"
|
||||
vendor: string; // "OpenAI Compatible" | "Ali" | "Volcano" | "Minimax"
|
||||
language: string; // "zh" | "en"
|
||||
voice_list?: string[]; // 支持的声音列表
|
||||
enabled: boolean;
|
||||
@@ -316,7 +316,6 @@ class LLMModelType(str, Enum):
|
||||
|
||||
class LLMModelVendor(str, Enum):
|
||||
OPENAI_COMPATIBLE = "OpenAI Compatible"
|
||||
SILICONFLOW = "SiliconFlow"
|
||||
DIFY = "Dify"
|
||||
FASTGPT = "FastGPT"
|
||||
|
||||
@@ -389,11 +388,11 @@ class ASRModelOut(ASRModelBase):
|
||||
}
|
||||
```
|
||||
|
||||
### SiliconFlow
|
||||
### OpenAI Compatible
|
||||
|
||||
```json
|
||||
{
|
||||
"vendor": "SiliconFlow",
|
||||
"vendor": "OpenAI Compatible",
|
||||
"base_url": "https://api.siliconflow.com/v1",
|
||||
"api_key": "sf-xxx",
|
||||
"model_name": "deepseek-v3"
|
||||
|
||||
580
api/docs/tools.md
Normal file
580
api/docs/tools.md
Normal file
@@ -0,0 +1,580 @@
|
||||
# 工具与自动测试 (Tools & Autotest) API
|
||||
|
||||
工具与自动测试 API 用于管理可用工具列表和自动测试功能。
|
||||
|
||||
## 基础信息
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| Base URL | `/api/v1/tools` |
|
||||
| 认证方式 | Bearer Token (预留) |
|
||||
|
||||
---
|
||||
|
||||
## 可用工具 (Tool Registry)
|
||||
|
||||
系统内置以下工具:
|
||||
|
||||
| 工具ID | 名称 | 类别 | 说明 |
|
||||
|--------|------|------|------|
|
||||
| calculator | 计算器 | query | 执行数学计算 |
|
||||
| code_interpreter | 代码执行 | query | 安全地执行Python代码 |
|
||||
| current_time | 当前时间 | query | 获取当前本地时间 |
|
||||
| turn_on_camera | 打开摄像头 | system | 执行打开摄像头命令 |
|
||||
| turn_off_camera | 关闭摄像头 | system | 执行关闭摄像头命令 |
|
||||
| increase_volume | 调高音量 | system | 提升设备音量 |
|
||||
| decrease_volume | 调低音量 | system | 降低设备音量 |
|
||||
| voice_msg_prompt | 语音消息提示 | system | 播报一条语音提示消息 |
|
||||
| text_msg_prompt | 文本消息提示 | system | 显示一条文本弹窗提示 |
|
||||
| voice_choice_prompt | 语音选项提示 | system | 播报问题并展示可选项,等待用户选择 |
|
||||
| text_choice_prompt | 文本选项提示 | system | 显示文本选项弹窗并等待用户选择 |
|
||||
|
||||
**类别说明:**
|
||||
- `query`: 查询类工具,需要配置 HTTP URL
|
||||
- `system`: 系统类工具,直接在客户端执行
|
||||
|
||||
---
|
||||
|
||||
## API 端点
|
||||
|
||||
### 1. 获取可用工具列表
|
||||
|
||||
```http
|
||||
GET /api/v1/tools/list
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"tools": {
|
||||
"search": {
|
||||
"name": "网络搜索",
|
||||
"description": "搜索互联网获取最新信息",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "搜索关键词"}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
},
|
||||
"calculator": {
|
||||
"name": "计算器",
|
||||
"description": "执行数学计算",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"expression": {"type": "string", "description": "数学表达式,如: 2 + 3 * 4"}
|
||||
},
|
||||
"required": ["expression"]
|
||||
}
|
||||
},
|
||||
"weather": {
|
||||
"name": "天气查询",
|
||||
"description": "查询指定城市的天气",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {"type": "string", "description": "城市名称"}
|
||||
},
|
||||
"required": ["city"]
|
||||
}
|
||||
},
|
||||
"translate": {
|
||||
"name": "翻译",
|
||||
"description": "翻译文本到指定语言",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {"type": "string", "description": "要翻译的文本"},
|
||||
"target_lang": {"type": "string", "description": "目标语言,如: en, ja, ko"}
|
||||
},
|
||||
"required": ["text", "target_lang"]
|
||||
}
|
||||
},
|
||||
"knowledge": {
|
||||
"name": "知识库查询",
|
||||
"description": "从知识库中检索相关信息",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "查询内容"},
|
||||
"kb_id": {"type": "string", "description": "知识库ID"}
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
},
|
||||
"code_interpreter": {
|
||||
"name": "代码执行",
|
||||
"description": "安全地执行Python代码",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {"type": "string", "description": "要执行的Python代码"}
|
||||
},
|
||||
"required": ["code"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. 获取工具详情
|
||||
|
||||
```http
|
||||
GET /api/v1/tools/list/{tool_id}
|
||||
```
|
||||
|
||||
**Path Parameters:**
|
||||
|
||||
| 参数 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| tool_id | string | 工具ID |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "计算器",
|
||||
"description": "执行数学计算",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"expression": {"type": "string", "description": "数学表达式,如: 2 + 3 * 4"}
|
||||
},
|
||||
"required": ["expression"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**错误响应 (工具不存在):**
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": "Tool not found"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. 健康检查
|
||||
|
||||
```http
|
||||
GET /api/v1/tools/health
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"timestamp": 1705315200.123,
|
||||
"tools": ["search", "calculator", "weather", "translate", "knowledge", "code_interpreter"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. 获取工具资源列表
|
||||
|
||||
```http
|
||||
GET /api/v1/tools/resources
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| category | string | 否 | - | 过滤类别: "query" \| "system" |
|
||||
| enabled | boolean | 否 | - | 过滤启用状态 |
|
||||
| include_system | boolean | 否 | true | 是否包含系统工具 |
|
||||
| page | int | 否 | 1 | 页码 |
|
||||
| limit | int | 否 | 100 | 每页数量 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"total": 15,
|
||||
"page": 1,
|
||||
"limit": 100,
|
||||
"list": [
|
||||
{
|
||||
"id": "calculator",
|
||||
"user_id": 1,
|
||||
"name": "计算器",
|
||||
"description": "执行数学计算",
|
||||
"category": "query",
|
||||
"icon": "Terminal",
|
||||
"http_method": "GET",
|
||||
"http_url": null,
|
||||
"http_timeout_ms": 10000,
|
||||
"parameter_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"expression": {"type": "string", "description": "数学表达式"}
|
||||
},
|
||||
"required": ["expression"]
|
||||
},
|
||||
"parameter_defaults": {},
|
||||
"wait_for_response": false,
|
||||
"enabled": true,
|
||||
"is_system": true,
|
||||
"created_at": "2024-01-15T10:30:00Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. 获取工具资源详情
|
||||
|
||||
```http
|
||||
GET /api/v1/tools/resources/{id}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 6. 创建工具资源
|
||||
|
||||
```http
|
||||
POST /api/v1/tools/resources
|
||||
```
|
||||
|
||||
**Request Body:**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "订单查询",
|
||||
"description": "查询用户订单信息",
|
||||
"category": "query",
|
||||
"icon": "Search",
|
||||
"http_method": "POST",
|
||||
"http_url": "https://api.example.com/orders",
|
||||
"http_headers": {"Authorization": "Bearer {api_key}"},
|
||||
"http_timeout_ms": 10000,
|
||||
"parameter_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"order_id": {"type": "string", "description": "订单ID"}
|
||||
},
|
||||
"required": ["order_id"]
|
||||
},
|
||||
"enabled": true
|
||||
}
|
||||
```
|
||||
|
||||
**Fields 说明:**
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| id | string | 否 | 工具ID,默认自动生成 |
|
||||
| name | string | 是 | 工具名称 |
|
||||
| description | string | 否 | 工具描述 |
|
||||
| category | string | 是 | 类别: "query" \| "system" |
|
||||
| icon | string | 否 | 图标名称 |
|
||||
| http_method | string | 否 | HTTP 方法,默认 GET |
|
||||
| http_url | string | 否* | HTTP 请求地址 (query 类必填) |
|
||||
| http_headers | object | 否 | HTTP 请求头 |
|
||||
| http_timeout_ms | int | 否 | 超时时间(毫秒),默认 10000 |
|
||||
| parameter_schema | object | 否 | 参数 JSON Schema |
|
||||
| parameter_defaults | object | 否 | 默认参数值 |
|
||||
| wait_for_response | boolean | 否 | 是否等待响应 (仅 system 类) |
|
||||
| enabled | boolean | 否 | 是否启用,默认 true |
|
||||
|
||||
---
|
||||
|
||||
### 7. 更新工具资源
|
||||
|
||||
```http
|
||||
PUT /api/v1/tools/resources/{id}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 8. 删除工具资源
|
||||
|
||||
```http
|
||||
DELETE /api/v1/tools/resources/{id}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 自动测试 (Autotest)
|
||||
|
||||
### 4. 运行完整自动测试
|
||||
|
||||
```http
|
||||
POST /api/v1/tools/autotest
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| llm_model_id | string | 否 | - | LLM 模型ID |
|
||||
| asr_model_id | string | 否 | - | ASR 模型ID |
|
||||
| test_llm | boolean | 否 | true | 是否测试LLM |
|
||||
| test_asr | boolean | 否 | true | 是否测试ASR |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "abc12345",
|
||||
"started_at": 1705315200.0,
|
||||
"duration_ms": 2500,
|
||||
"tests": [
|
||||
{
|
||||
"name": "Model Existence",
|
||||
"passed": true,
|
||||
"message": "Found model: GPT-4o",
|
||||
"duration_ms": 15
|
||||
},
|
||||
{
|
||||
"name": "API Connection",
|
||||
"passed": true,
|
||||
"message": "Latency: 150ms",
|
||||
"duration_ms": 150
|
||||
},
|
||||
{
|
||||
"name": "Temperature Setting",
|
||||
"passed": true,
|
||||
"message": "temperature=0.7"
|
||||
},
|
||||
{
|
||||
"name": "Streaming Support",
|
||||
"passed": true,
|
||||
"message": "Received 15 chunks",
|
||||
"duration_ms": 800
|
||||
}
|
||||
],
|
||||
"summary": {
|
||||
"passed": 4,
|
||||
"failed": 0,
|
||||
"total": 4
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. 测试单个 LLM 模型
|
||||
|
||||
```http
|
||||
POST /api/v1/tools/autotest/llm/{model_id}
|
||||
```
|
||||
|
||||
**Path Parameters:**
|
||||
|
||||
| 参数 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| model_id | string | LLM 模型ID |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "llm_test_001",
|
||||
"started_at": 1705315200.0,
|
||||
"duration_ms": 1200,
|
||||
"tests": [
|
||||
{
|
||||
"name": "Model Existence",
|
||||
"passed": true,
|
||||
"message": "Found model: GPT-4o",
|
||||
"duration_ms": 10
|
||||
},
|
||||
{
|
||||
"name": "API Connection",
|
||||
"passed": true,
|
||||
"message": "Latency: 180ms",
|
||||
"duration_ms": 180
|
||||
},
|
||||
{
|
||||
"name": "Temperature Setting",
|
||||
"passed": true,
|
||||
"message": "temperature=0.7"
|
||||
},
|
||||
{
|
||||
"name": "Streaming Support",
|
||||
"passed": true,
|
||||
"message": "Received 12 chunks",
|
||||
"duration_ms": 650
|
||||
}
|
||||
],
|
||||
"summary": {
|
||||
"passed": 4,
|
||||
"failed": 0,
|
||||
"total": 4
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 6. 测试单个 ASR 模型
|
||||
|
||||
```http
|
||||
POST /api/v1/tools/autotest/asr/{model_id}
|
||||
```
|
||||
|
||||
**Path Parameters:**
|
||||
|
||||
| 参数 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| model_id | string | ASR 模型ID |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "asr_test_001",
|
||||
"started_at": 1705315200.0,
|
||||
"duration_ms": 800,
|
||||
"tests": [
|
||||
{
|
||||
"name": "Model Existence",
|
||||
"passed": true,
|
||||
"message": "Found model: Whisper-1",
|
||||
"duration_ms": 8
|
||||
},
|
||||
{
|
||||
"name": "Hotwords Config",
|
||||
"passed": true,
|
||||
"message": "Hotwords: 3 words"
|
||||
},
|
||||
{
|
||||
"name": "API Availability",
|
||||
"passed": true,
|
||||
"message": "Status: 200",
|
||||
"duration_ms": 250
|
||||
},
|
||||
{
|
||||
"name": "Language Config",
|
||||
"passed": true,
|
||||
"message": "Language: zh"
|
||||
}
|
||||
],
|
||||
"summary": {
|
||||
"passed": 4,
|
||||
"failed": 0,
|
||||
"total": 4
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 7. 发送测试消息
|
||||
|
||||
```http
|
||||
POST /api/v1/tools/test-message
|
||||
```
|
||||
|
||||
**Query Parameters:**
|
||||
|
||||
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|------|------|------|--------|------|
|
||||
| llm_model_id | string | 是 | - | LLM 模型ID |
|
||||
| message | string | 否 | "Hello, this is a test message." | 测试消息 |
|
||||
|
||||
**Response:**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"reply": "Hello! This is a test reply from GPT-4o.",
|
||||
"usage": {
|
||||
"prompt_tokens": 15,
|
||||
"completion_tokens": 12,
|
||||
"total_tokens": 27
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**错误响应 (模型不存在):**
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": "LLM Model not found"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 测试结果结构
|
||||
|
||||
### AutotestResult
|
||||
|
||||
```typescript
|
||||
interface AutotestResult {
|
||||
id: string; // 测试ID
|
||||
started_at: number; // 开始时间戳
|
||||
duration_ms: number; // 总耗时(毫秒)
|
||||
tests: TestCase[]; // 测试用例列表
|
||||
summary: TestSummary; // 测试摘要
|
||||
}
|
||||
|
||||
interface TestCase {
|
||||
name: string; // 测试名称
|
||||
passed: boolean; // 是否通过
|
||||
message: string; // 测试消息
|
||||
duration_ms: number; // 耗时(毫秒)
|
||||
}
|
||||
|
||||
interface TestSummary {
|
||||
passed: number; // 通过数量
|
||||
failed: number; // 失败数量
|
||||
total: number; // 总数量
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 测试项目说明
|
||||
|
||||
### LLM 模型测试项目
|
||||
|
||||
| 测试名称 | 说明 |
|
||||
|----------|------|
|
||||
| Model Existence | 检查模型是否存在于数据库 |
|
||||
| API Connection | 测试 API 连接并测量延迟 |
|
||||
| Temperature Setting | 检查温度配置 |
|
||||
| Streaming Support | 测试流式响应支持 |
|
||||
|
||||
### ASR 模型测试项目
|
||||
|
||||
| 测试名称 | 说明 |
|
||||
|----------|------|
|
||||
| Model Existence | 检查模型是否存在于数据库 |
|
||||
| Hotwords Config | 检查热词配置 |
|
||||
| API Availability | 测试 API 可用性 |
|
||||
| Language Config | 检查语言配置 |
|
||||
|
||||
---
|
||||
|
||||
## 单元测试
|
||||
|
||||
项目包含完整的单元测试,位于 `api/tests/test_tools.py`。
|
||||
|
||||
### 测试用例概览
|
||||
|
||||
| 测试类 | 说明 |
|
||||
|--------|------|
|
||||
| TestToolsAPI | 工具列表、健康检查等基础功能测试 |
|
||||
| TestAutotestAPI | 自动测试功能完整测试 |
|
||||
|
||||
### 运行测试
|
||||
|
||||
```bash
|
||||
# 运行工具相关测试
|
||||
pytest api/tests/test_tools.py -v
|
||||
|
||||
# 运行所有测试
|
||||
pytest api/tests/ -v
|
||||
```
|
||||
@@ -182,12 +182,14 @@ POST /api/v1/voices
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|------|------|------|------|
|
||||
| name | string | 是 | 声音名称 |
|
||||
| vendor | string | 是 | 供应商 |
|
||||
| vendor | string | 是 | 供应商: "Ali" \| "Volcano" \| "Minimax" \| "OpenAI Compatible" \| "DashScope" |
|
||||
| gender | string | 是 | 性别: "Male" \| "Female" |
|
||||
| language | string | 是 | 语言: "zh" \| "en" |
|
||||
| description | string | 否 | 描述信息 |
|
||||
| model | string | 是 | 厂商语音模型标识 |
|
||||
| voice_key | string | 是 | 厂商voice_key |
|
||||
| model | string | 否 | 厂商语音模型标识 (可选,部分供应商有默认值) |
|
||||
| voice_key | string | 否 | 厂商 voice_key (可选,部分供应商有默认值) |
|
||||
| api_key | string | 否 | 供应商 API Key (可选,也可通过环境变量配置) |
|
||||
| base_url | string | 否 | API Base URL (可选,部分供应商有默认值) |
|
||||
| speed | number | 否 | 默认语速 (0.5-2.0),默认 1.0 |
|
||||
| gain | number | 否 | 音量增益 (-10~10 dB),默认 0 |
|
||||
| pitch | number | 否 | 音调调整,默认 0 |
|
||||
@@ -244,11 +246,14 @@ POST /api/v1/voices/{id}/preview
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"audio_url": "https://storage.example.com/preview/voice_001_preview.mp3",
|
||||
"duration_ms": 2500
|
||||
"audio_url": "data:audio/wav;base64,UklGRi...",
|
||||
"duration_ms": 2500,
|
||||
"error": null
|
||||
}
|
||||
```
|
||||
|
||||
**注意:** `audio_url` 返回 Base64 编码的音频数据 (data URI 格式),可直接在浏览器中播放或解码保存为音频文件。
|
||||
|
||||
---
|
||||
|
||||
### 7. 获取供应商声音列表
|
||||
|
||||
498
api/init_db.py
498
api/init_db.py
@@ -1,52 +1,490 @@
|
||||
#!/usr/bin/env python3
|
||||
"""初始化数据库"""
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
|
||||
# 添加路径
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from app.db import Base, engine
|
||||
from app.models import Voice
|
||||
from app.db import Base, engine, DATABASE_URL
|
||||
from app.id_generator import short_id
|
||||
from app.models import Voice, Assistant, KnowledgeBase, Workflow, LLMModel, ASRModel, KnowledgeDocument
|
||||
|
||||
VOICE_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
DASHSCOPE_VOICE_MODEL = "qwen3-tts-flash-realtime"
|
||||
DASHSCOPE_DEFAULT_VOICE_KEY = "Cherry"
|
||||
DASHSCOPE_REALTIME_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
|
||||
SEED_VOICE_IDS = {
|
||||
"alex": short_id("tts"),
|
||||
"david": short_id("tts"),
|
||||
"bella": short_id("tts"),
|
||||
"claire": short_id("tts"),
|
||||
"dashscope_cherry": short_id("tts"),
|
||||
}
|
||||
|
||||
SEED_LLM_IDS = {
|
||||
"deepseek_chat": short_id("llm"),
|
||||
"glm_4": short_id("llm"),
|
||||
"embedding_3_small": short_id("llm"),
|
||||
}
|
||||
|
||||
SEED_ASR_IDS = {
|
||||
"sensevoice_small": short_id("asr"),
|
||||
"telespeech_asr": short_id("asr"),
|
||||
"dashscope_realtime": short_id("asr"),
|
||||
}
|
||||
|
||||
SEED_ASSISTANT_IDS = {
|
||||
"default": short_id("ast"),
|
||||
"customer_service": short_id("ast"),
|
||||
"english_tutor": short_id("ast"),
|
||||
}
|
||||
|
||||
|
||||
def ensure_db_dir():
|
||||
"""确保 SQLite 数据目录存在。"""
|
||||
if not DATABASE_URL.startswith("sqlite:///"):
|
||||
return
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
data_dir = os.path.dirname(db_path)
|
||||
if data_dir:
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def db_session():
|
||||
"""统一管理 DB session 生命周期。"""
|
||||
from app.db import SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def seed_if_empty(db, model_cls, records, success_msg: str):
|
||||
"""当目标表为空时写入默认记录。"""
|
||||
if db.query(model_cls).count() != 0:
|
||||
return
|
||||
if isinstance(records, list):
|
||||
db.add_all(records)
|
||||
else:
|
||||
db.add(records)
|
||||
db.commit()
|
||||
print(success_msg)
|
||||
|
||||
|
||||
def init_db():
|
||||
"""创建所有表"""
|
||||
ensure_db_dir()
|
||||
|
||||
print("📦 创建数据库表...")
|
||||
Base.metadata.drop_all(bind=engine) # 删除旧表
|
||||
Base.metadata.create_all(bind=engine)
|
||||
print("✅ 数据库表创建完成")
|
||||
|
||||
|
||||
def init_default_voices():
|
||||
"""初始化默认声音"""
|
||||
from app.db import SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
if db.query(Voice).count() == 0:
|
||||
voices = [
|
||||
Voice(id="v1", name="Xiaoyun", vendor="Ali", gender="Female", language="zh", description="Gentle and professional."),
|
||||
Voice(id="v2", name="Kevin", vendor="Volcano", gender="Male", language="en", description="Deep and authoritative."),
|
||||
Voice(id="v3", name="Abby", vendor="Minimax", gender="Female", language="en", description="Cheerful and lively."),
|
||||
Voice(id="v4", name="Guang", vendor="Ali", gender="Male", language="zh", description="Standard newscast style."),
|
||||
Voice(id="v5", name="Doubao", vendor="Volcano", gender="Female", language="zh", description="Cute and young."),
|
||||
]
|
||||
for v in voices:
|
||||
db.add(v)
|
||||
db.commit()
|
||||
print("✅ 默认声音数据已初始化")
|
||||
def rebuild_vector_store(reset_doc_status: bool = True):
|
||||
"""重建知识库向量集合(按 DB 中的 KB 列表重建 collection 壳)。"""
|
||||
from app.vector_store import vector_store
|
||||
|
||||
with db_session() as db:
|
||||
print("🧹 重建向量库集合...")
|
||||
kb_list = db.query(KnowledgeBase).all()
|
||||
|
||||
# 删除现有 KB 集合
|
||||
try:
|
||||
collections = vector_store.client.list_collections()
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"无法读取向量集合列表: {exc}") from exc
|
||||
|
||||
for col in collections:
|
||||
name = getattr(col, "name", None) or str(col)
|
||||
if name.startswith("kb_"):
|
||||
try:
|
||||
vector_store.client.delete_collection(name=name)
|
||||
print(f" - removed {name}")
|
||||
except Exception as exc:
|
||||
print(f" - skip remove {name}: {exc}")
|
||||
|
||||
# 按 DB 重建 KB 集合
|
||||
for kb in kb_list:
|
||||
vector_store.create_collection(kb.id, kb.embedding_model)
|
||||
print(f" + created kb_{kb.id} ({kb.embedding_model})")
|
||||
|
||||
if reset_doc_status:
|
||||
kb.chunk_count = 0
|
||||
docs = db.query(KnowledgeDocument).filter(KnowledgeDocument.kb_id == kb.id).all()
|
||||
kb.doc_count = 0
|
||||
for doc in docs:
|
||||
doc.chunk_count = 0
|
||||
doc.status = "pending"
|
||||
doc.error_message = None
|
||||
doc.processed_at = None
|
||||
|
||||
db.commit()
|
||||
print("✅ 向量库重建完成(仅重建集合壳,文档需重新索引)")
|
||||
|
||||
|
||||
def init_default_data():
|
||||
with db_session() as db:
|
||||
# 检查是否已有数据
|
||||
# OpenAI Compatible (SiliconFlow API) CosyVoice 2.0 预设声音 (8个)
|
||||
# 参考: https://docs.siliconflow.cn/cn/api-reference/audio/create-speech
|
||||
voices = [
|
||||
# 男声 (Male Voices)
|
||||
Voice(
|
||||
id=SEED_VOICE_IDS["alex"],
|
||||
name="Alex",
|
||||
vendor="OpenAI Compatible",
|
||||
gender="Male",
|
||||
language="en",
|
||||
description="Steady male voice.",
|
||||
model=VOICE_MODEL,
|
||||
voice_key=f"{VOICE_MODEL}:alex",
|
||||
is_system=True,
|
||||
),
|
||||
Voice(
|
||||
id=SEED_VOICE_IDS["david"],
|
||||
name="David",
|
||||
vendor="OpenAI Compatible",
|
||||
gender="Male",
|
||||
language="en",
|
||||
description="Cheerful male voice.",
|
||||
model=VOICE_MODEL,
|
||||
voice_key=f"{VOICE_MODEL}:david",
|
||||
is_system=True,
|
||||
),
|
||||
# 女声 (Female Voices)
|
||||
Voice(
|
||||
id=SEED_VOICE_IDS["bella"],
|
||||
name="Bella",
|
||||
vendor="OpenAI Compatible",
|
||||
gender="Female",
|
||||
language="en",
|
||||
description="Passionate female voice.",
|
||||
model=VOICE_MODEL,
|
||||
voice_key=f"{VOICE_MODEL}:bella",
|
||||
is_system=True,
|
||||
),
|
||||
Voice(
|
||||
id=SEED_VOICE_IDS["claire"],
|
||||
name="Claire",
|
||||
vendor="OpenAI Compatible",
|
||||
gender="Female",
|
||||
language="en",
|
||||
description="Gentle female voice.",
|
||||
model=VOICE_MODEL,
|
||||
voice_key=f"{VOICE_MODEL}:claire",
|
||||
is_system=True,
|
||||
),
|
||||
Voice(
|
||||
id=SEED_VOICE_IDS["dashscope_cherry"],
|
||||
name="DashScope Cherry",
|
||||
vendor="DashScope",
|
||||
gender="Female",
|
||||
language="zh",
|
||||
description="DashScope realtime sample voice.",
|
||||
model=DASHSCOPE_VOICE_MODEL,
|
||||
voice_key=DASHSCOPE_DEFAULT_VOICE_KEY,
|
||||
base_url=DASHSCOPE_REALTIME_URL,
|
||||
is_system=True,
|
||||
),
|
||||
]
|
||||
seed_if_empty(db, Voice, voices, "✅ 默认声音数据已初始化 (OpenAI Compatible + DashScope)")
|
||||
|
||||
|
||||
def init_default_tools(recreate: bool = False):
|
||||
"""初始化默认工具,或按需重建工具表数据。"""
|
||||
from app.routers.tools import _seed_default_tools_if_empty, recreate_tool_resources
|
||||
|
||||
with db_session() as db:
|
||||
if recreate:
|
||||
recreate_tool_resources(db)
|
||||
print("✅ 工具库已重建")
|
||||
else:
|
||||
print("ℹ️ 声音数据已存在,跳过初始化")
|
||||
finally:
|
||||
db.close()
|
||||
_seed_default_tools_if_empty(db)
|
||||
print("✅ 默认工具已初始化")
|
||||
|
||||
|
||||
def init_default_assistants():
|
||||
"""初始化默认助手"""
|
||||
with db_session() as db:
|
||||
assistants = [
|
||||
Assistant(
|
||||
id=SEED_ASSISTANT_IDS["default"],
|
||||
user_id=1,
|
||||
name="AI 助手",
|
||||
call_count=0,
|
||||
opener="你好!我是AI助手,有什么可以帮你的吗?",
|
||||
prompt="你是一个友好的AI助手,请用简洁清晰的语言回答用户的问题。",
|
||||
language="zh",
|
||||
voice_output_enabled=True,
|
||||
voice=SEED_VOICE_IDS["bella"],
|
||||
speed=1.0,
|
||||
hotwords=[],
|
||||
tools=["current_time"],
|
||||
interruption_sensitivity=500,
|
||||
config_mode="platform",
|
||||
llm_model_id=SEED_LLM_IDS["deepseek_chat"],
|
||||
asr_model_id=SEED_ASR_IDS["sensevoice_small"],
|
||||
),
|
||||
Assistant(
|
||||
id=SEED_ASSISTANT_IDS["customer_service"],
|
||||
user_id=1,
|
||||
name="客服助手",
|
||||
call_count=0,
|
||||
opener="您好,欢迎致电客服中心,请问有什么可以帮您?",
|
||||
prompt="你是一个专业的客服人员,耐心解答客户问题,提供优质的服务体验。",
|
||||
language="zh",
|
||||
voice_output_enabled=True,
|
||||
voice=SEED_VOICE_IDS["claire"],
|
||||
speed=1.0,
|
||||
hotwords=["客服", "投诉", "咨询"],
|
||||
tools=["current_time"],
|
||||
interruption_sensitivity=600,
|
||||
config_mode="platform",
|
||||
),
|
||||
Assistant(
|
||||
id=SEED_ASSISTANT_IDS["english_tutor"],
|
||||
user_id=1,
|
||||
name="英语导师",
|
||||
call_count=0,
|
||||
opener="Hello! I'm your English learning companion. How can I help you today?",
|
||||
prompt="You are a friendly English tutor. Help users practice English conversation and explain grammar points clearly.",
|
||||
language="en",
|
||||
voice_output_enabled=True,
|
||||
voice=SEED_VOICE_IDS["alex"],
|
||||
speed=1.0,
|
||||
hotwords=["grammar", "vocabulary", "practice"],
|
||||
tools=["current_time"],
|
||||
interruption_sensitivity=400,
|
||||
config_mode="platform",
|
||||
),
|
||||
]
|
||||
seed_if_empty(db, Assistant, assistants, "✅ 默认助手数据已初始化")
|
||||
|
||||
|
||||
def init_default_workflows():
|
||||
"""初始化默认工作流"""
|
||||
from datetime import datetime
|
||||
|
||||
with db_session() as db:
|
||||
now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
workflows = [
|
||||
Workflow(
|
||||
id="simple_conversation",
|
||||
user_id=1,
|
||||
name="简单对话",
|
||||
node_count=2,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
global_prompt="处理简单的对话流程,用户问什么答什么。",
|
||||
nodes=[
|
||||
{"id": "1", "type": "start", "position": {"x": 100, "y": 100}, "data": {"label": "开始"}},
|
||||
{"id": "2", "type": "ai_reply", "position": {"x": 300, "y": 100}, "data": {"label": "AI回复"}},
|
||||
],
|
||||
edges=[{"source": "1", "target": "2", "id": "e1-2"}],
|
||||
),
|
||||
Workflow(
|
||||
id="voice_input_flow",
|
||||
user_id=1,
|
||||
name="语音输入流程",
|
||||
node_count=4,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
global_prompt="处理语音输入的完整流程。",
|
||||
nodes=[
|
||||
{"id": "1", "type": "start", "position": {"x": 100, "y": 100}, "data": {"label": "开始"}},
|
||||
{"id": "2", "type": "asr", "position": {"x": 250, "y": 100}, "data": {"label": "语音识别"}},
|
||||
{"id": "3", "type": "llm", "position": {"x": 400, "y": 100}, "data": {"label": "LLM处理"}},
|
||||
{"id": "4", "type": "tts", "position": {"x": 550, "y": 100}, "data": {"label": "语音合成"}},
|
||||
],
|
||||
edges=[
|
||||
{"source": "1", "target": "2", "id": "e1-2"},
|
||||
{"source": "2", "target": "3", "id": "e2-3"},
|
||||
{"source": "3", "target": "4", "id": "e3-4"},
|
||||
],
|
||||
),
|
||||
]
|
||||
seed_if_empty(db, Workflow, workflows, "✅ 默认工作流数据已初始化")
|
||||
|
||||
|
||||
def init_default_knowledge_bases():
|
||||
"""初始化默认知识库"""
|
||||
with db_session() as db:
|
||||
kb = KnowledgeBase(
|
||||
id="default_kb",
|
||||
user_id=1,
|
||||
name="默认知识库",
|
||||
description="系统默认知识库,用于存储常见问题解答。",
|
||||
embedding_model="text-embedding-3-small",
|
||||
chunk_size=500,
|
||||
chunk_overlap=50,
|
||||
doc_count=0,
|
||||
chunk_count=0,
|
||||
status="active",
|
||||
)
|
||||
seed_if_empty(db, KnowledgeBase, kb, "✅ 默认知识库已初始化")
|
||||
|
||||
|
||||
def init_default_llm_models():
|
||||
"""初始化默认LLM模型"""
|
||||
with db_session() as db:
|
||||
llm_models = [
|
||||
LLMModel(
|
||||
id=SEED_LLM_IDS["deepseek_chat"],
|
||||
user_id=1,
|
||||
name="DeepSeek Chat",
|
||||
vendor="OpenAI Compatible",
|
||||
type="text",
|
||||
base_url="https://api.deepseek.com",
|
||||
api_key="YOUR_API_KEY", # 用户需替换
|
||||
model_name="deepseek-chat",
|
||||
temperature=0.7,
|
||||
context_length=4096,
|
||||
enabled=True,
|
||||
),
|
||||
LLMModel(
|
||||
id=SEED_LLM_IDS["glm_4"],
|
||||
user_id=1,
|
||||
name="GLM-4",
|
||||
vendor="ZhipuAI",
|
||||
type="text",
|
||||
base_url="https://open.bigmodel.cn/api/paas/v4",
|
||||
api_key="YOUR_API_KEY",
|
||||
model_name="glm-4",
|
||||
temperature=0.7,
|
||||
context_length=8192,
|
||||
enabled=True,
|
||||
),
|
||||
LLMModel(
|
||||
id=SEED_LLM_IDS["embedding_3_small"],
|
||||
user_id=1,
|
||||
name="Embedding 3 Small",
|
||||
vendor="OpenAI Compatible",
|
||||
type="embedding",
|
||||
base_url="https://api.openai.com/v1",
|
||||
api_key="YOUR_API_KEY",
|
||||
model_name="text-embedding-3-small",
|
||||
enabled=True,
|
||||
),
|
||||
]
|
||||
seed_if_empty(db, LLMModel, llm_models, "✅ 默认LLM模型已初始化")
|
||||
|
||||
|
||||
def init_default_asr_models():
|
||||
"""初始化默认ASR模型"""
|
||||
with db_session() as db:
|
||||
asr_models = [
|
||||
ASRModel(
|
||||
id=SEED_ASR_IDS["sensevoice_small"],
|
||||
user_id=1,
|
||||
name="FunAudioLLM/SenseVoiceSmall",
|
||||
vendor="OpenAI Compatible",
|
||||
language="Multi-lingual",
|
||||
base_url="https://api.siliconflow.cn/v1",
|
||||
api_key="YOUR_API_KEY",
|
||||
model_name="FunAudioLLM/SenseVoiceSmall",
|
||||
hotwords=[],
|
||||
enable_punctuation=True,
|
||||
enable_normalization=True,
|
||||
enabled=True,
|
||||
),
|
||||
ASRModel(
|
||||
id=SEED_ASR_IDS["telespeech_asr"],
|
||||
user_id=1,
|
||||
name="TeleAI/TeleSpeechASR",
|
||||
vendor="OpenAI Compatible",
|
||||
language="Multi-lingual",
|
||||
base_url="https://api.siliconflow.cn/v1",
|
||||
api_key="YOUR_API_KEY",
|
||||
model_name="TeleAI/TeleSpeechASR",
|
||||
hotwords=[],
|
||||
enable_punctuation=True,
|
||||
enable_normalization=True,
|
||||
enabled=True,
|
||||
),
|
||||
ASRModel(
|
||||
id=SEED_ASR_IDS["dashscope_realtime"],
|
||||
user_id=1,
|
||||
name="DashScope Realtime ASR",
|
||||
vendor="DashScope",
|
||||
language="Multi-lingual",
|
||||
base_url=DASHSCOPE_REALTIME_URL,
|
||||
api_key="YOUR_API_KEY",
|
||||
model_name="qwen3-asr-flash-realtime",
|
||||
hotwords=[],
|
||||
enable_punctuation=True,
|
||||
enable_normalization=True,
|
||||
enabled=True,
|
||||
),
|
||||
]
|
||||
seed_if_empty(db, ASRModel, asr_models, "✅ 默认ASR模型已初始化")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 确保 data 目录存在
|
||||
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
init_db()
|
||||
init_default_voices()
|
||||
print("🎉 数据库初始化完成!")
|
||||
parser = argparse.ArgumentParser(description="初始化/重建 AI VideoAssistant 数据与向量库")
|
||||
parser.add_argument(
|
||||
"--rebuild-db",
|
||||
action="store_true",
|
||||
help="重建数据库(drop + create tables)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rebuild-vector-store",
|
||||
action="store_true",
|
||||
help="重建向量库 KB 集合(清空后按 DB 的 knowledge_bases 重建 collection)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-seed",
|
||||
action="store_true",
|
||||
help="跳过默认数据初始化",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recreate-tool-db",
|
||||
action="store_true",
|
||||
help="重建工具库数据(清空 tool_resources 后按内置默认工具重建)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# 无参数时保持旧行为:重建 DB + 初始化默认数据
|
||||
# 仅当完全未指定任何选项时才自动触发 rebuild-db。
|
||||
if (
|
||||
not args.rebuild_db
|
||||
and not args.rebuild_vector_store
|
||||
and not args.skip_seed
|
||||
and not args.recreate_tool_db
|
||||
):
|
||||
args.rebuild_db = True
|
||||
|
||||
ensure_db_dir()
|
||||
|
||||
if args.rebuild_db:
|
||||
init_db()
|
||||
else:
|
||||
print("ℹ️ 跳过数据库结构变更(未指定 --rebuild-db)")
|
||||
if not args.skip_seed or args.recreate_tool_db:
|
||||
print("ℹ️ 当前将执行非破坏性流程(仅工具/默认数据初始化)")
|
||||
|
||||
if args.recreate_tool_db:
|
||||
init_default_tools(recreate=True)
|
||||
|
||||
if not args.skip_seed:
|
||||
init_default_data()
|
||||
if not args.recreate_tool_db:
|
||||
init_default_tools(recreate=False)
|
||||
init_default_assistants()
|
||||
init_default_workflows()
|
||||
init_default_knowledge_bases()
|
||||
init_default_llm_models()
|
||||
init_default_asr_models()
|
||||
print("✅ 默认数据初始化完成")
|
||||
|
||||
if args.rebuild_vector_store:
|
||||
rebuild_vector_store(reset_doc_status=True)
|
||||
|
||||
print("🎉 初始化脚本执行完成!")
|
||||
|
||||
73
api/main.py
73
api/main.py
@@ -1,73 +0,0 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import os
|
||||
|
||||
from app.db import Base, engine
|
||||
from app.routers import assistants, history, knowledge
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 启动时创建表
|
||||
Base.metadata.create_all(bind=engine)
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="AI VideoAssistant API",
|
||||
description="Backend API for AI VideoAssistant",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 路由
|
||||
app.include_router(assistants.router, prefix="/api")
|
||||
app.include_router(history.router, prefix="/api")
|
||||
app.include_router(knowledge.router, prefix="/api")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def root():
|
||||
return {"message": "AI VideoAssistant API", "version": "1.0.0"}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# 初始化默认数据
|
||||
@app.on_event("startup")
|
||||
def init_default_data():
|
||||
from sqlalchemy.orm import Session
|
||||
from app.db import SessionLocal
|
||||
from app.models import Voice
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# 检查是否已有数据
|
||||
if db.query(Voice).count() == 0:
|
||||
# 插入默认声音
|
||||
voices = [
|
||||
Voice(id="v1", name="Xiaoyun", vendor="Ali", gender="Female", language="zh", description="Gentle and professional."),
|
||||
Voice(id="v2", name="Kevin", vendor="Volcano", gender="Male", language="en", description="Deep and authoritative."),
|
||||
Voice(id="v3", name="Abby", vendor="Minimax", gender="Female", language="en", description="Cheerful and lively."),
|
||||
Voice(id="v4", name="Guang", vendor="Ali", gender="Male", language="zh", description="Standard newscast style."),
|
||||
Voice(id="v5", name="Doubao", vendor="Volcano", gender="Female", language="zh", description="Cute and young."),
|
||||
]
|
||||
for v in voices:
|
||||
db.add(v)
|
||||
db.commit()
|
||||
print("✅ 默认声音数据已初始化")
|
||||
finally:
|
||||
db.close()
|
||||
8
api/pytest.ini
Normal file
8
api/pytest.ini
Normal file
@@ -0,0 +1,8 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts = -v --tb=short
|
||||
filterwarnings =
|
||||
ignore::DeprecationWarning
|
||||
@@ -1,11 +1,12 @@
|
||||
aiosqlite==0.19.0
|
||||
fastapi==0.109.0
|
||||
uvicorn==0.27.0
|
||||
python-multipart==0.0.6
|
||||
python-dotenv==1.0.0
|
||||
pydantic==2.5.3
|
||||
sqlalchemy==2.0.25
|
||||
minio==7.2.0
|
||||
httpx==0.26.0
|
||||
chromadb==0.4.22
|
||||
openai==1.12.0
|
||||
aiosqlite==0.22.1
|
||||
fastapi==0.135.1
|
||||
uvicorn==0.41.0
|
||||
python-multipart==0.0.22
|
||||
python-dotenv==1.2.2
|
||||
pydantic==2.11.7
|
||||
sqlalchemy==2.0.48
|
||||
minio==7.2.20
|
||||
httpx==0.28.1
|
||||
chromadb==1.5.2
|
||||
openai==2.24.0
|
||||
dashscope==1.25.13
|
||||
|
||||
14
api/run_tests.bat
Normal file
14
api/run_tests.bat
Normal file
@@ -0,0 +1,14 @@
|
||||
@echo off
|
||||
REM Run API tests
|
||||
|
||||
cd /d "%~dp0"
|
||||
|
||||
REM Install test dependencies
|
||||
echo Installing test dependencies...
|
||||
pip install pytest pytest-cov -q
|
||||
|
||||
REM Run tests
|
||||
echo Running tests...
|
||||
pytest tests/ -v --tb=short
|
||||
|
||||
pause
|
||||
1
api/tests/__init__.py
Normal file
1
api/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Tests package
|
||||
138
api/tests/conftest.py
Normal file
138
api/tests/conftest.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""Pytest fixtures for API tests"""
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
# Add api directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.db import Base, get_db
|
||||
from app.main import app
|
||||
|
||||
|
||||
# Use in-memory SQLite for testing
|
||||
DATABASE_URL = "sqlite:///:memory:"
|
||||
|
||||
engine = create_engine(
|
||||
DATABASE_URL,
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def db_session():
|
||||
"""Create a fresh database session for each test"""
|
||||
# Create all tables
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
session = TestingSessionLocal()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
# Drop all tables after test
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def client(db_session):
|
||||
"""Create a test client with database dependency override"""
|
||||
|
||||
def override_get_db():
|
||||
try:
|
||||
yield db_session
|
||||
finally:
|
||||
pass
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
|
||||
with TestClient(app) as test_client:
|
||||
yield test_client
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_voice_data():
|
||||
"""Sample voice data for testing"""
|
||||
return {
|
||||
"name": "Test Voice",
|
||||
"vendor": "TestVendor",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "A test voice for unit testing",
|
||||
"model": "test-model",
|
||||
"voice_key": "test-key",
|
||||
"speed": 1.0,
|
||||
"gain": 0,
|
||||
"pitch": 0,
|
||||
"enabled": True
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_assistant_data():
|
||||
"""Sample assistant data for testing"""
|
||||
return {
|
||||
"name": "Test Assistant",
|
||||
"opener": "Hello, welcome!",
|
||||
"prompt": "You are a helpful assistant.",
|
||||
"language": "zh",
|
||||
"voiceOutputEnabled": True,
|
||||
"speed": 1.0,
|
||||
"hotwords": ["test", "hello"],
|
||||
"tools": [],
|
||||
"configMode": "platform"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_call_record_data():
|
||||
"""Sample call record data for testing"""
|
||||
return {
|
||||
"user_id": 1,
|
||||
"assistant_id": None,
|
||||
"source": "debug"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_llm_model_data():
|
||||
"""Sample LLM model data for testing"""
|
||||
return {
|
||||
"id": "test-llm-001",
|
||||
"name": "Test LLM Model",
|
||||
"vendor": "TestVendor",
|
||||
"type": "text",
|
||||
"base_url": "https://api.test.com/v1",
|
||||
"api_key": "test-api-key",
|
||||
"model_name": "test-model",
|
||||
"temperature": 0.7,
|
||||
"context_length": 4096,
|
||||
"enabled": True
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_asr_model_data():
|
||||
"""Sample ASR model data for testing"""
|
||||
return {
|
||||
"id": "test-asr-001",
|
||||
"name": "Test ASR Model",
|
||||
"vendor": "TestVendor",
|
||||
"language": "zh",
|
||||
"base_url": "https://api.test.com/v1",
|
||||
"api_key": "test-api-key",
|
||||
"model_name": "paraformer-v2",
|
||||
"hotwords": ["测试", "语音"],
|
||||
"enable_punctuation": True,
|
||||
"enable_normalization": True,
|
||||
"enabled": True
|
||||
}
|
||||
430
api/tests/test_asr.py
Normal file
430
api/tests/test_asr.py
Normal file
@@ -0,0 +1,430 @@
|
||||
"""Tests for ASR Model API endpoints"""
|
||||
import io
|
||||
import wave
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
def _make_wav_bytes(sample_rate: int = 16000) -> bytes:
|
||||
with io.BytesIO() as buffer:
|
||||
with wave.open(buffer, "wb") as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(sample_rate)
|
||||
wav_file.writeframes(b"\x00\x00" * sample_rate)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
class TestASRModelAPI:
|
||||
"""Test cases for ASR Model endpoints"""
|
||||
|
||||
def test_get_asr_models_empty(self, client):
|
||||
"""Test getting ASR models when database is empty"""
|
||||
response = client.get("/api/asr")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total" in data
|
||||
assert "list" in data
|
||||
assert data["total"] == 0
|
||||
|
||||
def test_create_asr_model(self, client, sample_asr_model_data):
|
||||
"""Test creating a new ASR model"""
|
||||
response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == sample_asr_model_data["name"]
|
||||
assert data["vendor"] == sample_asr_model_data["vendor"]
|
||||
assert data["language"] == sample_asr_model_data["language"]
|
||||
assert "id" in data
|
||||
|
||||
def test_create_asr_model_minimal(self, client):
|
||||
"""Test creating an ASR model with minimal required data"""
|
||||
data = {
|
||||
"name": "Minimal ASR",
|
||||
"vendor": "Test",
|
||||
"language": "zh",
|
||||
"base_url": "https://api.test.com",
|
||||
"api_key": "test-key"
|
||||
}
|
||||
response = client.post("/api/asr", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["name"] == "Minimal ASR"
|
||||
|
||||
def test_get_asr_model_by_id(self, client, sample_asr_model_data):
|
||||
"""Test getting a specific ASR model by ID"""
|
||||
# Create first
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Get by ID
|
||||
response = client.get(f"/api/asr/{model_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == model_id
|
||||
assert data["name"] == sample_asr_model_data["name"]
|
||||
|
||||
def test_get_asr_model_not_found(self, client):
|
||||
"""Test getting a non-existent ASR model"""
|
||||
response = client.get("/api/asr/non-existent-id")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_asr_model(self, client, sample_asr_model_data):
|
||||
"""Test updating an ASR model"""
|
||||
# Create first
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Update
|
||||
update_data = {
|
||||
"name": "Updated ASR Model",
|
||||
"language": "en",
|
||||
"enable_punctuation": False
|
||||
}
|
||||
response = client.put(f"/api/asr/{model_id}", json=update_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "Updated ASR Model"
|
||||
assert data["language"] == "en"
|
||||
assert data["enable_punctuation"] == False
|
||||
|
||||
def test_update_asr_model_vendor(self, client, sample_asr_model_data):
|
||||
"""Test updating ASR vendor metadata."""
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
response = client.put(
|
||||
f"/api/asr/{model_id}",
|
||||
json={
|
||||
"vendor": "DashScope",
|
||||
"model_name": "qwen3-asr-flash-realtime",
|
||||
"base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
|
||||
},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["vendor"] == "DashScope"
|
||||
assert data["model_name"] == "qwen3-asr-flash-realtime"
|
||||
|
||||
def test_delete_asr_model(self, client, sample_asr_model_data):
|
||||
"""Test deleting an ASR model"""
|
||||
# Create first
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Delete
|
||||
response = client.delete(f"/api/asr/{model_id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify deleted
|
||||
get_response = client.get(f"/api/asr/{model_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
def test_list_asr_models_with_pagination(self, client, sample_asr_model_data):
|
||||
"""Test listing ASR models with pagination"""
|
||||
# Create multiple models
|
||||
for i in range(3):
|
||||
data = sample_asr_model_data.copy()
|
||||
data["id"] = f"test-asr-{i}"
|
||||
data["name"] = f"ASR Model {i}"
|
||||
client.post("/api/asr", json=data)
|
||||
|
||||
# Test pagination
|
||||
response = client.get("/api/asr?page=1&limit=2")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 3
|
||||
assert len(data["list"]) == 2
|
||||
|
||||
def test_filter_asr_models_by_language(self, client, sample_asr_model_data):
|
||||
"""Test filtering ASR models by language"""
|
||||
# Create models with different languages
|
||||
for i, lang in enumerate(["zh", "en", "Multi-lingual"]):
|
||||
data = sample_asr_model_data.copy()
|
||||
data["id"] = f"test-asr-{lang}"
|
||||
data["name"] = f"ASR {lang}"
|
||||
data["language"] = lang
|
||||
client.post("/api/asr", json=data)
|
||||
|
||||
# Filter by language
|
||||
response = client.get("/api/asr?language=zh")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] >= 1
|
||||
for model in data["list"]:
|
||||
assert model["language"] == "zh"
|
||||
|
||||
def test_filter_asr_models_by_enabled(self, client, sample_asr_model_data):
|
||||
"""Test filtering ASR models by enabled status"""
|
||||
# Create enabled and disabled models
|
||||
data = sample_asr_model_data.copy()
|
||||
data["id"] = "test-asr-enabled"
|
||||
data["name"] = "Enabled ASR"
|
||||
data["enabled"] = True
|
||||
client.post("/api/asr", json=data)
|
||||
|
||||
data["id"] = "test-asr-disabled"
|
||||
data["name"] = "Disabled ASR"
|
||||
data["enabled"] = False
|
||||
client.post("/api/asr", json=data)
|
||||
|
||||
# Filter by enabled
|
||||
response = client.get("/api/asr?enabled=true")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for model in data["list"]:
|
||||
assert model["enabled"] == True
|
||||
|
||||
def test_create_asr_model_with_hotwords(self, client):
|
||||
"""Test creating an ASR model with hotwords"""
|
||||
data = {
|
||||
"id": "asr-hotwords",
|
||||
"name": "ASR with Hotwords",
|
||||
"vendor": "SiliconFlow",
|
||||
"language": "zh",
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "test-key",
|
||||
"model_name": "paraformer-v2",
|
||||
"hotwords": ["你好", "查询", "帮助"],
|
||||
"enable_punctuation": True,
|
||||
"enable_normalization": True
|
||||
}
|
||||
response = client.post("/api/asr", json=data)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result["hotwords"] == ["你好", "查询", "帮助"]
|
||||
|
||||
def test_create_asr_model_with_all_fields(self, client):
|
||||
"""Test creating an ASR model with all fields"""
|
||||
data = {
|
||||
"id": "full-asr",
|
||||
"name": "Full ASR Model",
|
||||
"vendor": "SiliconFlow",
|
||||
"language": "zh",
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "sk-test",
|
||||
"model_name": "paraformer-v2",
|
||||
"hotwords": ["测试"],
|
||||
"enable_punctuation": True,
|
||||
"enable_normalization": True,
|
||||
"enabled": True
|
||||
}
|
||||
response = client.post("/api/asr", json=data)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result["name"] == "Full ASR Model"
|
||||
assert result["enable_punctuation"] == True
|
||||
assert result["enable_normalization"] == True
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_test_asr_model_siliconflow(self, mock_client_class, client, sample_asr_model_data):
|
||||
"""Test testing an ASR model with SiliconFlow vendor"""
|
||||
# Create model first
|
||||
sample_asr_model_data["vendor"] = "SiliconFlow"
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock the HTTP response
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"results": [{"transcript": "测试文本", "language": "zh"}]
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.asr.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/asr/{model_id}/test")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] == True
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_test_asr_model_openai(self, mock_client_class, client, sample_asr_model_data):
|
||||
"""Test testing an ASR model with OpenAI vendor"""
|
||||
# Create model with OpenAI vendor
|
||||
sample_asr_model_data["vendor"] = "OpenAI"
|
||||
sample_asr_model_data["id"] = "test-asr-openai"
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock the HTTP response
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"text": "Test transcript"}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.asr.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/asr/{model_id}/test")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_test_asr_model_dashscope(self, client, sample_asr_model_data, monkeypatch):
|
||||
"""Test DashScope ASR connectivity probe."""
|
||||
from app.routers import asr as asr_router
|
||||
|
||||
sample_asr_model_data["vendor"] = "DashScope"
|
||||
sample_asr_model_data["base_url"] = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
sample_asr_model_data["model_name"] = "qwen3-asr-flash-realtime"
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
def fake_probe(**kwargs):
|
||||
assert kwargs["api_key"] == sample_asr_model_data["api_key"]
|
||||
assert kwargs["model"] == "qwen3-asr-flash-realtime"
|
||||
|
||||
monkeypatch.setattr(asr_router, "_probe_dashscope_asr_connection", fake_probe)
|
||||
|
||||
response = client.post(f"/api/asr/{model_id}/test")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is True
|
||||
assert data["message"] == "DashScope realtime ASR connected"
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_test_asr_model_failure(self, mock_client_class, client, sample_asr_model_data):
|
||||
"""Test testing an ASR model with failed connection"""
|
||||
# Create model first
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock HTTP error
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 401
|
||||
mock_response.text = "Unauthorized"
|
||||
mock_response.raise_for_status = MagicMock(side_effect=Exception("401 Unauthorized"))
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.asr.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/asr/{model_id}/test")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] == False
|
||||
|
||||
def test_different_asr_languages(self, client):
|
||||
"""Test creating ASR models with different languages"""
|
||||
for lang in ["zh", "en", "Multi-lingual"]:
|
||||
data = {
|
||||
"id": f"asr-lang-{lang}",
|
||||
"name": f"ASR {lang}",
|
||||
"vendor": "SiliconFlow",
|
||||
"language": lang,
|
||||
"base_url": "https://api.siliconflow.cn/v1",
|
||||
"api_key": "test-key"
|
||||
}
|
||||
response = client.post("/api/asr", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["language"] == lang
|
||||
|
||||
def test_different_asr_vendors(self, client):
|
||||
"""Test creating ASR models with different vendors"""
|
||||
vendors = ["SiliconFlow", "OpenAI", "Azure", "DashScope"]
|
||||
for vendor in vendors:
|
||||
data = {
|
||||
"id": f"asr-vendor-{vendor.lower()}",
|
||||
"name": f"ASR {vendor}",
|
||||
"vendor": vendor,
|
||||
"language": "zh",
|
||||
"base_url": f"https://api.{vendor.lower()}.com/v1",
|
||||
"api_key": "test-key"
|
||||
}
|
||||
response = client.post("/api/asr", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["vendor"] == vendor
|
||||
|
||||
def test_preview_asr_model_success(self, client, sample_asr_model_data, monkeypatch):
|
||||
"""Test ASR preview endpoint with OpenAI-compatible transcriptions API."""
|
||||
from app.routers import asr as asr_router
|
||||
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def json(self):
|
||||
return {"text": "你好,这是测试转写", "language": "zh", "confidence": 0.98}
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return '{"text":"ok"}'
|
||||
|
||||
class DummyClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def post(self, url, headers=None, data=None, files=None):
|
||||
assert url.endswith("/audio/transcriptions")
|
||||
assert headers["Authorization"] == f"Bearer {sample_asr_model_data['api_key']}"
|
||||
assert data["model"] == sample_asr_model_data["model_name"]
|
||||
assert files["file"][0] == "sample.wav"
|
||||
return DummyResponse()
|
||||
|
||||
monkeypatch.setattr(asr_router.httpx, "Client", DummyClient)
|
||||
|
||||
response = client.post(
|
||||
f"/api/asr/{model_id}/preview",
|
||||
files={"file": ("sample.wav", b"fake-wav-bytes", "audio/wav")},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["success"] is True
|
||||
assert payload["transcript"] == "你好,这是测试转写"
|
||||
assert payload["language"] == "zh"
|
||||
|
||||
def test_preview_asr_model_reject_non_audio(self, client, sample_asr_model_data):
|
||||
"""Test ASR preview endpoint rejects non-audio file."""
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
response = client.post(
|
||||
f"/api/asr/{model_id}/preview",
|
||||
files={"file": ("sample.txt", b"text-data", "text/plain")},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "Only audio files are supported" in response.text
|
||||
|
||||
def test_preview_asr_model_dashscope(self, client, sample_asr_model_data, monkeypatch):
|
||||
"""Test ASR preview endpoint with DashScope realtime helper."""
|
||||
from app.routers import asr as asr_router
|
||||
|
||||
sample_asr_model_data["vendor"] = "DashScope"
|
||||
sample_asr_model_data["base_url"] = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
sample_asr_model_data["model_name"] = "qwen3-asr-flash-realtime"
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
def fake_preview(**kwargs):
|
||||
assert kwargs["base_url"] == sample_asr_model_data["base_url"]
|
||||
assert kwargs["model"] == sample_asr_model_data["model_name"]
|
||||
return {
|
||||
"transcript": "你好,这是实时识别",
|
||||
"language": "zh",
|
||||
"confidence": None,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(asr_router, "_transcribe_dashscope_preview", fake_preview)
|
||||
|
||||
response = client.post(
|
||||
f"/api/asr/{model_id}/preview",
|
||||
files={"file": ("sample.wav", _make_wav_bytes(), "audio/wav")},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["success"] is True
|
||||
assert payload["transcript"] == "你好,这是实时识别"
|
||||
440
api/tests/test_assistants.py
Normal file
440
api/tests/test_assistants.py
Normal file
@@ -0,0 +1,440 @@
|
||||
"""Tests for Assistant API endpoints"""
|
||||
import pytest
|
||||
import uuid
|
||||
|
||||
|
||||
class TestAssistantAPI:
|
||||
"""Test cases for Assistant endpoints"""
|
||||
|
||||
def test_get_assistants_empty(self, client):
|
||||
"""Test getting assistants when database is empty"""
|
||||
response = client.get("/api/assistants")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total" in data
|
||||
assert "list" in data
|
||||
|
||||
def test_create_assistant(self, client, sample_assistant_data):
|
||||
"""Test creating a new assistant"""
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == sample_assistant_data["name"]
|
||||
assert data["opener"] == sample_assistant_data["opener"]
|
||||
assert data["manualOpenerToolCalls"] == []
|
||||
assert data["prompt"] == sample_assistant_data["prompt"]
|
||||
assert data["language"] == sample_assistant_data["language"]
|
||||
assert data["voiceOutputEnabled"] is True
|
||||
assert data["firstTurnMode"] == "bot_first"
|
||||
assert data["generatedOpenerEnabled"] is False
|
||||
assert data["asrInterimEnabled"] is False
|
||||
assert data["botCannotBeInterrupted"] is False
|
||||
assert data["appId"] is None
|
||||
assert "id" in data
|
||||
assert data["callCount"] == 0
|
||||
|
||||
def test_create_assistant_minimal(self, client):
|
||||
"""Test creating an assistant with minimal required data"""
|
||||
data = {"name": "Minimal Assistant"}
|
||||
response = client.post("/api/assistants", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["name"] == "Minimal Assistant"
|
||||
assert response.json()["asrInterimEnabled"] is False
|
||||
|
||||
def test_get_assistant_by_id(self, client, sample_assistant_data):
|
||||
"""Test getting a specific assistant by ID"""
|
||||
# Create first
|
||||
create_response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assistant_id = create_response.json()["id"]
|
||||
|
||||
# Get by ID
|
||||
response = client.get(f"/api/assistants/{assistant_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == assistant_id
|
||||
assert data["name"] == sample_assistant_data["name"]
|
||||
|
||||
def test_get_assistant_not_found(self, client):
|
||||
"""Test getting a non-existent assistant"""
|
||||
response = client.get("/api/assistants/non-existent-id")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_assistant(self, client, sample_assistant_data):
|
||||
"""Test updating an assistant"""
|
||||
# Create first
|
||||
create_response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assistant_id = create_response.json()["id"]
|
||||
|
||||
# Update
|
||||
update_data = {
|
||||
"name": "Updated Assistant",
|
||||
"prompt": "You are an updated assistant.",
|
||||
"speed": 1.5,
|
||||
"voiceOutputEnabled": False,
|
||||
"asrInterimEnabled": True,
|
||||
"manualOpenerToolCalls": [
|
||||
{"toolName": "text_msg_prompt", "arguments": {"msg": "请选择服务类型"}}
|
||||
],
|
||||
}
|
||||
response = client.put(f"/api/assistants/{assistant_id}", json=update_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "Updated Assistant"
|
||||
assert data["prompt"] == "You are an updated assistant."
|
||||
assert data["speed"] == 1.5
|
||||
assert data["voiceOutputEnabled"] is False
|
||||
assert data["asrInterimEnabled"] is True
|
||||
assert data["manualOpenerToolCalls"] == [
|
||||
{"toolName": "text_msg_prompt", "arguments": {"msg": "请选择服务类型"}}
|
||||
]
|
||||
|
||||
def test_delete_assistant(self, client, sample_assistant_data):
|
||||
"""Test deleting an assistant"""
|
||||
# Create first
|
||||
create_response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assistant_id = create_response.json()["id"]
|
||||
|
||||
# Delete
|
||||
response = client.delete(f"/api/assistants/{assistant_id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify deleted
|
||||
get_response = client.get(f"/api/assistants/{assistant_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
def test_list_assistants_with_pagination(self, client, sample_assistant_data):
|
||||
"""Test listing assistants with pagination"""
|
||||
# Create multiple assistants
|
||||
for i in range(3):
|
||||
data = sample_assistant_data.copy()
|
||||
data["name"] = f"Assistant {i}"
|
||||
client.post("/api/assistants", json=data)
|
||||
|
||||
# Test pagination
|
||||
response = client.get("/api/assistants?page=1&limit=2")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 3
|
||||
assert len(data["list"]) == 2
|
||||
|
||||
def test_create_assistant_with_voice(self, client, sample_assistant_data, sample_voice_data):
|
||||
"""Test creating an assistant with a voice reference"""
|
||||
# Create a voice first
|
||||
voice_response = client.post("/api/voices", json=sample_voice_data)
|
||||
voice_id = voice_response.json()["id"]
|
||||
|
||||
# Create assistant with voice
|
||||
sample_assistant_data["voice"] = voice_id
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["voice"] == voice_id
|
||||
|
||||
def test_create_assistant_with_knowledge_base(self, client, sample_assistant_data):
|
||||
"""Test creating an assistant with knowledge base reference"""
|
||||
# Note: This test assumes knowledge base doesn't exist
|
||||
sample_assistant_data["knowledgeBaseId"] = "non-existent-kb"
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["knowledgeBaseId"] == "non-existent-kb"
|
||||
|
||||
assistant_id = response.json()["id"]
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
assert metadata["knowledgeBaseId"] == "non-existent-kb"
|
||||
assert metadata["knowledge"]["enabled"] is True
|
||||
assert metadata["knowledge"]["kbId"] == "non-existent-kb"
|
||||
|
||||
def test_assistant_with_model_references(self, client, sample_assistant_data):
|
||||
"""Test creating assistant with model references"""
|
||||
sample_assistant_data.update({
|
||||
"llmModelId": "llm-001",
|
||||
"asrModelId": "asr-001",
|
||||
"embeddingModelId": "emb-001",
|
||||
"rerankModelId": "rerank-001"
|
||||
})
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["llmModelId"] == "llm-001"
|
||||
assert data["asrModelId"] == "asr-001"
|
||||
assert data["embeddingModelId"] == "emb-001"
|
||||
assert data["rerankModelId"] == "rerank-001"
|
||||
|
||||
def test_assistant_with_tools(self, client, sample_assistant_data):
|
||||
"""Test creating assistant with tools"""
|
||||
sample_assistant_data["tools"] = ["weather", "calculator", "search"]
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["tools"] == ["weather", "calculator", "search"]
|
||||
|
||||
def test_assistant_with_hotwords(self, client, sample_assistant_data):
|
||||
"""Test creating assistant with hotwords"""
|
||||
sample_assistant_data["hotwords"] = ["hello", "help", "stop"]
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["hotwords"] == ["hello", "help", "stop"]
|
||||
|
||||
def test_different_config_modes(self, client, sample_assistant_data):
|
||||
"""Test creating assistants with different config modes"""
|
||||
for mode in ["platform", "dify", "fastgpt", "none"]:
|
||||
sample_assistant_data["name"] = f"Assistant {mode}"
|
||||
sample_assistant_data["configMode"] = mode
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["configMode"] == mode
|
||||
|
||||
def test_different_languages(self, client, sample_assistant_data):
|
||||
"""Test creating assistants with different languages"""
|
||||
for lang in ["zh", "en", "ja", "ko"]:
|
||||
sample_assistant_data["name"] = f"Assistant {lang}"
|
||||
sample_assistant_data["language"] = lang
|
||||
response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["language"] == lang
|
||||
|
||||
def test_get_runtime_config(self, client, sample_assistant_data, sample_llm_model_data, sample_asr_model_data, sample_voice_data):
|
||||
"""Test resolved runtime config endpoint for WS session.start metadata."""
|
||||
sample_asr_model_data["vendor"] = "OpenAI Compatible"
|
||||
llm_resp = client.post("/api/llm", json=sample_llm_model_data)
|
||||
assert llm_resp.status_code == 200
|
||||
llm_id = llm_resp.json()["id"]
|
||||
|
||||
asr_resp = client.post("/api/asr", json=sample_asr_model_data)
|
||||
assert asr_resp.status_code == 200
|
||||
asr_id = asr_resp.json()["id"]
|
||||
|
||||
sample_voice_data["vendor"] = "OpenAI Compatible"
|
||||
sample_voice_data["base_url"] = "https://tts.example.com/v1/audio/speech"
|
||||
sample_voice_data["api_key"] = "test-voice-key"
|
||||
voice_resp = client.post("/api/voices", json=sample_voice_data)
|
||||
assert voice_resp.status_code == 200
|
||||
voice_id = voice_resp.json()["id"]
|
||||
|
||||
sample_assistant_data.update({
|
||||
"llmModelId": llm_id,
|
||||
"asrModelId": asr_id,
|
||||
"voice": voice_id,
|
||||
"prompt": "runtime prompt",
|
||||
"opener": "runtime opener",
|
||||
"manualOpenerToolCalls": [{"toolName": "text_msg_prompt", "arguments": {"msg": "欢迎"}}],
|
||||
"asrInterimEnabled": True,
|
||||
"speed": 1.1,
|
||||
})
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
payload = runtime_resp.json()
|
||||
|
||||
assert payload["assistantId"] == assistant_id
|
||||
metadata = payload["sessionStartMetadata"]
|
||||
assert metadata["systemPrompt"].startswith("runtime prompt")
|
||||
assert "Tool usage policy:" in metadata["systemPrompt"]
|
||||
assert metadata["greeting"] == "runtime opener"
|
||||
assert metadata["manualOpenerToolCalls"] == [{"toolName": "text_msg_prompt", "arguments": {"msg": "欢迎"}}]
|
||||
assert metadata["services"]["llm"]["model"] == sample_llm_model_data["model_name"]
|
||||
assert metadata["services"]["asr"]["model"] == sample_asr_model_data["model_name"]
|
||||
assert metadata["services"]["asr"]["baseUrl"] == sample_asr_model_data["base_url"]
|
||||
assert metadata["services"]["asr"]["enableInterim"] is True
|
||||
expected_tts_voice = f"{sample_voice_data['model']}:{sample_voice_data['voice_key']}"
|
||||
assert metadata["services"]["tts"]["voice"] == expected_tts_voice
|
||||
assert metadata["services"]["tts"]["baseUrl"] == sample_voice_data["base_url"]
|
||||
|
||||
def test_get_engine_config_endpoint(self, client, sample_assistant_data):
|
||||
"""Test canonical assistant config endpoint consumed by engine backend adapter."""
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
config_resp = client.get(f"/api/assistants/{assistant_id}/config")
|
||||
assert config_resp.status_code == 200
|
||||
payload = config_resp.json()
|
||||
|
||||
assert payload["assistantId"] == assistant_id
|
||||
assert payload["assistant"]["assistantId"] == assistant_id
|
||||
assert payload["assistant"]["configVersionId"].startswith(f"asst_{assistant_id}_")
|
||||
assert payload["assistant"]["systemPrompt"].startswith(sample_assistant_data["prompt"])
|
||||
assert "Tool usage policy:" in payload["assistant"]["systemPrompt"]
|
||||
assert payload["sessionStartMetadata"]["systemPrompt"].startswith(sample_assistant_data["prompt"])
|
||||
assert "Tool usage policy:" in payload["sessionStartMetadata"]["systemPrompt"]
|
||||
assert payload["sessionStartMetadata"]["history"]["assistantId"] == assistant_id
|
||||
|
||||
def test_runtime_config_resolves_selected_tools_into_runtime_definitions(self, client, sample_assistant_data):
|
||||
sample_assistant_data["tools"] = ["increase_volume", "calculator"]
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
tools = metadata["tools"]
|
||||
assert isinstance(tools, list)
|
||||
assert len(tools) == 2
|
||||
|
||||
by_name = {item["function"]["name"]: item for item in tools}
|
||||
assert by_name["increase_volume"]["executor"] == "client"
|
||||
assert by_name["increase_volume"]["defaultArgs"]["step"] == 1
|
||||
assert by_name["calculator"]["executor"] == "server"
|
||||
assert by_name["calculator"]["function"]["parameters"]["type"] == "object"
|
||||
assert "expression" in by_name["calculator"]["function"]["parameters"]["properties"]
|
||||
|
||||
def test_runtime_config_normalizes_legacy_voice_message_prompt_tool_id(self, client, sample_assistant_data):
|
||||
sample_assistant_data["tools"] = ["voice_message_prompt"]
|
||||
sample_assistant_data["manualOpenerToolCalls"] = [
|
||||
{"toolName": "voice_message_prompt", "arguments": {"msg": "您好"}}
|
||||
]
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_payload = assistant_resp.json()
|
||||
assistant_id = assistant_payload["id"]
|
||||
assert assistant_payload["tools"] == ["voice_msg_prompt"]
|
||||
assert assistant_payload["manualOpenerToolCalls"] == [
|
||||
{"toolName": "voice_msg_prompt", "arguments": {"msg": "您好"}}
|
||||
]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
tools = metadata["tools"]
|
||||
by_name = {item["function"]["name"]: item for item in tools}
|
||||
assert "voice_msg_prompt" in by_name
|
||||
assert metadata["manualOpenerToolCalls"] == [
|
||||
{"toolName": "voice_msg_prompt", "arguments": {"msg": "您好"}}
|
||||
]
|
||||
|
||||
def test_runtime_config_text_mode_when_voice_output_disabled(self, client, sample_assistant_data):
|
||||
sample_assistant_data["voiceOutputEnabled"] = False
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
assert metadata["output"]["mode"] == "text"
|
||||
assert metadata["services"]["asr"]["enableInterim"] is False
|
||||
assert metadata["services"]["tts"]["enabled"] is False
|
||||
|
||||
def test_runtime_config_dashscope_voice_provider(self, client, sample_assistant_data):
|
||||
"""DashScope voices should map to dashscope tts provider in runtime metadata."""
|
||||
voice_resp = client.post("/api/voices", json={
|
||||
"name": "DashScope Cherry",
|
||||
"vendor": "DashScope",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "dashscope voice",
|
||||
"api_key": "dashscope-key",
|
||||
"base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
|
||||
})
|
||||
assert voice_resp.status_code == 200
|
||||
voice_payload = voice_resp.json()
|
||||
|
||||
sample_assistant_data.update({
|
||||
"voice": voice_payload["id"],
|
||||
"voiceOutputEnabled": True,
|
||||
})
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
tts = metadata["services"]["tts"]
|
||||
assert tts["provider"] == "dashscope"
|
||||
assert tts["voice"] == "Cherry"
|
||||
assert tts["model"] == "qwen3-tts-flash-realtime"
|
||||
assert tts["apiKey"] == "dashscope-key"
|
||||
assert tts["baseUrl"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
|
||||
def test_runtime_config_dashscope_asr_provider(self, client, sample_assistant_data):
|
||||
"""DashScope ASR models should map to dashscope asr provider in runtime metadata."""
|
||||
asr_resp = client.post("/api/asr", json={
|
||||
"name": "DashScope Realtime ASR",
|
||||
"vendor": "DashScope",
|
||||
"language": "zh",
|
||||
"base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
|
||||
"api_key": "dashscope-asr-key",
|
||||
"model_name": "qwen3-asr-flash-realtime",
|
||||
"hotwords": [],
|
||||
"enable_punctuation": True,
|
||||
"enable_normalization": True,
|
||||
"enabled": True,
|
||||
})
|
||||
assert asr_resp.status_code == 200
|
||||
asr_payload = asr_resp.json()
|
||||
|
||||
sample_assistant_data.update({
|
||||
"asrModelId": asr_payload["id"],
|
||||
})
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
asr = metadata["services"]["asr"]
|
||||
assert asr["provider"] == "dashscope"
|
||||
assert asr["baseUrl"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
assert asr["enableInterim"] is False
|
||||
|
||||
def test_runtime_config_defaults_asr_interim_disabled_without_asr_model(self, client, sample_assistant_data):
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
assert metadata["services"]["asr"]["enableInterim"] is False
|
||||
|
||||
def test_assistant_interrupt_and_generated_opener_flags(self, client, sample_assistant_data):
|
||||
sample_assistant_data.update({
|
||||
"firstTurnMode": "user_first",
|
||||
"generatedOpenerEnabled": True,
|
||||
"botCannotBeInterrupted": True,
|
||||
"interruptionSensitivity": 900,
|
||||
})
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
|
||||
get_resp = client.get(f"/api/assistants/{assistant_id}")
|
||||
assert get_resp.status_code == 200
|
||||
payload = get_resp.json()
|
||||
assert payload["firstTurnMode"] == "user_first"
|
||||
assert payload["generatedOpenerEnabled"] is True
|
||||
assert payload["botCannotBeInterrupted"] is True
|
||||
assert payload["interruptionSensitivity"] == 900
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
assert metadata["firstTurnMode"] == "user_first"
|
||||
assert metadata["generatedOpenerEnabled"] is True
|
||||
assert metadata["greeting"] == ""
|
||||
assert metadata["bargeIn"]["enabled"] is False
|
||||
assert metadata["bargeIn"]["minDurationMs"] == 900
|
||||
|
||||
def test_fastgpt_app_id_persists_and_flows_to_runtime(self, client, sample_assistant_data):
|
||||
sample_assistant_data.update({
|
||||
"configMode": "fastgpt",
|
||||
"apiUrl": "https://cloud.fastgpt.cn/api",
|
||||
"apiKey": "fastgpt-key",
|
||||
"appId": "app-fastgpt-123",
|
||||
})
|
||||
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assert assistant_resp.status_code == 200
|
||||
assistant_id = assistant_resp.json()["id"]
|
||||
assert assistant_resp.json()["appId"] == "app-fastgpt-123"
|
||||
|
||||
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
|
||||
assert runtime_resp.status_code == 200
|
||||
metadata = runtime_resp.json()["sessionStartMetadata"]
|
||||
assert metadata["services"]["llm"]["provider"] == "fastgpt"
|
||||
assert metadata["services"]["llm"]["appId"] == "app-fastgpt-123"
|
||||
236
api/tests/test_history.py
Normal file
236
api/tests/test_history.py
Normal file
@@ -0,0 +1,236 @@
|
||||
"""Tests for History/Call Record API endpoints"""
|
||||
import pytest
|
||||
import time
|
||||
|
||||
|
||||
class TestHistoryAPI:
|
||||
"""Test cases for History/Call Record endpoints"""
|
||||
|
||||
def test_get_history_empty(self, client):
|
||||
"""Test getting history when database is empty"""
|
||||
response = client.get("/api/history")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total" in data
|
||||
assert "list" in data
|
||||
|
||||
def test_create_call_record(self, client, sample_call_record_data):
|
||||
"""Test creating a new call record"""
|
||||
response = client.post("/api/history", json=sample_call_record_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["user_id"] == sample_call_record_data["user_id"]
|
||||
assert data["source"] == sample_call_record_data["source"]
|
||||
assert data["status"] == "connected"
|
||||
assert "id" in data
|
||||
assert "started_at" in data
|
||||
|
||||
def test_create_call_record_with_assistant(self, client, sample_assistant_data, sample_call_record_data):
|
||||
"""Test creating a call record associated with an assistant"""
|
||||
# Create assistant first
|
||||
assistant_response = client.post("/api/assistants", json=sample_assistant_data)
|
||||
assistant_id = assistant_response.json()["id"]
|
||||
|
||||
# Create call record with assistant
|
||||
sample_call_record_data["assistant_id"] = assistant_id
|
||||
response = client.post("/api/history", json=sample_call_record_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["assistant_id"] == assistant_id
|
||||
|
||||
def test_get_call_record_by_id(self, client, sample_call_record_data):
|
||||
"""Test getting a specific call record by ID"""
|
||||
# Create first
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Get by ID
|
||||
response = client.get(f"/api/history/{record_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == record_id
|
||||
|
||||
def test_get_call_record_not_found(self, client):
|
||||
"""Test getting a non-existent call record"""
|
||||
response = client.get("/api/history/non-existent-id")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_call_record(self, client, sample_call_record_data):
|
||||
"""Test updating a call record"""
|
||||
# Create first
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Update
|
||||
update_data = {
|
||||
"status": "completed",
|
||||
"summary": "Test summary",
|
||||
"duration_seconds": 120
|
||||
}
|
||||
response = client.put(f"/api/history/{record_id}", json=update_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "completed"
|
||||
assert data["summary"] == "Test summary"
|
||||
assert data["duration_seconds"] == 120
|
||||
|
||||
def test_delete_call_record(self, client, sample_call_record_data):
|
||||
"""Test deleting a call record"""
|
||||
# Create first
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Delete
|
||||
response = client.delete(f"/api/history/{record_id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify deleted
|
||||
get_response = client.get(f"/api/history/{record_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
def test_add_transcript(self, client, sample_call_record_data):
|
||||
"""Test adding a transcript to a call record"""
|
||||
# Create call record first
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Add transcript
|
||||
transcript_data = {
|
||||
"turn_index": 0,
|
||||
"speaker": "human",
|
||||
"content": "Hello, I need help",
|
||||
"start_ms": 0,
|
||||
"end_ms": 3000,
|
||||
"confidence": 0.95
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/history/{record_id}/transcripts",
|
||||
json=transcript_data
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["turn_index"] == 0
|
||||
assert data["speaker"] == "human"
|
||||
assert data["content"] == "Hello, I need help"
|
||||
|
||||
def test_add_multiple_transcripts(self, client, sample_call_record_data):
|
||||
"""Test adding multiple transcripts"""
|
||||
# Create call record first
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Add human transcript
|
||||
human_transcript = {
|
||||
"turn_index": 0,
|
||||
"speaker": "human",
|
||||
"content": "Hello",
|
||||
"start_ms": 0,
|
||||
"end_ms": 1000
|
||||
}
|
||||
client.post(f"/api/history/{record_id}/transcripts", json=human_transcript)
|
||||
|
||||
# Add AI transcript
|
||||
ai_transcript = {
|
||||
"turn_index": 1,
|
||||
"speaker": "ai",
|
||||
"content": "Hello! How can I help you?",
|
||||
"start_ms": 1500,
|
||||
"end_ms": 4000
|
||||
}
|
||||
client.post(f"/api/history/{record_id}/transcripts", json=ai_transcript)
|
||||
|
||||
# Verify both transcripts exist
|
||||
response = client.get(f"/api/history/{record_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert len(data["transcripts"]) == 2
|
||||
|
||||
def test_filter_history_by_status(self, client, sample_call_record_data):
|
||||
"""Test filtering history by status"""
|
||||
# Create records with different statuses
|
||||
for i in range(2):
|
||||
data = sample_call_record_data.copy()
|
||||
data["status"] = "connected" if i % 2 == 0 else "missed"
|
||||
client.post("/api/history", json=data)
|
||||
|
||||
# Filter by status
|
||||
response = client.get("/api/history?status=connected")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for record in data["list"]:
|
||||
assert record["status"] == "connected"
|
||||
|
||||
def test_filter_history_by_source(self, client, sample_call_record_data):
|
||||
"""Test filtering history by source"""
|
||||
sample_call_record_data["source"] = "external"
|
||||
client.post("/api/history", json=sample_call_record_data)
|
||||
|
||||
response = client.get("/api/history?source=external")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for record in data["list"]:
|
||||
assert record["source"] == "external"
|
||||
|
||||
def test_history_pagination(self, client, sample_call_record_data):
|
||||
"""Test history pagination"""
|
||||
# Create multiple records
|
||||
for i in range(5):
|
||||
data = sample_call_record_data.copy()
|
||||
data["source"] = f"source-{i}"
|
||||
client.post("/api/history", json=data)
|
||||
|
||||
# Test pagination
|
||||
response = client.get("/api/history?page=1&limit=3")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 5
|
||||
assert len(data["list"]) == 3
|
||||
|
||||
def test_transcript_with_emotion(self, client, sample_call_record_data):
|
||||
"""Test adding transcript with emotion"""
|
||||
# Create call record first
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Add transcript with emotion
|
||||
transcript_data = {
|
||||
"turn_index": 0,
|
||||
"speaker": "ai",
|
||||
"content": "Great news!",
|
||||
"start_ms": 0,
|
||||
"end_ms": 2000,
|
||||
"emotion": "happy"
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/history/{record_id}/transcripts",
|
||||
json=transcript_data
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["emotion"] == "happy"
|
||||
|
||||
def test_history_with_cost(self, client, sample_call_record_data):
|
||||
"""Test creating history with cost"""
|
||||
sample_call_record_data["cost"] = 0.05
|
||||
response = client.post("/api/history", json=sample_call_record_data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["cost"] == 0.05
|
||||
|
||||
def test_history_search(self, client, sample_call_record_data):
|
||||
"""Test searching history"""
|
||||
# Create record
|
||||
create_response = client.post("/api/history", json=sample_call_record_data)
|
||||
record_id = create_response.json()["id"]
|
||||
|
||||
# Add transcript with searchable content
|
||||
transcript_data = {
|
||||
"turn_index": 0,
|
||||
"speaker": "human",
|
||||
"content": "I want to buy a product",
|
||||
"start_ms": 0,
|
||||
"end_ms": 3000
|
||||
}
|
||||
client.post(f"/api/history/{record_id}/transcripts", json=transcript_data)
|
||||
|
||||
# Search (this endpoint may not exist yet)
|
||||
response = client.get("/api/history/search?q=product")
|
||||
# This might return 404 if endpoint doesn't exist
|
||||
assert response.status_code in [200, 404]
|
||||
291
api/tests/test_knowledge.py
Normal file
291
api/tests/test_knowledge.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""Tests for Knowledge Base API endpoints"""
|
||||
import pytest
|
||||
import uuid
|
||||
from app.models import KnowledgeBase
|
||||
|
||||
|
||||
class TestKnowledgeAPI:
|
||||
"""Test cases for Knowledge Base endpoints"""
|
||||
|
||||
def test_get_knowledge_bases_empty(self, client):
|
||||
"""Test getting knowledge bases when database is empty"""
|
||||
response = client.get("/api/knowledge/bases")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total" in data
|
||||
assert "list" in data
|
||||
|
||||
def test_create_knowledge_base(self, client):
|
||||
"""Test creating a new knowledge base"""
|
||||
data = {
|
||||
"name": "Test Knowledge Base",
|
||||
"description": "A test knowledge base",
|
||||
"embeddingModel": "text-embedding-3-small",
|
||||
"chunkSize": 500,
|
||||
"chunkOverlap": 50
|
||||
}
|
||||
response = client.post("/api/knowledge/bases", json=data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "Test Knowledge Base"
|
||||
assert data["description"] == "A test knowledge base"
|
||||
assert data["embeddingModel"] == "text-embedding-3-small"
|
||||
assert "id" in data
|
||||
assert data["docCount"] == 0
|
||||
assert data["chunkCount"] == 0
|
||||
assert data["status"] == "active"
|
||||
|
||||
def test_create_knowledge_base_minimal(self, client):
|
||||
"""Test creating a knowledge base with minimal data"""
|
||||
data = {"name": "Minimal KB"}
|
||||
response = client.post("/api/knowledge/bases", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["name"] == "Minimal KB"
|
||||
|
||||
def test_get_knowledge_base_by_id(self, client):
|
||||
"""Test getting a specific knowledge base by ID"""
|
||||
# Create first
|
||||
create_data = {"name": "Test KB"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Get by ID
|
||||
response = client.get(f"/api/knowledge/bases/{kb_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == kb_id
|
||||
assert data["name"] == "Test KB"
|
||||
|
||||
def test_get_knowledge_base_not_found(self, client):
|
||||
"""Test getting a non-existent knowledge base"""
|
||||
response = client.get("/api/knowledge/bases/non-existent-id")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_knowledge_base(self, client):
|
||||
"""Test updating a knowledge base"""
|
||||
# Create first
|
||||
create_data = {"name": "Original Name"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Update
|
||||
update_data = {
|
||||
"name": "Updated Name",
|
||||
"description": "Updated description",
|
||||
"chunkSize": 800
|
||||
}
|
||||
response = client.put(f"/api/knowledge/bases/{kb_id}", json=update_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "Updated Name"
|
||||
assert data["description"] == "Updated description"
|
||||
assert data["chunkSize"] == 800
|
||||
|
||||
def test_delete_knowledge_base(self, client):
|
||||
"""Test deleting a knowledge base"""
|
||||
# Create first
|
||||
create_data = {"name": "To Delete"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Delete
|
||||
response = client.delete(f"/api/knowledge/bases/{kb_id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify deleted
|
||||
get_response = client.get(f"/api/knowledge/bases/{kb_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
def test_upload_document(self, client):
|
||||
"""Test uploading a document to knowledge base"""
|
||||
# Create KB first
|
||||
create_data = {"name": "Test KB for Docs"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Upload document
|
||||
doc_data = {
|
||||
"name": "test-document.txt",
|
||||
"size": "1024",
|
||||
"fileType": "txt",
|
||||
"storageUrl": "https://storage.example.com/test-document.txt"
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/knowledge/bases/{kb_id}/documents",
|
||||
json=doc_data
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "test-document.txt"
|
||||
assert "id" in data
|
||||
assert data["status"] == "pending"
|
||||
|
||||
def test_upload_file_auto_index(self, client):
|
||||
"""Test uploading a real file triggers auto indexing."""
|
||||
create_response = client.post("/api/knowledge/bases", json={"name": "Auto Index KB"})
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
content = "Line one about product.\nLine two about warranty."
|
||||
files = {"file": ("auto-index.txt", content.encode("utf-8"), "text/plain")}
|
||||
response = client.post(f"/api/knowledge/bases/{kb_id}/documents", files=files)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "completed"
|
||||
assert data["chunkCount"] >= 1
|
||||
|
||||
def test_delete_document(self, client):
|
||||
"""Test deleting a document from knowledge base"""
|
||||
# Create KB first
|
||||
create_data = {"name": "Test KB for Delete"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Upload document
|
||||
doc_data = {"name": "to-delete.txt", "size": "100", "fileType": "txt"}
|
||||
upload_response = client.post(
|
||||
f"/api/knowledge/bases/{kb_id}/documents",
|
||||
json=doc_data
|
||||
)
|
||||
doc_id = upload_response.json()["id"]
|
||||
|
||||
# Delete document
|
||||
response = client.delete(
|
||||
f"/api/knowledge/bases/{kb_id}/documents/{doc_id}"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_index_document(self, client):
|
||||
"""Test indexing a document"""
|
||||
# Create KB first
|
||||
create_data = {"name": "Test KB for Index"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Index document
|
||||
index_data = {
|
||||
"document_id": "doc-001",
|
||||
"content": "This is the content to index. It contains important information about the product."
|
||||
}
|
||||
response = client.post(
|
||||
f"/api/knowledge/bases/{kb_id}/documents/doc-001/index",
|
||||
json=index_data
|
||||
)
|
||||
# This might return 200 or error depending on vector store implementation
|
||||
assert response.status_code in [200, 500]
|
||||
|
||||
def test_search_knowledge(self, client):
|
||||
"""Test searching knowledge base"""
|
||||
# Create KB first
|
||||
create_data = {"name": "Test KB for Search"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Search (this may fail without indexed content)
|
||||
search_data = {
|
||||
"query": "test query",
|
||||
"kb_id": kb_id,
|
||||
"nResults": 5
|
||||
}
|
||||
response = client.post("/api/knowledge/search", json=search_data)
|
||||
# This might return 200 or error depending on implementation
|
||||
assert response.status_code in [200, 500]
|
||||
|
||||
def test_get_knowledge_stats(self, client):
|
||||
"""Test getting knowledge base statistics"""
|
||||
# Create KB first
|
||||
create_data = {"name": "Test KB for Stats"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
response = client.get(f"/api/knowledge/bases/{kb_id}/stats")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["kb_id"] == kb_id
|
||||
assert "docCount" in data
|
||||
assert "chunkCount" in data
|
||||
|
||||
def test_knowledge_bases_pagination(self, client):
|
||||
"""Test knowledge bases pagination"""
|
||||
# Create multiple KBs
|
||||
for i in range(5):
|
||||
data = {"name": f"Knowledge Base {i}"}
|
||||
client.post("/api/knowledge/bases", json=data)
|
||||
|
||||
# Test pagination
|
||||
response = client.get("/api/knowledge/bases?page=1&limit=3")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 5
|
||||
assert len(data["list"]) == 3
|
||||
|
||||
def test_different_embedding_models(self, client):
|
||||
"""Test creating KB with different embedding models"""
|
||||
models = [
|
||||
"text-embedding-3-small",
|
||||
"text-embedding-3-large",
|
||||
"bge-small-zh"
|
||||
]
|
||||
for model in models:
|
||||
data = {"name": f"KB with {model}", "embeddingModel": model}
|
||||
response = client.post("/api/knowledge/bases", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["embeddingModel"] == model
|
||||
|
||||
def test_different_chunk_sizes(self, client):
|
||||
"""Test creating KB with different chunk configurations"""
|
||||
configs = [
|
||||
{"chunkSize": 500, "chunkOverlap": 50},
|
||||
{"chunkSize": 1000, "chunkOverlap": 100},
|
||||
{"chunkSize": 256, "chunkOverlap": 25}
|
||||
]
|
||||
for idx, config in enumerate(configs):
|
||||
data = {"name": f"Chunk Test KB {idx}", **config}
|
||||
response = client.post("/api/knowledge/bases", json=data)
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_knowledge_base_with_documents(self, client):
|
||||
"""Test creating KB and adding multiple documents"""
|
||||
# Create KB
|
||||
create_data = {"name": "KB with Multiple Docs"}
|
||||
create_response = client.post("/api/knowledge/bases", json=create_data)
|
||||
kb_id = create_response.json()["id"]
|
||||
|
||||
# Add multiple documents
|
||||
for i in range(3):
|
||||
doc_data = {
|
||||
"name": f"document-{i}.txt",
|
||||
"size": f"{1000 + i * 100}",
|
||||
"fileType": "txt"
|
||||
}
|
||||
client.post(
|
||||
f"/api/knowledge/bases/{kb_id}/documents",
|
||||
json=doc_data
|
||||
)
|
||||
|
||||
# Verify documents are listed
|
||||
response = client.get(f"/api/knowledge/bases/{kb_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert len(data["documents"]) == 3
|
||||
|
||||
def test_create_knowledge_base_duplicate_name(self, client):
|
||||
"""Test duplicate KB names are rejected for same user."""
|
||||
payload = {"name": "Duplicate KB"}
|
||||
first = client.post("/api/knowledge/bases", json=payload)
|
||||
assert first.status_code == 200
|
||||
|
||||
second = client.post("/api/knowledge/bases", json=payload)
|
||||
assert second.status_code == 400
|
||||
|
||||
def test_update_embedding_model_blocked_when_chunks_exist(self, client, db_session):
|
||||
"""Test embedding model change is blocked after indexing chunks."""
|
||||
create_resp = client.post("/api/knowledge/bases", json={"name": "KB Embedding Lock"})
|
||||
assert create_resp.status_code == 200
|
||||
kb_id = create_resp.json()["id"]
|
||||
|
||||
kb = db_session.query(KnowledgeBase).filter(KnowledgeBase.id == kb_id).first()
|
||||
kb.chunk_count = 5
|
||||
db_session.commit()
|
||||
|
||||
update_resp = client.put(f"/api/knowledge/bases/{kb_id}", json={"embeddingModel": "text-embedding-3-large"})
|
||||
assert update_resp.status_code == 400
|
||||
352
api/tests/test_llm.py
Normal file
352
api/tests/test_llm.py
Normal file
@@ -0,0 +1,352 @@
|
||||
"""Tests for LLM Model API endpoints"""
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
class TestLLMModelAPI:
|
||||
"""Test cases for LLM Model endpoints"""
|
||||
|
||||
def test_get_llm_models_empty(self, client):
|
||||
"""Test getting LLM models when database is empty"""
|
||||
response = client.get("/api/llm")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total" in data
|
||||
assert "list" in data
|
||||
assert data["total"] == 0
|
||||
|
||||
def test_create_llm_model(self, client, sample_llm_model_data):
|
||||
"""Test creating a new LLM model"""
|
||||
response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == sample_llm_model_data["name"]
|
||||
assert data["vendor"] == sample_llm_model_data["vendor"]
|
||||
assert data["type"] == sample_llm_model_data["type"]
|
||||
assert data["base_url"] == sample_llm_model_data["base_url"]
|
||||
assert "id" in data
|
||||
|
||||
def test_create_llm_model_minimal(self, client):
|
||||
"""Test creating an LLM model with minimal required data"""
|
||||
data = {
|
||||
"name": "Minimal LLM",
|
||||
"vendor": "Test",
|
||||
"type": "text",
|
||||
"base_url": "https://api.test.com",
|
||||
"api_key": "test-key"
|
||||
}
|
||||
response = client.post("/api/llm", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["name"] == "Minimal LLM"
|
||||
|
||||
def test_get_llm_model_by_id(self, client, sample_llm_model_data):
|
||||
"""Test getting a specific LLM model by ID"""
|
||||
# Create first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Get by ID
|
||||
response = client.get(f"/api/llm/{model_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == model_id
|
||||
assert data["name"] == sample_llm_model_data["name"]
|
||||
|
||||
def test_get_llm_model_not_found(self, client):
|
||||
"""Test getting a non-existent LLM model"""
|
||||
response = client.get("/api/llm/non-existent-id")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_llm_model(self, client, sample_llm_model_data):
|
||||
"""Test updating an LLM model"""
|
||||
# Create first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Update
|
||||
update_data = {
|
||||
"name": "Updated LLM Model",
|
||||
"vendor": "SiliconFlow",
|
||||
"type": "embedding",
|
||||
"temperature": 0.5,
|
||||
"context_length": 8192
|
||||
}
|
||||
response = client.put(f"/api/llm/{model_id}", json=update_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "Updated LLM Model"
|
||||
assert data["vendor"] == "SiliconFlow"
|
||||
assert data["type"] == "embedding"
|
||||
assert data["temperature"] == 0.5
|
||||
assert data["context_length"] == 8192
|
||||
|
||||
def test_delete_llm_model(self, client, sample_llm_model_data):
|
||||
"""Test deleting an LLM model"""
|
||||
# Create first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Delete
|
||||
response = client.delete(f"/api/llm/{model_id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify deleted
|
||||
get_response = client.get(f"/api/llm/{model_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
def test_list_llm_models_with_pagination(self, client, sample_llm_model_data):
|
||||
"""Test listing LLM models with pagination"""
|
||||
# Create multiple models
|
||||
for i in range(3):
|
||||
data = sample_llm_model_data.copy()
|
||||
data["id"] = f"test-llm-{i}"
|
||||
data["name"] = f"LLM Model {i}"
|
||||
client.post("/api/llm", json=data)
|
||||
|
||||
# Test pagination
|
||||
response = client.get("/api/llm?page=1&limit=2")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 3
|
||||
assert len(data["list"]) == 2
|
||||
|
||||
def test_filter_llm_models_by_type(self, client, sample_llm_model_data):
|
||||
"""Test filtering LLM models by type"""
|
||||
# Create models with different types
|
||||
for i, model_type in enumerate(["text", "embedding", "rerank"]):
|
||||
data = sample_llm_model_data.copy()
|
||||
data["id"] = f"test-llm-{model_type}"
|
||||
data["name"] = f"LLM {model_type}"
|
||||
data["type"] = model_type
|
||||
client.post("/api/llm", json=data)
|
||||
|
||||
# Filter by type
|
||||
response = client.get("/api/llm?model_type=text")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] >= 1
|
||||
for model in data["list"]:
|
||||
assert model["type"] == "text"
|
||||
|
||||
def test_filter_llm_models_by_enabled(self, client, sample_llm_model_data):
|
||||
"""Test filtering LLM models by enabled status"""
|
||||
# Create enabled and disabled models
|
||||
data = sample_llm_model_data.copy()
|
||||
data["id"] = "test-llm-enabled"
|
||||
data["name"] = "Enabled LLM"
|
||||
data["enabled"] = True
|
||||
client.post("/api/llm", json=data)
|
||||
|
||||
data["id"] = "test-llm-disabled"
|
||||
data["name"] = "Disabled LLM"
|
||||
data["enabled"] = False
|
||||
client.post("/api/llm", json=data)
|
||||
|
||||
# Filter by enabled
|
||||
response = client.get("/api/llm?enabled=true")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for model in data["list"]:
|
||||
assert model["enabled"] == True
|
||||
|
||||
def test_create_llm_model_with_all_fields(self, client):
|
||||
"""Test creating an LLM model with all fields"""
|
||||
data = {
|
||||
"id": "full-llm",
|
||||
"name": "Full LLM Model",
|
||||
"vendor": "OpenAI",
|
||||
"type": "text",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-test",
|
||||
"model_name": "gpt-4",
|
||||
"temperature": 0.8,
|
||||
"context_length": 16384,
|
||||
"enabled": True
|
||||
}
|
||||
response = client.post("/api/llm", json=data)
|
||||
assert response.status_code == 200
|
||||
result = response.json()
|
||||
assert result["name"] == "Full LLM Model"
|
||||
assert result["temperature"] == 0.8
|
||||
assert result["context_length"] == 16384
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_test_llm_model_success(self, mock_client_class, client, sample_llm_model_data):
|
||||
"""Test testing an LLM model with successful connection"""
|
||||
# Create model first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock the HTTP response
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{"message": {"content": "OK"}}]
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.llm.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/llm/{model_id}/test")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] == True
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_test_llm_model_failure(self, mock_client_class, client, sample_llm_model_data):
|
||||
"""Test testing an LLM model with failed connection"""
|
||||
# Create model first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock HTTP error
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 401
|
||||
mock_response.text = "Unauthorized"
|
||||
mock_response.raise_for_status = MagicMock(side_effect=Exception("401 Unauthorized"))
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.llm.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/llm/{model_id}/test")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] == False
|
||||
|
||||
def test_different_llm_vendors(self, client):
|
||||
"""Test creating LLM models with different vendors"""
|
||||
vendors = ["OpenAI", "SiliconFlow", "ZhipuAI", "Anthropic"]
|
||||
for vendor in vendors:
|
||||
data = {
|
||||
"id": f"test-{vendor.lower()}",
|
||||
"name": f"Test {vendor}",
|
||||
"vendor": vendor,
|
||||
"type": "text",
|
||||
"base_url": f"https://api.{vendor.lower()}.com/v1",
|
||||
"api_key": "test-key"
|
||||
}
|
||||
response = client.post("/api/llm", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["vendor"] == vendor
|
||||
|
||||
def test_embedding_llm_model(self, client):
|
||||
"""Test creating an embedding LLM model"""
|
||||
data = {
|
||||
"id": "embedding-test",
|
||||
"name": "Embedding Model",
|
||||
"vendor": "OpenAI",
|
||||
"type": "embedding",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "test-key",
|
||||
"model_name": "text-embedding-3-small"
|
||||
}
|
||||
response = client.post("/api/llm", json=data)
|
||||
assert response.status_code == 200
|
||||
assert response.json()["type"] == "embedding"
|
||||
|
||||
def test_preview_llm_model_success(self, client, sample_llm_model_data, monkeypatch):
|
||||
"""Test LLM preview endpoint returns model reply."""
|
||||
from app.routers import llm as llm_router
|
||||
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def json(self):
|
||||
return {
|
||||
"choices": [{"message": {"content": "Preview OK"}}],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 2, "total_tokens": 12}
|
||||
}
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return '{"ok":true}'
|
||||
|
||||
class DummyClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def post(self, url, json=None, headers=None):
|
||||
assert url.endswith("/chat/completions")
|
||||
assert headers["Authorization"] == f"Bearer {sample_llm_model_data['api_key']}"
|
||||
assert json["messages"][0]["role"] == "user"
|
||||
return DummyResponse()
|
||||
|
||||
monkeypatch.setattr(llm_router.httpx, "Client", DummyClient)
|
||||
|
||||
response = client.post(f"/api/llm/{model_id}/preview", json={"message": "hello"})
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is True
|
||||
assert data["reply"] == "Preview OK"
|
||||
|
||||
def test_preview_llm_model_reject_empty_message(self, client, sample_llm_model_data):
|
||||
"""Test LLM preview endpoint validates message."""
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
response = client.post(f"/api/llm/{model_id}/preview", json={"message": " "})
|
||||
assert response.status_code == 400
|
||||
|
||||
def test_preview_embedding_model_success(self, client, monkeypatch):
|
||||
"""Test embedding model preview endpoint returns embedding summary."""
|
||||
from app.routers import llm as llm_router
|
||||
|
||||
embedding_model_data = {
|
||||
"id": "preview-emb",
|
||||
"name": "Preview Embedding",
|
||||
"vendor": "OpenAI",
|
||||
"type": "embedding",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "test-key",
|
||||
"model_name": "text-embedding-3-small"
|
||||
}
|
||||
create_response = client.post("/api/llm", json=embedding_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def json(self):
|
||||
return {"data": [{"embedding": [0.1, 0.2, 0.3, 0.4]}], "usage": {"total_tokens": 7}}
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return '{"ok":true}'
|
||||
|
||||
class DummyClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def post(self, url, json=None, headers=None):
|
||||
assert url.endswith("/embeddings")
|
||||
assert json["input"] == "hello embedding"
|
||||
assert headers["Authorization"] == "Bearer test-key"
|
||||
return DummyResponse()
|
||||
|
||||
monkeypatch.setattr(llm_router.httpx, "Client", DummyClient)
|
||||
|
||||
response = client.post(f"/api/llm/{model_id}/preview", json={"message": "hello embedding"})
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is True
|
||||
assert "dims=4" in data["reply"]
|
||||
368
api/tests/test_tools.py
Normal file
368
api/tests/test_tools.py
Normal file
@@ -0,0 +1,368 @@
|
||||
"""Tests for Tools & Autotest API endpoints"""
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
|
||||
class TestToolsAPI:
|
||||
"""Test cases for Tools endpoints"""
|
||||
|
||||
def test_list_available_tools(self, client):
|
||||
"""Test listing all available tools"""
|
||||
response = client.get("/api/tools/list")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "tools" in data
|
||||
# Check for expected tools
|
||||
tools = data["tools"]
|
||||
assert "calculator" in tools
|
||||
assert "code_interpreter" in tools
|
||||
assert "current_time" in tools
|
||||
assert "turn_on_camera" in tools
|
||||
assert "turn_off_camera" in tools
|
||||
assert "increase_volume" in tools
|
||||
assert "decrease_volume" in tools
|
||||
assert "voice_msg_prompt" in tools
|
||||
assert "calculator" in tools
|
||||
|
||||
def test_get_tool_detail(self, client):
|
||||
"""Test getting a specific tool's details"""
|
||||
response = client.get("/api/tools/list/calculator")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "计算器"
|
||||
assert "parameters" in data
|
||||
|
||||
def test_get_tool_detail_not_found(self, client):
|
||||
"""Test getting a non-existent tool"""
|
||||
response = client.get("/api/tools/list/non-existent-tool")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_get_tool_detail_legacy_alias(self, client):
|
||||
"""Legacy tool id should resolve to canonical tool detail."""
|
||||
response = client.get("/api/tools/list/voice_message_prompt")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "语音消息提示"
|
||||
assert "msg" in data["parameters"]["properties"]
|
||||
|
||||
def test_health_check(self, client):
|
||||
"""Test health check endpoint"""
|
||||
response = client.get("/api/tools/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "healthy"
|
||||
assert "timestamp" in data
|
||||
assert "tools" in data
|
||||
|
||||
|
||||
class TestAutotestAPI:
|
||||
"""Test cases for Autotest endpoints"""
|
||||
|
||||
def test_autotest_no_models(self, client):
|
||||
"""Test autotest without specifying model IDs"""
|
||||
response = client.post("/api/tools/autotest")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "id" in data
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
# Should have test failures since no models provided
|
||||
assert data["summary"]["total"] > 0
|
||||
|
||||
def test_autotest_with_llm_model(self, client, sample_llm_model_data):
|
||||
"""Test autotest with an LLM model"""
|
||||
# Create an LLM model first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Run autotest
|
||||
response = client.post(f"/api/tools/autotest?llm_model_id={model_id}&test_asr=false")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
|
||||
def test_autotest_with_asr_model(self, client, sample_asr_model_data):
|
||||
"""Test autotest with an ASR model"""
|
||||
# Create an ASR model first
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Run autotest
|
||||
response = client.post(f"/api/tools/autotest?asr_model_id={model_id}&test_llm=false")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
|
||||
def test_autotest_with_both_models(self, client, sample_llm_model_data, sample_asr_model_data):
|
||||
"""Test autotest with both LLM and ASR models"""
|
||||
# Create models
|
||||
llm_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
llm_id = llm_response.json()["id"]
|
||||
|
||||
asr_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
asr_id = asr_response.json()["id"]
|
||||
|
||||
# Run autotest
|
||||
response = client.post(
|
||||
f"/api/tools/autotest?llm_model_id={llm_id}&asr_model_id={asr_id}"
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_autotest_llm_model_success(self, mock_client_class, client, sample_llm_model_data):
|
||||
"""Test autotest for a specific LLM model with successful connection"""
|
||||
# Create an LLM model first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock the HTTP response for successful connection
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{"message": {"content": "OK"}}]
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_response.iter_bytes = MagicMock(return_value=[b'chunk1', b'chunk2'])
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.tools.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/tools/autotest/llm/{model_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_autotest_asr_model_success(self, mock_client_class, client, sample_asr_model_data):
|
||||
"""Test autotest for a specific ASR model with successful connection"""
|
||||
# Create an ASR model first
|
||||
create_response = client.post("/api/asr", json=sample_asr_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock the HTTP response for successful connection
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_client.get.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.tools.httpx.Client', return_value=mock_client):
|
||||
response = client.post(f"/api/tools/autotest/asr/{model_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
|
||||
def test_autotest_llm_model_not_found(self, client):
|
||||
"""Test autotest for a non-existent LLM model"""
|
||||
response = client.post("/api/tools/autotest/llm/non-existent-id")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
# Should have a failure test
|
||||
assert any(not t["passed"] for t in data["tests"])
|
||||
|
||||
def test_autotest_asr_model_not_found(self, client):
|
||||
"""Test autotest for a non-existent ASR model"""
|
||||
response = client.post("/api/tools/autotest/asr/non-existent-id")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
# Should have a failure test
|
||||
assert any(not t["passed"] for t in data["tests"])
|
||||
|
||||
@patch('httpx.Client')
|
||||
def test_test_message_success(self, mock_client_class, client, sample_llm_model_data):
|
||||
"""Test sending a test message to an LLM model"""
|
||||
# Create an LLM model first
|
||||
create_response = client.post("/api/llm", json=sample_llm_model_data)
|
||||
model_id = create_response.json()["id"]
|
||||
|
||||
# Mock the HTTP response
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{"message": {"content": "Hello! This is a test reply."}}],
|
||||
"usage": {"total_tokens": 10}
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_client.post.return_value = mock_response
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch('app.routers.tools.httpx.Client', return_value=mock_client):
|
||||
response = client.post(
|
||||
f"/api/tools/test-message?llm_model_id={model_id}",
|
||||
json={"message": "Hello!"}
|
||||
)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] == True
|
||||
assert "reply" in data
|
||||
|
||||
def test_test_message_model_not_found(self, client):
|
||||
"""Test sending a test message to a non-existent model"""
|
||||
response = client.post(
|
||||
"/api/tools/test-message?llm_model_id=non-existent",
|
||||
json={"message": "Hello!"}
|
||||
)
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_autotest_result_structure(self, client):
|
||||
"""Test that autotest results have the correct structure"""
|
||||
response = client.post("/api/tools/autotest")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Check required fields
|
||||
assert "id" in data
|
||||
assert "started_at" in data
|
||||
assert "duration_ms" in data
|
||||
assert "tests" in data
|
||||
assert "summary" in data
|
||||
|
||||
# Check summary structure
|
||||
assert "passed" in data["summary"]
|
||||
assert "failed" in data["summary"]
|
||||
assert "total" in data["summary"]
|
||||
|
||||
# Check test structure
|
||||
if data["tests"]:
|
||||
test = data["tests"][0]
|
||||
assert "name" in test
|
||||
assert "passed" in test
|
||||
assert "message" in test
|
||||
assert "duration_ms" in test
|
||||
|
||||
def test_tools_have_required_fields(self, client):
|
||||
"""Test that all tools have required fields"""
|
||||
response = client.get("/api/tools/list")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
for tool_id, tool in data["tools"].items():
|
||||
assert "name" in tool
|
||||
assert "description" in tool
|
||||
assert "parameters" in tool
|
||||
|
||||
# Check parameters structure
|
||||
params = tool["parameters"]
|
||||
assert "type" in params
|
||||
assert "properties" in params
|
||||
|
||||
def test_calculator_tool_parameters(self, client):
|
||||
"""Test calculator tool has correct parameters"""
|
||||
response = client.get("/api/tools/list/calculator")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["name"] == "计算器"
|
||||
assert "expression" in data["parameters"]["properties"]
|
||||
assert "required" in data["parameters"]
|
||||
assert "expression" in data["parameters"]["required"]
|
||||
|
||||
def test_code_interpreter_tool_parameters(self, client):
|
||||
"""Test code_interpreter tool has correct parameters"""
|
||||
response = client.get("/api/tools/list/code_interpreter")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
assert data["name"] == "代码执行"
|
||||
assert "code" in data["parameters"]["properties"]
|
||||
|
||||
|
||||
class TestToolResourceCRUD:
|
||||
"""Test cases for persistent tool resource CRUD endpoints."""
|
||||
|
||||
def test_list_tool_resources_contains_system_tools(self, client):
|
||||
response = client.get("/api/tools/resources")
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["total"] >= 1
|
||||
ids = [item["id"] for item in payload["list"]]
|
||||
assert "calculator" in ids
|
||||
assert "voice_msg_prompt" in ids
|
||||
calculator = next((item for item in payload["list"] if item["id"] == "calculator"), None)
|
||||
assert calculator is not None
|
||||
assert calculator["parameter_schema"]["type"] == "object"
|
||||
|
||||
def test_create_update_delete_tool_resource(self, client):
|
||||
create_resp = client.post("/api/tools/resources", json={
|
||||
"name": "自定义网页抓取",
|
||||
"description": "抓取页面并提取正文",
|
||||
"category": "query",
|
||||
"icon": "Globe",
|
||||
"http_method": "GET",
|
||||
"http_url": "https://example.com/search",
|
||||
"http_headers": {},
|
||||
"http_timeout_ms": 10000,
|
||||
"parameter_schema": {
|
||||
"type": "object",
|
||||
"properties": {"keyword": {"type": "string"}},
|
||||
"required": ["keyword"]
|
||||
},
|
||||
"parameter_defaults": {"limit": 10},
|
||||
"enabled": True,
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
created = create_resp.json()
|
||||
tool_id = created["id"]
|
||||
assert created["name"] == "自定义网页抓取"
|
||||
assert created["is_system"] is False
|
||||
assert created["parameter_schema"]["required"] == ["keyword"]
|
||||
assert created["parameter_defaults"]["limit"] == 10
|
||||
|
||||
update_resp = client.put(f"/api/tools/resources/{tool_id}", json={
|
||||
"name": "自定义网页检索",
|
||||
"category": "system",
|
||||
"parameter_defaults": {"limit": 20},
|
||||
})
|
||||
assert update_resp.status_code == 200
|
||||
updated = update_resp.json()
|
||||
assert updated["name"] == "自定义网页检索"
|
||||
assert updated["category"] == "system"
|
||||
assert updated["parameter_defaults"]["limit"] == 20
|
||||
|
||||
get_resp = client.get(f"/api/tools/resources/{tool_id}")
|
||||
assert get_resp.status_code == 200
|
||||
assert get_resp.json()["id"] == tool_id
|
||||
|
||||
delete_resp = client.delete(f"/api/tools/resources/{tool_id}")
|
||||
assert delete_resp.status_code == 200
|
||||
|
||||
missing_resp = client.get(f"/api/tools/resources/{tool_id}")
|
||||
assert missing_resp.status_code == 404
|
||||
|
||||
def test_create_query_tool_requires_http_url(self, client):
|
||||
resp = client.post("/api/tools/resources", json={
|
||||
"name": "缺失URL的查询工具",
|
||||
"description": "应当失败",
|
||||
"category": "query",
|
||||
"icon": "Globe",
|
||||
"enabled": True,
|
||||
})
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_system_tool_can_be_updated_and_deleted(self, client):
|
||||
list_resp = client.get("/api/tools/resources")
|
||||
assert list_resp.status_code == 200
|
||||
assert any(item["id"] == "turn_on_camera" for item in list_resp.json()["list"])
|
||||
|
||||
update_resp = client.put("/api/tools/resources/turn_on_camera", json={"name": "更新后的打开摄像头", "category": "system"})
|
||||
assert update_resp.status_code == 200
|
||||
assert update_resp.json()["name"] == "更新后的打开摄像头"
|
||||
|
||||
delete_resp = client.delete("/api/tools/resources/turn_on_camera")
|
||||
assert delete_resp.status_code == 200
|
||||
|
||||
get_resp = client.get("/api/tools/resources/turn_on_camera")
|
||||
assert get_resp.status_code == 404
|
||||
331
api/tests/test_voices.py
Normal file
331
api/tests/test_voices.py
Normal file
@@ -0,0 +1,331 @@
|
||||
"""Tests for Voice API endpoints"""
|
||||
import base64
|
||||
import pytest
|
||||
|
||||
|
||||
class TestVoiceAPI:
|
||||
"""Test cases for Voice endpoints"""
|
||||
|
||||
def test_get_voices_empty(self, client):
|
||||
"""Test getting voices when database is empty"""
|
||||
response = client.get("/api/voices")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "total" in data
|
||||
assert "list" in data
|
||||
|
||||
def test_create_voice(self, client, sample_voice_data):
|
||||
"""Test creating a new voice"""
|
||||
response = client.post("/api/voices", json=sample_voice_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == sample_voice_data["name"]
|
||||
assert data["vendor"] == sample_voice_data["vendor"]
|
||||
assert data["gender"] == sample_voice_data["gender"]
|
||||
assert data["language"] == sample_voice_data["language"]
|
||||
assert "id" in data
|
||||
|
||||
def test_create_voice_minimal(self, client):
|
||||
"""Test creating a voice with minimal data"""
|
||||
data = {
|
||||
"name": "Minimal Voice",
|
||||
"vendor": "Test",
|
||||
"gender": "Male",
|
||||
"language": "en",
|
||||
"description": ""
|
||||
}
|
||||
response = client.post("/api/voices", json=data)
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_get_voice_by_id(self, client, sample_voice_data):
|
||||
"""Test getting a specific voice by ID"""
|
||||
# Create first
|
||||
create_response = client.post("/api/voices", json=sample_voice_data)
|
||||
voice_id = create_response.json()["id"]
|
||||
|
||||
# Get by ID
|
||||
response = client.get(f"/api/voices/{voice_id}")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == voice_id
|
||||
assert data["name"] == sample_voice_data["name"]
|
||||
|
||||
def test_get_voice_not_found(self, client):
|
||||
"""Test getting a non-existent voice"""
|
||||
response = client.get("/api/voices/non-existent-id")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_update_voice(self, client, sample_voice_data):
|
||||
"""Test updating a voice"""
|
||||
# Create first
|
||||
create_response = client.post("/api/voices", json=sample_voice_data)
|
||||
voice_id = create_response.json()["id"]
|
||||
|
||||
# Update
|
||||
update_data = {"name": "Updated Voice", "speed": 1.5}
|
||||
response = client.put(f"/api/voices/{voice_id}", json=update_data)
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["name"] == "Updated Voice"
|
||||
assert data["speed"] == 1.5
|
||||
|
||||
def test_delete_voice(self, client, sample_voice_data):
|
||||
"""Test deleting a voice"""
|
||||
# Create first
|
||||
create_response = client.post("/api/voices", json=sample_voice_data)
|
||||
voice_id = create_response.json()["id"]
|
||||
|
||||
# Delete
|
||||
response = client.delete(f"/api/voices/{voice_id}")
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify deleted
|
||||
get_response = client.get(f"/api/voices/{voice_id}")
|
||||
assert get_response.status_code == 404
|
||||
|
||||
def test_list_voices_with_pagination(self, client, sample_voice_data):
|
||||
"""Test listing voices with pagination"""
|
||||
# Create multiple voices
|
||||
for i in range(3):
|
||||
data = sample_voice_data.copy()
|
||||
data["name"] = f"Voice {i}"
|
||||
client.post("/api/voices", json=data)
|
||||
|
||||
# Test pagination
|
||||
response = client.get("/api/voices?page=1&limit=2")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 3
|
||||
assert len(data["list"]) == 2
|
||||
|
||||
def test_filter_voices_by_vendor(self, client, sample_voice_data):
|
||||
"""Test filtering voices by vendor"""
|
||||
# Create voice with specific vendor
|
||||
sample_voice_data["vendor"] = "FilterTestVendor"
|
||||
client.post("/api/voices", json=sample_voice_data)
|
||||
|
||||
response = client.get("/api/voices?vendor=FilterTestVendor")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for voice in data["list"]:
|
||||
assert voice["vendor"] == "FilterTestVendor"
|
||||
|
||||
def test_filter_voices_by_language(self, client, sample_voice_data):
|
||||
"""Test filtering voices by language"""
|
||||
sample_voice_data["language"] = "en"
|
||||
client.post("/api/voices", json=sample_voice_data)
|
||||
|
||||
response = client.get("/api/voices?language=en")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for voice in data["list"]:
|
||||
assert voice["language"] == "en"
|
||||
|
||||
def test_filter_voices_by_gender(self, client, sample_voice_data):
|
||||
"""Test filtering voices by gender"""
|
||||
sample_voice_data["gender"] = "Female"
|
||||
client.post("/api/voices", json=sample_voice_data)
|
||||
|
||||
response = client.get("/api/voices?gender=Female")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for voice in data["list"]:
|
||||
assert voice["gender"] == "Female"
|
||||
|
||||
def test_preview_voice_success(self, client, monkeypatch):
|
||||
"""Test preview voice endpoint returns audio data URL"""
|
||||
from app.routers import voices as voice_router
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
content = b"fake-mp3-bytes"
|
||||
text = "ok"
|
||||
|
||||
def json(self):
|
||||
return {}
|
||||
|
||||
class DummyClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def post(self, *args, **kwargs):
|
||||
return DummyResponse()
|
||||
|
||||
monkeypatch.setenv("SILICONFLOW_API_KEY", "test-key")
|
||||
monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
|
||||
|
||||
create_resp = client.post("/api/voices", json={
|
||||
"id": "anna",
|
||||
"name": "Anna",
|
||||
"vendor": "SiliconFlow",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "system voice",
|
||||
"model": "FunAudioLLM/CosyVoice2-0.5B",
|
||||
"voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna"
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
voice_id = create_resp.json()["id"]
|
||||
|
||||
preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
|
||||
assert preview_resp.status_code == 200
|
||||
payload = preview_resp.json()
|
||||
assert payload["success"] is True
|
||||
assert payload["audio_url"].startswith("data:audio/mpeg;base64,")
|
||||
encoded = payload["audio_url"].split(",", 1)[1]
|
||||
assert base64.b64decode(encoded) == b"fake-mp3-bytes"
|
||||
|
||||
def test_voice_credential_persist_and_preview_use_voice_key(self, client, monkeypatch):
|
||||
"""Test per-voice api_key/base_url persisted and used by preview endpoint"""
|
||||
from app.routers import voices as voice_router
|
||||
|
||||
captured_auth = {"value": ""}
|
||||
captured_url = {"value": ""}
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
content = b"fake-mp3"
|
||||
text = "ok"
|
||||
|
||||
def json(self):
|
||||
return {}
|
||||
|
||||
class DummyClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def post(self, *args, **kwargs):
|
||||
headers = kwargs.get("headers", {})
|
||||
captured_auth["value"] = headers.get("Authorization", "")
|
||||
if args:
|
||||
captured_url["value"] = args[0]
|
||||
return DummyResponse()
|
||||
|
||||
monkeypatch.delenv("SILICONFLOW_API_KEY", raising=False)
|
||||
monkeypatch.setattr(voice_router.httpx, "Client", DummyClient)
|
||||
|
||||
create_resp = client.post("/api/voices", json={
|
||||
"id": "anna2",
|
||||
"name": "Anna 2",
|
||||
"vendor": "SiliconFlow",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "voice",
|
||||
"model": "FunAudioLLM/CosyVoice2-0.5B",
|
||||
"voice_key": "FunAudioLLM/CosyVoice2-0.5B:anna",
|
||||
"api_key": "voice-key-123",
|
||||
"base_url": "https://api.siliconflow.cn/v1"
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
voice_id = create_resp.json()["id"]
|
||||
|
||||
preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "hello"})
|
||||
assert preview_resp.status_code == 200
|
||||
assert captured_auth["value"] == "Bearer voice-key-123"
|
||||
assert captured_url["value"] == "https://api.siliconflow.cn/v1/audio/speech"
|
||||
|
||||
def test_create_voice_dashscope_defaults(self, client):
|
||||
"""Test creating DashScope voice applies model/voice defaults."""
|
||||
create_resp = client.post("/api/voices", json={
|
||||
"name": "DashScope Voice",
|
||||
"vendor": "DashScope",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "dashscope",
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
payload = create_resp.json()
|
||||
assert payload["vendor"] == "DashScope"
|
||||
assert payload["model"] == "qwen3-tts-flash-realtime"
|
||||
assert payload["voice_key"] == "Cherry"
|
||||
|
||||
def test_preview_voice_dashscope_success(self, client, monkeypatch):
|
||||
"""DashScope voice preview should return playable wav data url."""
|
||||
from app.routers import voices as voice_router
|
||||
|
||||
captured = {
|
||||
"api_key": "",
|
||||
"model": "",
|
||||
"url": "",
|
||||
"session": {},
|
||||
"text": "",
|
||||
}
|
||||
|
||||
class DummyAudioFormat:
|
||||
PCM_24000HZ_MONO_16BIT = "pcm24k16mono"
|
||||
|
||||
class DummyDashScopeModule:
|
||||
api_key = ""
|
||||
|
||||
class DummyRealtime:
|
||||
def __init__(self, *args, **kwargs):
|
||||
captured["api_key"] = kwargs.get("api_key", "")
|
||||
captured["model"] = kwargs.get("model", "")
|
||||
captured["url"] = kwargs.get("url", "")
|
||||
self.callback = kwargs["callback"]
|
||||
|
||||
def connect(self):
|
||||
self.callback.on_open()
|
||||
|
||||
def update_session(self, **kwargs):
|
||||
captured["session"] = kwargs
|
||||
|
||||
def append_text(self, text):
|
||||
captured["text"] = text
|
||||
|
||||
def commit(self):
|
||||
# 16-bit PCM mono samples
|
||||
raw_pcm = b"\x00\x00\x01\x00\x02\x00\x03\x00"
|
||||
self.callback.on_event({
|
||||
"type": "response.audio.delta",
|
||||
"delta": base64.b64encode(raw_pcm).decode("utf-8"),
|
||||
})
|
||||
self.callback.on_event({"type": "response.done"})
|
||||
|
||||
def finish(self):
|
||||
return None
|
||||
|
||||
def close(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(voice_router, "DASHSCOPE_SDK_AVAILABLE", True)
|
||||
monkeypatch.setattr(voice_router, "AudioFormat", DummyAudioFormat)
|
||||
monkeypatch.setattr(voice_router, "QwenTtsRealtime", DummyRealtime)
|
||||
monkeypatch.setattr(voice_router, "dashscope", DummyDashScopeModule())
|
||||
|
||||
create_resp = client.post("/api/voices", json={
|
||||
"name": "DashScope Voice",
|
||||
"vendor": "DashScope",
|
||||
"gender": "Female",
|
||||
"language": "zh",
|
||||
"description": "dashscope",
|
||||
"api_key": "dashscope-key",
|
||||
"base_url": "wss://dashscope.aliyuncs.com/api-ws/v1/realtime",
|
||||
})
|
||||
assert create_resp.status_code == 200
|
||||
voice_id = create_resp.json()["id"]
|
||||
|
||||
preview_resp = client.post(f"/api/voices/{voice_id}/preview", json={"text": "你好"})
|
||||
assert preview_resp.status_code == 200
|
||||
payload = preview_resp.json()
|
||||
assert payload["success"] is True
|
||||
assert payload["audio_url"].startswith("data:audio/wav;base64,")
|
||||
encoded = payload["audio_url"].split(",", 1)[1]
|
||||
wav_bytes = base64.b64decode(encoded)
|
||||
assert wav_bytes.startswith(b"RIFF")
|
||||
assert captured["model"] == "qwen3-tts-flash-realtime"
|
||||
assert captured["url"] == "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
assert captured["text"] == "你好"
|
||||
assert captured["session"]["voice"] == "Cherry"
|
||||
167
api/tests/test_workflows.py
Normal file
167
api/tests/test_workflows.py
Normal file
@@ -0,0 +1,167 @@
|
||||
"""Tests for workflow graph schema and router behavior."""
|
||||
|
||||
|
||||
class TestWorkflowAPI:
|
||||
"""Workflow CRUD and graph validation test cases."""
|
||||
|
||||
def _minimal_nodes(self):
|
||||
return [
|
||||
{
|
||||
"id": "start_1",
|
||||
"name": "start_1",
|
||||
"type": "start",
|
||||
"isStart": True,
|
||||
"metadata": {"position": {"x": 80, "y": 80}},
|
||||
},
|
||||
{
|
||||
"id": "assistant_1",
|
||||
"name": "assistant_1",
|
||||
"type": "assistant",
|
||||
"metadata": {"position": {"x": 280, "y": 80}},
|
||||
"prompt": "You are the first assistant node.",
|
||||
},
|
||||
]
|
||||
|
||||
def test_create_workflow_with_canonical_graph(self, client):
|
||||
payload = {
|
||||
"name": "Canonical Graph",
|
||||
"nodes": self._minimal_nodes(),
|
||||
"edges": [
|
||||
{
|
||||
"id": "edge_start_assistant",
|
||||
"fromNodeId": "start_1",
|
||||
"toNodeId": "assistant_1",
|
||||
"condition": {"type": "always"},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
resp = client.post("/api/workflows", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["name"] == "Canonical Graph"
|
||||
assert data["nodeCount"] == 2
|
||||
assert data["nodes"][0]["id"] == "start_1"
|
||||
assert data["edges"][0]["fromNodeId"] == "start_1"
|
||||
assert data["edges"][0]["toNodeId"] == "assistant_1"
|
||||
|
||||
def test_create_workflow_with_legacy_graph(self, client):
|
||||
payload = {
|
||||
"name": "Legacy Graph",
|
||||
"nodes": [
|
||||
{
|
||||
"name": "legacy_start",
|
||||
"type": "conversation",
|
||||
"isStart": True,
|
||||
"metadata": {"position": {"x": 100, "y": 100}},
|
||||
},
|
||||
{
|
||||
"name": "legacy_human",
|
||||
"type": "human",
|
||||
"metadata": {"position": {"x": 300, "y": 100}},
|
||||
},
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"from": "legacy_start",
|
||||
"to": "legacy_human",
|
||||
"label": "人工",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
resp = client.post("/api/workflows", json=payload)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["nodes"][0]["type"] == "assistant"
|
||||
assert data["nodes"][1]["type"] == "human_transfer"
|
||||
assert data["edges"][0]["fromNodeId"] == "legacy_start"
|
||||
assert data["edges"][0]["toNodeId"] == "legacy_human"
|
||||
assert data["edges"][0]["condition"]["type"] == "contains"
|
||||
|
||||
def test_create_workflow_without_start_node_fails(self, client):
|
||||
payload = {
|
||||
"name": "No Start",
|
||||
"nodes": [
|
||||
{"id": "node_1", "name": "node_1", "type": "assistant", "metadata": {"position": {"x": 0, "y": 0}}},
|
||||
],
|
||||
"edges": [],
|
||||
}
|
||||
resp = client.post("/api/workflows", json=payload)
|
||||
assert resp.status_code == 422
|
||||
|
||||
def test_create_workflow_with_invalid_edge_fails(self, client):
|
||||
payload = {
|
||||
"name": "Bad Edge",
|
||||
"nodes": self._minimal_nodes(),
|
||||
"edges": [
|
||||
{"id": "edge_bad", "fromNodeId": "missing", "toNodeId": "assistant_1", "condition": {"type": "always"}},
|
||||
],
|
||||
}
|
||||
resp = client.post("/api/workflows", json=payload)
|
||||
assert resp.status_code == 422
|
||||
|
||||
def test_update_workflow_nodes_and_edges(self, client):
|
||||
create_payload = {
|
||||
"name": "Before Update",
|
||||
"nodes": self._minimal_nodes(),
|
||||
"edges": [
|
||||
{
|
||||
"id": "edge_start_assistant",
|
||||
"fromNodeId": "start_1",
|
||||
"toNodeId": "assistant_1",
|
||||
"condition": {"type": "always"},
|
||||
}
|
||||
],
|
||||
}
|
||||
create_resp = client.post("/api/workflows", json=create_payload)
|
||||
assert create_resp.status_code == 200
|
||||
workflow_id = create_resp.json()["id"]
|
||||
|
||||
update_payload = {
|
||||
"name": "After Update",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "start_1",
|
||||
"name": "start_1",
|
||||
"type": "start",
|
||||
"isStart": True,
|
||||
"metadata": {"position": {"x": 50, "y": 50}},
|
||||
},
|
||||
{
|
||||
"id": "assistant_2",
|
||||
"name": "assistant_2",
|
||||
"type": "assistant",
|
||||
"metadata": {"position": {"x": 250, "y": 50}},
|
||||
"prompt": "new prompt",
|
||||
},
|
||||
{
|
||||
"id": "end_1",
|
||||
"name": "end_1",
|
||||
"type": "end",
|
||||
"metadata": {"position": {"x": 450, "y": 50}},
|
||||
},
|
||||
],
|
||||
"edges": [
|
||||
{
|
||||
"id": "edge_start_assistant2",
|
||||
"fromNodeId": "start_1",
|
||||
"toNodeId": "assistant_2",
|
||||
"condition": {"type": "always"},
|
||||
},
|
||||
{
|
||||
"id": "edge_assistant2_end",
|
||||
"fromNodeId": "assistant_2",
|
||||
"toNodeId": "end_1",
|
||||
"condition": {"type": "contains", "source": "user", "value": "结束"},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
update_resp = client.put(f"/api/workflows/{workflow_id}", json=update_payload)
|
||||
assert update_resp.status_code == 200
|
||||
updated = update_resp.json()
|
||||
assert updated["name"] == "After Update"
|
||||
assert updated["nodeCount"] == 3
|
||||
assert len(updated["nodes"]) == 3
|
||||
assert len(updated["edges"]) == 2
|
||||
1
changelog/README.md
Normal file
1
changelog/README.md
Normal file
@@ -0,0 +1 @@
|
||||
# Changelog
|
||||
@@ -1 +1,78 @@
|
||||
# Docker Deployment
|
||||
# Docker Deployment
|
||||
|
||||
This folder contains Docker Compose configuration to run the entire AI VideoAssistant stack.
|
||||
|
||||
## Services
|
||||
|
||||
| Service | Port | Description |
|
||||
|---------|------|-------------|
|
||||
| minio | 9000, 9001 | S3-compatible object storage |
|
||||
| backend | 8100 | FastAPI backend API |
|
||||
| engine | 8001 | Conversation engine (WebSocket) |
|
||||
| frontend | 6000 | React web application |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. Docker and Docker Compose installed
|
||||
2. The `engine/data/vad/silero_vad.onnx` VAD model file must exist
|
||||
3. Agent configuration in `engine/config/agents/default.yaml`
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
cd docker
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## Access Points
|
||||
|
||||
- **Frontend**: http://localhost:6000
|
||||
- **Backend API**: http://localhost:8100
|
||||
- **Engine WebSocket**: ws://localhost:8001/ws
|
||||
- **MinIO Console**: http://localhost:9001 (admin / password123)
|
||||
|
||||
## Configuration
|
||||
|
||||
### Engine Environment Variables
|
||||
|
||||
The engine service uses environment variables for configuration. Key variables:
|
||||
|
||||
- `BACKEND_URL`: Backend API URL (default: `http://backend:8100`)
|
||||
- `LOG_LEVEL`: Logging level (default: `INFO`)
|
||||
- `CORS_ORIGINS`: Allowed CORS origins
|
||||
|
||||
Agent-specific settings (LLM, TTS, ASR) are configured via YAML files in `engine/config/agents/`.
|
||||
|
||||
### Volumes
|
||||
|
||||
- `minio_data`: MinIO storage data
|
||||
- `backend_data`: Backend SQLite database
|
||||
- `engine_logs`: Engine log files
|
||||
|
||||
## Development Mode
|
||||
|
||||
To mount source code for hot-reload during development:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
|
||||
```
|
||||
|
||||
## Logs
|
||||
|
||||
```bash
|
||||
# View all logs
|
||||
docker compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker compose logs -f engine
|
||||
docker compose logs -f backend
|
||||
```
|
||||
|
||||
## Stopping
|
||||
|
||||
```bash
|
||||
docker compose down
|
||||
|
||||
# Remove volumes as well
|
||||
docker compose down -v
|
||||
```
|
||||
|
||||
@@ -1,13 +1,37 @@
|
||||
version: '3.8'
|
||||
# Project name used as prefix for containers, volumes, and networks
|
||||
name: ras
|
||||
|
||||
# Docker registry mirror for China users (change to empty or "docker.io" if you have direct access)
|
||||
x-registry-mirror: ®istry-mirror docker.1ms.run
|
||||
|
||||
services:
|
||||
# 后端 API
|
||||
# MinIO (S3 compatible storage)
|
||||
minio:
|
||||
image: ${REGISTRY_MIRROR:-docker.1ms.run}/minio/minio
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
environment:
|
||||
MINIO_ROOT_USER: admin
|
||||
MINIO_ROOT_PASSWORD: password123
|
||||
command: server /data --console-address ":9001"
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# Backend API
|
||||
backend:
|
||||
build:
|
||||
context: ./backend
|
||||
context: ../api
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
REGISTRY_MIRROR: ${REGISTRY_MIRROR:-docker.1ms.run}
|
||||
ports:
|
||||
- "8000:8000"
|
||||
- "8100:8100"
|
||||
environment:
|
||||
- DATABASE_URL=sqlite:///./data/app.db
|
||||
- MINIO_ENDPOINT=minio:9000
|
||||
@@ -15,35 +39,83 @@ services:
|
||||
- MINIO_SECRET_KEY=password123
|
||||
- MINIO_BUCKET=ai-audio
|
||||
volumes:
|
||||
- ./backend:/app
|
||||
- ./backend/data:/app/data
|
||||
- backend_data:/app/data
|
||||
depends_on:
|
||||
- minio
|
||||
minio:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8100/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
|
||||
# 对话引擎 (py-active-call)
|
||||
# Conversation Engine
|
||||
engine:
|
||||
build:
|
||||
context: ../py-active-call
|
||||
context: ../engine
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "8001:8001"
|
||||
environment:
|
||||
- BACKEND_URL=http://backend:8000
|
||||
- HOST=0.0.0.0
|
||||
- PORT=8001
|
||||
- BACKEND_MODE=http
|
||||
- BACKEND_URL=http://backend:8100
|
||||
- LOG_LEVEL=INFO
|
||||
- CORS_ORIGINS=["http://localhost:6000","http://localhost:3000"]
|
||||
volumes:
|
||||
- ../engine/config:/app/config:ro
|
||||
- ../engine/data:/app/data:ro
|
||||
- engine_logs:/app/logs
|
||||
depends_on:
|
||||
backend:
|
||||
condition: service_started
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 15s
|
||||
|
||||
# Frontend (Vite + React) – production: built static files served on 6000
|
||||
frontend:
|
||||
build:
|
||||
context: ../web
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
REGISTRY_MIRROR: ${REGISTRY_MIRROR:-docker.1ms.run}
|
||||
VITE_API_BASE_URL: ${VITE_API_BASE_URL:-http://localhost:8100/api}
|
||||
VITE_ENGINE_WS_URL: ${VITE_ENGINE_WS_URL:-ws://localhost:8001/ws}
|
||||
ports:
|
||||
- "6000:6000"
|
||||
depends_on:
|
||||
- backend
|
||||
- engine
|
||||
|
||||
# MinIO (S3 兼容存储)
|
||||
minio:
|
||||
image: minio/minio
|
||||
# Frontend dev – hot reload on port 3000 (run with: docker compose --profile dev up)
|
||||
frontend-dev:
|
||||
profiles:
|
||||
- dev
|
||||
build:
|
||||
context: ../web
|
||||
dockerfile: Dockerfile.dev
|
||||
args:
|
||||
REGISTRY_MIRROR: ${REGISTRY_MIRROR:-docker.1ms.run}
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- ./storage/minio/data:/data
|
||||
- "3000:3000"
|
||||
environment:
|
||||
MINIO_ROOT_USER: admin
|
||||
MINIO_ROOT_PASSWORD: password123
|
||||
command: server /data --console-address ":9001"
|
||||
- VITE_API_BASE_URL=${VITE_API_BASE_URL:-http://localhost:8100/api}
|
||||
- VITE_ENGINE_WS_URL=${VITE_ENGINE_WS_URL:-ws://localhost:8001/ws}
|
||||
volumes:
|
||||
- ../web:/app
|
||||
- frontend_dev_node_modules:/app/node_modules
|
||||
depends_on:
|
||||
- backend
|
||||
- engine
|
||||
|
||||
volumes:
|
||||
minio-data:
|
||||
minio_data:
|
||||
backend_data:
|
||||
engine_logs:
|
||||
frontend_dev_node_modules:
|
||||
|
||||
@@ -1,7 +1,18 @@
|
||||
# Documentation
|
||||
|
||||
部署 MkDocs:
|
||||
pip install mkdocs
|
||||
mkdocs serve
|
||||
**安装依赖(推荐使用 1.x,避免与 Material 主题不兼容):**
|
||||
|
||||
访问 http://localhost:8000 查看文档网站。
|
||||
```bash
|
||||
cd docs
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
或手动安装:`pip install "mkdocs>=1.6,<2" mkdocs-material`
|
||||
|
||||
**本地预览:**
|
||||
|
||||
```bash
|
||||
mkdocs serve
|
||||
```
|
||||
|
||||
访问终端中显示的地址(如 http://127.0.0.1:8000)查看文档。
|
||||
166
docs/content/analysis/evaluation.md
Normal file
166
docs/content/analysis/evaluation.md
Normal file
@@ -0,0 +1,166 @@
|
||||
# 效果评估
|
||||
|
||||
效果评估帮助你系统地衡量和改进助手的对话质量。
|
||||
|
||||
## 评估维度
|
||||
|
||||
### 核心指标
|
||||
|
||||
| 指标 | 说明 | 计算方式 |
|
||||
|------|------|---------|
|
||||
| **解决率** | 用户问题被成功解决的比例 | 已解决 / 总对话数 |
|
||||
| **准确率** | 回复内容正确的比例 | 正确回复 / 总回复数 |
|
||||
| **满意度** | 用户满意的对话比例 | 满意评价 / 总评价数 |
|
||||
| **转人工率** | 需要人工介入的比例 | 转人工数 / 总对话数 |
|
||||
|
||||
### 性能指标
|
||||
|
||||
| 指标 | 说明 | 建议值 |
|
||||
|------|------|--------|
|
||||
| **首次响应时间** | 用户输入到首次回复的时间 | < 2s |
|
||||
| **平均对话轮次** | 解决问题需要的平均轮数 | < 5 轮 |
|
||||
| **平均对话时长** | 单次对话的平均时长 | 视场景而定 |
|
||||
|
||||
## 配置评估标准
|
||||
|
||||
在助手配置中设置评估标准:
|
||||
|
||||
### 解决标准
|
||||
|
||||
定义什么情况视为"问题已解决":
|
||||
|
||||
```
|
||||
评估标准:solved_inquiry
|
||||
描述:用户的问题得到了满意的解答
|
||||
|
||||
成功条件:
|
||||
- 用户明确表示问题已解决
|
||||
- 用户表示感谢并结束对话
|
||||
- 用户获得了所需信息
|
||||
|
||||
失败条件:
|
||||
- 用户要求转人工
|
||||
- 用户多次重复相同问题
|
||||
- 用户表达不满
|
||||
```
|
||||
|
||||
### 质量标准
|
||||
|
||||
定义回复质量的评估维度:
|
||||
|
||||
```
|
||||
评估维度:
|
||||
1. 准确性 - 信息是否正确
|
||||
2. 完整性 - 是否回答了用户所有问题
|
||||
3. 相关性 - 回复是否切题
|
||||
4. 简洁性 - 是否避免了冗余信息
|
||||
5. 语气 - 是否保持了友好专业的态度
|
||||
```
|
||||
|
||||
## 数据收集
|
||||
|
||||
### 自动收集
|
||||
|
||||
系统自动收集以下数据:
|
||||
|
||||
- 对话内容和时间戳
|
||||
- 工具调用记录
|
||||
- 错误和异常
|
||||
- 转人工事件
|
||||
|
||||
### 用户反馈
|
||||
|
||||
配置用户反馈收集:
|
||||
|
||||
1. 对话结束后显示满意度评价
|
||||
2. 收集用户评分(1-5 分)
|
||||
3. 可选的文字反馈
|
||||
|
||||
### 数据提取
|
||||
|
||||
配置需要从对话中提取的信息:
|
||||
|
||||
```
|
||||
数据提取项:
|
||||
|
||||
1. user_intent
|
||||
描述:用户的主要意图
|
||||
类型:string
|
||||
|
||||
2. issue_category
|
||||
描述:问题分类
|
||||
类型:enum [产品问题, 订单问题, 技术问题, 其他]
|
||||
|
||||
3. resolution_status
|
||||
描述:解决状态
|
||||
类型:enum [已解决, 未解决, 转人工]
|
||||
```
|
||||
|
||||
## 评估报告
|
||||
|
||||
### 查看报告
|
||||
|
||||
在 **数据分析** > **效果评估** 页面查看:
|
||||
|
||||
1. **总体概览** - 核心指标趋势图
|
||||
2. **分类分析** - 按问题类型的评估结果
|
||||
3. **时段分析** - 不同时间段的表现
|
||||
4. **详细记录** - 单条对话的评估结果
|
||||
|
||||
### 报告示例
|
||||
|
||||
```
|
||||
评估报告 - 2025年1月
|
||||
|
||||
总对话数:1,234
|
||||
解决率:78.5%
|
||||
准确率:85.2%
|
||||
平均满意度:4.2/5
|
||||
转人工率:12.3%
|
||||
|
||||
问题分类分布:
|
||||
- 产品问题:45%
|
||||
- 订单问题:30%
|
||||
- 技术问题:15%
|
||||
- 其他:10%
|
||||
|
||||
改进建议:
|
||||
1. 订单问题解决率较低(65%),建议补充订单相关知识库
|
||||
2. 技术问题转人工率高(25%),建议增加技术支持工具
|
||||
```
|
||||
|
||||
## 持续改进
|
||||
|
||||
### 改进流程
|
||||
|
||||
1. **收集数据** - 持续收集对话和评估数据
|
||||
2. **分析问题** - 找出低分对话的共性
|
||||
3. **制定方案** - 针对问题制定改进措施
|
||||
4. **实施改进** - 更新提示词、知识库或工具
|
||||
5. **验证效果** - 观察改进后的指标变化
|
||||
|
||||
### 常见改进措施
|
||||
|
||||
| 问题 | 改进措施 |
|
||||
|------|---------|
|
||||
| 回复不准确 | 优化提示词,补充知识库 |
|
||||
| 无法理解问题 | 增加示例,优化 ASR 热词 |
|
||||
| 回复太长 | 在提示词中限制长度 |
|
||||
| 缺少专业知识 | 上传相关文档到知识库 |
|
||||
| 工具调用失败 | 检查工具配置和 API 状态 |
|
||||
|
||||
### A/B 测试
|
||||
|
||||
对比不同配置的效果:
|
||||
|
||||
1. 创建助手的变体版本
|
||||
2. 按比例分配流量
|
||||
3. 收集两个版本的评估数据
|
||||
4. 比较各项指标
|
||||
5. 选择效果更好的版本
|
||||
|
||||
## 下一步
|
||||
|
||||
- [自动化测试](autotest.md) - 批量测试助手
|
||||
- [历史记录](history.md) - 查看对话详情
|
||||
- [提示词指南](../concepts/assistants/prompts.md) - 优化提示词
|
||||
88
docs/content/api-reference/errors.md
Normal file
88
docs/content/api-reference/errors.md
Normal file
@@ -0,0 +1,88 @@
|
||||
# 错误码
|
||||
|
||||
本文档列出 Realtime Agent Studio (RAS) API 的所有错误码及其说明。
|
||||
|
||||
## 协议错误
|
||||
|
||||
| 错误码 | 说明 | 解决方案 |
|
||||
|---|---|---|
|
||||
| `protocol.invalid_json` | JSON 格式错误 | 检查发送的 JSON 是否合法 |
|
||||
| `protocol.invalid_message` | 消息格式错误 | 检查消息结构是否符合协议 |
|
||||
| `protocol.order` | 消息顺序错误 | 确保先发送 `session.start` |
|
||||
| `protocol.assistant_id_required` | 缺少 `assistant_id` query 参数 | 在连接 URL 中添加 `assistant_id` 参数 |
|
||||
| `protocol.invalid_override` | metadata 覆盖字段不合法 | 检查 overrides 字段是否在白名单内 |
|
||||
|
||||
## 助手错误
|
||||
|
||||
| 错误码 | 说明 | 解决方案 |
|
||||
|---|---|---|
|
||||
| `assistant.not_found` | 助手不存在 | 检查 `assistant_id` 是否正确 |
|
||||
| `assistant.config_unavailable` | 助手配置不可用 | 确认助手已正确配置并发布 |
|
||||
|
||||
## 音频错误
|
||||
|
||||
| 错误码 | 说明 | 解决方案 |
|
||||
|---|---|---|
|
||||
| `audio.invalid_pcm` | PCM 数据无效 | 检查音频格式是否为 `pcm_s16le` |
|
||||
| `audio.frame_size_mismatch` | 音频帧大小不匹配 | 确保帧长度是 640 字节的整数倍 |
|
||||
|
||||
## 服务器错误
|
||||
|
||||
| 错误码 | 说明 | 解决方案 |
|
||||
|---|---|---|
|
||||
| `server.internal` | 服务端内部错误 | 查看服务端日志排查问题 |
|
||||
|
||||
## 错误响应格式
|
||||
|
||||
所有错误都通过 `error` 事件返回:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "error",
|
||||
"timestamp": 1730000000000,
|
||||
"sessionId": "sess_xxx",
|
||||
"data": {
|
||||
"code": "protocol.invalid_json",
|
||||
"message": "Invalid JSON format",
|
||||
"details": {}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## HTTP API 错误
|
||||
|
||||
REST API 使用标准 HTTP 状态码:
|
||||
|
||||
| 状态码 | 说明 |
|
||||
|--------|------|
|
||||
| 200 | 请求成功 |
|
||||
| 201 | 创建成功 |
|
||||
| 400 | 请求参数错误 |
|
||||
| 401 | 未授权(缺少或无效的认证信息) |
|
||||
| 403 | 禁止访问(权限不足) |
|
||||
| 404 | 资源不存在 |
|
||||
| 422 | 请求实体无法处理 |
|
||||
| 500 | 服务器内部错误 |
|
||||
|
||||
### HTTP 错误响应示例
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": {
|
||||
"code": "VALIDATION_ERROR",
|
||||
"message": "Invalid request parameters",
|
||||
"details": {
|
||||
"field": "name",
|
||||
"reason": "required"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 错误处理最佳实践
|
||||
|
||||
1. **始终检查错误响应** - 不要假设请求一定成功
|
||||
2. **实现重试机制** - 对于临时性错误(如网络问题)实现指数退避重试
|
||||
3. **记录错误日志** - 保存错误详情用于问题排查
|
||||
4. **友好的用户提示** - 将技术错误转换为用户可理解的提示
|
||||
235
docs/content/api-reference/index.md
Normal file
235
docs/content/api-reference/index.md
Normal file
@@ -0,0 +1,235 @@
|
||||
# API 参考
|
||||
|
||||
本节提供 Realtime Agent Studio (RAS) 的完整 API 文档。
|
||||
|
||||
## API 概览
|
||||
|
||||
Realtime Agent Studio (RAS) 提供两种类型的 API:
|
||||
|
||||
| API 类型 | 用途 | 协议 |
|
||||
|---------|------|------|
|
||||
| **REST API** | 管理助手、模型、知识库等资源 | HTTP |
|
||||
| **WebSocket API** | 实时语音对话 | WebSocket |
|
||||
|
||||
## REST API
|
||||
|
||||
### 基础地址
|
||||
|
||||
```
|
||||
http://localhost:8000/api/v1
|
||||
```
|
||||
|
||||
### 认证
|
||||
|
||||
REST API 使用 Bearer Token 认证:
|
||||
|
||||
```bash
|
||||
curl -H "Authorization: Bearer YOUR_API_KEY" \
|
||||
http://localhost:8000/api/v1/assistants
|
||||
```
|
||||
|
||||
### 通用响应格式
|
||||
|
||||
**成功响应**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"data": { ... }
|
||||
}
|
||||
```
|
||||
|
||||
**列表响应**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"data": {
|
||||
"items": [...],
|
||||
"total": 100,
|
||||
"page": 1,
|
||||
"page_size": 20
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**错误响应**
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": {
|
||||
"code": "ERROR_CODE",
|
||||
"message": "错误描述"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 主要端点
|
||||
|
||||
#### 助手管理
|
||||
|
||||
| 方法 | 路径 | 说明 |
|
||||
|------|------|------|
|
||||
| GET | /assistants | 获取助手列表 |
|
||||
| POST | /assistants | 创建助手 |
|
||||
| GET | /assistants/{id} | 获取助手详情 |
|
||||
| PUT | /assistants/{id} | 更新助手 |
|
||||
| DELETE | /assistants/{id} | 删除助手 |
|
||||
| GET | /assistants/{id}/config | 获取引擎配置 |
|
||||
| GET | /assistants/{id}/opener-audio | 获取开场音频状态 |
|
||||
| POST | /assistants/{id}/opener-audio/generate | 生成开场音频 |
|
||||
|
||||
#### 模型管理
|
||||
|
||||
| 方法 | 路径 | 说明 |
|
||||
|------|------|------|
|
||||
| GET | /llm | 获取 LLM 模型列表 |
|
||||
| POST | /llm | 添加 LLM 模型 |
|
||||
| PUT | /llm/{id} | 更新 LLM 模型 |
|
||||
| DELETE | /llm/{id} | 删除 LLM 模型 |
|
||||
| POST | /llm/{id}/test | 测试 LLM 连接 |
|
||||
| POST | /llm/{id}/preview | 预览模型输出 |
|
||||
| GET | /asr | 获取 ASR 模型列表 |
|
||||
| POST | /asr | 添加 ASR 模型 |
|
||||
| PUT | /asr/{id} | 更新 ASR 模型 |
|
||||
| DELETE | /asr/{id} | 删除 ASR 模型 |
|
||||
| POST | /asr/{id}/test | 测试 ASR 连接 |
|
||||
| POST | /asr/{id}/preview | 上传音频预览识别 |
|
||||
| GET | /voices | 获取语音列表 |
|
||||
| POST | /voices | 添加语音配置 |
|
||||
| PUT | /voices/{id} | 更新语音配置 |
|
||||
| DELETE | /voices/{id} | 删除语音配置 |
|
||||
| POST | /voices/{id}/preview | 预览声音 |
|
||||
|
||||
#### 知识库管理
|
||||
|
||||
| 方法 | 路径 | 说明 |
|
||||
|------|------|------|
|
||||
| GET | /knowledge/bases | 获取知识库列表 |
|
||||
| POST | /knowledge/bases | 创建知识库 |
|
||||
| PUT | /knowledge/bases/{id} | 更新知识库 |
|
||||
| DELETE | /knowledge/bases/{id} | 删除知识库 |
|
||||
| POST | /knowledge/bases/{id}/documents | 上传文档 |
|
||||
| POST | /knowledge/bases/{id}/documents/{doc_id}/index | 索引文档内容 |
|
||||
| DELETE | /knowledge/bases/{id}/documents/{doc_id} | 删除文档 |
|
||||
| POST | /knowledge/search | 搜索知识库 |
|
||||
| GET | /knowledge/bases/{id}/stats | 获取统计信息 |
|
||||
|
||||
#### 工具管理
|
||||
|
||||
| 方法 | 路径 | 说明 |
|
||||
|------|------|------|
|
||||
| GET | /tools/list | 获取内置工具列表 |
|
||||
| GET | /tools/resources | 获取工具资源列表 |
|
||||
| POST | /tools/resources | 创建工具资源 |
|
||||
| PUT | /tools/resources/{id} | 更新工具资源 |
|
||||
| DELETE | /tools/resources/{id} | 删除工具资源 |
|
||||
| GET | /tools/health | 健康检查 |
|
||||
| POST | /tools/autotest | 运行自动测试 |
|
||||
| POST | /tools/test-message | 发送测试消息 |
|
||||
|
||||
#### 历史记录
|
||||
|
||||
| 方法 | 路径 | 说明 |
|
||||
|------|------|------|
|
||||
| GET | /history | 获取对话历史 |
|
||||
| GET | /history/{id} | 获取对话详情 |
|
||||
| POST | /history | 创建通话记录 |
|
||||
| PUT | /history/{id} | 更新通话记录 |
|
||||
| DELETE | /history/{id} | 删除通话记录 |
|
||||
| POST | /history/{id}/transcripts | 添加转写片段 |
|
||||
| GET | /history/{id}/audio/{turn_index} | 获取音频文件 |
|
||||
|
||||
## WebSocket API
|
||||
|
||||
### 连接地址
|
||||
|
||||
```
|
||||
ws://localhost:8000/ws?assistant_id=<assistant_id>
|
||||
```
|
||||
|
||||
### 协议概述
|
||||
|
||||
WebSocket API 使用双向消息通信:
|
||||
|
||||
- **文本帧**:JSON 格式的控制消息
|
||||
- **二进制帧**:PCM 音频数据
|
||||
|
||||
### 详细文档
|
||||
|
||||
- [WebSocket 协议](websocket.md) - 完整的消息格式和流程
|
||||
- [错误码](errors.md) - 错误码列表和处理方式
|
||||
|
||||
## SDK
|
||||
|
||||
> 下面的 SDK 包名和类名沿用当前包标识;产品名称在文档中统一使用 Realtime Agent Studio(RAS)。
|
||||
|
||||
### JavaScript SDK
|
||||
|
||||
```bash
|
||||
npm install @ai-video-assistant/sdk
|
||||
```
|
||||
|
||||
```javascript
|
||||
import { AIVideoAssistant } from '@ai-video-assistant/sdk';
|
||||
|
||||
const assistant = new AIVideoAssistant({
|
||||
apiUrl: 'http://localhost:8080',
|
||||
wsUrl: 'ws://localhost:8000'
|
||||
});
|
||||
|
||||
// 创建助手
|
||||
const result = await assistant.create({
|
||||
name: '客服助手',
|
||||
prompt: '你是一个友好的客服助手'
|
||||
});
|
||||
|
||||
// 开始对话
|
||||
const conversation = await assistant.connect(result.id);
|
||||
conversation.on('response', (text) => {
|
||||
console.log('助手回复:', text);
|
||||
});
|
||||
```
|
||||
|
||||
### Python SDK
|
||||
|
||||
```bash
|
||||
pip install ai-video-assistant
|
||||
```
|
||||
|
||||
```python
|
||||
from ai_video_assistant import AIVideoAssistant
|
||||
|
||||
client = AIVideoAssistant(
|
||||
api_url="http://localhost:8080",
|
||||
ws_url="ws://localhost:8000"
|
||||
)
|
||||
|
||||
# 创建助手
|
||||
assistant = client.assistants.create(
|
||||
name="客服助手",
|
||||
prompt="你是一个友好的客服助手"
|
||||
)
|
||||
|
||||
# 开始对话
|
||||
async with client.connect(assistant.id) as conv:
|
||||
response = await conv.send_text("你好")
|
||||
print(f"助手回复: {response}")
|
||||
```
|
||||
|
||||
## 速率限制
|
||||
|
||||
| 端点类型 | 限制 |
|
||||
|---------|------|
|
||||
| REST API | 100 请求/分钟 |
|
||||
| WebSocket | 10 并发连接/用户 |
|
||||
|
||||
超出限制会返回 `429 Too Many Requests`。
|
||||
|
||||
## 下一步
|
||||
|
||||
- [WebSocket 协议](websocket.md) - 实时对话协议详解
|
||||
- [错误码](errors.md) - 错误处理参考
|
||||
- [快速开始](../quickstart/index.md) - 快速创建助手
|
||||
|
||||
880
docs/content/api-reference/websocket.md
Normal file
880
docs/content/api-reference/websocket.md
Normal file
@@ -0,0 +1,880 @@
|
||||
# WebSocket 协议
|
||||
|
||||
WebSocket 端点提供双向实时语音对话能力,支持音频流输入输出和文本消息交互。
|
||||
|
||||
## 连接地址
|
||||
|
||||
```
|
||||
ws://<host>/ws?assistant_id=<assistant_id>
|
||||
```
|
||||
|
||||
- `assistant_id` 为必填 query 参数,用于从数据库加载该助手的运行时配置。
|
||||
|
||||
## 传输规则
|
||||
|
||||
- **文本帧**:JSON 格式控制消息
|
||||
- **二进制帧**:PCM 音频数据(`pcm_s16le`, 16kHz, 单声道)
|
||||
- 帧长度必须是 640 字节的整数倍(20ms 音频 = 640 bytes)
|
||||
|
||||
---
|
||||
|
||||
## 消息流程
|
||||
|
||||
```
|
||||
Client -> session.start
|
||||
Server <- session.started
|
||||
Server <- (optional) config.resolved
|
||||
Client -> (binary pcm frames...)
|
||||
Server <- input.speech_started / transcript.delta / transcript.final
|
||||
Server <- assistant.response.delta / assistant.response.final
|
||||
Server <- output.audio.start
|
||||
Server <- (binary pcm frames...)
|
||||
Server <- output.audio.end
|
||||
Client -> output.audio.played (optional)
|
||||
Client -> session.stop
|
||||
Server <- session.stopped
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 客户端 -> 服务端消息
|
||||
|
||||
`session.start`
|
||||
|
||||
客户端连接后发送的第一个消息,用于启动对话会话。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "session.start",
|
||||
"audio": {
|
||||
"encoding": "pcm_s16le",
|
||||
"sample_rate_hz": 16000,
|
||||
"channels": 1
|
||||
},
|
||||
"metadata": {
|
||||
"channel": "web",
|
||||
"source": "web_debug",
|
||||
"history": {
|
||||
"userId": 1
|
||||
},
|
||||
"overrides": {
|
||||
"systemPrompt": "你是简洁助手",
|
||||
"greeting": "你好,我能帮你什么?",
|
||||
"output": {
|
||||
"mode": "audio"
|
||||
}
|
||||
},
|
||||
"dynamicVariables": {
|
||||
"customer_name": "Alice",
|
||||
"plan_tier": "Pro"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|---|---|---|---|
|
||||
| `type` | string | 是 | 固定为 `"session.start"` |
|
||||
| `audio` | object | 否 | 音频格式描述 |
|
||||
| `audio.encoding` | string | 否 | 固定为 `"pcm_s16le"` |
|
||||
| `audio.sample_rate_hz` | number | 否 | 固定为 `16000` |
|
||||
| `audio.channels` | number | 否 | 固定为 `1` |
|
||||
| `metadata` | object | 否 | 运行时配置 |
|
||||
|
||||
**metadata 支持的字段**:
|
||||
- `channel` - 渠道标识
|
||||
- `source` - 来源标识
|
||||
- `history.userId` - 历史记录用户 ID
|
||||
- `overrides` - 可覆盖字段(仅限安全白名单)
|
||||
- `dynamicVariables` - 动态变量(支持 `{{variable}}` 占位符)
|
||||
|
||||
**`metadata.overrides` 白名单字段**:
|
||||
- `systemPrompt`
|
||||
- `greeting`
|
||||
- `firstTurnMode`
|
||||
- `generatedOpenerEnabled`
|
||||
- `output`
|
||||
- `bargeIn`
|
||||
- `knowledgeBaseId`
|
||||
- `knowledge`
|
||||
- `tools`
|
||||
- `openerAudio`
|
||||
|
||||
**限制**:
|
||||
- `metadata.workflow` 会被忽略(不触发 workflow 事件)
|
||||
- 禁止提交 `metadata.services`
|
||||
- 禁止提交 `assistantId` / `appId` / `app_id` / `configVersionId` / `config_version_id`
|
||||
- 禁止提交包含密钥语义的字段(如 `apiKey` / `token` / `secret` / `password` / `authorization`)
|
||||
|
||||
---
|
||||
|
||||
`input.text`
|
||||
|
||||
发送文本输入,跳过 ASR 识别,直接触发 LLM 回复。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "input.text",
|
||||
"text": "你能做什么?"
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|---|---|---|---|
|
||||
| `type` | string | 是 | 固定为 `"input.text"` |
|
||||
| `text` | string | 是 | 用户文本内容 |
|
||||
|
||||
---
|
||||
|
||||
`response.cancel`
|
||||
|
||||
请求中断当前回答。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "response.cancel",
|
||||
"graceful": false
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必填 | 默认值 | 说明 |
|
||||
|---|---|---|---|---|
|
||||
| `type` | string | 是 | - | 固定为 `"response.cancel"` |
|
||||
| `graceful` | boolean | 否 | `false` | `false` 立即打断 |
|
||||
|
||||
---
|
||||
|
||||
`output.audio.played`
|
||||
|
||||
客户端回执音频已在本地播放完成(含本地 jitter buffer / 播放队列)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "output.audio.played",
|
||||
"tts_id": "tts_001",
|
||||
"response_id": "resp_001",
|
||||
"turn_id": "turn_001",
|
||||
"played_at_ms": 1730000018450,
|
||||
"played_ms": 2520
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|---|---|---|---|
|
||||
| `type` | string | 是 | 固定为 `"output.audio.played"` |
|
||||
| `tts_id` | string | 是 | 已完成播放的 TTS 段 ID |
|
||||
| `response_id` | string | 否 | 所属回复 ID(建议回传) |
|
||||
| `turn_id` | string | 否 | 所属轮次 ID(建议回传) |
|
||||
| `played_at_ms` | number | 否 | 客户端本地播放完成时间戳(毫秒) |
|
||||
| `played_ms` | number | 否 | 本次播放耗时(毫秒) |
|
||||
|
||||
---
|
||||
|
||||
`tool_call.results`
|
||||
|
||||
回传客户端执行的工具结果。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "tool_call.results",
|
||||
"results": [
|
||||
{
|
||||
"tool_call_id": "call_abc123",
|
||||
"name": "weather",
|
||||
"output": { "temp_c": 21, "condition": "sunny" },
|
||||
"status": { "code": 200, "message": "ok" }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|---|---|---|---|
|
||||
| `type` | string | 是 | 固定为 `"tool_call.results"` |
|
||||
| `results` | array | 否 | 工具结果列表 |
|
||||
| `results[].tool_call_id` | string | 是 | 工具调用 ID |
|
||||
| `results[].name` | string | 是 | 工具名称 |
|
||||
| `results[].output` | any | 否 | 工具输出 |
|
||||
| `results[].status` | object | 是 | 执行状态 |
|
||||
| `results[].status.code` | number | 是 | HTTP 状态码(200-299 表示成功) |
|
||||
| `results[].status.message` | string | 是 | 状态描述 |
|
||||
|
||||
---
|
||||
|
||||
`session.stop`
|
||||
|
||||
结束对话会话。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "session.stop",
|
||||
"reason": "client_disconnect"
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 必填 | 说明 |
|
||||
|---|---|---|---|
|
||||
| `type` | string | 是 | 固定为 `"session.stop"` |
|
||||
| `reason` | string | 否 | 结束原因 |
|
||||
|
||||
---
|
||||
|
||||
`Binary Audio`
|
||||
|
||||
在 `session.started` 之后可持续发送二进制 PCM 音频。
|
||||
|
||||
- **格式**:`pcm_s16le`
|
||||
- **采样率**:16000 Hz
|
||||
- **声道**:1(单声道)
|
||||
- **帧长**:20ms = 640 bytes
|
||||
|
||||
---
|
||||
|
||||
## 服务端 -> 客户端事件
|
||||
|
||||
### 事件包络
|
||||
|
||||
所有 JSON 事件都包含统一包络字段:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "event.name",
|
||||
"timestamp": 1730000000000,
|
||||
"sessionId": "sess_xxx",
|
||||
"seq": 42,
|
||||
"source": "asr",
|
||||
"trackId": "audio_in",
|
||||
"data": {}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `type` | string | 事件类型 |
|
||||
| `timestamp` | number | 事件时间戳(Unix 毫秒) |
|
||||
| `sessionId` | string | 会话 ID |
|
||||
| `seq` | number | 递增序号(用于重放/恢复) |
|
||||
| `source` | string | 事件来源:`asr` / `llm` / `tts` / `tool` / `system` / `client` / `server` |
|
||||
| `trackId` | string | 事件轨道:`audio_in` / `audio_out` / `control` |
|
||||
| `data` | object | 业务数据(可选) |
|
||||
|
||||
**轨道 ID 说明**:
|
||||
|
||||
| trackId | 说明 | 相关事件 |
|
||||
|---------|------|---------|
|
||||
| `audio_in` | ASR/VAD 输入侧事件 | `input.*`, `transcript.*` |
|
||||
| `audio_out` | 助手输出侧事件 | `assistant.*`, `output.audio.*`, `response.interrupted`, `metrics.ttfb` |
|
||||
| `control` | 会话控制事件 | `session.*`, `error`, `heartbeat`, `(optional) config.resolved` |
|
||||
|
||||
---
|
||||
|
||||
### 会话控制类事件
|
||||
|
||||
#### `session.started`
|
||||
|
||||
会话启动成功,客户端收到此事件后可以开始发送音频。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "session.started",
|
||||
"timestamp": 1730000000000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 1,
|
||||
"trackId": "control",
|
||||
"tracks": {
|
||||
"audio_in": "audio_in",
|
||||
"audio_out": "audio_out",
|
||||
"control": "control"
|
||||
},
|
||||
"audio": {
|
||||
"encoding": "pcm_s16le",
|
||||
"sample_rate_hz": 16000,
|
||||
"channels": 1
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `sessionId` | string | 会话唯一标识符 |
|
||||
| `trackId` | string | 固定为 `"control"` |
|
||||
| `tracks` | object | 可用轨道列表 |
|
||||
| `tracks.audio_in` | string | 输入轨道 ID |
|
||||
| `tracks.audio_out` | string | 输出轨道 ID |
|
||||
| `tracks.control` | string | 控制轨道 ID |
|
||||
| `audio` | object | 音频格式配置 |
|
||||
| `audio.encoding` | string | 编码格式 |
|
||||
| `audio.sample_rate_hz` | number | 采样率 |
|
||||
| `audio.channels` | number | 声道数 |
|
||||
|
||||
---
|
||||
|
||||
#### `config.resolved`
|
||||
|
||||
服务端返回的**公开配置快照**。
|
||||
默认不发送(SaaS 公网模式建议关闭);仅在 `WS_EMIT_CONFIG_RESOLVED=true` 时发送。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "config.resolved",
|
||||
"timestamp": 1730000000001,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 2,
|
||||
"trackId": "control",
|
||||
"config": {
|
||||
"channel": "web_debug",
|
||||
"output": {
|
||||
"mode": "audio"
|
||||
},
|
||||
"tools": {
|
||||
"enabled": true,
|
||||
"count": 2
|
||||
},
|
||||
"tracks": {
|
||||
"audio_in": "audio_in",
|
||||
"audio_out": "audio_out",
|
||||
"control": "control"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"control"` |
|
||||
| `config` | object | SaaS 安全的公开配置快照 |
|
||||
| `config.channel` | string | 回显 `session.start.metadata.channel`(如提供) |
|
||||
| `config.output` | object | 输出配置 |
|
||||
| `config.output.mode` | string | 输出模式:`"audio"` / `"text"` |
|
||||
| `config.tools.enabled` | boolean | 是否启用工具能力 |
|
||||
| `config.tools.count` | number | 可用工具数量(不暴露工具清单) |
|
||||
| `config.tracks` | object | 可用轨道列表 |
|
||||
|
||||
**不会返回以下内部字段**:
|
||||
- `assistantId` / `appId` / `configVersionId`
|
||||
- `services`(provider/model/baseUrl 等)
|
||||
- 系统提示词原文及其它内部编排细节
|
||||
|
||||
---
|
||||
|
||||
#### `heartbeat`
|
||||
|
||||
保活心跳事件,默认每 50 秒发送一次。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "heartbeat",
|
||||
"timestamp": 1730000050000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 10
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `timestamp` | number | 心跳时间戳 |
|
||||
|
||||
---
|
||||
|
||||
#### `session.stopped`
|
||||
|
||||
会话结束确认。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "session.stopped",
|
||||
"timestamp": 1730000100000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 50,
|
||||
"reason": "client_requested"
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `reason` | string | 结束原因:`"client_requested"` / `"timeout"` / `"error"` |
|
||||
|
||||
---
|
||||
|
||||
### ASR 识别事件
|
||||
|
||||
#### `input.speech_started`
|
||||
|
||||
检测到语音开始(VAD)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "input.speech_started",
|
||||
"timestamp": 1730000010000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 5,
|
||||
"source": "asr",
|
||||
"trackId": "audio_in",
|
||||
"probability": 0.95
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_in"` |
|
||||
| `probability` | number | 语音检测置信度(0-1) |
|
||||
|
||||
---
|
||||
|
||||
#### `input.speech_stopped`
|
||||
|
||||
检测到语音结束(VAD)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "input.speech_stopped",
|
||||
"timestamp": 1730000012000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 8,
|
||||
"source": "asr",
|
||||
"trackId": "audio_in",
|
||||
"probability": 0.92
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_in"` |
|
||||
| `probability` | number | 静音检测置信度(0-1) |
|
||||
|
||||
---
|
||||
|
||||
#### `transcript.delta`
|
||||
|
||||
ASR 增量识别文本(实时转写)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "transcript.delta",
|
||||
"timestamp": 1730000011000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 6,
|
||||
"source": "asr",
|
||||
"trackId": "audio_in",
|
||||
"text": "你好",
|
||||
"data": {
|
||||
"text": "你好",
|
||||
"turn_id": "turn_001",
|
||||
"utterance_id": "utt_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_in"` |
|
||||
| `text` | string | 增量识别文本 |
|
||||
| `data.text` | string | 增量识别文本(同 `text`) |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
| `data.utterance_id` | string | 当前语句 ID |
|
||||
|
||||
**节流说明**:服务端默认每 300ms 合并一次 delta 事件。
|
||||
|
||||
---
|
||||
|
||||
#### `transcript.final`
|
||||
|
||||
ASR 最终识别文本(语句结束)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "transcript.final",
|
||||
"timestamp": 1730000012500,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 9,
|
||||
"source": "asr",
|
||||
"trackId": "audio_in",
|
||||
"text": "你好,请问今天天气怎么样",
|
||||
"data": {
|
||||
"text": "你好,请问今天天气怎么样",
|
||||
"turn_id": "turn_001",
|
||||
"utterance_id": "utt_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_in"` |
|
||||
| `text` | string | 最终识别文本 |
|
||||
| `data.text` | string | 最终识别文本(同 `text`) |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
| `data.utterance_id` | string | 当前语句 ID |
|
||||
|
||||
---
|
||||
|
||||
### LLM/TTS 输出事件
|
||||
|
||||
#### `assistant.response.delta`
|
||||
|
||||
助手增量文本输出(流式生成)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "assistant.response.delta",
|
||||
"timestamp": 1730000013000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 12,
|
||||
"source": "llm",
|
||||
"trackId": "audio_out",
|
||||
"text": "今天天气",
|
||||
"data": {
|
||||
"text": "今天天气",
|
||||
"turn_id": "turn_001",
|
||||
"response_id": "resp_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `source` | string | 固定为 `"llm"` |
|
||||
| `text` | string | 增量文本内容 |
|
||||
| `data.text` | string | 增量文本内容(同 `text`) |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
| `data.response_id` | string | 当前回复 ID |
|
||||
|
||||
**节流说明**:服务端默认每 80ms 合并一次 delta 事件。
|
||||
|
||||
---
|
||||
|
||||
#### `assistant.response.final`
|
||||
|
||||
助手完整文本输出(回复结束)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "assistant.response.final",
|
||||
"timestamp": 1730000015000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 18,
|
||||
"source": "llm",
|
||||
"trackId": "audio_out",
|
||||
"text": "今天天气晴朗,气温25度,适合外出。",
|
||||
"data": {
|
||||
"text": "今天天气晴朗,气温25度,适合外出。",
|
||||
"turn_id": "turn_001",
|
||||
"response_id": "resp_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `source` | string | 固定为 `"llm"` |
|
||||
| `text` | string | 完整回复文本 |
|
||||
| `data.text` | string | 完整回复文本(同 `text`) |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
| `data.response_id` | string | 当前回复 ID |
|
||||
|
||||
---
|
||||
|
||||
#### `assistant.tool_call`
|
||||
|
||||
工具调用通知,通知客户端 LLM 请求调用工具。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "assistant.tool_call",
|
||||
"timestamp": 1730000014000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 14,
|
||||
"source": "llm",
|
||||
"trackId": "audio_out",
|
||||
"tool_call_id": "call_abc123",
|
||||
"tool_name": "weather",
|
||||
"arguments": {
|
||||
"city": "北京"
|
||||
},
|
||||
"executor": "server",
|
||||
"timeout_ms": 30000,
|
||||
"data": {
|
||||
"tool_call": {
|
||||
"id": "call_abc123",
|
||||
"name": "weather",
|
||||
"arguments": "{\"city\":\"北京\"}"
|
||||
},
|
||||
"turn_id": "turn_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `source` | string | 固定为 `"llm"` |
|
||||
| `tool_call_id` | string | 工具调用唯一 ID |
|
||||
| `tool_name` | string | 工具名称 |
|
||||
| `arguments` | object | 工具参数(已解析的 JSON) |
|
||||
| `executor` | string | 执行方:`"server"` 服务端执行 / `"client"` 客户端执行 |
|
||||
| `timeout_ms` | number | 超时时间(毫秒) |
|
||||
| `data.tool_call` | object | 原始工具调用信息 |
|
||||
| `data.tool_call.id` | string | 工具调用 ID |
|
||||
| `data.tool_call.name` | string | 工具名称 |
|
||||
| `data.tool_call.arguments` | string | 工具参数(JSON 字符串) |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
|
||||
**注意**:当 `executor = "client"` 时,客户端需要执行工具并返回 `tool_call.results`。
|
||||
|
||||
---
|
||||
|
||||
#### `assistant.tool_result`
|
||||
|
||||
工具执行结果通知。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "assistant.tool_result",
|
||||
"timestamp": 1730000014500,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 15,
|
||||
"source": "server",
|
||||
"trackId": "audio_out",
|
||||
"tool_call_id": "call_abc123",
|
||||
"tool_name": "weather",
|
||||
"tool_display_name": "天气查询",
|
||||
"ok": true,
|
||||
"error": null,
|
||||
"result": {
|
||||
"tool_call_id": "call_abc123",
|
||||
"name": "weather",
|
||||
"output": {
|
||||
"temperature": 25,
|
||||
"condition": "晴",
|
||||
"humidity": 40
|
||||
},
|
||||
"status": {
|
||||
"code": 200,
|
||||
"message": "ok"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `source` | string | 执行方:`"server"` / `"client"` |
|
||||
| `tool_call_id` | string | 工具调用 ID |
|
||||
| `tool_name` | string | 工具名称 |
|
||||
| `tool_display_name` | string | 工具显示名称 |
|
||||
| `ok` | boolean | 执行是否成功(状态码 200-299 为 true) |
|
||||
| `error` | object \| null | 错误信息(`ok=false` 时存在) |
|
||||
| `error.code` | number | 错误状态码 |
|
||||
| `error.message` | string | 错误描述 |
|
||||
| `error.retryable` | boolean | 是否可重试 |
|
||||
| `result` | object | 原始执行结果 |
|
||||
| `result.output` | any | 工具返回数据 |
|
||||
| `result.status` | object | 执行状态 |
|
||||
| `result.status.code` | number | HTTP 状态码 |
|
||||
| `result.status.message` | string | 状态描述 |
|
||||
|
||||
---
|
||||
|
||||
#### `output.audio.start`
|
||||
|
||||
TTS 音频播放开始标记。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "output.audio.start",
|
||||
"timestamp": 1730000015500,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 19,
|
||||
"source": "tts",
|
||||
"trackId": "audio_out",
|
||||
"data": {
|
||||
"tts_id": "tts_001",
|
||||
"turn_id": "turn_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `source` | string | 固定为 `"tts"` |
|
||||
| `data.tts_id` | string | TTS 播放段 ID |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
|
||||
**说明**:此事件后服务端将发送二进制 PCM 音频帧。
|
||||
|
||||
---
|
||||
|
||||
#### `output.audio.end`
|
||||
|
||||
TTS 音频播放结束标记。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "output.audio.end",
|
||||
"timestamp": 1730000018000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 25,
|
||||
"source": "tts",
|
||||
"trackId": "audio_out",
|
||||
"data": {
|
||||
"tts_id": "tts_001",
|
||||
"turn_id": "turn_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `source` | string | 固定为 `"tts"` |
|
||||
| `data.tts_id` | string | TTS 播放段 ID |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
|
||||
**说明**:`output.audio.end` 表示服务端已发送完成,不代表客户端扬声器已播完。若需要“真实播完”信号,客户端应发送 `output.audio.played`。
|
||||
|
||||
---
|
||||
|
||||
#### `response.interrupted`
|
||||
|
||||
回答被打断(用户插话)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "response.interrupted",
|
||||
"timestamp": 1730000016000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 20,
|
||||
"source": "system",
|
||||
"trackId": "audio_out",
|
||||
"data": {
|
||||
"turn_id": "turn_001",
|
||||
"response_id": "resp_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `data.turn_id` | string | 被打断的对话轮次 ID |
|
||||
| `data.response_id` | string | 被打断的回复 ID |
|
||||
|
||||
---
|
||||
|
||||
#### `metrics.ttfb`
|
||||
|
||||
首包音频时延指标(Time To First Byte)。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "metrics.ttfb",
|
||||
"timestamp": 1730000015600,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 21,
|
||||
"source": "system",
|
||||
"trackId": "audio_out",
|
||||
"latencyMs": 1520,
|
||||
"data": {
|
||||
"latencyMs": 1520,
|
||||
"turn_id": "turn_001"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `trackId` | string | 固定为 `"audio_out"` |
|
||||
| `latencyMs` | number | 首包音频时延(毫秒) |
|
||||
| `data.latencyMs` | number | 首包音频时延(同 `latencyMs`) |
|
||||
| `data.turn_id` | string | 当前对话轮次 ID |
|
||||
|
||||
**说明**:从用户输入结束到第一个音频包发送的时间。
|
||||
|
||||
---
|
||||
|
||||
### 错误事件
|
||||
|
||||
#### `error`
|
||||
|
||||
统一错误事件。
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "error",
|
||||
"timestamp": 1730000020000,
|
||||
"sessionId": "ea34e1ca-b417-4a57-b03e-f752cb82e97d",
|
||||
"seq": 30,
|
||||
"sender": "server",
|
||||
"code": "llm.timeout",
|
||||
"message": "LLM request timeout",
|
||||
"stage": "llm",
|
||||
"retryable": true,
|
||||
"trackId": "audio_out",
|
||||
"data": {
|
||||
"error": {
|
||||
"stage": "llm",
|
||||
"code": "llm.timeout",
|
||||
"message": "LLM request timeout",
|
||||
"retryable": true
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|---|---|---|
|
||||
| `sender` | string | 错误来源:`"server"` / `"client"` |
|
||||
| `code` | string | 错误码 |
|
||||
| `message` | string | 错误描述 |
|
||||
| `stage` | string | 错误阶段:`"protocol"` / `"asr"` / `"llm"` / `"tts"` / `"tool"` / `"audio"` |
|
||||
| `retryable` | boolean | 是否可重试 |
|
||||
| `trackId` | string | 错误关联的轨道 |
|
||||
| `data.error` | object | 结构化错误信息 |
|
||||
| `data.error.stage` | string | 错误阶段 |
|
||||
| `data.error.code` | string | 错误码 |
|
||||
| `data.error.message` | string | 错误描述 |
|
||||
| `data.error.retryable` | boolean | 是否可重试 |
|
||||
|
||||
**trackId 约定**:
|
||||
- `audio_in`:ASR/音频输入相关错误
|
||||
- `audio_out`:LLM/TTS/工具相关错误
|
||||
- `control`:协议/会话控制相关错误
|
||||
|
||||
---
|
||||
|
||||
## 关联 ID 说明
|
||||
|
||||
事件中的关联 ID 用于追踪对话流程:
|
||||
|
||||
| ID 类型 | 说明 | 生命周期 |
|
||||
|---------|------|---------|
|
||||
| `turn_id` | 对话轮次 ID | 一次用户-助手交互 |
|
||||
| `utterance_id` | 语句 ID | 一次 ASR 最终识别结果 |
|
||||
| `response_id` | 回复 ID | 一次助手回复生成 |
|
||||
| `tool_call_id` | 工具调用 ID | 一次工具调用 |
|
||||
| `tts_id` | TTS 播放段 ID | 一段语音合成播放 |
|
||||
|
||||
---
|
||||
|
||||
## 心跳与超时
|
||||
|
||||
- **心跳间隔**:默认 50 秒(`heartbeat_interval_sec`)
|
||||
- **空闲超时**:默认 60 秒(`inactivity_timeout_sec`)
|
||||
- 客户端应持续发送音频或轻量消息避免被判定闲置
|
||||
|
||||
## 事件节流
|
||||
|
||||
为保持客户端渲染和服务端负载稳定,v1 协议对部分事件进行节流:
|
||||
|
||||
| 事件 | 默认节流间隔 | 说明 |
|
||||
|------|-------------|------|
|
||||
| `transcript.delta` | 300ms | ASR 增量文本 |
|
||||
| `assistant.response.delta` | 80ms | LLM 增量文本 |
|
||||
|
||||
## 错误处理
|
||||
|
||||
详细错误码请参考 [错误码](errors.md)。
|
||||
8
docs/content/assistants/configuration.md
Normal file
8
docs/content/assistants/configuration.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# 配置选项(旧入口)
|
||||
|
||||
本页保留旧链接,用于承接历史导航或外部引用。助手配置的正式文档已经迁移到:
|
||||
|
||||
- [配置选项](../concepts/assistants/configuration.md) - 助手配置界面与运行时配置层说明
|
||||
- [助手概念](../concepts/assistants.md) - 先理解助手对象、会话与动态变量
|
||||
|
||||
如果你是从创建路径进入,也可以直接回到 [快速开始](../quickstart/index.md)。
|
||||
10
docs/content/assistants/index.md
Normal file
10
docs/content/assistants/index.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# 助手管理(旧入口)
|
||||
|
||||
本页保留旧链接,用于承接历史导航或外部引用。助手相关内容已经拆分到更明确的文档中:
|
||||
|
||||
- [助手概念](../concepts/assistants.md) - 了解助手是什么、由哪些部分组成,以及会话如何运行
|
||||
- [配置选项](../concepts/assistants/configuration.md) - 查看控制台和运行时配置项的分工
|
||||
- [提示词指南](../concepts/assistants/prompts.md) - 编写高质量系统提示词
|
||||
- [测试调试](../concepts/assistants/testing.md) - 验证助手行为并排查问题
|
||||
|
||||
如果你是第一次上手,建议直接从 [快速开始](../quickstart/index.md) 进入。
|
||||
8
docs/content/assistants/prompts.md
Normal file
8
docs/content/assistants/prompts.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# 提示词指南(旧入口)
|
||||
|
||||
本页保留旧链接,用于承接历史导航或外部引用。提示词的正式文档已经迁移到:
|
||||
|
||||
- [提示词指南](../concepts/assistants/prompts.md) - 设计角色、任务、限制与风格
|
||||
- [助手概念](../concepts/assistants.md) - 理解提示词在助手体系中的位置
|
||||
|
||||
如果你想先完成最小可用配置,请从 [快速开始](../quickstart/index.md) 继续。
|
||||
8
docs/content/assistants/testing.md
Normal file
8
docs/content/assistants/testing.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# 测试调试(旧入口)
|
||||
|
||||
本页保留旧链接,用于承接历史导航或外部引用。测试与调试的正式文档已经迁移到:
|
||||
|
||||
- [测试调试](../concepts/assistants/testing.md) - 验证助手行为、事件流和常见问题定位
|
||||
- [故障排查](../resources/troubleshooting.md) - 进入更细的链路排查步骤
|
||||
|
||||
如果你还没创建助手,请先完成 [快速开始](../quickstart/index.md)。
|
||||
7
docs/content/assistants/workflow-configuration.md
Normal file
7
docs/content/assistants/workflow-configuration.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# 工作流配置(旧入口)
|
||||
|
||||
本页保留旧链接,用于承接早期草稿和历史引用。工作流的正式文档已收敛到:
|
||||
|
||||
- [工作流](../customization/workflows.md) - 了解工作流的定位、节点结构、设计建议和当前边界
|
||||
|
||||
如果你正在配置助手中的流程能力,请优先阅读上述页面,再结合 [工具](../customization/tools.md) 与 [助手概念](../concepts/assistants.md) 一起使用。
|
||||
81
docs/content/changelog.md
Normal file
81
docs/content/changelog.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# 更新日志
|
||||
|
||||
本文档记录 Realtime Agent Studio 的所有重要变更。
|
||||
|
||||
格式基于 [Keep a Changelog](https://keepachangelog.com/zh-CN/1.0.0/),
|
||||
版本号遵循 [语义化版本](https://semver.org/lang/zh-CN/)。
|
||||
|
||||
---
|
||||
|
||||
## [未发布]
|
||||
|
||||
### 开发中
|
||||
|
||||
- 工作流可视化编辑器
|
||||
- 知识库 RAG 集成
|
||||
- JavaScript/Python SDK
|
||||
- Step Audio 多模态模型支持
|
||||
|
||||
---
|
||||
|
||||
## [0.1.0] - 2025-01-15
|
||||
|
||||
### 新增
|
||||
|
||||
#### 实时交互引擎
|
||||
|
||||
- **管线式全双工引擎** - ASR → LLM → TTS 流水线架构
|
||||
- **智能打断** - 支持 VAD 和 EOU 检测
|
||||
- **OpenAI 兼容接口** - 支持 OpenAI Compatible 的 ASR/TTS 服务
|
||||
- **DashScope TTS** - 阿里云语音合成服务适配
|
||||
|
||||
#### 助手配置
|
||||
|
||||
- **系统提示词** - 支持角色定义和动态变量 `{{variable}}`
|
||||
- **模型管理** - LLM/ASR/TTS 模型统一管理界面
|
||||
- **工具调用** - Webhook 工具和客户端工具配置
|
||||
|
||||
#### 交互测试
|
||||
|
||||
- **实时调试控制台** - 内置 WebSocket 调试工具
|
||||
|
||||
#### 开放接口
|
||||
|
||||
- **WebSocket 协议** - `/ws` 端点,支持二进制音频流
|
||||
- **RESTful API** - 完整的助手/模型/会话 CRUD 接口
|
||||
|
||||
#### 历史监控
|
||||
|
||||
- **会话回放** - 音频 + 转写 + LLM 响应完整记录
|
||||
- **会话筛选** - 按时间、助手、状态多维度检索
|
||||
|
||||
#### 部署
|
||||
|
||||
- **Docker 支持** - 提供 docker-compose 一键部署
|
||||
|
||||
### 技术栈
|
||||
|
||||
- 前端:React 18, TypeScript, Tailwind CSS, Zustand
|
||||
- 后端:FastAPI (Python 3.10+)
|
||||
- 数据库:SQLite(开发)/ PostgreSQL(生产)
|
||||
|
||||
---
|
||||
|
||||
## 版本规划
|
||||
|
||||
| 版本 | 计划发布 | 主要特性 |
|
||||
|------|---------|---------|
|
||||
| 0.2.0 | 2025 Q1 | 工作流编辑器、知识库集成 |
|
||||
| 0.3.0 | 2025 Q2 | SDK 发布、多模态模型 |
|
||||
| 1.0.0 | 2025 H2 | 生产就绪、企业特性 |
|
||||
|
||||
---
|
||||
|
||||
## 贡献者
|
||||
|
||||
感谢所有为 RAS 做出贡献的开发者!
|
||||
|
||||
---
|
||||
|
||||
[未发布]: https://github.com/your-org/AI-VideoAssistant/compare/v0.1.0...HEAD
|
||||
[0.1.0]: https://github.com/your-org/AI-VideoAssistant/releases/tag/v0.1.0
|
||||
147
docs/content/concepts/assistants.md
Normal file
147
docs/content/concepts/assistants.md
Normal file
@@ -0,0 +1,147 @@
|
||||
# 助手概念详解
|
||||
|
||||
助手(Assistant)是 Realtime Agent Studio(RAS)中最核心的配置单元,也是控制台和 API 对外暴露能力的基本对象。
|
||||
|
||||
---
|
||||
|
||||
## 什么是助手
|
||||
|
||||
一个助手代表一个可接入、可测试、可发布的实时 AI 入口。它回答三个问题:
|
||||
|
||||
- **它是谁**:角色、语气、目标、限制、开场方式、静默时候的行动(比如静默时候的询问 Ask-on-Idle)
|
||||
- **它能做什么**:语言模型能力、语音模型能力(ASR、TTS、用户打断灵敏度(Barge-in)、语句端点设置(End-of-Utterance))、知识库、记忆、工具(Webhook、客户端工具、系统工具、MCP)、输出模式
|
||||
- **它在一次会话中如何运行**:通过 `assistant_id` 载入配置,并在运行时接收动态变量、对话时候的上下文更新
|
||||
|
||||
如果把引擎理解为“运行时”,那么助手就是“运行时要执行的那份定义”。
|
||||
|
||||
## 助手由哪些部分组成
|
||||
|
||||
| 层次 | 负责什么 | 典型内容 |
|
||||
|------|----------|----------|
|
||||
| **身份层** | 定义助手角色和交互风格 | 系统提示词、限制、开场白、静默处理 |
|
||||
| **模型层** | 决定理解与生成能力 | LLM、ASR、TTS、引擎类型、用户打断、语句端点 |
|
||||
| **能力层** | 扩展知识和执行能力 | 知识库、工具、记忆 |
|
||||
| **会话层** | 决定运行时上下文如何注入 | `assistant_id`、动态变量 |
|
||||
|
||||
## 身份层
|
||||
|
||||
助手首先是一个“被约束的角色”,而不是一段孤立的模型调用。
|
||||
|
||||
### 系统提示词
|
||||
|
||||
系统提示词定义助手的角色、任务、边界和风格,是所有能力组合的基础。
|
||||
|
||||
| 要素 | 作用 | 示例 |
|
||||
|------|------|------|
|
||||
| **角色** | 告诉模型“自己是谁” | 客服助手、销售顾问、培训教练 |
|
||||
| **任务** | 指定要完成的结果 | 解答咨询、收集信息、调用工具处理业务 |
|
||||
| **限制** | 明确哪些事不能做 | 不承诺超权限优惠、不输出未经验证的结论 |
|
||||
| **风格** | 约束回答节奏和措辞 | 简洁、口语化、每次 2-3 句 |
|
||||
|
||||
### 开场白
|
||||
|
||||
一个助手还要定义会话应该如何开始,以及用户静默时候如何处理,包括:
|
||||
|
||||
- **首轮模式**:助手先说、用户先说或者机器先说
|
||||
- **开场白**:使用固定开场白或者AI生成开场白
|
||||
|
||||
### 静默处理
|
||||
|
||||
用户静默时候是否询问用户是否在线
|
||||
|
||||
## 模型层
|
||||
|
||||
模型决定助手的基础理解、推理和表达能力,但不是助手定义的全部。
|
||||
|
||||
- **LLM** 决定对话推理与文本生成能力
|
||||
- **ASR** 决定语音输入如何被实时转写
|
||||
- **TTS** 决定文本回复如何转成可播放语音
|
||||
- **引擎类型** 决定运行链路是分段可控还是端到端低延迟
|
||||
- **VAD** 声音活动模型,判断用户是否在说话
|
||||
- **EOU** 语句端点模型,判断用户是否完成一段语句等待回复
|
||||
- **Barge In** 由于用户声音活动或者手动请求,是否打断助手当前的回复
|
||||
|
||||
## 能力层
|
||||
|
||||
### 知识库
|
||||
|
||||
知识库用于补充私有领域知识,让助手回答超出基础模型常识之外的问题。
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Question[用户问题] --> Retrieval[检索]
|
||||
Retrieval --> KB[(知识库)]
|
||||
KB --> Context[相关片段]
|
||||
Context --> LLM[LLM]
|
||||
LLM --> Answer[回答]
|
||||
```
|
||||
|
||||
知识库适合承载政策、产品资料、流程说明、FAQ 和内部文档,而不是把所有业务知识堆进系统提示词。
|
||||
|
||||
### 工具
|
||||
|
||||
工具让助手从“会说”变成“能做事”。
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
User[用户] --> Assistant[助手]
|
||||
Assistant --> Tool[工具 / 外部系统]
|
||||
Tool --> Assistant
|
||||
Assistant --> User
|
||||
```
|
||||
|
||||
适合用工具处理的任务包括:订单查询、预约、外部搜索、写入业务系统、调用客户端能力等。
|
||||
|
||||
## 会话层
|
||||
|
||||
### `assistant_id` 的作用
|
||||
|
||||
在接入层面,客户端通过 `assistant_id` 指定要加载哪一个助手。引擎据此读取默认配置,并把同一份助手定义应用到当前会话。
|
||||
|
||||
### 会话生命周期
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Connecting: WebSocket 连接
|
||||
Connecting --> Started: session.started
|
||||
Started --> Active: config.resolved / 开始对话
|
||||
Active --> Active: 多轮交互
|
||||
Active --> Stopped: session.stop 或连接关闭
|
||||
Stopped --> [*]
|
||||
```
|
||||
|
||||
一次会话通常会沉淀以下信息:
|
||||
|
||||
- 用户与助手消息时间线
|
||||
- 音频流、转写结果和模型输出
|
||||
- 工具调用记录与中间事件
|
||||
- 自定义 metadata、渠道和业务上下文
|
||||
|
||||
|
||||
### 动态变量与会话级覆盖
|
||||
|
||||
助手的默认配置不需要为每个用户都重新复制一份。RAS 提供两种常见的运行时注入方式:
|
||||
|
||||
- **动态变量**:在提示词中使用 `{{variable}}` 占位,并在会话开始时传入具体值
|
||||
- **会话级覆盖**:仅对当前会话覆盖部分运行时参数,不回写助手基线配置
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "session.start",
|
||||
"metadata": {
|
||||
"dynamicVariables": {
|
||||
"company_name": "ABC 公司",
|
||||
"customer_name": "张三",
|
||||
"tier": "VIP"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
这种设计让你既能复用标准助手,又能在每次接入时注入渠道、用户、订单或上下文信息。
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [配置选项](assistants/configuration.md) - 查看助手在控制台和运行时有哪些配置层
|
||||
- [提示词指南](assistants/prompts.md) - 设计角色、任务、限制和语气
|
||||
- [测试调试](assistants/testing.md) - 验证助手质量并定位问题
|
||||
218
docs/content/concepts/assistants/configuration.md
Normal file
218
docs/content/concepts/assistants/configuration.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# 配置选项
|
||||
|
||||
助手配置界面包含多个标签页,每个标签页负责不同方面的配置。
|
||||
|
||||
## 全局设置
|
||||
|
||||
全局设置定义助手的核心对话能力。
|
||||
|
||||
| 配置项 | 说明 | 建议值 |
|
||||
|-------|------|--------|
|
||||
| 助手名称 | 用于标识和管理 | 简洁明确 |
|
||||
| 系统提示词 | 定义角色、任务和约束 | 详见[提示词指南](prompts.md) |
|
||||
| 开场白 | 对话开始时的问候语 | 简短友好 |
|
||||
| 温度参数 | 控制回复随机性 | 0.7(通用)/ 0.3(严谨) |
|
||||
| 上下文长度 | 保留的历史消息数 | 10-20 |
|
||||
|
||||
### 高级选项
|
||||
|
||||
- **首轮模式** - 设置首次对话的触发方式
|
||||
- **打断检测** - 用户打断时的处理策略
|
||||
- **超时设置** - 无响应时的处理
|
||||
|
||||
## 语音配置
|
||||
|
||||
配置语音识别和语音合成参数。
|
||||
|
||||
### TTS 语音合成
|
||||
|
||||
| 配置 | 说明 |
|
||||
|------|------|
|
||||
| TTS 引擎 | 选择语音合成服务(阿里/火山/Minimax) |
|
||||
| 音色 | 选择语音风格和性别 |
|
||||
| 语速 | 语音播放速度(0.5-2.0) |
|
||||
| 音量 | 语音输出音量(0-100) |
|
||||
| 音调 | 语音音调高低(0.5-2.0) |
|
||||
|
||||
### ASR 语音识别
|
||||
|
||||
| 配置 | 说明 |
|
||||
|------|------|
|
||||
| ASR 引擎 | 选择语音识别服务 |
|
||||
| 语言 | 识别语言(中文/英文/多语言) |
|
||||
| 热词 | 提高特定词汇识别准确率 |
|
||||
|
||||
## 工具绑定
|
||||
|
||||
配置助手可调用的外部工具。
|
||||
|
||||
### 可用工具类型
|
||||
|
||||
| 工具 | 说明 |
|
||||
|------|------|
|
||||
| 搜索工具 | 网络搜索获取信息 |
|
||||
| 天气查询 | 查询天气预报 |
|
||||
| 计算器 | 数学计算 |
|
||||
| 知识库检索 | RAG 知识检索 |
|
||||
| 自定义工具 | HTTP 回调外部 API |
|
||||
|
||||
### 配置步骤
|
||||
|
||||
1. 在工具列表中勾选需要的工具
|
||||
2. 配置工具参数(如有)
|
||||
3. 测试工具调用是否正常
|
||||
|
||||
## 知识关联
|
||||
|
||||
关联 RAG 知识库,让助手能够回答专业领域问题。
|
||||
|
||||
### 配置参数
|
||||
|
||||
| 参数 | 说明 | 建议值 |
|
||||
|------|------|--------|
|
||||
| 知识库 | 选择要关联的知识库 | - |
|
||||
| 相似度阈值 | 低于此分数不返回 | 0.7 |
|
||||
| 返回数量 | 单次检索返回条数 | 3 |
|
||||
| 检索策略 | 混合/向量/关键词 | 混合 |
|
||||
|
||||
### 多知识库
|
||||
|
||||
支持关联多个知识库,系统会自动合并检索结果。
|
||||
|
||||
## 外部链接
|
||||
|
||||
配置第三方服务集成和 Webhook 回调。
|
||||
|
||||
### Webhook 配置
|
||||
|
||||
| 字段 | 说明 |
|
||||
|------|------|
|
||||
| 回调 URL | 接收事件的 HTTP 端点 |
|
||||
| 事件类型 | 订阅的事件(对话开始/结束/工具调用等) |
|
||||
| 认证方式 | API Key / Bearer Token / 无 |
|
||||
|
||||
### 支持的事件
|
||||
|
||||
- `conversation.started` - 对话开始
|
||||
- `conversation.ended` - 对话结束
|
||||
- `tool.called` - 工具被调用
|
||||
- `human.transfer` - 转人工
|
||||
|
||||
## 配置持久化与运行时覆盖
|
||||
|
||||
助手配置分为两层:
|
||||
|
||||
1. **数据库持久化配置(基线配置)**:通过助手管理 API 保存,后续会话默认读取这一层。
|
||||
2. **会话级覆盖配置(runtime overrides)**:仅对当前 WebSocket 会话生效,不会写回数据库。
|
||||
|
||||
### 哪些配置会存到数据库
|
||||
|
||||
以下字段会持久化在 `assistants` / `assistant_opener_audio` 等表中(通过创建/更新助手写入):
|
||||
|
||||
| 类别 | 典型字段 |
|
||||
|------|---------|
|
||||
| 对话行为 | `name`、`prompt`、`opener`、`firstTurnMode`、`generatedOpenerEnabled` |
|
||||
| 输出与打断 | `voiceOutputEnabled`、`voice`、`speed`、`botCannotBeInterrupted`、`interruptionSensitivity` |
|
||||
| 工具与知识库 | `tools`、`knowledgeBaseId` |
|
||||
| 模型与外部模式 | `configMode`、`apiUrl`、`apiKey`、`llmModelId`、`asrModelId`、`embeddingModelId`、`rerankModelId` |
|
||||
| 开场音频 | `openerAudioEnabled` 及音频文件状态(`ready`、`durationMs` 等) |
|
||||
|
||||
> 引擎在连接时通过 `assistant_id` 从后端读取该助手的 `sessionStartMetadata` 作为默认运行配置。
|
||||
|
||||
### 哪些配置可以在会话中覆盖
|
||||
|
||||
客户端可在 `session.start.metadata.overrides` 中覆盖以下白名单字段(仅当前会话有效):
|
||||
|
||||
- `systemPrompt`
|
||||
- `greeting`
|
||||
- `firstTurnMode`
|
||||
- `generatedOpenerEnabled`
|
||||
- `output`
|
||||
- `bargeIn`
|
||||
- `knowledgeBaseId`
|
||||
- `knowledge`
|
||||
- `tools`
|
||||
- `openerAudio`
|
||||
|
||||
以下字段不能由客户端覆盖:
|
||||
|
||||
- `services`(模型 provider / apiKey / baseUrl 等)
|
||||
- `assistantId` / `appId` / `configVersionId`(及下划线变体)
|
||||
- 包含密钥语义的字段(如 `apiKey`、`token`、`secret`、`password`、`authorization`)
|
||||
|
||||
### 覆盖示例(代码)
|
||||
|
||||
下面示例展示「数据库基线配置 + 会话 overrides」的最终效果。
|
||||
|
||||
```json
|
||||
// 1) 数据库存储的基线配置(示意)
|
||||
// GET /api/v1/assistants/asst_demo/config -> sessionStartMetadata
|
||||
{
|
||||
"systemPrompt": "你是电商客服助手,回答要简洁。",
|
||||
"greeting": "你好,我是你的客服助手。",
|
||||
"firstTurnMode": "bot_first",
|
||||
"output": { "mode": "audio" },
|
||||
"knowledgeBaseId": "kb_orders",
|
||||
"tools": [
|
||||
{ "type": "function", "function": { "name": "query_order" } }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
// 2) 客户端发起会话时的覆盖
|
||||
{
|
||||
"type": "session.start",
|
||||
"metadata": {
|
||||
"channel": "web",
|
||||
"history": { "userId": 1001 },
|
||||
"overrides": {
|
||||
"greeting": "你好,我来帮你查订单进度。",
|
||||
"output": { "mode": "text" },
|
||||
"knowledgeBaseId": "kb_vip_orders",
|
||||
"tools": [
|
||||
{ "type": "function", "function": { "name": "query_vip_order" } }
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
// 3) 引擎合并后的有效配置(示意)
|
||||
{
|
||||
"assistantId": "asst_demo",
|
||||
"systemPrompt": "你是电商客服助手,回答要简洁。",
|
||||
"greeting": "你好,我来帮你查订单进度。",
|
||||
"firstTurnMode": "bot_first",
|
||||
"output": { "mode": "text" },
|
||||
"knowledgeBaseId": "kb_vip_orders",
|
||||
"tools": [
|
||||
{ "type": "function", "function": { "name": "query_vip_order" } }
|
||||
],
|
||||
"channel": "web",
|
||||
"history": { "userId": 1001 }
|
||||
}
|
||||
```
|
||||
|
||||
合并规则可简化为:
|
||||
|
||||
```python
|
||||
effective = {**db_session_start_metadata, **metadata.overrides}
|
||||
```
|
||||
|
||||
当 `WS_EMIT_CONFIG_RESOLVED=true` 时,服务端会返回 `config.resolved`(公开、安全裁剪后的快照)用于前端调试当前生效配置。
|
||||
|
||||
## 配置导入导出
|
||||
|
||||
### 导出配置
|
||||
|
||||
1. 在助手详情页点击 **更多**
|
||||
2. 选择 **导出配置**
|
||||
3. 下载 JSON 格式的配置文件
|
||||
|
||||
### 导入配置
|
||||
|
||||
1. 点击 **新建助手**
|
||||
2. 选择 **从配置导入**
|
||||
3. 上传配置文件
|
||||
184
docs/content/concepts/assistants/prompts.md
Normal file
184
docs/content/concepts/assistants/prompts.md
Normal file
@@ -0,0 +1,184 @@
|
||||
# 提示词指南
|
||||
|
||||
系统提示词(System Prompt)是定义助手行为的核心配置。本指南介绍如何编写高质量的提示词。
|
||||
|
||||
## 提示词结构
|
||||
|
||||
一个完整的系统提示词通常包含以下部分:
|
||||
|
||||
```
|
||||
[角色定义]
|
||||
[任务描述]
|
||||
[行为约束]
|
||||
[输出格式]
|
||||
[示例(可选)]
|
||||
```
|
||||
|
||||
## 编写原则
|
||||
|
||||
### 1. 明确角色
|
||||
|
||||
告诉助手它是谁:
|
||||
|
||||
```
|
||||
你是一个专业的技术支持工程师,专门负责解答产品使用问题。
|
||||
```
|
||||
|
||||
### 2. 定义任务
|
||||
|
||||
明确助手需要完成什么:
|
||||
|
||||
```
|
||||
你的主要任务是:
|
||||
1. 解答用户关于产品功能的问题
|
||||
2. 提供使用指导和最佳实践
|
||||
3. 帮助用户排查常见故障
|
||||
```
|
||||
|
||||
### 3. 设置约束
|
||||
|
||||
限制不希望出现的行为:
|
||||
|
||||
```
|
||||
请注意:
|
||||
- 不要讨论与产品无关的话题
|
||||
- 不要编造不存在的功能
|
||||
- 如果不确定答案,请建议用户联系人工客服
|
||||
```
|
||||
|
||||
### 4. 指定风格
|
||||
|
||||
定义回复的语气和风格:
|
||||
|
||||
```
|
||||
回复风格要求:
|
||||
- 使用友好、专业的语气
|
||||
- 回答简洁明了,避免冗长
|
||||
- 适当使用列表和步骤说明
|
||||
```
|
||||
|
||||
## 提示词模板
|
||||
|
||||
### 客服助手
|
||||
|
||||
```
|
||||
你是 [公司名称] 的智能客服助手。
|
||||
|
||||
## 你的职责
|
||||
- 解答用户关于产品和服务的问题
|
||||
- 处理常见的投诉和建议
|
||||
- 引导用户完成操作流程
|
||||
|
||||
## 回复要求
|
||||
- 保持友好和耐心
|
||||
- 回答简洁,一般不超过 3 句话
|
||||
- 如果问题复杂,建议转接人工客服
|
||||
|
||||
## 禁止行为
|
||||
- 不要讨论竞争对手
|
||||
- 不要承诺无法兑现的事项
|
||||
- 不要透露内部信息
|
||||
```
|
||||
|
||||
### 技术支持
|
||||
|
||||
```
|
||||
你是一个技术支持工程师,专门帮助用户解决技术问题。
|
||||
|
||||
## 工作流程
|
||||
1. 首先了解用户遇到的具体问题
|
||||
2. 询问必要的环境信息(系统版本、错误信息等)
|
||||
3. 提供分步骤的解决方案
|
||||
4. 确认问题是否解决
|
||||
|
||||
## 回复格式
|
||||
- 使用编号列表说明操作步骤
|
||||
- 提供代码示例时使用代码块
|
||||
- 复杂问题可以分多次回复
|
||||
```
|
||||
|
||||
### 销售顾问
|
||||
|
||||
```
|
||||
你是一个产品销售顾问,帮助用户了解产品并做出购买决策。
|
||||
|
||||
## 沟通策略
|
||||
- 先了解用户需求,再推荐合适的产品
|
||||
- 突出产品优势,但不贬低竞品
|
||||
- 提供真实的价格和优惠信息
|
||||
|
||||
## 目标
|
||||
- 帮助用户找到最适合的方案
|
||||
- 解答购买相关的疑问
|
||||
- 促进成交但不过度推销
|
||||
```
|
||||
|
||||
## 动态变量
|
||||
|
||||
提示词支持动态变量,使用 `{{变量名}}` 语法:
|
||||
|
||||
```
|
||||
你好 {{customer_name}},欢迎来到 {{company_name}}。
|
||||
你当前的会员等级是 {{membership_tier}}。
|
||||
```
|
||||
|
||||
在 `session.start` 时通过 `dynamicVariables` 传入:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "session.start",
|
||||
"metadata": {
|
||||
"dynamicVariables": {
|
||||
"customer_name": "张三",
|
||||
"company_name": "AI 公司",
|
||||
"membership_tier": "黄金会员"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 常见问题
|
||||
|
||||
### 回复太长
|
||||
|
||||
在提示词中明确限制:
|
||||
|
||||
```
|
||||
回复长度要求:
|
||||
- 一般问题:1-2 句话
|
||||
- 复杂问题:不超过 5 句话
|
||||
- 避免重复和冗余内容
|
||||
```
|
||||
|
||||
### 答非所问
|
||||
|
||||
增加任务边界说明:
|
||||
|
||||
```
|
||||
重要提示:
|
||||
- 只回答与 [产品/服务] 相关的问题
|
||||
- 对于无关问题,礼貌地拒绝并引导回正题
|
||||
```
|
||||
|
||||
### 编造信息
|
||||
|
||||
强调诚实原则:
|
||||
|
||||
```
|
||||
信息准确性要求:
|
||||
- 只提供你确定的信息
|
||||
- 不确定时说"我不太确定,建议您..."
|
||||
- 绝对不要编造数据或功能
|
||||
```
|
||||
|
||||
## 最佳实践
|
||||
|
||||
1. **迭代优化** - 根据实际对话效果持续调整
|
||||
2. **测试覆盖** - 用各种场景测试提示词效果
|
||||
3. **版本管理** - 保存历史版本,便于回退
|
||||
4. **定期复盘** - 分析对话记录,发现改进点
|
||||
|
||||
## 下一步
|
||||
|
||||
- [测试调试](testing.md) - 验证提示词效果
|
||||
- [知识库配置](../../customization/knowledge-base.md) - 补充专业知识
|
||||
162
docs/content/concepts/assistants/testing.md
Normal file
162
docs/content/concepts/assistants/testing.md
Normal file
@@ -0,0 +1,162 @@
|
||||
# 测试调试
|
||||
|
||||
本指南介绍如何测试和调试 AI 助手,确保其行为符合预期。
|
||||
|
||||
## 测试面板
|
||||
|
||||
在助手详情页,点击 **测试** 按钮打开测试面板。
|
||||
|
||||
### 功能介绍
|
||||
|
||||
| 功能 | 说明 |
|
||||
|------|------|
|
||||
| 文本对话 | 直接输入文字进行测试 |
|
||||
| 语音测试 | 使用麦克风进行语音对话 |
|
||||
| 查看日志 | 实时查看系统日志 |
|
||||
| 事件追踪 | 查看 WebSocket 事件流 |
|
||||
|
||||
## 测试用例设计
|
||||
|
||||
### 基础功能测试
|
||||
|
||||
| 测试项 | 输入 | 预期结果 |
|
||||
|--------|------|---------|
|
||||
| 问候响应 | "你好" | 友好的问候回复 |
|
||||
| 功能介绍 | "你能做什么?" | 准确描述能力范围 |
|
||||
| 开场白 | 连接后自动 | 播放配置的开场白 |
|
||||
|
||||
### 业务场景测试
|
||||
|
||||
根据助手定位设计测试用例:
|
||||
|
||||
```
|
||||
场景:产品咨询助手
|
||||
|
||||
测试用例 1:常见问题
|
||||
- 输入:"产品有哪些功能?"
|
||||
- 预期:准确列出主要功能
|
||||
|
||||
测试用例 2:价格询问
|
||||
- 输入:"多少钱?"
|
||||
- 预期:提供价格信息或引导方式
|
||||
|
||||
测试用例 3:超出范围
|
||||
- 输入:"帮我写一首诗"
|
||||
- 预期:礼貌拒绝并引导回业务话题
|
||||
```
|
||||
|
||||
### 边界测试
|
||||
|
||||
| 测试项 | 输入 | 预期结果 |
|
||||
|--------|------|---------|
|
||||
| 空输入 | "" | 提示用户输入内容 |
|
||||
| 超长输入 | 1000+ 字符 | 正常处理或提示过长 |
|
||||
| 特殊字符 | "<script>alert(1)</script>" | 安全处理,不执行 |
|
||||
| 敏感内容 | 不当言论 | 拒绝回复并提示 |
|
||||
|
||||
## 日志分析
|
||||
|
||||
### 查看日志
|
||||
|
||||
在测试面板的 **日志** 标签页,可以看到:
|
||||
|
||||
- ASR 识别结果
|
||||
- LLM 推理过程
|
||||
- TTS 合成状态
|
||||
- 工具调用记录
|
||||
|
||||
### 常见日志
|
||||
|
||||
```
|
||||
[ASR] transcript.final: "你好,请问有什么可以帮你"
|
||||
[LLM] request: messages=[...]
|
||||
[LLM] response: "您好!我是..."
|
||||
[TTS] synthesizing: "您好!我是..."
|
||||
[TTS] audio.start
|
||||
[TTS] audio.end
|
||||
```
|
||||
|
||||
## 事件追踪
|
||||
|
||||
在 **事件** 标签页查看完整的 WebSocket 事件流:
|
||||
|
||||
```json
|
||||
{"type": "session.started", "timestamp": 1704067200000}
|
||||
{"type": "input.speech_started", "timestamp": 1704067201000}
|
||||
{"type": "transcript.delta", "data": {"text": "你"}}
|
||||
{"type": "transcript.delta", "data": {"text": "好"}}
|
||||
{"type": "transcript.final", "data": {"text": "你好"}}
|
||||
{"type": "assistant.response.delta", "data": {"text": "您"}}
|
||||
{"type": "assistant.response.final", "data": {"text": "您好!..."}}
|
||||
{"type": "output.audio.start"}
|
||||
{"type": "output.audio.end"}
|
||||
```
|
||||
|
||||
## 性能指标
|
||||
|
||||
关注以下性能指标:
|
||||
|
||||
| 指标 | 说明 | 建议值 |
|
||||
|------|------|--------|
|
||||
| TTFB | 首字节时间 | < 500ms |
|
||||
| 识别延迟 | ASR 处理时间 | < 1s |
|
||||
| 回复延迟 | LLM 推理时间 | < 2s |
|
||||
| 合成延迟 | TTS 处理时间 | < 500ms |
|
||||
|
||||
## 常见问题排查
|
||||
|
||||
### 助手不响应
|
||||
|
||||
1. **检查连接状态**
|
||||
- 确认 WebSocket 连接成功
|
||||
- 查看是否收到 `session.started` 事件
|
||||
|
||||
2. **检查模型配置**
|
||||
- 确认 LLM 模型 API Key 有效
|
||||
- 测试模型连接是否正常
|
||||
|
||||
3. **查看错误日志**
|
||||
- 打开浏览器开发者工具
|
||||
- 检查 Console 和 Network 标签
|
||||
|
||||
### 回复质量差
|
||||
|
||||
1. **优化提示词**
|
||||
- 增加更明确的指令
|
||||
- 添加示例和约束
|
||||
|
||||
2. **调整温度参数**
|
||||
- 降低 temperature 提高一致性
|
||||
- 适当值通常在 0.3-0.7
|
||||
|
||||
3. **补充知识库**
|
||||
- 上传相关文档
|
||||
- 提高检索相关性
|
||||
|
||||
### 语音问题
|
||||
|
||||
1. **ASR 识别不准**
|
||||
- 检查麦克风权限
|
||||
- 尝试更换 ASR 引擎
|
||||
- 添加热词提高识别率
|
||||
|
||||
2. **TTS 不播放**
|
||||
- 检查浏览器自动播放限制
|
||||
- 确认 TTS 配置正确
|
||||
|
||||
## 自动化测试
|
||||
|
||||
使用自动化测试功能进行批量测试:
|
||||
|
||||
1. 进入 **自动化测试** 页面
|
||||
2. 创建测试任务
|
||||
3. 配置测试用例
|
||||
4. 运行测试并查看报告
|
||||
|
||||
详见 [自动化测试](../../analysis/autotest.md)。
|
||||
|
||||
## 下一步
|
||||
|
||||
- [自动化测试](../../analysis/autotest.md) - 批量测试
|
||||
- [历史记录](../../analysis/history.md) - 查看对话记录
|
||||
- [效果评估](../../analysis/evaluation.md) - 评估对话质量
|
||||
107
docs/content/concepts/engines.md
Normal file
107
docs/content/concepts/engines.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# 引擎架构
|
||||
|
||||
RAS 提供两类实时运行时:**Pipeline 引擎** 和 **Realtime 引擎**。本页只回答一个问题:你的助手应该跑在哪种引擎上。
|
||||
|
||||
---
|
||||
|
||||
## 先记住这条判断标准
|
||||
|
||||
- 如果你优先考虑 **可控性、可替换性、成本管理、工具 / 知识 / 流程编排**,优先选 **Pipeline 引擎**
|
||||
- 如果你优先考虑 **超低延迟、更自然的端到端语音体验**,优先选 **Realtime 引擎**
|
||||
|
||||
## 两类引擎的区别
|
||||
|
||||
| 维度 | Pipeline 引擎 | Realtime 引擎 |
|
||||
|------|---------------|---------------|
|
||||
| **交互路径** | VAD → ASR → TD → LLM → TTS | 端到端实时模型 |
|
||||
| **可控性** | 高,每个环节可替换 | 中,更多依赖模型供应商 |
|
||||
| **延迟** | 中等,通常由多环节累加 | 低,链路更短 |
|
||||
| **能力编排** | 更适合接入工具、知识库、工作流 | 也可接工具,但流程可控性较弱 |
|
||||
| **成本结构** | 可按环节优化 | 往往更依赖单一供应商定价 |
|
||||
| **适合场景** | 企业客服、流程型助手、电话场景、知识问答 | 高拟真语音助手、多模态入口、高自然度体验 |
|
||||
|
||||
## Pipeline 引擎是什么
|
||||
|
||||
Pipeline 引擎把实时语音拆成多个明确环节:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
VAD[VAD] --> ASR[ASR]
|
||||
ASR --> TD[回合检测]
|
||||
TD --> LLM[LLM]
|
||||
LLM --> TTS[TTS]
|
||||
```
|
||||
|
||||
这样做的好处是:
|
||||
|
||||
- 你可以分别选择 ASR、LLM、TTS 的供应商
|
||||
- 你可以单独优化某一个环节,而不是整体替换
|
||||
- 工具、知识库和工作流更容易插入到链路中
|
||||
|
||||
代价是:
|
||||
|
||||
- 延迟会累加
|
||||
- 系统集成更复杂
|
||||
- 你需要同时管理多类外部依赖
|
||||
|
||||
## Realtime 引擎是什么
|
||||
|
||||
Realtime 引擎直接连接端到端实时模型,让模型同时处理输入、理解、生成与打断。
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Input[音频 / 视频 / 文本输入] --> RT[Realtime Model]
|
||||
RT --> Output[音频 / 文本输出]
|
||||
RT --> Tools[工具]
|
||||
```
|
||||
|
||||
这样做的好处是:
|
||||
|
||||
- 链路更短,延迟更低
|
||||
- 全双工与打断通常更自然
|
||||
- 接入路径更简单,适合强调体验的入口
|
||||
|
||||
代价是:
|
||||
|
||||
- 更依赖特定模型供应商
|
||||
- 对 ASR / TTS / 回合检测的独立控制更弱
|
||||
- 成本和能力边界受实时模型限制更大
|
||||
|
||||
## 怎么选
|
||||
|
||||
### 适合选择 Pipeline 的情况
|
||||
|
||||
- 你要接入特定 ASR 或 TTS 供应商
|
||||
- 你需要知识库、工具、工作流形成稳定业务流程
|
||||
- 你更在意可解释性、观测和分段优化
|
||||
- 你需要把成本按环节精细控制
|
||||
|
||||
### 适合选择 Realtime 的情况
|
||||
|
||||
- 你把“自然对话感”放在首位
|
||||
- 你需要更低的首响和更顺滑的打断体验
|
||||
- 你可以接受对某个模型供应商的依赖
|
||||
- 你的场景更接近语音助手、陪练、虚拟角色或多模态入口
|
||||
|
||||
## 简化决策表
|
||||
|
||||
| 场景 | 推荐引擎 | 原因 |
|
||||
|------|----------|------|
|
||||
| 企业客服 / 电话机器人 | Pipeline | 可控、可审计、易接工具与业务系统 |
|
||||
| 知识问答 / 业务流程助手 | Pipeline | 更适合接知识库与工作流 |
|
||||
| 高拟真语音助手 | Realtime | 更自然、更低延迟 |
|
||||
| 多模态入口 | Realtime | 端到端处理音频 / 视频 / 文本 |
|
||||
| 预算敏感场景 | Pipeline | 更容易逐环节优化成本 |
|
||||
|
||||
## 智能打断的差异
|
||||
|
||||
两类引擎都支持打断,但边界不同:
|
||||
|
||||
- **Pipeline**:由 VAD / 回合检测与 TTS 停止逻辑协同实现,行为更可控
|
||||
- **Realtime**:更多由实时模型内部完成,体验更自然,但可解释性更低
|
||||
|
||||
## 继续阅读
|
||||
|
||||
- [Pipeline 引擎](pipeline-engine.md) - 查看分段链路、延迟构成与配置示例
|
||||
- [Realtime 引擎](realtime-engine.md) - 查看端到端实时模型的交互路径
|
||||
- [系统架构](../overview/architecture.md) - 从服务边界理解引擎在整体系统中的位置
|
||||
49
docs/content/concepts/index.md
Normal file
49
docs/content/concepts/index.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# 核心概念
|
||||
|
||||
本章节只解释 Realtime Agent Studio 的关键心智模型,不重复环境部署或助手构建的操作细节。
|
||||
|
||||
---
|
||||
|
||||
## 先建立这三个概念
|
||||
|
||||
### 1. 助手是“对外提供能力的配置单元”
|
||||
|
||||
助手决定了一个实时 AI 入口对外表现成什么角色:它使用什么提示词、哪些模型、能访问哪些知识和工具、会话如何开始以及运行时如何被覆盖。
|
||||
|
||||
- [助手概念](assistants.md) — 统一理解助手、会话、动态变量与能力边界
|
||||
- [配置选项](assistants/configuration.md) — 了解界面层和运行时配置项如何分工
|
||||
- [提示词指南](assistants/prompts.md) — 学会定义助手的角色、任务、风格与约束
|
||||
- [测试调试](assistants/testing.md) — 理解如何验证助手行为和定位问题
|
||||
|
||||
### 2. 引擎是“承载实时交互的运行时”
|
||||
|
||||
RAS 同时提供 Pipeline 引擎与 Realtime 引擎。它们都能驱动实时助手,但在延迟、可控性、成本和可替换性上各有取舍。
|
||||
|
||||
- [引擎概览](engines.md) — 两类引擎的能力边界与选择建议
|
||||
- [Pipeline 引擎](pipeline-engine.md) — VAD/ASR/TD/LLM/TTS 串联的可组合链路
|
||||
- [Realtime 引擎](realtime-engine.md) — 面向端到端实时模型的低延迟交互路径
|
||||
|
||||
### 3. 工作流是“把复杂业务拆成步骤和分支的方法”
|
||||
|
||||
当单一提示词不足以稳定处理多步骤、多条件、多工具的业务流程时,应使用工作流来显式编排节点、路由和回退策略。
|
||||
|
||||
- [工作流](../customization/workflows.md) — 了解何时需要工作流、它由哪些部分组成、如何设计可维护的流程
|
||||
|
||||
---
|
||||
|
||||
## 本章节不负责什么
|
||||
|
||||
以下内容属于“如何搭建和使用”,不在本章节展开说明:
|
||||
|
||||
- 助手搭建、模型/知识库/工具/工作流配置:从 [助手概览](assistants.md) 进入构建链路
|
||||
- 部署与环境变量:见 [环境与部署](../getting-started/index.md)
|
||||
- 第一个助手的最短操作路径:见 [快速开始](../quickstart/index.md)
|
||||
- 事件格式与接入协议:见 [API 参考](../api-reference/index.md)
|
||||
|
||||
## 建议阅读顺序
|
||||
|
||||
1. 先读 [助手概念](assistants.md),明确你要配置的对象到底是什么
|
||||
2. 再读 [引擎概览](engines.md),决定应该选择 Pipeline 还是 Realtime
|
||||
3. 如果场景涉及多步骤流程,再读 [工作流](../customization/workflows.md)
|
||||
4. 最后回到 [快速开始](../quickstart/index.md) 或 [助手概览](assistants.md) 开始具体配置
|
||||
|
||||
137
docs/content/concepts/pipeline-engine.md
Normal file
137
docs/content/concepts/pipeline-engine.md
Normal file
@@ -0,0 +1,137 @@
|
||||
# Pipeline 引擎
|
||||
|
||||
Pipeline 引擎把实时对话拆成多个清晰环节,适合需要高可控性、可替换外部能力和复杂业务编排的场景。
|
||||
|
||||
---
|
||||
|
||||
## 运行链路
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Input["输入处理"]
|
||||
Audio[用户音频] --> VAD[声音活动检测 VAD]
|
||||
VAD --> ASR[语音识别 ASR]
|
||||
ASR --> TD[回合检测 TD]
|
||||
end
|
||||
|
||||
subgraph Reasoning["语义处理"]
|
||||
TD --> LLM[大语言模型 LLM]
|
||||
LLM --> Tools[工具]
|
||||
LLM --> Text[回复文本]
|
||||
end
|
||||
|
||||
subgraph Output["输出生成"]
|
||||
Text --> TTS[语音合成 TTS]
|
||||
TTS --> AudioOut[助手音频]
|
||||
end
|
||||
```
|
||||
|
||||
Pipeline 的关键价值不在于“环节多”,而在于每个环节都可以被单独选择、单独优化、单独观测。
|
||||
|
||||
## 它适合什么场景
|
||||
|
||||
- 需要接特定 ASR / TTS 供应商
|
||||
- 需要稳定接入知识库、工具和工作流
|
||||
- 需要把问题定位到具体环节,而不是只看到整体失败
|
||||
- 需要按延迟、成本、质量对不同环节分别优化
|
||||
|
||||
## 数据流
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as 用户
|
||||
participant E as 引擎
|
||||
participant ASR as ASR 服务
|
||||
participant LLM as LLM 服务
|
||||
participant TTS as TTS 服务
|
||||
|
||||
U->>E: 音频帧 (PCM)
|
||||
E->>E: VAD / 回合检测
|
||||
E->>ASR: 发送可识别音频
|
||||
ASR-->>E: transcript.delta / transcript.final
|
||||
E->>LLM: 发送对话历史与当前输入
|
||||
LLM-->>E: assistant.response.delta
|
||||
E->>TTS: 文本片段
|
||||
TTS-->>E: 音频片段
|
||||
E-->>U: 音频流与事件
|
||||
```
|
||||
|
||||
## 延迟来自哪里
|
||||
|
||||
| 环节 | 典型影响 | 常见优化点 |
|
||||
|------|----------|------------|
|
||||
| **VAD / EoU** | 用户说完后多久触发回复 | 调整静音阈值和最短语音门限 |
|
||||
| **ASR** | 语音转写速度和准确率 | 选择合适模型、热词和语言设置 |
|
||||
| **LLM** | 首个 token 返回速度 | 选择低延迟模型、优化上下文 |
|
||||
| **TTS** | 文字到音频的生成速度 | 选择流式 TTS,缩短单次回复 |
|
||||
|
||||
Pipeline 的总延迟通常不是单点问题,而是链路总和。因此更适合做“逐环节调优”。
|
||||
|
||||
## EoU(用户说完)为什么重要
|
||||
|
||||
Pipeline 必须决定“什么时候把当前轮输入正式交给 LLM”。这个判断通常由 **EoU** 完成。
|
||||
|
||||
- 阈值小:响应更快,但更容易把用户停顿误判为说完
|
||||
- 阈值大:更稳,但首次响应会更慢
|
||||
|
||||
你可以把它理解为 Pipeline 中最直接影响“对话节奏感”的参数之一。
|
||||
|
||||
## 工具、知识库和工作流如何插入
|
||||
|
||||
Pipeline 特别适合把业务能力插入到对话中:
|
||||
|
||||
- **知识库**:在 LLM 生成前补充领域事实
|
||||
- **工具**:在需要外部信息或动作时调用系统能力
|
||||
- **工作流**:在多步骤、多分支流程中决定接下来走哪个节点
|
||||
|
||||
这也是它在企业客服、流程助手和知识问答场景中更常见的原因。
|
||||
|
||||
## 智能打断
|
||||
|
||||
在 Pipeline 中,打断通常由 VAD 检测和 TTS 停止逻辑协同完成:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as 用户
|
||||
participant E as 引擎
|
||||
participant TTS as TTS
|
||||
|
||||
Note over E,TTS: 正在播放回复
|
||||
E->>U: 音频流...
|
||||
U->>E: 用户开始说话
|
||||
E->>E: 判定是否触发打断
|
||||
E->>TTS: 停止合成 / 播放
|
||||
E-->>U: output.audio.interrupted
|
||||
```
|
||||
|
||||
相比端到端实时模型,这种方式更容易解释“为什么打断”以及“在哪个环节发生了问题”。
|
||||
|
||||
## 配置示例
|
||||
|
||||
```json
|
||||
{
|
||||
"engine": "pipeline",
|
||||
"asr": {
|
||||
"provider": "openai-compatible",
|
||||
"model": "FunAudioLLM/SenseVoiceSmall",
|
||||
"language": "zh"
|
||||
},
|
||||
"llm": {
|
||||
"provider": "openai",
|
||||
"model": "gpt-4o-mini",
|
||||
"temperature": 0.7
|
||||
},
|
||||
"tts": {
|
||||
"provider": "openai-compatible",
|
||||
"model": "FunAudioLLM/CosyVoice2-0.5B",
|
||||
"voice": "anna"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [引擎架构](engines.md) - 回到选择指南
|
||||
- [Realtime 引擎](realtime-engine.md) - 对比端到端实时模型路径
|
||||
- [工具](../customization/tools.md) - 设计可被 LLM 安全调用的工具
|
||||
- [知识库](../customization/knowledge-base.md) - 在对话中补充领域知识
|
||||
97
docs/content/concepts/realtime-engine.md
Normal file
97
docs/content/concepts/realtime-engine.md
Normal file
@@ -0,0 +1,97 @@
|
||||
# Realtime 引擎
|
||||
|
||||
Realtime 引擎直接连接端到端实时模型,适合把低延迟和自然语音体验放在第一位的场景。
|
||||
|
||||
---
|
||||
|
||||
## 运行链路
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Input[音频 / 视频 / 文本输入] --> RT[Realtime Model]
|
||||
RT --> Output[音频 / 文本输出]
|
||||
RT --> Tools[工具]
|
||||
```
|
||||
|
||||
与 Pipeline 不同,Realtime 引擎不会把 ASR、回合检测、LLM、TTS 作为独立阶段暴露出来,而是更多依赖实时模型整体处理。
|
||||
|
||||
## 常见后端
|
||||
|
||||
| 后端 | 特点 |
|
||||
|------|------|
|
||||
| **OpenAI Realtime** | 语音交互自然,延迟低 |
|
||||
| **Gemini Live** | 多模态能力强 |
|
||||
| **Doubao 实时交互** | 更适合国内环境与中文场景 |
|
||||
|
||||
## 它适合什么场景
|
||||
|
||||
- 语音助手、陪练、虚拟角色等高自然度体验场景
|
||||
- 对首响和连续打断体验要求高的入口
|
||||
- 希望减少链路拼装复杂度,直接接入端到端模型的团队
|
||||
|
||||
## 数据流
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as 用户
|
||||
participant E as 引擎
|
||||
participant RT as Realtime Model
|
||||
|
||||
U->>E: 音频 / 视频 / 文本输入
|
||||
E->>RT: 转发实时流
|
||||
RT-->>E: 流式文本 / 音频输出
|
||||
E-->>U: 播放或渲染结果
|
||||
```
|
||||
|
||||
## Realtime 的优势
|
||||
|
||||
- **延迟更低**:链路更短,用户感知更自然
|
||||
- **全双工更顺滑**:用户插话时,模型更容易在内部处理打断
|
||||
- **多模态更直接**:适合音频、视频、文本混合输入输出场景
|
||||
|
||||
## Realtime 的取舍
|
||||
|
||||
- 更依赖实时模型供应商的能力边界
|
||||
- 不容易对 ASR / TTS / 回合检测做独立替换
|
||||
- 成本和可观测性往往不如 Pipeline 那样可逐环节拆分
|
||||
|
||||
## 智能打断
|
||||
|
||||
Realtime 模型通常原生支持全双工和打断:
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant U as 用户
|
||||
participant E as 引擎
|
||||
participant RT as Realtime Model
|
||||
|
||||
Note over RT: 模型正在输出
|
||||
RT-->>E: 音频流...
|
||||
E-->>U: 播放
|
||||
U->>E: 用户开始说话
|
||||
E->>RT: 转发新输入
|
||||
Note over RT: 模型内部处理中断并切换回复
|
||||
RT-->>E: 新的响应
|
||||
E-->>U: 播放新响应
|
||||
```
|
||||
|
||||
这种方式更自然,但你通常只能看到模型的整体行为,而不是每个中间阶段的细节。
|
||||
|
||||
## 配置示例
|
||||
|
||||
```json
|
||||
{
|
||||
"engine": "multimodal",
|
||||
"model": {
|
||||
"provider": "openai",
|
||||
"model": "gpt-4o-realtime-preview",
|
||||
"voice": "alloy"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [引擎架构](engines.md) - 回到两类引擎的选择指南
|
||||
- [Pipeline 引擎](pipeline-engine.md) - 查看分段可控的运行路径
|
||||
- [WebSocket 协议](../api-reference/websocket.md) - 了解客户端如何与引擎建立会话
|
||||
53
docs/content/customization/asr.md
Normal file
53
docs/content/customization/asr.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# 语音识别
|
||||
|
||||
语音识别(ASR)负责把用户音频实时转写成文本,供引擎继续理解和处理。
|
||||
|
||||
## 关键配置项
|
||||
|
||||
| 配置项 | 说明 |
|
||||
|--------|------|
|
||||
| **ASR 引擎** | 选择语音识别服务提供商或自建服务 |
|
||||
| **模型** | 实际使用的识别模型名称 |
|
||||
| **语言** | 中文、英文或多语言 |
|
||||
| **热词** | 提高业务词汇、品牌词、专有名词识别率 |
|
||||
| **标点与规范化** | 自动补全标点、规范数字和日期等 |
|
||||
|
||||
## 模式
|
||||
|
||||
- `offline`:引擎本地缓冲音频后触发识别(适用于 OpenAI-compatible / SiliconFlow)。
|
||||
- `streaming`:音频分片实时发送到服务端,服务端持续返回转写事件(适用于 DashScope Realtime ASR、Volcengine BigASR)。
|
||||
|
||||
## 配置项
|
||||
|
||||
| 配置项 | 说明 |
|
||||
|---|---|
|
||||
| ASR 引擎 | 选择语音识别服务提供商 |
|
||||
| 模型 | 识别模型名称 |
|
||||
| `enable_interim` | 是否开启离线 ASR 中间结果(默认 `false`,仅离线模式生效) |
|
||||
| `app_id` / `resource_id` | Volcengine 等厂商的应用标识与资源标识 |
|
||||
| `request_params` | 厂商原生请求参数透传,例如 `end_window_size`、`force_to_speech_time`、`context` |
|
||||
| 语言 | 中文/英文/多语言 |
|
||||
| 热词 | 提升特定词汇识别准确率 |
|
||||
| 标点与规范化 | 是否自动补全标点、文本规范化 |
|
||||
|
||||
## 选择建议
|
||||
|
||||
- 客服、外呼等业务场景建议维护热词表,并按业务线持续更新
|
||||
- 多语言入口建议显式指定语言,避免模型自动判断带来的波动
|
||||
- 对延迟敏感的场景优先选择流式识别模型
|
||||
- 对准确率敏感的场景,先评估专有名词、数字、地址等样本的识别表现
|
||||
|
||||
## 运行建议
|
||||
|
||||
- 使用与接入端一致的采样率和编码方式,减少额外转换
|
||||
- 在测试阶段准备固定样本,便于对比不同模型或参数的变化
|
||||
- 把“识别准确率”和“识别延迟”一起看,不要只看其中一项
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [声音资源](voices.md) - 完整语音输入输出链路中的 TTS 侧配置
|
||||
- [快速开始](../quickstart/index.md) - 以任务路径接入第一个 ASR 资源
|
||||
- 客服场景建议开启热词并维护业务词表
|
||||
- 多语言场景建议按会话入口显式指定语言
|
||||
- 对延迟敏感场景优先选择流式识别模型
|
||||
- 当前支持提供商:`openai_compatible`、`siliconflow`、`dashscope`、`volcengine`、`buffered`(回退)
|
||||
86
docs/content/customization/knowledge-base.md
Normal file
86
docs/content/customization/knowledge-base.md
Normal file
@@ -0,0 +1,86 @@
|
||||
# 知识库
|
||||
|
||||
知识库负责承载助手需要引用的私有事实、业务资料和长文档内容,是 RAG(检索增强生成)能力的正式说明页。
|
||||
|
||||
## 什么时候应该用知识库
|
||||
|
||||
当问题答案主要来自“稳定文档”而不是实时外部动作时,优先使用知识库:
|
||||
|
||||
- 产品说明、政策条款、操作流程、培训材料
|
||||
- 内部手册、FAQ、规范文档
|
||||
- 需要被多位助手复用的领域知识
|
||||
|
||||
如果任务本质上是“查状态、写数据、执行动作”,那通常更适合 [工具](tools.md),而不是知识库。
|
||||
|
||||
## 工作原理
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Indexing["索引阶段"]
|
||||
Doc[文档] --> Chunk[分块]
|
||||
Chunk --> Embed[向量化]
|
||||
Embed --> Store[(向量数据库)]
|
||||
end
|
||||
|
||||
subgraph Query["查询阶段"]
|
||||
Q[用户问题] --> Search[相似度检索]
|
||||
Store --> Search
|
||||
Search --> Context[相关片段]
|
||||
Context --> LLM[LLM 生成回答]
|
||||
end
|
||||
```
|
||||
|
||||
核心原则很简单:把长文档转成可检索的片段,在用户提问时只把最相关的内容送给模型。
|
||||
|
||||
## 适合放进知识库的内容
|
||||
|
||||
| 适合 | 不适合 |
|
||||
|------|--------|
|
||||
| 稳定规则、标准答案、产品文档 | 高频变化的实时状态 |
|
||||
| 领域术语、说明手册、培训材料 | 需要外部系统写入或变更的动作 |
|
||||
| 需要跨助手复用的内容 | 只在单次会话里临时生成的数据 |
|
||||
|
||||
## 内容准备建议
|
||||
|
||||
- 优先上传结构清晰、主题明确的文档
|
||||
- 对超长文档按主题拆分,减少一次索引的噪声
|
||||
- 标题、章节名和表格说明对召回质量很重要,不要全部删掉格式信息
|
||||
- 与其堆很多相近文档,不如先清理重复、过期和相互冲突的内容
|
||||
|
||||
## 常见配置项
|
||||
|
||||
| 配置项 | 作用 | 常见做法 |
|
||||
|--------|------|----------|
|
||||
| **相似度阈值** | 过滤弱相关结果 | 从保守值起步,再按误召回调 |
|
||||
| **返回数量** | 控制一次送给模型的候选片段数 | 先少后多,避免上下文污染 |
|
||||
| **分块大小** | 决定每个文档片段的长度 | 按文档类型和问题粒度调整 |
|
||||
|
||||
## 创建与维护
|
||||
|
||||
### 最小流程
|
||||
|
||||
1. 新建知识库
|
||||
2. 上传文档
|
||||
3. 完成索引
|
||||
4. 用典型问题测试召回结果
|
||||
5. 绑定到目标助手
|
||||
|
||||
### 日常维护
|
||||
|
||||
- 删除过期或互相矛盾的文档
|
||||
- 当业务口径变化时,优先更新知识库而不是只改提示词
|
||||
- 为关键问题准备固定测试问句,观察召回是否稳定
|
||||
|
||||
## 与助手的关系
|
||||
|
||||
知识库不是独立产品入口,而是助手的能力层:
|
||||
|
||||
- 助手决定是否、何时、以什么风格使用知识
|
||||
- 知识库决定能够提供哪些事实片段
|
||||
- 工作流和工具可以与知识库并用,但承担不同职责
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [助手概念](../concepts/assistants.md) - 知识库在助手能力层中的位置
|
||||
- [LLM 模型](models.md) - 为知识库准备嵌入或重排模型
|
||||
- [工具](tools.md) - 当任务需要执行动作时,优先考虑工具而不是知识库
|
||||
53
docs/content/customization/models.md
Normal file
53
docs/content/customization/models.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# LLM 模型
|
||||
|
||||
本页是资源库中 LLM 模型的正式说明页,聚焦文本生成、嵌入和重排模型的接入与选择。
|
||||
|
||||
## 这页负责什么
|
||||
|
||||
当你需要为助手配置“理解与生成能力”时,请从这里开始决定:
|
||||
|
||||
- 使用哪个供应商或模型家族
|
||||
- 该模型负责文本生成、嵌入还是重排
|
||||
- 接口地址、认证信息和默认参数如何设置
|
||||
|
||||
语音识别和语音合成分别由 [语音识别](asr.md) 与 [声音资源](voices.md) 说明,不在本页重复。
|
||||
|
||||
## 模型类型
|
||||
|
||||
| 类型 | 用途 | 常见场景 |
|
||||
|------|------|----------|
|
||||
| **文本模型** | 生成回复、总结、分类、规划 | 助手主对话、工具调用决策 |
|
||||
| **嵌入模型** | 向量化文档或查询 | 知识库检索 |
|
||||
| **重排模型** | 对检索结果再次排序 | 提升知识召回质量 |
|
||||
|
||||
## 配置清单
|
||||
|
||||
| 配置项 | 说明 | 建议 |
|
||||
|--------|------|------|
|
||||
| **供应商** | OpenAI 兼容、托管平台或自建服务 | 用统一命名规范区分环境 |
|
||||
| **模型名称** | 控制台中的显示名称 | 体现厂商、用途和环境 |
|
||||
| **模型标识** | 请求中实际使用的 model 名称 | 保持与供应商文档一致 |
|
||||
| **Base URL** | 接口地址 | 为不同环境分别配置 |
|
||||
| **API Key / Token** | 鉴权凭证 | 与显示名称配套管理 |
|
||||
| **默认参数** | Temperature、Max Tokens、上下文长度等 | 按业务场景收敛默认值 |
|
||||
|
||||
## 选择建议
|
||||
|
||||
- **先按用途选模型,再按成本和延迟筛选供应商**
|
||||
- **文本模型不要承担知识库检索职责**:检索应交给嵌入与重排模型
|
||||
- **为不同环境建立清晰命名**:如 `prod-gpt4o-mini`、`staging-qwen-text`
|
||||
- **默认参数要保守**:让助手默认稳定,再在单个场景内按需调优
|
||||
|
||||
## 常见组合
|
||||
|
||||
| 目标 | 推荐组合 |
|
||||
|------|----------|
|
||||
| **通用对话助手** | 1 个文本模型 |
|
||||
| **知识问答助手** | 文本模型 + 嵌入模型 |
|
||||
| **高质量知识召回** | 文本模型 + 嵌入模型 + 重排模型 |
|
||||
|
||||
## 下一步
|
||||
|
||||
- [语音识别](asr.md) - 为语音输入选择 ASR
|
||||
- [声音资源](voices.md) - 为语音输出准备 TTS 资源
|
||||
- [知识库](knowledge-base.md) - 把嵌入 / 重排模型接入 RAG 链路
|
||||
108
docs/content/customization/tools.md
Normal file
108
docs/content/customization/tools.md
Normal file
@@ -0,0 +1,108 @@
|
||||
# 工具
|
||||
|
||||
工具让助手从“会回答”扩展成“能执行动作”。本页是工具能力的正式说明页。
|
||||
|
||||
## 什么时候应该用工具
|
||||
|
||||
当用户请求需要依赖外部系统、实时数据或执行某个动作时,应该使用工具,而不是只靠提示词或知识库。
|
||||
|
||||
典型场景包括:
|
||||
|
||||
- 查询订单、库存、物流、天气等实时信息
|
||||
- 创建预约、提交表单、写入业务系统
|
||||
- 获取客户端环境能力,如定位、相机、权限确认
|
||||
|
||||
如果问题本质上是“查阅稳定资料”,优先用 [知识库](knowledge-base.md);如果问题是“执行动作或读写实时状态”,优先用工具。
|
||||
|
||||
## 工具类型
|
||||
|
||||
| 类型 | 说明 | 常见场景 |
|
||||
|------|------|----------|
|
||||
| **Webhook 工具** | 调用外部 HTTP API | 订单查询、CRM 写入、预约服务 |
|
||||
| **客户端工具** | 由接入端在本地执行 | 获取定位、打开相机、请求用户授权 |
|
||||
| **内建工具** | 平台或运行时直接提供 | 搜索、计算、知识检索等 |
|
||||
|
||||
## 工具调用的基本过程
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User as 用户
|
||||
participant Assistant as 助手 / 模型
|
||||
participant Tool as 工具
|
||||
|
||||
User->>Assistant: 发起请求
|
||||
Assistant->>Assistant: 判断是否需要工具
|
||||
Assistant->>Tool: 发起工具调用
|
||||
Tool-->>Assistant: 返回结构化结果
|
||||
Assistant->>User: 组织最终回复
|
||||
```
|
||||
|
||||
关键点不是“模型会不会调用工具”,而是“工具的定义是否足够清晰,能让模型在正确时机调用”。
|
||||
|
||||
## 如何定义一个好工具
|
||||
|
||||
| 要素 | 为什么重要 |
|
||||
|------|------------|
|
||||
| **清晰名称** | 让模型知道它是做什么的,而不是猜用途 |
|
||||
| **明确描述** | 告诉模型何时调用、何时不要调用 |
|
||||
| **完整参数定义** | 降低缺参、错参和歧义调用 |
|
||||
| **稳定返回结构** | 让模型更容易根据结果组织回复 |
|
||||
| **明确错误语义** | 让失败时也能安全退回用户对话 |
|
||||
|
||||
## Webhook 工具示例
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "query_order",
|
||||
"description": "根据订单号查询当前订单状态,仅用于用户已提供订单号的场景。",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"order_id": {
|
||||
"type": "string",
|
||||
"description": "订单编号"
|
||||
}
|
||||
},
|
||||
"required": ["order_id"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 客户端工具的作用
|
||||
|
||||
某些动作必须在接入端执行,例如:
|
||||
|
||||
- 获取当前位置
|
||||
- 请求麦克风或相机权限
|
||||
- 打开特定页面或原生能力
|
||||
|
||||
这类工具通常通过事件流和客户端配合完成,而不是由后端直接执行。
|
||||
|
||||
## 工具设计建议
|
||||
|
||||
- **一工具一职责**:不要把多个业务动作塞进同一个工具
|
||||
- **名称与描述写给模型看**:必须明确何时用、何时不用
|
||||
- **先设计错误返回**:失败时模型应该知道如何解释给用户
|
||||
- **减少高权限工具暴露面**:不是每个助手、每个工作流节点都需要全部工具
|
||||
- **把业务规则放回系统**:工具负责执行,提示词负责决策边界
|
||||
|
||||
## 与知识库、工作流的分工
|
||||
|
||||
- **知识库**:提供稳定事实
|
||||
- **工具**:执行动作或读取实时状态
|
||||
- **工作流**:决定何时进入某个步骤、调用哪个工具、失败如何回退
|
||||
|
||||
当一个助手开始涉及多步骤、多系统调用时,工具通常应与 [工作流](workflows.md) 一起设计,而不是孤立配置。
|
||||
|
||||
## 安全与治理
|
||||
|
||||
- 校验输入,不直接信任模型生成的参数
|
||||
- 为工具设置最小权限和清晰的可见范围
|
||||
- 记录调用日志,便于审计和回放
|
||||
- 对外部接口增加超时、重试和速率限制策略
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [知识库](knowledge-base.md) - 当问题更适合“查资料”时使用知识库
|
||||
- [工作流](workflows.md) - 当工具调用需要流程控制和分支逻辑时接入工作流
|
||||
- [助手概念](../concepts/assistants.md) - 理解工具在助手能力层中的位置
|
||||
25
docs/content/customization/tts.md
Normal file
25
docs/content/customization/tts.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# TTS 参数
|
||||
|
||||
TTS 参数决定助手语音输出的节奏、音量和听感。本页只讨论参数层面的调优建议。
|
||||
|
||||
## 常用参数
|
||||
|
||||
| 参数 | 说明 | 常见范围 |
|
||||
|------|------|----------|
|
||||
| **语速** | 说话速度 | `0.5 - 2.0` |
|
||||
| **音量 / 增益** | 输出音量强弱 | 供应商自定义 |
|
||||
| **音调** | 声线高低 | 供应商自定义 |
|
||||
| **模型** | 合成模型名称 | 依供应商而定 |
|
||||
| **声音 ID** | 发音人或音色标识 | 依供应商而定 |
|
||||
|
||||
## 调优建议
|
||||
|
||||
- 对话助手通常建议把语速控制在 `0.9 - 1.2`
|
||||
- 需要打断能力的场景,优先选择低延迟流式 TTS,并避免过长的单次回复
|
||||
- 如果业务强调可信度或专业感,先保证清晰度和稳定性,再追求个性化音色
|
||||
- 不要只试听一句问候语,至少用三类文案对比:短答复、长答复、数字或专有名词较多的答复
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [声音资源](voices.md) - 先选择适合的供应商、模型和音色
|
||||
- [语音识别](asr.md) - 结合输入侧延迟一起评估整条语音链路
|
||||
43
docs/content/customization/voices.md
Normal file
43
docs/content/customization/voices.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# 声音资源
|
||||
|
||||
本页是资源库中 TTS 声音与发音人资源的正式说明页,聚焦“选择哪种声音给助手输出”。
|
||||
|
||||
## 这页负责什么
|
||||
|
||||
当你已经决定启用语音输出后,需要在这里完成:
|
||||
|
||||
- 选择供应商、模型和声音资源
|
||||
- 为不同业务或语言准备不同音色
|
||||
- 通过预览和测试确定默认发音人
|
||||
|
||||
更细的速度、音量、音调等参数建议见 [TTS 参数](tts.md)。
|
||||
|
||||
## 选择声音时要考虑什么
|
||||
|
||||
| 维度 | 说明 |
|
||||
|------|------|
|
||||
| **语言与口音** | 是否覆盖目标用户语言与地区口音 |
|
||||
| **风格** | 专业、亲切、活泼、沉稳等输出气质 |
|
||||
| **延迟** | 是否适合实时对话,而不仅是离线合成 |
|
||||
| **稳定性** | 长文本、多轮会话中的音色一致性 |
|
||||
| **成本** | 单次调用成本和高并发可用性 |
|
||||
|
||||
## 推荐做法
|
||||
|
||||
1. 先为每类业务角色确定一条主音色
|
||||
2. 再按语言或渠道补充少量备选音色
|
||||
3. 通过固定测试文案试听,统一比较自然度、节奏和可懂度
|
||||
4. 上线后尽量保持默认音色稳定,避免频繁切换影响用户体验
|
||||
|
||||
## 常见资源组织方式
|
||||
|
||||
| 组织方式 | 适用场景 |
|
||||
|----------|----------|
|
||||
| **按语言区分** | 中英文或多语种助手 |
|
||||
| **按业务角色区分** | 客服、销售、培训、提醒类助手 |
|
||||
| **按环境区分** | 开发、预发、生产使用不同供应商或凭证 |
|
||||
|
||||
## 下一步
|
||||
|
||||
- [TTS 参数](tts.md) - 调整语速、增益、音调等输出参数
|
||||
- [快速开始](../quickstart/index.md) - 把声音资源绑定到第一个助手
|
||||
106
docs/content/customization/workflows.md
Normal file
106
docs/content/customization/workflows.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# 工作流
|
||||
|
||||
工作流用于把复杂业务拆成明确的步骤、分支和回退策略,是 RAS 中承载流程逻辑的正式能力页。
|
||||
|
||||
## 什么时候需要工作流
|
||||
|
||||
当一个助手同时满足以下任一情况时,通常应考虑工作流,而不是继续堆叠单一提示词:
|
||||
|
||||
- 需要多轮收集信息,例如订单号、手机号、预约时间等
|
||||
- 需要按意图或条件走不同分支
|
||||
- 需要串联多个工具或业务系统
|
||||
- 需要在异常或信息不足时统一回退到澄清、兜底或人工节点
|
||||
|
||||
## 工作流与助手的关系
|
||||
|
||||
助手负责对外表现、全局策略和渠道接入;工作流负责把某个业务流程拆成可维护的节点。
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Assistant[助手] --> Workflow[工作流]
|
||||
Workflow --> Nodes[节点与分支]
|
||||
Nodes --> Tools[工具 / 知识库 / 人工]
|
||||
```
|
||||
|
||||
这意味着:
|
||||
|
||||
- 助手定义角色、提示词基线、模型和输出方式
|
||||
- 工作流定义“这类问题该按什么顺序被处理”
|
||||
- 工具和知识库作为节点可调用的能力,被有选择地暴露给流程
|
||||
|
||||
## 关键组成
|
||||
|
||||
| 组成 | 作用 | 设计建议 |
|
||||
|------|------|----------|
|
||||
| **工作流名称** | 区分业务流程 | 用业务语义命名,避免过于技术化 |
|
||||
| **入口节点** | 用户进入后的第一步 | 保持单入口,便于理解和测试 |
|
||||
| **全局提示词** | 对所有节点生效的共性约束 | 保持简短,避免与节点提示词冲突 |
|
||||
| **节点提示词** | 当前节点的任务说明 | 单一职责,明确输入 / 输出 |
|
||||
| **节点工具白名单** | 控制当前节点可调用的工具集合 | 遵循最小权限原则 |
|
||||
| **超时与回退** | 异常、超时、缺信息时的处理方式 | 优先回到澄清、兜底或人工节点 |
|
||||
| **上下文透传** | 在节点之间共享状态 | 只传递后续节点真正需要的信息 |
|
||||
|
||||
## 常见节点类型
|
||||
|
||||
| 节点类型 | 适合做什么 |
|
||||
|----------|------------|
|
||||
| **路由节点** | 判断用户意图并进入不同分支 |
|
||||
| **信息收集节点** | 收集订单号、联系方式、时间等关键信息 |
|
||||
| **处理节点** | 调用工具、执行查询、计算或写入系统 |
|
||||
| **回复节点** | 组织最终答复并控制输出风格 |
|
||||
| **人工节点** | 转接人工、排队或发起通知 |
|
||||
| **结束节点** | 输出结束语并关闭流程 |
|
||||
|
||||
## 推荐编排步骤
|
||||
|
||||
1. 先写清楚流程目标:这条工作流要解决哪一类业务问题
|
||||
2. 画出最小节点图:入口、关键分支、结束和兜底
|
||||
3. 为每个节点定义唯一职责和输入 / 输出
|
||||
4. 再绑定知识库、工具和回退策略
|
||||
5. 在测试面板或流程调试工具中验证每条主路径和异常路径
|
||||
|
||||
## 配置示例
|
||||
|
||||
```yaml
|
||||
workflow:
|
||||
name: "订单咨询流程"
|
||||
entry: "intent_router"
|
||||
global_prompt: "优先给出可执行步骤,必要时先澄清信息。"
|
||||
nodes:
|
||||
- id: "intent_router"
|
||||
type: "router"
|
||||
prompt: "识别用户意图:查订单、退款、投诉"
|
||||
next:
|
||||
- when: "intent == query_order"
|
||||
to: "collect_order_id"
|
||||
- when: "intent == refund"
|
||||
to: "refund_policy"
|
||||
- id: "collect_order_id"
|
||||
type: "collect"
|
||||
prompt: "请用户提供订单号"
|
||||
tools: ["query_order"]
|
||||
fallback: "human_handoff"
|
||||
- id: "human_handoff"
|
||||
type: "end"
|
||||
prompt: "转人工处理"
|
||||
```
|
||||
|
||||
## 设计建议
|
||||
|
||||
- **让每个节点只做一件事**:避免单节点同时负责路由、收集信息和最终回复
|
||||
- **工具按节点授权**:不要把所有工具暴露给整条流程中的每个节点
|
||||
- **把失败路径设计出来**:超时、无结果、参数缺失都应该有明确回退
|
||||
- **优先传状态,不传长文本**:节点之间共享必要结构化信息,比传递大段自然语言更稳
|
||||
- **为流程保留可观测性**:每条主路径都应能在调试时解释“为什么走到这里”
|
||||
|
||||
## 当前边界
|
||||
|
||||
- 文档不会完整覆盖所有表达式或节点字段的最终 Schema
|
||||
- 不同执行引擎下,可用节点字段和运行行为可能存在差异
|
||||
- 可视化编排与底层字段映射可能不会一一对应
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [助手概念](../concepts/assistants.md) - 工作流在助手体系中的位置
|
||||
- [工具](tools.md) - 设计可被流程安全调用的工具
|
||||
- [知识库](knowledge-base.md) - 让流程中的节点使用 RAG 能力
|
||||
@@ -1,95 +0,0 @@
|
||||
# 部署指南
|
||||
|
||||
## 方式一:Docker 部署(推荐)
|
||||
|
||||
### 1. 构建镜像
|
||||
|
||||
```bash
|
||||
docker build -t ai-video-assistant-web ./web
|
||||
```
|
||||
|
||||
### 2. 运行容器
|
||||
|
||||
```bash
|
||||
docker run -d \
|
||||
--name ai-assistant-web \
|
||||
-p 3000:80 \
|
||||
ai-video-assistant-web
|
||||
```
|
||||
|
||||
### 3. 使用 Docker Compose
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
web:
|
||||
build: ./web
|
||||
ports:
|
||||
- "3000:80"
|
||||
environment:
|
||||
- VITE_API_URL=http://api:8080
|
||||
```
|
||||
|
||||
运行:
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
## 方式二:Nginx 部署
|
||||
|
||||
### 1. 构建前端
|
||||
|
||||
```bash
|
||||
cd web
|
||||
npm run build
|
||||
```
|
||||
|
||||
### 2. 配置 Nginx
|
||||
|
||||
```nginx
|
||||
server {
|
||||
listen 80;
|
||||
server_name your-domain.com;
|
||||
root /var/www/ai-assistant/dist;
|
||||
index index.html;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
location /api {
|
||||
proxy_pass http://localhost:8080;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3. 启动 Nginx
|
||||
|
||||
```bash
|
||||
sudo nginx -t
|
||||
sudo systemctl reload nginx
|
||||
```
|
||||
|
||||
## 环境变量配置
|
||||
|
||||
| 变量 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| VITE_API_URL | 后端 API 地址 | http://localhost:8080 |
|
||||
| VITE_GEMINI_API_KEY | Gemini API Key | - |
|
||||
|
||||
## 验证部署
|
||||
|
||||
1. 访问 http://your-domain.com
|
||||
2. 检查页面是否正常加载
|
||||
3. 验证各功能模块是否可用
|
||||
|
||||
## 故障排查
|
||||
|
||||
| 问题 | 解决方案 |
|
||||
|------|---------|
|
||||
| 页面空白 | 检查浏览器控制台错误 |
|
||||
| API 请求失败 | 确认 VITE_API_URL 配置正确 |
|
||||
| 静态资源 404 | 检查 nginx try_files 配置 |
|
||||
161
docs/content/deployment/docker.md
Normal file
161
docs/content/deployment/docker.md
Normal file
@@ -0,0 +1,161 @@
|
||||
# Docker 部署
|
||||
|
||||
Docker 是推荐的部署方式,可以快速启动服务并确保环境一致性。
|
||||
|
||||
## 前提条件
|
||||
|
||||
- Docker 20.10+
|
||||
- Docker Compose 2.0+(可选)
|
||||
|
||||
## 构建镜像
|
||||
|
||||
### Web 前端
|
||||
|
||||
```bash
|
||||
docker build -t ai-video-assistant-web ./web
|
||||
```
|
||||
|
||||
### API 服务
|
||||
|
||||
```bash
|
||||
docker build -t ai-video-assistant-api ./api
|
||||
```
|
||||
|
||||
### Engine 服务
|
||||
|
||||
```bash
|
||||
docker build -t ai-video-assistant-engine ./engine
|
||||
```
|
||||
|
||||
## 运行容器
|
||||
|
||||
### 单独运行
|
||||
|
||||
```bash
|
||||
# Web 前端
|
||||
docker run -d \
|
||||
--name ai-assistant-web \
|
||||
-p 3000:80 \
|
||||
ai-video-assistant-web
|
||||
|
||||
# API 服务
|
||||
docker run -d \
|
||||
--name ai-assistant-api \
|
||||
-p 8080:8080 \
|
||||
ai-video-assistant-api
|
||||
|
||||
# Engine 服务
|
||||
docker run -d \
|
||||
--name ai-assistant-engine \
|
||||
-p 8000:8000 \
|
||||
ai-video-assistant-engine
|
||||
```
|
||||
|
||||
## Docker Compose
|
||||
|
||||
推荐使用 Docker Compose 管理多个服务:
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
web:
|
||||
build: ./web
|
||||
ports:
|
||||
- "3000:80"
|
||||
environment:
|
||||
- VITE_API_URL=http://api:8080
|
||||
depends_on:
|
||||
- api
|
||||
|
||||
api:
|
||||
build: ./api
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres:password@db:5432/ai_assistant
|
||||
depends_on:
|
||||
- db
|
||||
|
||||
engine:
|
||||
build: ./engine
|
||||
ports:
|
||||
- "8000:8000"
|
||||
environment:
|
||||
- BACKEND_URL=http://api:8080
|
||||
|
||||
db:
|
||||
image: postgres:15
|
||||
environment:
|
||||
- POSTGRES_DB=ai_assistant
|
||||
- POSTGRES_PASSWORD=password
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
```
|
||||
|
||||
### 启动服务
|
||||
|
||||
```bash
|
||||
# 启动所有服务
|
||||
docker-compose up -d
|
||||
|
||||
# 查看日志
|
||||
docker-compose logs -f
|
||||
|
||||
# 停止服务
|
||||
docker-compose down
|
||||
```
|
||||
|
||||
## 镜像优化
|
||||
|
||||
### 多阶段构建
|
||||
|
||||
Web 前端 Dockerfile 示例:
|
||||
|
||||
```dockerfile
|
||||
# 构建阶段
|
||||
FROM node:18-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# 运行阶段
|
||||
FROM nginx:alpine
|
||||
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||
COPY nginx.conf /etc/nginx/nginx.conf
|
||||
EXPOSE 80
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
```
|
||||
|
||||
## 健康检查
|
||||
|
||||
```yaml
|
||||
services:
|
||||
api:
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
## 常见问题
|
||||
|
||||
### 容器启动失败
|
||||
|
||||
```bash
|
||||
# 查看容器日志
|
||||
docker logs ai-assistant-web
|
||||
|
||||
# 进入容器调试
|
||||
docker exec -it ai-assistant-web sh
|
||||
```
|
||||
|
||||
### 端口冲突
|
||||
|
||||
修改 `docker-compose.yml` 中的端口映射,例如 `3001:80`。
|
||||
41
docs/content/deployment/index.md
Normal file
41
docs/content/deployment/index.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# 部署概览
|
||||
|
||||
本章节介绍如何使用 Docker 部署 Realtime Agent Studio (RAS)。
|
||||
|
||||
## 部署方式
|
||||
|
||||
| 方式 | 适用场景 | 复杂度 |
|
||||
|------|---------|--------|
|
||||
| [Docker 部署](docker.md) | 快速启动、容器化运行 | 简单 |
|
||||
|
||||
## 快速开始
|
||||
|
||||
### Docker 一键部署
|
||||
|
||||
```bash
|
||||
docker build -t ai-video-assistant-web ./web
|
||||
docker run -d -p 3000:80 --name ai-assistant-web ai-video-assistant-web
|
||||
```
|
||||
|
||||
### 验证部署
|
||||
|
||||
1. 访问 http://localhost:3000
|
||||
2. 检查页面是否正常加载
|
||||
3. 验证各功能模块是否可用
|
||||
|
||||
## 环境变量配置
|
||||
|
||||
| 变量 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| VITE_API_URL | 后端 API 地址 | http://localhost:8080 |
|
||||
| VITE_GEMINI_API_KEY | Gemini API Key | - |
|
||||
|
||||
## 故障排查
|
||||
|
||||
| 问题 | 解决方案 |
|
||||
|------|---------|
|
||||
| 页面空白 | 检查浏览器控制台错误 |
|
||||
| API 请求失败 | 确认 VITE_API_URL 配置正确 |
|
||||
| 静态资源 404 | 检查 nginx try_files 配置 |
|
||||
|
||||
更多问题请参考 [故障排查](../resources/troubleshooting.md)。
|
||||
@@ -1,65 +0,0 @@
|
||||
# 助手管理
|
||||
|
||||
助手是 AI Video Assistant 的核心模块,用于创建和配置智能对话机器人。
|
||||
|
||||
## 创建助手
|
||||
|
||||

|
||||
|
||||
### 基本配置
|
||||
|
||||
1. 进入 **助手管理** 页面
|
||||
2. 点击 **新建助手** 按钮
|
||||
3. 填写基本信息:
|
||||
|
||||
| 配置项 | 说明 |
|
||||
|-------|------|
|
||||
| 助手名称 | 唯一标识,用于区分不同助手 |
|
||||
| 提示词 | 定义助手的角色和行为 |
|
||||
| 温度参数 | 控制回复的随机性(0-1) |
|
||||
|
||||
### 配置标签页
|
||||
|
||||
#### 全局设置
|
||||
|
||||
- 设置助手的核心对话能力
|
||||
- 配置上下文长度
|
||||
- 设置对话开场白
|
||||
|
||||
#### 语音配置
|
||||
|
||||
| 配置 | 说明 |
|
||||
|------|------|
|
||||
| TTS 引擎 | 选择语音合成服务(阿里/火山/Minimax) |
|
||||
| 音色 | 选择语音风格和性别 |
|
||||
| 语速 | 语音播放速度 |
|
||||
| 音量 | 语音输出音量 |
|
||||
|
||||
#### 工具绑定
|
||||
|
||||
- 配置助手可调用的外部工具
|
||||
- 启用/禁用特定功能模块
|
||||
|
||||
#### 知识关联
|
||||
|
||||
- 关联 RAG 知识库
|
||||
- 配置检索参数(相似度阈值、返回数量)
|
||||
|
||||
#### 外部链接
|
||||
|
||||
- 配置第三方服务集成
|
||||
- 设置 Webhook 回调
|
||||
|
||||
## 调试助手
|
||||
|
||||
在助手详情页可进行实时调试:
|
||||
- 文本对话测试
|
||||
- 语音输入测试
|
||||
- 工具调用验证
|
||||
|
||||
## 发布助手
|
||||
|
||||
配置完成后:
|
||||
1. 点击 **保存**
|
||||
2. 点击 **发布**
|
||||
3. 获取 API 调用地址
|
||||
@@ -1,53 +0,0 @@
|
||||
# 知识库
|
||||
|
||||
知识库基于 RAG(检索增强生成)技术,让 AI 能够回答私有领域问题。
|
||||
|
||||
## 概述
|
||||
|
||||

|
||||
|
||||
## 创建知识库
|
||||
|
||||
### 步骤
|
||||
|
||||
1. 进入 **知识库** 页面
|
||||
2. 点击 **新建知识库**
|
||||
3. 填写知识库名称
|
||||
4. 上传文档
|
||||
|
||||
### 支持格式
|
||||
|
||||
| 格式 | 说明 |
|
||||
|------|------|
|
||||
| Markdown | 最佳选择,格式清晰 |
|
||||
| PDF | 自动提取文本 |
|
||||
| TXT | 纯文本支持 |
|
||||
| Word | 需转换为其他格式 |
|
||||
|
||||
### 文档上传
|
||||
|
||||
- 拖拽上传或点击选择
|
||||
- 单文件大小限制 10MB
|
||||
- 建议单文档不超过 50000 字
|
||||
|
||||
## 配置检索参数
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| 相似度阈值 | 低于此分数的结果不返回 | 0.7 |
|
||||
| 返回数量 | 单次检索返回的结果数 | 3 |
|
||||
| 分块大小 | 文档分块的最大长度 | 500 |
|
||||
|
||||
## 管理知识库
|
||||
|
||||
- **查看文档** - 浏览已上传的文件
|
||||
- **删除文档** - 移除不需要的内容
|
||||
- **更新文档** - 重新上传覆盖
|
||||
- **测试检索** - 验证知识库效果
|
||||
|
||||
## 关联助手
|
||||
|
||||
在助手配置的 **知识** 标签页中:
|
||||
1. 选择要关联的知识库
|
||||
2. 设置检索策略
|
||||
3. 保存配置
|
||||
@@ -1,44 +0,0 @@
|
||||
# 模型配置
|
||||
|
||||
## LLM 模型库
|
||||
|
||||

|
||||
|
||||
### 支持的模型
|
||||
|
||||
| 供应商 | 模型 | 特点 |
|
||||
|--------|------|------|
|
||||
| **OpenAI** | GPT-4 / GPT-3.5 | 通用能力强 |
|
||||
| **DeepSeek** | DeepSeek Chat | 高性价比 |
|
||||
| **SiliconFlow** | 多种开源模型 | 本地部署友好 |
|
||||
| **Google** | Gemini Pro | 多模态支持 |
|
||||
|
||||
### 配置步骤
|
||||
|
||||
1. 进入 **LLM 库** 页面
|
||||
2. 点击 **添加模型**
|
||||
3. 选择供应商
|
||||
4. 填写 API Key 和 Endpoint
|
||||
5. 设置默认参数
|
||||
|
||||
### 参数说明
|
||||
|
||||
| 参数 | 说明 | 建议值 |
|
||||
|------|------|--------|
|
||||
| Temperature | 随机性 | 0.7 |
|
||||
| Max Tokens | 最大输出长度 | 2048 |
|
||||
| Top P | 核采样 | 0.9 |
|
||||
|
||||
## ASR 语音识别
|
||||
|
||||
### 支持引擎
|
||||
|
||||
- **Whisper** - OpenAI 通用语音识别
|
||||
- **SenseVoice** - 高精度中文语音识别
|
||||
|
||||
### 配置方法
|
||||
|
||||
1. 进入 **ASR 库** 页面
|
||||
2. 选择识别引擎
|
||||
3. 配置音频参数(采样率、编码)
|
||||
4. 测试识别效果
|
||||
@@ -1,58 +0,0 @@
|
||||
# 语音合成
|
||||
|
||||
语音合成(TTS)模块提供自然流畅的语音输出能力。
|
||||
|
||||
## 概述
|
||||
|
||||

|
||||
|
||||
## 支持的引擎
|
||||
|
||||
| 供应商 | 特点 | 适用场景 |
|
||||
|--------|------|---------|
|
||||
| **阿里云** | 多音色、高自然度 | 通用场景 |
|
||||
| **火山引擎** | 低延迟、实时性好 | 实时对话 |
|
||||
| **Minimax** | 高性价比 | 批量合成 |
|
||||
|
||||
## 配置方法
|
||||
|
||||
### 添加语音配置
|
||||
|
||||
1. 进入 **语音库** 页面
|
||||
2. 点击 **添加语音**
|
||||
3. 选择供应商
|
||||
4. 填写 API 凭证
|
||||
5. 保存配置
|
||||
|
||||
### 测试语音
|
||||
|
||||
- 在线预览发音效果
|
||||
- 调整语速和音量
|
||||
- 切换不同音色
|
||||
|
||||
## 音色选择
|
||||
|
||||
### 中文音色
|
||||
|
||||
| 音色 | 风格 |
|
||||
|------|------|
|
||||
| 晓晓 | 标准女声 |
|
||||
| 晓北 | 知性女声 |
|
||||
| 逍遥 | 青年男声 |
|
||||
| 丫丫 | 活泼童声 |
|
||||
|
||||
### 英文音色
|
||||
|
||||
| 音色 | 风格 |
|
||||
|------|------|
|
||||
| Joanna | 专业女声 |
|
||||
| Matthew | 沉稳男声 |
|
||||
| Amy | 亲切女声 |
|
||||
|
||||
## 参数调优
|
||||
|
||||
| 参数 | 范围 | 说明 |
|
||||
|------|------|------|
|
||||
| 语速 | 0.5-2.0 | 1.0 为正常速度 |
|
||||
| 音量 | 0-100 | 输出音量百分比 |
|
||||
| 音调 | 0.5-2.0 | 语音音调高低 |
|
||||
@@ -1,53 +0,0 @@
|
||||
# 工作流管理
|
||||
|
||||
工作流提供可视化的对话流程编排能力,支持复杂的业务场景。
|
||||
|
||||
## 概述
|
||||
|
||||

|
||||
|
||||
## 节点类型
|
||||
|
||||
| 节点 | 图标 | 功能说明 |
|
||||
|------|------|---------|
|
||||
| **对话节点** | 💬 | AI 自动回复,可设置回复策略 |
|
||||
| **工具节点** | 🔧 | 调用外部 API 或自定义工具 |
|
||||
| **人工节点** | 👤 | 转接人工客服 |
|
||||
| **结束节点** | 🏁 | 结束对话流程 |
|
||||
|
||||
## 创建工作流
|
||||
|
||||
### 步骤
|
||||
|
||||
1. 进入 **工作流** 页面
|
||||
2. 点击 **新建工作流**
|
||||
3. 从左侧拖拽节点到画布
|
||||
4. 连接节点建立流程
|
||||
5. 配置各节点参数
|
||||
6. 保存并发布
|
||||
|
||||
### 节点配置
|
||||
|
||||
#### 对话节点配置
|
||||
|
||||
- 回复模板
|
||||
- 条件分支
|
||||
- 知识库检索
|
||||
|
||||
#### 工具节点配置
|
||||
|
||||
- 选择工具类型
|
||||
- 配置输入参数
|
||||
- 设置输出处理
|
||||
|
||||
#### 人工节点配置
|
||||
|
||||
- 转接规则
|
||||
- 排队策略
|
||||
- 通知设置
|
||||
|
||||
## 流程测试
|
||||
|
||||
- 支持单步调试
|
||||
- 可查看执行日志
|
||||
- 实时验证流程逻辑
|
||||
@@ -1,59 +0,0 @@
|
||||
# 快速开始
|
||||
|
||||
## 环境准备
|
||||
|
||||
### 前置条件
|
||||
|
||||
| 软件 | 版本要求 |
|
||||
|------|---------|
|
||||
| Node.js | 18.0 或更高 |
|
||||
| npm/yarn/pnpm | 最新版本 |
|
||||
| 现代浏览器 | Chrome 90+ / Firefox 90+ / Edge 90+ |
|
||||
|
||||
### 检查环境
|
||||
|
||||
```bash
|
||||
node --version
|
||||
npm --version
|
||||
```
|
||||
|
||||
## 安装步骤
|
||||
|
||||
### 1. 克隆项目
|
||||
|
||||
```bash
|
||||
git clone https://github.com/your-repo/AI-VideoAssistant.git
|
||||
cd AI-VideoAssistant
|
||||
```
|
||||
|
||||
### 2. 安装依赖
|
||||
|
||||
```bash
|
||||
cd web
|
||||
npm install
|
||||
```
|
||||
|
||||
### 3. 配置环境变量
|
||||
|
||||
创建 `.env` 文件:
|
||||
|
||||
```env
|
||||
VITE_API_URL=http://localhost:8080
|
||||
VITE_GEMINI_API_KEY=your_api_key_here
|
||||
```
|
||||
|
||||
### 4. 启动开发服务器
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
访问 http://localhost:3000
|
||||
|
||||
## 构建生产版本
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
构建产物在 `dist` 目录。
|
||||
279
docs/content/getting-started/configuration.md
Normal file
279
docs/content/getting-started/configuration.md
Normal file
@@ -0,0 +1,279 @@
|
||||
# 配置说明
|
||||
|
||||
本页面介绍 Realtime Agent Studio 各组件的配置方法。
|
||||
|
||||
---
|
||||
|
||||
## 配置概览
|
||||
|
||||
RAS 采用分层配置,各组件独立配置:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Config["配置层级"]
|
||||
ENV[环境变量]
|
||||
File[配置文件]
|
||||
DB[数据库配置]
|
||||
end
|
||||
|
||||
subgraph Services["服务组件"]
|
||||
Web[Web 前端]
|
||||
API[API 服务]
|
||||
Engine[Engine 服务]
|
||||
end
|
||||
|
||||
ENV --> Web
|
||||
ENV --> API
|
||||
ENV --> Engine
|
||||
File --> API
|
||||
File --> Engine
|
||||
DB --> API
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Web 前端配置
|
||||
|
||||
### 环境变量
|
||||
|
||||
在 `web/` 目录创建 `.env` 文件:
|
||||
|
||||
```env
|
||||
# API 服务地址(必填)
|
||||
VITE_API_URL=http://localhost:8080
|
||||
|
||||
# Engine WebSocket 地址(可选,默认同 API 服务器)
|
||||
VITE_WS_URL=ws://localhost:8000
|
||||
|
||||
# Google Gemini API Key(可选,用于前端直连)
|
||||
VITE_GEMINI_API_KEY=your_api_key
|
||||
```
|
||||
|
||||
### 变量说明
|
||||
|
||||
| 变量 | 必填 | 说明 | 默认值 |
|
||||
|------|:----:|------|--------|
|
||||
| `VITE_API_URL` | ✅ | 后端 API 服务地址 | - |
|
||||
| `VITE_WS_URL` | ❌ | WebSocket 服务地址 | 从 API URL 推断 |
|
||||
| `VITE_GEMINI_API_KEY` | ❌ | Gemini API 密钥 | - |
|
||||
|
||||
### 开发环境配置
|
||||
|
||||
```env
|
||||
# .env.development
|
||||
VITE_API_URL=http://localhost:8080
|
||||
VITE_WS_URL=ws://localhost:8000
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API 服务配置
|
||||
|
||||
### 环境变量
|
||||
|
||||
```env
|
||||
# 数据库配置
|
||||
DATABASE_URL=sqlite:///./data/app.db
|
||||
# 或 PostgreSQL
|
||||
# DATABASE_URL=postgresql://user:pass@localhost:5432/ras
|
||||
|
||||
# Redis 配置(可选)
|
||||
REDIS_URL=redis://localhost:6379/0
|
||||
|
||||
# 安全配置
|
||||
SECRET_KEY=your-secret-key-at-least-32-chars
|
||||
CORS_ORIGINS=http://localhost:3000,https://your-domain.com
|
||||
|
||||
# 日志级别
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# 文件存储路径
|
||||
UPLOAD_DIR=./uploads
|
||||
```
|
||||
|
||||
### 配置文件
|
||||
|
||||
API 服务支持 YAML 配置文件 `api/config/settings.yaml`:
|
||||
|
||||
```yaml
|
||||
# 服务配置
|
||||
server:
|
||||
host: "0.0.0.0"
|
||||
port: 8080
|
||||
workers: 4
|
||||
|
||||
# 数据库配置
|
||||
database:
|
||||
url: "sqlite:///./data/app.db"
|
||||
pool_size: 5
|
||||
max_overflow: 10
|
||||
|
||||
# Redis 配置
|
||||
redis:
|
||||
url: "redis://localhost:6379/0"
|
||||
|
||||
# 安全配置
|
||||
security:
|
||||
secret_key: "your-secret-key"
|
||||
token_expire_minutes: 1440
|
||||
|
||||
# 日志配置
|
||||
logging:
|
||||
level: "INFO"
|
||||
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Engine 服务配置
|
||||
|
||||
### 环境变量
|
||||
|
||||
```env
|
||||
# 后端 API 地址
|
||||
BACKEND_URL=http://localhost:8080
|
||||
|
||||
# WebSocket 服务配置
|
||||
WS_HOST=0.0.0.0
|
||||
WS_PORT=8000
|
||||
|
||||
# 音频配置
|
||||
AUDIO_SAMPLE_RATE=16000
|
||||
AUDIO_CHANNELS=1
|
||||
|
||||
# 日志级别
|
||||
LOG_LEVEL=INFO
|
||||
```
|
||||
|
||||
### 引擎配置
|
||||
|
||||
Engine 配置文件 `engine/config/engine.yaml`:
|
||||
|
||||
```yaml
|
||||
# WebSocket 服务
|
||||
websocket:
|
||||
host: "0.0.0.0"
|
||||
port: 8000
|
||||
ping_interval: 30
|
||||
ping_timeout: 10
|
||||
|
||||
# 音频处理
|
||||
audio:
|
||||
sample_rate: 16000
|
||||
channels: 1
|
||||
chunk_size: 640 # 20ms at 16kHz
|
||||
|
||||
# VAD 配置
|
||||
vad:
|
||||
enabled: true
|
||||
threshold: 0.5
|
||||
min_speech_duration: 0.25
|
||||
min_silence_duration: 0.5
|
||||
|
||||
# 引擎默认配置
|
||||
defaults:
|
||||
engine_type: "pipeline" # pipeline 或 multimodal
|
||||
max_response_tokens: 512
|
||||
temperature: 0.7
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Docker 配置
|
||||
|
||||
### docker-compose.yml 环境变量
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
web:
|
||||
environment:
|
||||
- VITE_API_URL=http://api:8080
|
||||
|
||||
api:
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres:password@db:5432/ras
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- SECRET_KEY=${SECRET_KEY}
|
||||
|
||||
engine:
|
||||
environment:
|
||||
- BACKEND_URL=http://api:8080
|
||||
- LOG_LEVEL=INFO
|
||||
```
|
||||
|
||||
### 使用 .env 文件
|
||||
|
||||
在项目根目录创建 `.env`:
|
||||
|
||||
```env
|
||||
# Docker Compose 会自动加载
|
||||
SECRET_KEY=your-secret-key-at-least-32-chars
|
||||
POSTGRES_PASSWORD=secure-db-password
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 配置优先级
|
||||
|
||||
配置按以下优先级加载(高优先级覆盖低优先级):
|
||||
|
||||
```
|
||||
1. 命令行参数(最高)
|
||||
2. 环境变量
|
||||
3. .env 文件
|
||||
4. 配置文件 (yaml)
|
||||
5. 代码默认值(最低)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 敏感配置管理
|
||||
|
||||
!!! danger "安全提醒"
|
||||
不要将敏感信息提交到代码仓库!
|
||||
|
||||
### 推荐做法
|
||||
|
||||
1. **使用 .env 文件**,并将其加入 `.gitignore`
|
||||
2. **使用环境变量**,通过 CI/CD 注入
|
||||
3. **使用密钥管理服务**,如 AWS Secrets Manager、HashiCorp Vault
|
||||
|
||||
### .gitignore 配置
|
||||
|
||||
```gitignore
|
||||
# 环境配置文件
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# 敏感数据目录
|
||||
/secrets/
|
||||
*.pem
|
||||
*.key
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 配置验证
|
||||
|
||||
启动服务前验证配置是否正确:
|
||||
|
||||
```bash
|
||||
# 验证 API 服务配置
|
||||
cd api
|
||||
python -c "from app.config import settings; print(settings)"
|
||||
|
||||
# 验证 Engine 配置
|
||||
cd engine
|
||||
python -c "from config import settings; print(settings)"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 下一步
|
||||
|
||||
- [环境与部署](index.md) - 开始安装服务
|
||||
- [Docker 部署](../deployment/docker.md) - 容器化部署
|
||||
|
||||
115
docs/content/getting-started/index.md
Normal file
115
docs/content/getting-started/index.md
Normal file
@@ -0,0 +1,115 @@
|
||||
# 环境与部署
|
||||
|
||||
本页属于“快速开始”中的环境与部署路径,只负责把服务跑起来、说明配置入口和部署方式。首次创建助手请转到 [创建第一个助手](../quickstart/index.md)。
|
||||
|
||||
---
|
||||
|
||||
## 先理解部署对象
|
||||
|
||||
Realtime Agent Studio(RAS)通常由三个核心服务组成:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Services["服务组件"]
|
||||
Web[Web 前端<br/>React + TypeScript]
|
||||
API[API 服务<br/>FastAPI]
|
||||
Engine[Engine 服务<br/>WebSocket]
|
||||
end
|
||||
|
||||
subgraph Storage["数据存储"]
|
||||
DB[(SQLite/PostgreSQL)]
|
||||
end
|
||||
|
||||
Web -->|REST| API
|
||||
Web -->|WebSocket| Engine
|
||||
API <--> DB
|
||||
Engine <--> API
|
||||
```
|
||||
|
||||
| 组件 | 默认端口 | 负责什么 |
|
||||
|------|----------|----------|
|
||||
| **Web 前端** | 3000 | 管理控制台与调试界面 |
|
||||
| **API 服务** | 8080 | 资源管理、配置持久化、历史数据 |
|
||||
| **Engine 服务** | 8000 | 实时会话、事件流和音频流 |
|
||||
|
||||
## 选择你的安装方式
|
||||
|
||||
### 方式一:Docker Compose
|
||||
|
||||
适合希望尽快跑通一套完整环境的团队。
|
||||
|
||||
```bash
|
||||
# 仓库目录示例沿用当前代码仓库 slug
|
||||
# 你本地实际目录名可以不同
|
||||
git clone https://github.com/your-org/AI-VideoAssistant.git
|
||||
cd AI-VideoAssistant
|
||||
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
### 方式二:本地开发
|
||||
|
||||
适合需要分别调试前端、API 和 Engine 的开发者。
|
||||
|
||||
#### 启动 API 服务
|
||||
|
||||
```bash
|
||||
cd api
|
||||
python -m venv venv
|
||||
source venv/bin/activate # Windows: venv\Scripts\activate
|
||||
pip install -r requirements.txt
|
||||
uvicorn main:app --host 0.0.0.0 --port 8080 --reload
|
||||
```
|
||||
|
||||
#### 启动 Engine 服务
|
||||
|
||||
```bash
|
||||
cd engine
|
||||
python -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python main.py
|
||||
```
|
||||
|
||||
#### 启动 Web 前端
|
||||
|
||||
```bash
|
||||
cd web
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
## 基础验证
|
||||
|
||||
完成安装后,至少确认以下入口可访问:
|
||||
|
||||
| 服务 | 地址 | 用途 |
|
||||
|------|------|------|
|
||||
| Web | `http://localhost:3000` | 打开控制台 |
|
||||
| API | `http://localhost:8080/docs` | 查看管理接口 |
|
||||
| Engine | `http://localhost:8000/health` | 检查实时引擎健康状态 |
|
||||
|
||||
如果你需要更完整的环境变量、配置文件和部署说明,请继续阅读本章节其他页面:
|
||||
|
||||
- [环境要求](requirements.md)
|
||||
- [配置说明](configuration.md)
|
||||
- [部署概览](../deployment/index.md)
|
||||
- [Docker 部署](../deployment/docker.md)
|
||||
|
||||
## 目录结构(阅读导向)
|
||||
|
||||
```text
|
||||
repo/
|
||||
├── web/ # 管理控制台
|
||||
├── api/ # 控制面与管理接口
|
||||
├── engine/ # 实时交互引擎
|
||||
├── docker/ # 部署编排与镜像配置
|
||||
└── docs/ # 当前文档站点
|
||||
```
|
||||
|
||||
## 遇到问题时去哪里
|
||||
|
||||
- 需要“快速判断往哪看”:先看 [常见问题](../resources/faq.md)
|
||||
- 需要“按步骤排查”:直接看 [故障排查](../resources/troubleshooting.md)
|
||||
- 已经跑通环境,准备创建助手:回到 [快速开始](../quickstart/index.md)
|
||||
|
||||
150
docs/content/getting-started/requirements.md
Normal file
150
docs/content/getting-started/requirements.md
Normal file
@@ -0,0 +1,150 @@
|
||||
# 环境要求
|
||||
|
||||
本页面列出运行 Realtime Agent Studio 所需的软件和硬件要求。
|
||||
|
||||
---
|
||||
|
||||
## 软件依赖
|
||||
|
||||
### 必需软件
|
||||
|
||||
| 软件 | 版本要求 | 说明 | 安装命令 |
|
||||
|------|---------|------|---------|
|
||||
| **Node.js** | 18.0+ | 前端构建运行 | `nvm install 18` |
|
||||
| **Python** | 3.10+ | 后端服务 | `pyenv install 3.10` |
|
||||
| **Docker** | 20.10+ | 容器化部署(可选) | [安装指南](https://docs.docker.com/get-docker/) |
|
||||
|
||||
### 可选软件
|
||||
|
||||
| 软件 | 版本要求 | 用途 |
|
||||
|------|---------|------|
|
||||
| **Docker Compose** | 2.0+ | 多服务编排 |
|
||||
| **PostgreSQL** | 14+ | 生产数据库 |
|
||||
| **Redis** | 6.0+ | 缓存与会话 |
|
||||
| **Nginx** | 1.20+ | 反向代理 |
|
||||
|
||||
---
|
||||
|
||||
## 版本检查
|
||||
|
||||
运行以下命令验证环境:
|
||||
|
||||
=== "Node.js"
|
||||
|
||||
```bash
|
||||
node --version
|
||||
# v18.0.0 或更高
|
||||
|
||||
npm --version
|
||||
# 8.0.0 或更高
|
||||
```
|
||||
|
||||
=== "Python"
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Python 3.10.0 或更高
|
||||
|
||||
pip --version
|
||||
# pip 22.0 或更高
|
||||
```
|
||||
|
||||
=== "Docker"
|
||||
|
||||
```bash
|
||||
docker --version
|
||||
# Docker version 20.10.0 或更高
|
||||
|
||||
docker compose version
|
||||
# Docker Compose version v2.0.0 或更高
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 浏览器支持
|
||||
|
||||
控制台需要现代浏览器支持 WebSocket 和 Web Audio API:
|
||||
|
||||
| 浏览器 | 最低版本 | 推荐版本 |
|
||||
|--------|---------|---------|
|
||||
| Chrome | 90+ | 最新版 |
|
||||
| Firefox | 90+ | 最新版 |
|
||||
| Edge | 90+ | 最新版 |
|
||||
| Safari | 14+ | 最新版 |
|
||||
|
||||
!!! warning "IE 不支持"
|
||||
Internet Explorer 不受支持,请使用现代浏览器。
|
||||
|
||||
---
|
||||
|
||||
## 硬件要求
|
||||
|
||||
### 开发环境
|
||||
|
||||
| 资源 | 最低配置 | 推荐配置 |
|
||||
|------|---------|---------|
|
||||
| **CPU** | 2 核心 | 4 核心+ |
|
||||
| **内存** | 4GB | 8GB+ |
|
||||
| **磁盘** | 10GB | 20GB+ SSD |
|
||||
| **网络** | 10Mbps | 100Mbps |
|
||||
|
||||
---
|
||||
|
||||
## 网络要求
|
||||
|
||||
### 出站访问
|
||||
|
||||
以下外部服务需要网络可达(根据使用的模型供应商):
|
||||
|
||||
| 服务 | 域名 | 端口 | 用途 |
|
||||
|------|------|------|------|
|
||||
| **OpenAI** | api.openai.com | 443 | LLM / TTS |
|
||||
| **Azure OpenAI** | *.openai.azure.com | 443 | LLM / ASR / TTS |
|
||||
| **阿里云** | *.aliyuncs.com | 443 | DashScope TTS |
|
||||
| **SiliconFlow** | api.siliconflow.cn | 443 | ASR / TTS |
|
||||
| **DeepSeek** | api.deepseek.com | 443 | LLM |
|
||||
|
||||
### 端口规划
|
||||
|
||||
| 服务 | 默认端口 | 可配置 |
|
||||
|------|---------|--------|
|
||||
| Web 前端 | 3000 | ✅ |
|
||||
| API 服务 | 8080 | ✅ |
|
||||
| Engine 服务 | 8000 | ✅ |
|
||||
| PostgreSQL | 5432 | ✅ |
|
||||
| Redis | 6379 | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## 操作系统
|
||||
|
||||
### 支持的系统
|
||||
|
||||
| 操作系统 | 版本 | 支持状态 |
|
||||
|---------|------|---------|
|
||||
| **Ubuntu** | 20.04 LTS, 22.04 LTS | ✅ 完全支持 |
|
||||
| **Debian** | 11, 12 | ✅ 完全支持 |
|
||||
| **CentOS** | 8+ | ✅ 完全支持 |
|
||||
| **macOS** | 12+ (Monterey) | ✅ 开发支持 |
|
||||
| **Windows** | 10/11 + WSL2 | ✅ 开发支持 |
|
||||
|
||||
### Windows 注意事项
|
||||
|
||||
推荐使用 WSL2 进行开发:
|
||||
|
||||
```powershell
|
||||
# 安装 WSL2
|
||||
wsl --install
|
||||
|
||||
# 安装 Ubuntu
|
||||
wsl --install -d Ubuntu
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 下一步
|
||||
|
||||
- [配置说明](configuration.md) - 环境变量配置
|
||||
- [环境与部署](index.md) - 开始安装
|
||||
- [Docker 部署](../deployment/docker.md) - 容器化部署
|
||||
|
||||
BIN
docs/content/images/logo.png
Normal file
BIN
docs/content/images/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.1 MiB |
@@ -1,200 +1,186 @@
|
||||
# AI Video Assistant 使用说明
|
||||
<p align="center">
|
||||
<img src="images/logo.png" alt="Realtime Agent Studio" width="400">
|
||||
</p>
|
||||
|
||||
## 产品概述
|
||||
<p align="center">
|
||||
<strong>通过管理控制台与 API 构建、部署和运营实时多模态助手</strong>
|
||||
</p>
|
||||
|
||||
AI Video Assistant 是一款基于大语言模型的智能对话与工作流管理平台,支持多模型集成、语音合成、自动化测试等功能,帮助企业快速构建智能客服系统。
|
||||
<p align="center">
|
||||
<img src="https://img.shields.io/badge/version-0.1.0-blue" alt="Version">
|
||||
<img src="https://img.shields.io/badge/license-MIT-green" alt="License">
|
||||
<img src="https://img.shields.io/badge/python-3.10+-blue" alt="Python">
|
||||
<img src="https://img.shields.io/badge/node-18+-green" alt="Node">
|
||||
</p>
|
||||
|
||||

|
||||
<p align="center">
|
||||
<a href="overview/index.md">产品概览</a> ·
|
||||
<a href="quickstart/index.md">快速开始</a> ·
|
||||
<a href="concepts/assistants.md">构建助手</a> ·
|
||||
<a href="concepts/index.md">核心概念</a> ·
|
||||
<a href="api-reference/index.md">API 参考</a>
|
||||
</p>
|
||||
|
||||
## 核心功能
|
||||
---
|
||||
|
||||
| 功能模块 | 描述 |
|
||||
|---------|------|
|
||||
| **仪表盘** | 实时数据统计与可视化分析 |
|
||||
| **助手管理** | 创建、配置、测试 AI 助手 |
|
||||
| **工作流** | 可视化流程编排 |
|
||||
| **模型库** | LLM/ASR/语音模型配置 |
|
||||
| **知识库** | RAG 文档知识管理 |
|
||||
| **历史记录** | 对话日志查询与分析 |
|
||||
| **自动化测试** | 批量测试与质量评估 |
|
||||
Realtime Agent Studio (RAS) 是一个通过管理控制台与 API 构建、部署和运营实时多模态助手的开源平台。
|
||||
|
||||
## 快速开始
|
||||
## 适合谁
|
||||
|
||||
### 环境要求
|
||||
- 需要把实时语音或视频助手接入产品、设备或内部系统的开发团队
|
||||
- 需要通过控制台快速配置提示词、模型、知识库、工具和工作流的运营团队
|
||||
- 需要私有化部署、模型可替换、链路可观测的企业场景
|
||||
|
||||
- Node.js 18+
|
||||
- 现代浏览器(Chrome/Firefox/Edge)
|
||||
## 核心能力
|
||||
|
||||
### 启动服务
|
||||
<div class="grid cards" markdown>
|
||||
|
||||
```bash
|
||||
cd web
|
||||
npm install
|
||||
npm run dev
|
||||
- :material-robot-outline: **助手构建**
|
||||
|
||||
---
|
||||
|
||||
用统一的助手对象管理提示词、模型、知识库、工具、开场白和会话策略。
|
||||
|
||||
- :material-pulse: **双引擎运行时**
|
||||
|
||||
---
|
||||
|
||||
同时支持 Pipeline 引擎与 Realtime 引擎,可按延迟、成本和可控性选择运行方式。
|
||||
|
||||
- :material-source-branch: **能力扩展**
|
||||
|
||||
---
|
||||
|
||||
通过资源库、知识库、工具与工作流扩展助手能力,而不是把全部逻辑塞进单一提示词。
|
||||
|
||||
- :material-api: **开放集成**
|
||||
|
||||
---
|
||||
|
||||
使用 REST API 管理资源,使用 WebSocket API 接入实时对话,面向 Web、移动端和第三方系统。
|
||||
|
||||
- :material-shield-lock-outline: **私有化部署**
|
||||
|
||||
---
|
||||
|
||||
支持 Docker 部署、自有模型服务和企业内网运行,便于满足合规与成本要求。
|
||||
|
||||
- :material-chart-line: **可观测与评估**
|
||||
|
||||
---
|
||||
|
||||
提供会话历史、实时指标、自动化测试和效果评估,帮助持续改进助手质量。
|
||||
|
||||
</div>
|
||||
|
||||
## 系统架构
|
||||
|
||||
平台架构层级:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
|
||||
subgraph Access["Access Layer"]
|
||||
API["API"]
|
||||
SDK["SDK"]
|
||||
Browser["Browser UI"]
|
||||
Embed["Web Embed"]
|
||||
end
|
||||
|
||||
subgraph Runtime["Realtime Interaction Engine"]
|
||||
direction LR
|
||||
|
||||
subgraph Duplex["Duplex Interaction Engine"]
|
||||
direction LR
|
||||
|
||||
subgraph Pipeline["Pipeline Engine"]
|
||||
direction LR
|
||||
VAD["VAD"]
|
||||
ASR["ASR"]
|
||||
TD["Turn Detection"]
|
||||
LLM["LLM"]
|
||||
TTS["TTS"]
|
||||
end
|
||||
|
||||
subgraph Multi["Realtime Engine"]
|
||||
MM["Realtime Model"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Capability["Agent Capabilities"]
|
||||
subgraph Tools["Tool System"]
|
||||
Webhook["Webhook"]
|
||||
ClientTool["Client Tools"]
|
||||
Builtin["Builtin Tools"]
|
||||
end
|
||||
|
||||
subgraph KB["Knowledge System"]
|
||||
Docs["Documents"]
|
||||
Vector[("Vector Index")]
|
||||
Retrieval["Retrieval"]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Platform["Platform Services"]
|
||||
direction TB
|
||||
Backend["Backend Service"]
|
||||
Frontend["Frontend Console"]
|
||||
DB[("Database")]
|
||||
end
|
||||
|
||||
Access --> Runtime
|
||||
Runtime <--> Backend
|
||||
Backend <--> DB
|
||||
Backend <--> Frontend
|
||||
LLM --> Tools
|
||||
MM --> Tools
|
||||
LLM <--> KB
|
||||
MM <--> KB
|
||||
```
|
||||
|
||||
访问 `http://localhost:3000`
|
||||
## 从这里开始
|
||||
|
||||
## 详细使用指南
|
||||
<div class="grid cards" markdown>
|
||||
|
||||
### 1. 仪表盘
|
||||
- :material-compass-outline: **[了解产品](overview/index.md)**
|
||||
|
||||

|
||||
---
|
||||
|
||||
仪表盘展示系统核心指标:
|
||||
- **总对话数** - 累计对话请求数量
|
||||
- **回答率** - 成功回答的对话占比
|
||||
- **平均时长** - 单次对话平均持续时间
|
||||
- **人工转接率** - 需要人工介入的对话比例
|
||||
先看产品定位、核心模块、适用场景,以及 RAS 与其他方案的差异。
|
||||
|
||||
### 2. 助手管理
|
||||
- :material-cog-outline: **[环境与部署](getting-started/index.md)**
|
||||
|
||||

|
||||
---
|
||||
|
||||
#### 创建助手
|
||||
先把服务跑起来,了解环境要求、配置入口和部署方式。
|
||||
|
||||
1. 点击 **创建助手**
|
||||
2. 配置助手基本信息(名称、提示词)
|
||||
3. 选择对话语言与音色
|
||||
4. 绑定知识库和工具
|
||||
- :material-rocket-launch-outline: **[创建第一个助手](quickstart/index.md)**
|
||||
|
||||
#### 配置选项
|
||||
---
|
||||
|
||||
| 标签页 | 配置项 |
|
||||
|-------|--------|
|
||||
| 全局 | 名称、提示词、温度参数 |
|
||||
| 语音 | TTS 引擎、音色、语言 |
|
||||
| 工具 | 可用工具列表 |
|
||||
| 知识 | RAG 知识库关联 |
|
||||
| 链接 | 外部服务配置 |
|
||||
按最短路径准备资源、创建助手、测试效果并拿到接入所需信息。
|
||||
|
||||
### 3. 工作流
|
||||
- :material-tune: **[构建助手](concepts/assistants.md)**
|
||||
|
||||

|
||||
---
|
||||
|
||||
#### 工作流节点类型
|
||||
按完整链路配置助手、提示词、模型、知识库、工具与工作流。
|
||||
|
||||
| 节点 | 功能 |
|
||||
|------|------|
|
||||
| 对话节点 | AI 自动回复 |
|
||||
| 工具节点 | 调用外部工具 |
|
||||
| 人工节点 | 转接人工客服 |
|
||||
| 结束节点 | 结束对话流程 |
|
||||
- :material-connection: **[接入应用](api-reference/index.md)**
|
||||
|
||||
### 4. 模型配置
|
||||
---
|
||||
|
||||

|
||||
查看 REST 与 WebSocket 接口,把助手嵌入到你的 Web、移动端或服务端系统。
|
||||
|
||||
#### 支持的 LLM 模型
|
||||
- :material-lifebuoy: **[排查问题](resources/troubleshooting.md)**
|
||||
|
||||
- **OpenAI** - GPT-4/GPT-3.5
|
||||
- **DeepSeek** - DeepSeek Chat
|
||||
- **SiliconFlow** - 多种开源模型
|
||||
- **Google Gemini** - Gemini Pro
|
||||
---
|
||||
|
||||
#### ASR 语音识别
|
||||
当连接、对话质量或部署链路出现问题时,从这里进入可执行的排查步骤。
|
||||
|
||||
- **Whisper** - OpenAI 语音识别
|
||||
- **SenseVoice** - 高精度中文识别
|
||||
</div>
|
||||
|
||||
### 5. 知识库
|
||||
|
||||

|
||||
|
||||
#### 创建知识库
|
||||
|
||||
1. 进入 **知识库** 页面
|
||||
2. 点击 **新建知识库**
|
||||
3. 上传文档(支持 Markdown/PDF/TXT)
|
||||
4. 配置检索参数
|
||||
|
||||
### 6. 历史记录
|
||||
|
||||

|
||||
|
||||
查询条件:
|
||||
- 按时间范围筛选
|
||||
- 按助手名称搜索
|
||||
- 查看对话详情与统计
|
||||
|
||||
### 7. 自动化测试
|
||||
|
||||

|
||||
|
||||
#### 测试类型
|
||||
|
||||
| 类型 | 说明 |
|
||||
|------|------|
|
||||
| 固定测试 | 预设问答对测试 |
|
||||
| 智能测试 | AI 生成测试用例 |
|
||||
|
||||
#### 评估指标
|
||||
|
||||
- 回复准确率
|
||||
- 回答完整度
|
||||
- 响应时间
|
||||
|
||||
### 8. 语音合成
|
||||
|
||||

|
||||
|
||||
#### 支持的 TTS 引擎
|
||||
|
||||
- **阿里云** - 多音色可选
|
||||
- **火山引擎** - 高自然度
|
||||
- **Minimax** - 低延迟
|
||||
|
||||
### 9. 个人中心
|
||||
|
||||

|
||||
|
||||
管理账户信息与系统设置。
|
||||
|
||||
## 部署指南
|
||||
|
||||
### Docker 部署(推荐)
|
||||
|
||||
```bash
|
||||
# 构建镜像
|
||||
docker build -t ai-video-assistant .
|
||||
|
||||
# 运行容器
|
||||
docker run -d -p 3000:3000 --name ai-assistant ai-video-assistant
|
||||
```
|
||||
|
||||
### Nginx 反向代理
|
||||
|
||||
```nginx
|
||||
server {
|
||||
listen 80;
|
||||
server_name your-domain.com;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:3000;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 常见问题
|
||||
|
||||
### Q: 如何配置 API Key?
|
||||
|
||||
进入 **LLM 库** 或 **语音库** 页面,点击对应模型的配置按钮填写 API Key。
|
||||
|
||||
### Q: 助手无法回复?
|
||||
|
||||
1. 检查模型配置是否正确
|
||||
2. 确认知识库已正确关联
|
||||
3. 查看系统日志排查错误
|
||||
|
||||
### Q: 语音识别不准确?
|
||||
|
||||
- 确认 ASR 模型选择正确
|
||||
- 检查音频采样率(推荐 16kHz)
|
||||
- 确认语言设置匹配
|
||||
|
||||
## 技术支持
|
||||
|
||||
如有问题,请提交 Issue 或联系技术支持团队。
|
||||
|
||||
26
docs/content/javascripts/extra.js
Normal file
26
docs/content/javascripts/extra.js
Normal file
@@ -0,0 +1,26 @@
|
||||
// Realtime Agent Studio - Custom JavaScript
|
||||
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
// Add external link icons
|
||||
document.querySelectorAll('a[href^="http"]').forEach(function (link) {
|
||||
if (!link.hostname.includes(window.location.hostname)) {
|
||||
link.setAttribute("target", "_blank");
|
||||
link.setAttribute("rel", "noopener noreferrer");
|
||||
}
|
||||
});
|
||||
|
||||
// Smooth scroll for anchor links
|
||||
document.querySelectorAll('a[href^="#"]').forEach(function (anchor) {
|
||||
anchor.addEventListener("click", function (e) {
|
||||
const targetId = this.getAttribute("href").slice(1);
|
||||
const targetElement = document.getElementById(targetId);
|
||||
if (targetElement) {
|
||||
e.preventDefault();
|
||||
targetElement.scrollIntoView({
|
||||
behavior: "smooth",
|
||||
block: "start",
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
18
docs/content/javascripts/mermaid.mjs
Normal file
18
docs/content/javascripts/mermaid.mjs
Normal file
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* Global Mermaid config for consistent diagram sizing across all docs.
|
||||
* Exposed as window.mermaid so Material for MkDocs uses this instance.
|
||||
*/
|
||||
import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs";
|
||||
|
||||
mermaid.initialize({
|
||||
startOnLoad: false,
|
||||
securityLevel: "loose",
|
||||
theme: "base",
|
||||
useMaxWidth: false,
|
||||
themeVariables: {
|
||||
fontSize: "14px",
|
||||
fontFamily: "Inter, sans-serif",
|
||||
},
|
||||
});
|
||||
|
||||
window.mermaid = mermaid;
|
||||
312
docs/content/overview/architecture.md
Normal file
312
docs/content/overview/architecture.md
Normal file
@@ -0,0 +1,312 @@
|
||||
# 系统架构
|
||||
|
||||
本文档只解释 Realtime Agent Studio (RAS) 的服务边界、数据流、部署形态和关键技术选型,不重复产品定位或上手流程。
|
||||
|
||||
---
|
||||
|
||||
## 整体架构
|
||||
|
||||
RAS 采用前后端分离的微服务架构,主要由三个核心服务组成:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Client["客户端"]
|
||||
Browser[Web 浏览器]
|
||||
Mobile[移动应用]
|
||||
ThirdParty[第三方系统]
|
||||
end
|
||||
|
||||
subgraph Frontend["前端服务"]
|
||||
WebApp[React 管理控制台]
|
||||
end
|
||||
|
||||
subgraph Backend["后端服务"]
|
||||
API[API 服务<br/>FastAPI]
|
||||
Engine[实时交互引擎<br/>WebSocket]
|
||||
end
|
||||
|
||||
subgraph Storage["数据存储"]
|
||||
DB[(SQLite/PostgreSQL)]
|
||||
FileStore[文件存储]
|
||||
end
|
||||
|
||||
subgraph External["外部服务"]
|
||||
OpenAI[OpenAI]
|
||||
SiliconFlow[SiliconFlow]
|
||||
DashScope[DashScope]
|
||||
LocalModel[本地模型]
|
||||
end
|
||||
|
||||
subgraph Tools["工具"]
|
||||
Webhook[Webhook]
|
||||
ClientTool[客户端工具]
|
||||
Builtin[内建工具]
|
||||
end
|
||||
|
||||
Browser --> WebApp
|
||||
Mobile -->|WebSocket| Engine
|
||||
ThirdParty -->|REST API| API
|
||||
WebApp -->|REST API| API
|
||||
WebApp -->|WebSocket| Engine
|
||||
API <--> DB
|
||||
API <--> FileStore
|
||||
Engine <--> API
|
||||
Engine --> External
|
||||
Engine --> Tools
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 核心组件
|
||||
|
||||
### 1. Web 前端 (React)
|
||||
|
||||
管理控制台,提供可视化的配置、测试和监控界面。
|
||||
|
||||
| 功能模块 | 说明 |
|
||||
|---------|------|
|
||||
| 助手管理 | 创建、配置、测试智能助手 |
|
||||
| 资源库 | LLM / ASR / TTS 等模型管理 |
|
||||
| 知识库 | RAG 文档上传与管理 |
|
||||
| 历史记录 | 会话日志查询与回放 |
|
||||
| 仪表盘 | 实时数据统计 |
|
||||
| 调试控制台 | WebSocket 实时测试 |
|
||||
|
||||
### 2. API 服务 (FastAPI)
|
||||
|
||||
REST API 后端,处理资源管理、持久化配置和历史数据等控制面能力。
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph API["API 服务"]
|
||||
Router[路由层]
|
||||
Service[业务逻辑层]
|
||||
Model[数据模型层]
|
||||
end
|
||||
|
||||
Client[客户端] --> Router
|
||||
Router --> Service
|
||||
Service --> Model
|
||||
Model --> DB[(数据库)]
|
||||
```
|
||||
|
||||
**主要职责:**
|
||||
|
||||
- 助手 CRUD 操作
|
||||
- 模型资源管理
|
||||
- 知识库管理
|
||||
- 会话记录存储
|
||||
- 认证与授权
|
||||
|
||||
### 3. 实时交互引擎 (Engine)
|
||||
|
||||
处理实时音视频对话、事件流转、模型调用与工具执行。
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Engine["实时交互引擎"]
|
||||
WS[WebSocket Handler]
|
||||
SM[会话管理器]
|
||||
|
||||
subgraph Pipeline["管线式引擎"]
|
||||
VAD[声音活动检测 VAD]
|
||||
ASR[语音识别 ASR]
|
||||
TD[回合检测 TD]
|
||||
LLM[大语言模型 LLM]
|
||||
TTS[语音合成 TTS]
|
||||
end
|
||||
|
||||
subgraph Realtime["实时引擎连接"]
|
||||
RTOpenAI[OpenAI Realtime]
|
||||
RTGemini[Gemini Live]
|
||||
RTDoubao[Doubao 实时交互]
|
||||
end
|
||||
|
||||
subgraph Tools["工具"]
|
||||
Webhook[Webhook]
|
||||
ClientTool[客户端工具]
|
||||
Builtin[内建工具]
|
||||
end
|
||||
end
|
||||
|
||||
Client[客户端] -->|音频流| WS
|
||||
WS --> SM
|
||||
SM --> Pipeline
|
||||
SM --> Realtime
|
||||
Pipeline --> LLM
|
||||
LLM --> Tools
|
||||
Realtime --> Tools
|
||||
Pipeline -->|文本/音频| WS
|
||||
Realtime -->|文本/音频| WS
|
||||
```
|
||||
|
||||
### 外部服务与工具
|
||||
|
||||
| 类别 | 说明 | 可选项 |
|
||||
|------|------|--------|
|
||||
| **外部模型服务** | Pipeline 引擎各环节依赖的云端或本地服务 | OpenAI、SiliconFlow、DashScope、本地模型 |
|
||||
| **实时模型连接** | Realtime 引擎可直接连接的后端 | OpenAI Realtime、Gemini Live、Doubao 实时交互 |
|
||||
| **工具系统** | 由助手或引擎调用的外部执行能力 | Webhook、客户端工具、内建工具 |
|
||||
|
||||
---
|
||||
|
||||
## 引擎架构
|
||||
|
||||
### 管线式全双工引擎
|
||||
|
||||
管线式引擎由 **VAD → ASR → TD → LLM → TTS** 组成。每个环节可替换,适合需要精细控制、工具扩展和较高可解释性的场景。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as 客户端
|
||||
participant E as 引擎
|
||||
participant VAD as VAD
|
||||
participant ASR as 语音识别
|
||||
participant TD as 回合检测
|
||||
participant LLM as 大语言模型
|
||||
participant TTS as 语音合成
|
||||
participant Tools as 工具
|
||||
|
||||
C->>E: 音频流 (PCM)
|
||||
E->>VAD: 检测语音活动
|
||||
VAD-->>E: 有效语音段
|
||||
E->>ASR: 语音转写
|
||||
ASR-->>E: 转写文本
|
||||
E->>TD: 判断回合边界
|
||||
TD-->>E: 可送入 LLM 的输入
|
||||
E->>LLM: 生成回复
|
||||
LLM->>Tools: 可选:调用工具
|
||||
Tools-->>LLM: 工具结果
|
||||
LLM-->>E: 回复文本 (流式)
|
||||
E->>TTS: 文本转语音
|
||||
TTS-->>E: 音频流
|
||||
E->>C: 播放音频
|
||||
```
|
||||
|
||||
**特点:**
|
||||
|
||||
- 各环节可单独替换和优化
|
||||
- 便于接入知识库、工具、工作流等能力
|
||||
- 延迟通常高于端到端实时模型,但可控性更强
|
||||
|
||||
### Realtime 引擎
|
||||
|
||||
Realtime 引擎直接连接端到端实时模型,适合追求更低延迟和更自然多模态交互的场景。
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as 客户端
|
||||
participant E as 引擎
|
||||
participant RT as Realtime Model
|
||||
|
||||
C->>E: 音频/视频/文本输入
|
||||
E->>RT: 实时流输入
|
||||
RT-->>E: 流式文本/音频输出
|
||||
E->>C: 播放或渲染结果
|
||||
```
|
||||
|
||||
**特点:**
|
||||
|
||||
- 交互链路更短,延迟更低
|
||||
- 更依赖具体模型供应商的能力边界
|
||||
- 适合强调自然对话和多模态体验的入口
|
||||
|
||||
---
|
||||
|
||||
## 数据流
|
||||
|
||||
### WebSocket 会话流程
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as 客户端
|
||||
participant E as 引擎
|
||||
participant API as API 服务
|
||||
participant DB as 数据库
|
||||
|
||||
C->>E: 连接 ws://.../ws?assistant_id=xxx
|
||||
E->>API: 获取助手配置
|
||||
API->>DB: 查询助手
|
||||
DB-->>API: 助手数据
|
||||
API-->>E: 配置信息
|
||||
|
||||
C->>E: session.start
|
||||
E-->>C: session.started
|
||||
E-->>C: config.resolved
|
||||
|
||||
loop 对话循环
|
||||
C->>E: 音频帧 (binary)
|
||||
E-->>C: input.speech_started
|
||||
E-->>C: transcript.delta
|
||||
E-->>C: transcript.final
|
||||
E-->>C: assistant.response.delta
|
||||
E-->>C: output.audio.start
|
||||
E-->>C: 音频帧 (binary)
|
||||
E-->>C: output.audio.end
|
||||
end
|
||||
|
||||
C->>E: session.stop
|
||||
E->>API: 保存会话记录
|
||||
API->>DB: 存储
|
||||
E-->>C: session.stopped
|
||||
```
|
||||
|
||||
### 智能打断流程
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as 客户端
|
||||
participant E as 引擎
|
||||
participant TTS as TTS 服务
|
||||
|
||||
Note over E: 正在播放 TTS 音频
|
||||
E->>C: 音频帧...
|
||||
|
||||
C->>E: 用户说话 (VAD 检测)
|
||||
E->>E: 触发打断
|
||||
E->>TTS: 停止合成
|
||||
E-->>C: output.audio.interrupted
|
||||
|
||||
Note over E: 处理新的用户输入
|
||||
E-->>C: input.speech_started
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 部署形态
|
||||
|
||||
### 开发环境
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Local["本地开发"]
|
||||
Web[npm run dev<br/>:3000]
|
||||
API[uvicorn<br/>:8080]
|
||||
Engine[python main.py<br/>:8000]
|
||||
DB[(SQLite)]
|
||||
end
|
||||
|
||||
Web --> API
|
||||
Web --> Engine
|
||||
API --> DB
|
||||
Engine --> API
|
||||
```
|
||||
|
||||
## 技术选型
|
||||
|
||||
| 组件 | 技术 | 说明 |
|
||||
|------|------|------|
|
||||
| **前端框架** | React 18 | 管理控制台与调试界面 |
|
||||
| **状态管理** | Zustand | 前端轻量状态管理 |
|
||||
| **UI 样式** | Tailwind CSS | 快速构建控制台界面 |
|
||||
| **后端框架** | FastAPI | 管理接口与配置持久化 |
|
||||
| **WebSocket** | websockets | 实时事件与音频流通信 |
|
||||
| **数据库** | SQLite / PostgreSQL | 配置与历史数据存储 |
|
||||
|
||||
---
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [产品概览](index.md) - 产品定位、核心模块与适用场景
|
||||
- [引擎架构](../concepts/engines.md) - Pipeline 与 Realtime 的选择指南
|
||||
- [WebSocket 协议](../api-reference/websocket.md) - 实时对话事件和消息格式
|
||||
84
docs/content/overview/index.md
Normal file
84
docs/content/overview/index.md
Normal file
@@ -0,0 +1,84 @@
|
||||
# 产品概览
|
||||
|
||||
Realtime Agent Studio (RAS) 是一个通过管理控制台与 API 构建、部署和运营实时多模态助手的开源平台。
|
||||
|
||||
---
|
||||
|
||||
## 产品定位
|
||||
|
||||
RAS 面向需要构建实时语音或视频助手的团队,目标不是替代你的业务系统,而是提供一套可组合的助手基础设施:
|
||||
|
||||
- **控制台**:让团队快速配置助手、资源库、知识库、工具、工作流与评估策略
|
||||
- **API 与实时运行时**:让应用、设备和第三方系统稳定接入实时对话能力
|
||||
- **运维与分析能力**:让团队能观察会话效果、排查问题并持续迭代助手质量
|
||||
|
||||
如果你把实时助手看作一条完整的产品链路,RAS 负责其中的“构建、接入、运行、观测”四个阶段。
|
||||
|
||||
## 核心模块
|
||||
|
||||
| 模块 | 负责什么 | 适合谁使用 |
|
||||
|------|----------|------------|
|
||||
| **助手** | 定义角色、行为、模型、知识、工具和会话策略 | 产品、运营、算法、开发 |
|
||||
| **引擎** | 承载实时语音/多模态对话,输出事件流和音频流 | 开发、基础设施 |
|
||||
| **资源库** | 管理 LLM、ASR、TTS 等外部能力接入 | 平台管理员、开发 |
|
||||
| **知识库 / 工具 / 工作流** | 让助手获得领域知识、外部执行能力和复杂流程控制 | 业务设计者、开发 |
|
||||
| **分析与评估** | 记录会话、监控指标、做自动化回归和效果评估 | 运营、QA、开发 |
|
||||
|
||||
## 为什么是“控制台 + API”
|
||||
|
||||
RAS 采用“控制台配置 + API 接入”的组合方式,而不是把所有内容都固化在代码里:
|
||||
|
||||
- **控制台负责提效**:让非后端角色也能参与提示词、工具、知识、流程的配置与调优
|
||||
- **API 负责集成**:让产品团队继续用自己的前端、服务端或设备侧应用承载最终体验
|
||||
- **同一套助手配置可复用**:控制台保存的助手定义可以被不同渠道重复接入和评估
|
||||
|
||||
## 典型使用方式
|
||||
|
||||
<div class="grid cards" markdown>
|
||||
|
||||
- :material-headset: **客户服务与运营自动化**
|
||||
|
||||
---
|
||||
|
||||
在客服、外呼、预约、售后等场景中接入实时语音助手,并保留人工接管与工具调用能力。
|
||||
|
||||
- :material-school-outline: **培训、陪练与问答**
|
||||
|
||||
---
|
||||
|
||||
用知识库、提示词和流程编排构建可持续优化的教学、培训或辅导助手。
|
||||
|
||||
- :material-domain: **企业内部助手**
|
||||
|
||||
---
|
||||
|
||||
通过私有部署、内部知识库和业务系统工具,把助手接入内部流程或设备终端。
|
||||
|
||||
- :material-devices: **多端集成**
|
||||
|
||||
---
|
||||
|
||||
通过 WebSocket API 将同一个助手接入 Web、移动端、坐席工作台或自有硬件设备。
|
||||
|
||||
</div>
|
||||
|
||||
## 与其他方案的差异
|
||||
|
||||
本页是站内唯一保留“产品对比”视角的地方,用于帮助你快速判断 RAS 的定位边界。
|
||||
|
||||
| 特性 | RAS | Vapi | Retell | ElevenLabs Agents |
|
||||
|------|-----|------|--------|-------------------|
|
||||
| **开源** | :white_check_mark: | :x: | :x: | :x: |
|
||||
| **私有部署** | :white_check_mark: | :x: | :x: | :x: |
|
||||
| **Pipeline 引擎** | :white_check_mark: | :white_check_mark: | :white_check_mark: | :x: |
|
||||
| **Realtime / 多模态引擎** | :white_check_mark: | :white_check_mark: | :x: | :white_check_mark: |
|
||||
| **自定义 ASR / TTS** | :white_check_mark: | 有限 | 有限 | :x: |
|
||||
| **知识库与工具扩展** | :white_check_mark: | :white_check_mark: | :white_check_mark: | 有限 |
|
||||
| **工作流编排** | 开发中 | :white_check_mark: | :x: | :x: |
|
||||
| **数据与链路可观测** | :white_check_mark: | 有限 | 有限 | 有限 |
|
||||
|
||||
## 继续阅读
|
||||
|
||||
- [系统架构](architecture.md) - 从服务边界、数据流和部署形态理解系统如何组成
|
||||
- [核心概念](../concepts/index.md) - 先建立助手、引擎与工作流的心智模型
|
||||
- [快速开始](../quickstart/index.md) - 以最短路径创建第一个助手
|
||||
44
docs/content/quickstart/dashboard.md
Normal file
44
docs/content/quickstart/dashboard.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# 资源准备清单
|
||||
|
||||
本页保留原“资源库配置详解”链接,但在本轮文档收敛后,它只承担快速开始阶段的资源核对职责。
|
||||
|
||||
## 你至少要准备什么
|
||||
|
||||
在创建第一个助手前,至少确认以下三类资源都已经可用:
|
||||
|
||||
| 资源 | 为什么需要 | 正式说明页 |
|
||||
|------|------------|------------|
|
||||
| **LLM 模型** | 负责理解与生成回复 | [LLM 模型](../customization/models.md) |
|
||||
| **ASR 资源** | 负责把语音输入转写为文本 | [语音识别](../customization/asr.md) |
|
||||
| **TTS 声音资源** | 负责把文本回复合成为语音 | [声音资源](../customization/voices.md) |
|
||||
|
||||
## 上手前自检
|
||||
|
||||
### LLM
|
||||
|
||||
- 已配置供应商、模型名称、Base URL 和凭证
|
||||
- 已明确该模型用于文本生成、嵌入还是重排
|
||||
- 已准备保守的默认参数,而不是先追求极端效果
|
||||
|
||||
### ASR
|
||||
|
||||
- 已确认目标语言与模型匹配
|
||||
- 已准备必要热词或专有名词词表
|
||||
- 已能用固定样本测试识别准确率和延迟
|
||||
|
||||
### TTS
|
||||
|
||||
- 已选择主音色,并完成至少一次试听
|
||||
- 已确认该声音适合实时对话,而不是仅适合离线播报
|
||||
- 已为默认语速、音量等参数设定初始值
|
||||
|
||||
## 不在本页展开的内容
|
||||
|
||||
字段说明、供应商差异、参数建议和最佳实践已经分别收敛到正式能力页:
|
||||
|
||||
- [LLM 模型](../customization/models.md)
|
||||
- [语音识别](../customization/asr.md)
|
||||
- [声音资源](../customization/voices.md)
|
||||
- [TTS 参数](../customization/tts.md)
|
||||
|
||||
准备完成后,请回到 [快速开始](index.md) 继续创建助手。
|
||||
98
docs/content/quickstart/index.md
Normal file
98
docs/content/quickstart/index.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# 快速开始
|
||||
|
||||
本页负责“创建第一个助手”的最短路径。环境要求、配置文件和部署方式统一放在 [环境与部署](../getting-started/index.md)。
|
||||
|
||||
## 目标
|
||||
|
||||
完成本页后,你应该已经:
|
||||
|
||||
1. 准备好 1 个 LLM、1 个 ASR、1 个 TTS 资源
|
||||
2. 创建并保存 1 个助手
|
||||
3. 完成至少 1 轮测试对话
|
||||
4. 拿到接入应用所需的 `assistant_id` 和 WebSocket 地址
|
||||
|
||||
## 前提条件
|
||||
|
||||
- 已部署 Realtime Agent Studio(RAS)服务
|
||||
- 已准备可用的 LLM / ASR / TTS 凭证
|
||||
- 已能访问控制台与 WebSocket 服务
|
||||
|
||||
## 第一步:准备资源
|
||||
|
||||
创建助手之前,先准备三类资源:
|
||||
|
||||
- **LLM 模型**:决定助手如何理解和生成回复。详见 [LLM 模型](../customization/models.md)
|
||||
- **ASR 资源**:决定语音输入如何转写。详见 [语音识别](../customization/asr.md)
|
||||
- **TTS 声音资源**:决定回复如何被合成为语音。详见 [声音资源](../customization/voices.md)
|
||||
|
||||
如果你想先检查“资源是否准备齐”,可以看 [资源准备清单](dashboard.md)。
|
||||
|
||||
## 第二步:创建助手
|
||||
|
||||
1. 进入控制台中的 **助手** 页面
|
||||
2. 新建一个助手,并填写最小必要信息:
|
||||
- **助手名称**:让团队知道它服务于什么场景
|
||||
- **系统提示词**:先定义角色、任务和限制
|
||||
- **首轮模式**:决定由助手先说还是等待用户开口
|
||||
3. 绑定默认模型:
|
||||
- 文本生成使用一个 LLM
|
||||
- 语音输入使用一个 ASR
|
||||
- 语音输出使用一个 TTS 声音资源
|
||||
|
||||
如果你想把助手设计得更稳,继续阅读:
|
||||
|
||||
- [助手概念](../concepts/assistants.md)
|
||||
- [配置选项](../concepts/assistants/configuration.md)
|
||||
- [提示词指南](../concepts/assistants/prompts.md)
|
||||
|
||||
## 第三步:补充能力
|
||||
|
||||
最小助手可以只依赖提示词和模型;更复杂的场景通常还需要以下能力:
|
||||
|
||||
- **知识库**:让助手回答私有领域问题。见 [知识库](../customization/knowledge-base.md)
|
||||
- **工具**:让助手执行查单、预约、查询等外部操作。见 [工具](../customization/tools.md)
|
||||
- **工作流**:让助手处理多步骤、多分支流程。见 [工作流](../customization/workflows.md)
|
||||
|
||||
## 第四步:测试并发布
|
||||
|
||||
1. 打开助手测试面板,先验证文本对话,再验证语音输入输出
|
||||
2. 观察事件流、转写、工具调用和最终回复是否符合预期
|
||||
3. 保存当前配置,并确认该助手已可用于外部接入
|
||||
|
||||
更系统的验证方式见 [测试调试](../concepts/assistants/testing.md)。
|
||||
|
||||
## 第五步:接入应用
|
||||
|
||||
最小接入方式是使用 WebSocket API 建立实时会话:
|
||||
|
||||
```javascript
|
||||
const ws = new WebSocket('ws://your-server/ws?assistant_id=YOUR_ASSISTANT_ID');
|
||||
|
||||
ws.onopen = () => {
|
||||
ws.send(JSON.stringify({
|
||||
type: 'session.start',
|
||||
audio: { encoding: 'pcm_s16le', sample_rate_hz: 16000, channels: 1 }
|
||||
}));
|
||||
};
|
||||
```
|
||||
|
||||
你通常只需要两项信息:
|
||||
|
||||
- `assistant_id`:指定接入哪个助手
|
||||
- WebSocket 地址:由引擎服务提供实时对话入口
|
||||
|
||||
完整协议见 [WebSocket 协议](../api-reference/websocket.md)。
|
||||
|
||||
## 常见卡点
|
||||
|
||||
- 资源配置不生效:回到 [资源准备清单](dashboard.md) 检查三类资源是否都已准备好
|
||||
- 助手不回复:先看 [测试调试](../concepts/assistants/testing.md),再进入 [故障排查](../resources/troubleshooting.md)
|
||||
- 回复质量不稳定:优先检查 [提示词指南](../concepts/assistants/prompts.md) 与 [知识库](../customization/knowledge-base.md)
|
||||
|
||||
## 下一步
|
||||
|
||||
- [环境与部署](../getting-started/index.md) - 补全环境、配置和部署细节
|
||||
- [构建助手](../concepts/assistants.md) - 深入配置助手、模型、知识库、工具与工作流
|
||||
- [API 参考](../api-reference/index.md) - 查看管理接口与实时协议
|
||||
|
||||
|
||||
59
docs/content/resources/faq.md
Normal file
59
docs/content/resources/faq.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# 常见问题
|
||||
|
||||
本页只提供简短回答和跳转建议;如果你需要逐步排查,请直接进入 [故障排查](troubleshooting.md)。
|
||||
|
||||
## Q: 我应该先看哪一部分文档?
|
||||
|
||||
- 想了解产品是什么:看 [产品概览](../overview/index.md)
|
||||
- 想先把服务跑起来:看 [环境与部署](../getting-started/index.md)
|
||||
- 想最快创建第一个助手:看 [快速开始](../quickstart/index.md)
|
||||
- 想系统完成助手配置:从 [助手概览](../concepts/assistants.md) 开始
|
||||
|
||||
## Q: 如何配置模型或 API Key?
|
||||
|
||||
进入对应资源页完成配置:
|
||||
|
||||
- LLM:见 [LLM 模型](../customization/models.md)
|
||||
- ASR:见 [语音识别](../customization/asr.md)
|
||||
- TTS:见 [声音资源](../customization/voices.md)
|
||||
|
||||
## Q: 助手为什么不回复?
|
||||
|
||||
通常先检查三件事:
|
||||
|
||||
- 助手是否已绑定可用的模型资源
|
||||
- 提示词、知识库或工具是否配置完整
|
||||
- WebSocket 会话是否已经正常建立
|
||||
|
||||
下一步:
|
||||
|
||||
- 助手行为验证:看 [测试调试](../concepts/assistants/testing.md)
|
||||
- 逐步排查:看 [故障排查](troubleshooting.md)
|
||||
|
||||
## Q: 回复为什么不准确或不稳定?
|
||||
|
||||
优先检查:
|
||||
|
||||
- 提示词是否明确了角色、任务和限制
|
||||
- 是否应该补充知识库,而不是继续堆叠提示词
|
||||
- 是否需要把复杂业务改成工作流,而不是单轮问答
|
||||
|
||||
相关文档:
|
||||
|
||||
- [提示词指南](../concepts/assistants/prompts.md)
|
||||
- [知识库](../customization/knowledge-base.md)
|
||||
- [工作流](../customization/workflows.md)
|
||||
|
||||
## Q: 语音识别或语音播放效果不好怎么办?
|
||||
|
||||
- 输入侧问题先看 [语音识别](../customization/asr.md)
|
||||
- 输出侧问题先看 [声音资源](../customization/voices.md) 和 [TTS 参数](../customization/tts.md)
|
||||
- 需要逐步定位链路问题时,再看 [故障排查](troubleshooting.md)
|
||||
|
||||
## Q: 页面空白、接口报错或连接不上怎么办?
|
||||
|
||||
这是典型的环境或链路问题:
|
||||
|
||||
- 先确认 [环境与部署](../getting-started/index.md) 中的三个服务都已启动
|
||||
- 再进入 [故障排查](troubleshooting.md) 按连接、API、页面加载或性能问题分类处理
|
||||
|
||||
292
docs/content/resources/troubleshooting.md
Normal file
292
docs/content/resources/troubleshooting.md
Normal file
@@ -0,0 +1,292 @@
|
||||
# 故障排查
|
||||
|
||||
本文档汇总常见问题的排查步骤和解决方案。
|
||||
|
||||
## 连接问题
|
||||
|
||||
### WebSocket 连接失败
|
||||
|
||||
**症状**:无法建立 WebSocket 连接,控制台显示连接错误。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查服务状态**
|
||||
```bash
|
||||
# 检查 Engine 服务是否运行
|
||||
curl http://localhost:8000/health
|
||||
```
|
||||
|
||||
2. **验证连接地址**
|
||||
- 确认 host 和 port 正确
|
||||
- 确认 assistant_id 参数存在
|
||||
|
||||
3. **检查网络**
|
||||
- 确认防火墙未阻止 WebSocket
|
||||
- 检查 Nginx 代理配置(如有)
|
||||
|
||||
4. **查看服务日志**
|
||||
```bash
|
||||
docker logs ai-assistant-engine
|
||||
```
|
||||
|
||||
**常见原因**:
|
||||
- Engine 服务未启动
|
||||
- assistant_id 无效
|
||||
- 防火墙阻止 WebSocket 端口
|
||||
|
||||
---
|
||||
|
||||
### API 请求失败
|
||||
|
||||
**症状**:REST API 返回错误或超时。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查 API 服务**
|
||||
```bash
|
||||
curl http://localhost:8080/health
|
||||
```
|
||||
|
||||
2. **验证请求格式**
|
||||
- Content-Type 是否为 application/json
|
||||
- 请求体是否为有效 JSON
|
||||
|
||||
3. **检查认证**
|
||||
- Authorization header 是否正确
|
||||
- API Key 是否有效
|
||||
|
||||
4. **查看响应详情**
|
||||
```bash
|
||||
curl -v http://localhost:8080/api/v1/assistants
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 助手问题
|
||||
|
||||
### 助手不回复
|
||||
|
||||
**症状**:发送消息后没有收到助手回复。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查会话状态**
|
||||
- 确认收到 `session.started` 事件
|
||||
- 确认没有 `error` 事件
|
||||
|
||||
2. **检查 LLM 配置**
|
||||
- API Key 是否有效
|
||||
- 模型配置是否正确
|
||||
- 测试模型连接
|
||||
|
||||
3. **查看日志**
|
||||
- 检查 LLM 调用是否成功
|
||||
- 查看是否有超时错误
|
||||
|
||||
**常见原因**:
|
||||
- LLM API Key 无效或过期
|
||||
- 模型服务不可用
|
||||
- 请求超时
|
||||
|
||||
---
|
||||
|
||||
### 回复质量差
|
||||
|
||||
**症状**:助手回复不准确、不相关或格式混乱。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查提示词**
|
||||
- 是否有明确的角色定义
|
||||
- 是否有清晰的任务描述
|
||||
- 是否有必要的约束
|
||||
|
||||
2. **调整参数**
|
||||
- 降低 temperature 提高一致性
|
||||
- 调整 max_tokens 控制长度
|
||||
|
||||
3. **检查知识库**
|
||||
- 确认知识库已关联
|
||||
- 测试检索结果是否相关
|
||||
|
||||
4. **查看对话历史**
|
||||
- 分析问题出现的模式
|
||||
- 收集典型的失败案例
|
||||
|
||||
---
|
||||
|
||||
## 语音问题
|
||||
|
||||
### 语音识别不准确
|
||||
|
||||
**症状**:ASR 识别结果与实际说话内容不符。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查音频质量**
|
||||
- 麦克风是否正常工作
|
||||
- 环境是否嘈杂
|
||||
- 采样率是否正确(16kHz)
|
||||
|
||||
2. **验证 ASR 配置**
|
||||
- 语言设置是否正确
|
||||
- 是否配置了热词
|
||||
|
||||
3. **测试不同引擎**
|
||||
- 尝试切换 ASR 服务提供商
|
||||
- 对比识别效果
|
||||
|
||||
**改进建议**:
|
||||
- 添加业务相关的热词
|
||||
- 使用降噪麦克风
|
||||
- 选择针对中文优化的 ASR 引擎
|
||||
|
||||
---
|
||||
|
||||
### 语音无法播放
|
||||
|
||||
**症状**:TTS 合成成功但没有声音输出。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查浏览器设置**
|
||||
- 是否允许自动播放音频
|
||||
- 音量是否静音
|
||||
|
||||
2. **验证音频数据**
|
||||
- 确认收到 `output.audio.start` 事件
|
||||
- 确认收到二进制音频帧
|
||||
- 确认收到 `output.audio.end` 事件
|
||||
|
||||
3. **检查音频解码**
|
||||
- PCM 格式是否正确解析
|
||||
- AudioContext 是否正确初始化
|
||||
|
||||
4. **测试 TTS 服务**
|
||||
- 单独测试 TTS 配置
|
||||
- 检查 TTS API 状态
|
||||
|
||||
---
|
||||
|
||||
## 部署问题
|
||||
|
||||
### Docker 容器启动失败
|
||||
|
||||
**症状**:容器无法启动或立即退出。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **查看容器日志**
|
||||
```bash
|
||||
docker logs <container_name>
|
||||
```
|
||||
|
||||
2. **检查资源限制**
|
||||
```bash
|
||||
docker stats
|
||||
```
|
||||
|
||||
3. **验证配置文件**
|
||||
- 环境变量是否正确
|
||||
- 配置文件路径是否存在
|
||||
|
||||
4. **检查端口冲突**
|
||||
```bash
|
||||
netstat -an | grep <port>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 页面加载空白
|
||||
|
||||
**症状**:浏览器打开页面但内容为空。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **检查浏览器控制台**
|
||||
- 打开 F12 开发者工具
|
||||
- 查看 Console 错误信息
|
||||
|
||||
2. **验证静态资源**
|
||||
- 检查 Network 标签页
|
||||
- 确认 JS/CSS 文件加载成功
|
||||
|
||||
3. **检查 API 连接**
|
||||
- 确认 VITE_API_URL 配置正确
|
||||
- 测试 API 是否可访问
|
||||
|
||||
4. **清除缓存**
|
||||
```bash
|
||||
# 强制刷新
|
||||
Ctrl + Shift + R
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 性能问题
|
||||
|
||||
### 响应延迟高
|
||||
|
||||
**症状**:从发送消息到收到回复时间过长。
|
||||
|
||||
**排查步骤**:
|
||||
|
||||
1. **定位延迟环节**
|
||||
- ASR 处理时间
|
||||
- LLM 推理时间
|
||||
- TTS 合成时间
|
||||
|
||||
2. **查看性能指标**
|
||||
- 检查 `metrics.ttfb` 事件
|
||||
- 分析各环节耗时
|
||||
|
||||
3. **优化配置**
|
||||
- 使用更快的模型
|
||||
- 减少 max_tokens
|
||||
- 启用流式输出
|
||||
|
||||
4. **检查网络**
|
||||
- 测试到各 API 的延迟
|
||||
- 考虑使用更近的服务区域
|
||||
|
||||
---
|
||||
|
||||
## 日志查看
|
||||
|
||||
### 服务端日志
|
||||
|
||||
```bash
|
||||
# Docker 容器日志
|
||||
docker logs -f ai-assistant-engine
|
||||
|
||||
# 查看最近 100 行
|
||||
docker logs --tail 100 ai-assistant-engine
|
||||
```
|
||||
|
||||
### 客户端日志
|
||||
|
||||
在浏览器开发者工具中:
|
||||
|
||||
1. **Console** - 查看 JavaScript 错误和日志
|
||||
2. **Network** - 查看网络请求和响应
|
||||
3. **WebSocket** - 查看 WS 消息(在 Network 标签页)
|
||||
|
||||
### 启用详细日志
|
||||
|
||||
设置环境变量启用调试日志:
|
||||
|
||||
```bash
|
||||
# Engine 服务
|
||||
LOG_LEVEL=debug
|
||||
|
||||
# API 服务
|
||||
DEBUG=true
|
||||
```
|
||||
|
||||
## 获取帮助
|
||||
|
||||
如果以上方法无法解决问题:
|
||||
|
||||
1. 收集相关日志和错误信息
|
||||
2. 描述复现步骤
|
||||
3. 提交 Issue 或联系技术支持
|
||||
110
docs/content/roadmap.md
Normal file
110
docs/content/roadmap.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# 开发路线图
|
||||
|
||||
本页面展示 Realtime Agent Studio 的开发计划和进度。
|
||||
|
||||
---
|
||||
|
||||
## 已完成 :white_check_mark:
|
||||
|
||||
### 实时交互引擎
|
||||
|
||||
- [x] **管线式全双工引擎** - ASR / LLM / TTS 流水线架构
|
||||
- [x] **智能打断处理** - VAD + EOU 检测
|
||||
- [x] **OpenAI 兼容接口** - ASR / TTS 标准接口适配
|
||||
- [x] **DashScope TTS** - 阿里云语音合成适配
|
||||
|
||||
### 助手配置管理
|
||||
|
||||
- [x] **系统提示词编辑** - Prompt 配置,动态变量注入
|
||||
- [x] **模型选择** - LLM / ASR / TTS 模型管理界面
|
||||
- [x] **工具调用配置** - Webhook 工具 + 客户端工具
|
||||
|
||||
### 调试与观察
|
||||
|
||||
- [x] **实时调试控制台** - WebSocket 调试连接示例
|
||||
- [x] **完整会话回放** - 音频 + 转写 + LLM 响应
|
||||
- [x] **会话检索筛选** - 按时间 / 助手 / 状态筛选
|
||||
|
||||
### 开放接口
|
||||
|
||||
- [x] **WebSocket 协议** - `/ws` 端点完整实现
|
||||
- [x] **RESTful 接口** - 完整的 CRUD API
|
||||
|
||||
---
|
||||
|
||||
## 开发中 :construction:
|
||||
|
||||
### 助手与能力编排
|
||||
|
||||
- [ ] **私有化 ASR / TTS 适配** - 本地模型接入
|
||||
- [ ] **工作流编辑** - 可视化流程编排
|
||||
- [ ] **知识库关联** - RAG 文档管理
|
||||
|
||||
### 实时交互引擎
|
||||
|
||||
- [ ] **原生多模态模型** - Step Audio 接入(GPT-4o Realtime / Gemini Live 国内环境受限)
|
||||
- [ ] **WebRTC 协议** - `/webrtc` 端点
|
||||
|
||||
### 开放接口
|
||||
|
||||
- [ ] **SDK 支持** - JavaScript / Python SDK
|
||||
- [ ] **电话接入** - 电话呼入自动接听 / 自动呼出接口和批量呼出
|
||||
|
||||
### 效果评估
|
||||
|
||||
- [ ] **自动化测试工具** - 固定测试 + 智能测试
|
||||
|
||||
---
|
||||
|
||||
## 计划中 :spiral_notepad:
|
||||
|
||||
### 开放接口
|
||||
|
||||
- [ ] **Webhook 回调** - 会话事件通知机制
|
||||
|
||||
### 数据与评估
|
||||
|
||||
- [ ] **实时仪表盘增强** - 完善统计看板功能
|
||||
- [ ] **评估闭环** - 测试、评分、回归与变更追踪
|
||||
|
||||
### 企业能力
|
||||
|
||||
- [ ] **多租户支持** - 团队 / 组织管理
|
||||
- [ ] **权限管理** - RBAC 角色权限控制
|
||||
- [ ] **审计日志** - 操作记录追踪
|
||||
|
||||
### 生态集成
|
||||
|
||||
- [ ] **更多模型供应商** - 讯飞、百度、腾讯等
|
||||
- [ ] **CRM 集成** - Salesforce、HubSpot 等
|
||||
- [ ] **呼叫中心集成** - SIP / PSTN 网关
|
||||
|
||||
---
|
||||
|
||||
## 版本规划
|
||||
|
||||
| 版本 | 目标 | 状态 |
|
||||
|------|------|------|
|
||||
| **v0.1.0** | 核心功能 MVP,管线式引擎 | :white_check_mark: 已发布 |
|
||||
| **v0.2.0** | 工作流编辑器,知识库集成 | :construction: 开发中 |
|
||||
| **v0.3.0** | SDK 发布,多模态模型支持 | :spiral_notepad: 计划中 |
|
||||
| **v1.0.0** | 生产就绪,企业特性 | :spiral_notepad: 计划中 |
|
||||
|
||||
---
|
||||
|
||||
## 生态参考
|
||||
|
||||
### 开源项目
|
||||
|
||||
- [Livekit Agent](https://github.com/livekit/agents)
|
||||
- [Pipecat](https://github.com/pipecat-ai/pipecat)
|
||||
- [Vision Agents](https://github.com/GetStream/Vision-Agents)
|
||||
- [active-call](https://github.com/miuda-ai/active-call)
|
||||
- [TEN](https://github.com/TEN-framework/ten-framework)
|
||||
- [airi](https://github.com/moeru-ai/airi)
|
||||
- [Vocode Core](https://github.com/vocodedev/vocode-core)
|
||||
- [awesome-voice-agents](https://github.com/yzfly/awesome-voice-agents)
|
||||
|
||||
### 文档与研究参考
|
||||
|
||||
- [Voice AI & Voice Agents](https://voiceaiandvoiceagents.com/)
|
||||
160
docs/content/stylesheets/extra.css
Normal file
160
docs/content/stylesheets/extra.css
Normal file
@@ -0,0 +1,160 @@
|
||||
/* Realtime Agent Studio - Custom Styles */
|
||||
|
||||
:root {
|
||||
--md-primary-fg-color: #4f46e5;
|
||||
--md-primary-fg-color--light: #6366f1;
|
||||
--md-primary-fg-color--dark: #4338ca;
|
||||
--md-accent-fg-color: #6366f1;
|
||||
}
|
||||
|
||||
/* Hero Section - Center aligned content */
|
||||
.md-typeset p[align="center"] {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.md-typeset p[align="center"] img {
|
||||
display: inline-block;
|
||||
margin: 0 4px;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.md-typeset p[align="center"] a {
|
||||
margin: 0 8px;
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] {
|
||||
--md-primary-fg-color: #818cf8;
|
||||
--md-primary-fg-color--light: #a5b4fc;
|
||||
--md-primary-fg-color--dark: #6366f1;
|
||||
--md-accent-fg-color: #818cf8;
|
||||
}
|
||||
|
||||
/* Hero Section Styling */
|
||||
.md-content h1 {
|
||||
font-weight: 700;
|
||||
letter-spacing: -0.02em;
|
||||
}
|
||||
|
||||
/* Badge Styling */
|
||||
.md-content img[src*="badge"] {
|
||||
margin: 0 4px;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
/* Grid Cards Enhancement */
|
||||
.md-typeset .grid.cards > ul > li {
|
||||
border: 1px solid var(--md-default-fg-color--lightest);
|
||||
border-radius: 8px;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.md-typeset .grid.cards > ul > li:hover {
|
||||
border-color: var(--md-primary-fg-color);
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* Code Block Enhancement */
|
||||
.md-typeset pre > code {
|
||||
border-radius: 8px;
|
||||
}
|
||||
|
||||
.md-typeset .highlight {
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
/* Table Enhancement */
|
||||
.md-typeset table:not([class]) {
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
border: 1px solid var(--md-default-fg-color--lightest);
|
||||
}
|
||||
|
||||
.md-typeset table:not([class]) th {
|
||||
background-color: var(--md-default-fg-color--lightest);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* Admonition Enhancement */
|
||||
.md-typeset .admonition,
|
||||
.md-typeset details {
|
||||
border-radius: 8px;
|
||||
border: none;
|
||||
}
|
||||
|
||||
/* Mermaid Diagram Styling - consistent element size across diagrams */
|
||||
.mermaid {
|
||||
margin: 1.5rem 0;
|
||||
overflow-x: auto;
|
||||
}
|
||||
.mermaid svg {
|
||||
min-width: min-content;
|
||||
}
|
||||
|
||||
/* Navigation Enhancement */
|
||||
.md-nav__link {
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.md-nav__item--active > .md-nav__link {
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* Footer Styling */
|
||||
.md-footer {
|
||||
margin-top: 3rem;
|
||||
}
|
||||
|
||||
/* Center align for hero badges */
|
||||
.md-content > .md-typeset > div[align="center"] img {
|
||||
margin: 0.25rem;
|
||||
}
|
||||
|
||||
/* Task list styling */
|
||||
.md-typeset .task-list-item input[type="checkbox"] {
|
||||
margin-right: 0.5rem;
|
||||
}
|
||||
|
||||
/* Improve readability */
|
||||
.md-typeset {
|
||||
font-size: 0.85rem;
|
||||
line-height: 1.75;
|
||||
}
|
||||
|
||||
.md-typeset h2 {
|
||||
margin-top: 2.5rem;
|
||||
padding-bottom: 0.5rem;
|
||||
border-bottom: 1px solid var(--md-default-fg-color--lightest);
|
||||
}
|
||||
|
||||
.md-typeset h3 {
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
/* Responsive improvements */
|
||||
@media screen and (max-width: 76.1875em) {
|
||||
.md-typeset .grid.cards > ul > li {
|
||||
padding: 1rem;
|
||||
}
|
||||
}
|
||||
|
||||
/* Animation for interactive elements */
|
||||
.md-typeset a:not(.md-button) {
|
||||
transition: color 0.15s ease;
|
||||
}
|
||||
|
||||
.md-typeset a:not(.md-button):hover {
|
||||
color: var(--md-accent-fg-color);
|
||||
}
|
||||
|
||||
/* Version selector styling */
|
||||
.md-version {
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
/* Search highlight */
|
||||
.md-search-result mark {
|
||||
background-color: var(--md-accent-fg-color--transparent);
|
||||
color: inherit;
|
||||
}
|
||||
168
docs/mkdocs.yml
168
docs/mkdocs.yml
@@ -1,21 +1,157 @@
|
||||
site_name: "AI Video Assistant"
|
||||
site_description: "AI 视频助手 - 智能对话与工作流管理平台"
|
||||
copyright: "2025"
|
||||
site_author: "AI Video Assistant Team"
|
||||
site_name: "Realtime Agent Studio"
|
||||
site_description: "Realtime Agent Studio(RAS)是一个通过管理控制台与 API 构建、部署和运营实时多模态助手的开源平台。"
|
||||
site_url: "https://your-org.github.io/AI-VideoAssistant"
|
||||
copyright: "Copyright © 2025 RAS Team"
|
||||
site_author: "RAS Team"
|
||||
|
||||
docs_dir: "content"
|
||||
site_dir: "site"
|
||||
|
||||
nav:
|
||||
- 首页: "index.md"
|
||||
- 快速开始: "getting-started.md"
|
||||
- 功能介绍:
|
||||
- 仪表盘: "features/dashboard.md"
|
||||
- 助手管理: "features/assistants.md"
|
||||
- 工作流: "features/workflows.md"
|
||||
- 模型配置: "features/models.md"
|
||||
- 知识库: "features/knowledge.md"
|
||||
- 历史记录: "features/history.md"
|
||||
- 自动化测试: "features/autotest.md"
|
||||
- 语音合成: "features/voices.md"
|
||||
- 部署指南: "deployment.md"
|
||||
- 首页: index.md
|
||||
- 快速开始:
|
||||
- 环境与部署: getting-started/index.md
|
||||
- 创建第一个助手: quickstart/index.md
|
||||
- 构建助手:
|
||||
- 助手概览: concepts/assistants.md
|
||||
- 基础配置: concepts/assistants/configuration.md
|
||||
- 提示词: concepts/assistants/prompts.md
|
||||
- LLM 模型: customization/models.md
|
||||
- 语音识别: customization/asr.md
|
||||
- 声音资源: customization/voices.md
|
||||
- TTS 参数: customization/tts.md
|
||||
- 知识库: customization/knowledge-base.md
|
||||
- 工具: customization/tools.md
|
||||
- 工作流: customization/workflows.md
|
||||
- 测试与调试: concepts/assistants/testing.md
|
||||
- 核心概念:
|
||||
- 产品概览: overview/index.md
|
||||
- 概念总览: concepts/index.md
|
||||
- 引擎架构: concepts/engines.md
|
||||
- Pipeline 引擎: concepts/pipeline-engine.md
|
||||
- Realtime 引擎: concepts/realtime-engine.md
|
||||
- 系统架构: overview/architecture.md
|
||||
- 集成:
|
||||
- API 参考: api-reference/index.md
|
||||
- WebSocket 协议: api-reference/websocket.md
|
||||
- 错误码: api-reference/errors.md
|
||||
- 运维:
|
||||
- 仪表盘: analysis/dashboard.md
|
||||
- 历史记录: analysis/history.md
|
||||
- 效果评估: analysis/evaluation.md
|
||||
- 自动化测试: analysis/autotest.md
|
||||
- 常见问题: resources/faq.md
|
||||
- 故障排查: resources/troubleshooting.md
|
||||
- 更新日志: changelog.md
|
||||
- 路线图: roadmap.md
|
||||
theme:
|
||||
name: material
|
||||
language: zh
|
||||
custom_dir: overrides
|
||||
icon:
|
||||
logo: material/robot-outline
|
||||
font:
|
||||
text: Inter
|
||||
code: JetBrains Mono
|
||||
palette:
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/brightness-7
|
||||
name: 切换到深色模式
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/brightness-4
|
||||
name: 切换到浅色模式
|
||||
features:
|
||||
- navigation.instant
|
||||
- navigation.instant.prefetch
|
||||
- navigation.tracking
|
||||
- navigation.tabs
|
||||
- navigation.tabs.sticky
|
||||
- navigation.sections
|
||||
- navigation.expand
|
||||
- navigation.path
|
||||
- navigation.top
|
||||
- navigation.footer
|
||||
- toc.follow
|
||||
- search.suggest
|
||||
- search.highlight
|
||||
- search.share
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
- content.tabs.link
|
||||
markdown_extensions:
|
||||
- abbr
|
||||
- admonition
|
||||
- attr_list
|
||||
- def_list
|
||||
- footnotes
|
||||
- md_in_html
|
||||
- tables
|
||||
- toc:
|
||||
permalink: true
|
||||
toc_depth: 3
|
||||
- pymdownx.arithmatex:
|
||||
generic: true
|
||||
- pymdownx.betterem:
|
||||
smart_enable: all
|
||||
- pymdownx.caret
|
||||
- pymdownx.details
|
||||
- pymdownx.emoji:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
line_spans: __span
|
||||
pygments_lang_class: true
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.keys
|
||||
- pymdownx.magiclink:
|
||||
repo_url_shorthand: true
|
||||
user: your-org
|
||||
repo: AI-VideoAssistant
|
||||
- pymdownx.mark
|
||||
- pymdownx.smartsymbols
|
||||
- pymdownx.snippets
|
||||
- pymdownx.superfences:
|
||||
custom_fences:
|
||||
- name: mermaid
|
||||
class: mermaid
|
||||
format: !!python/name:pymdownx.superfences.fence_code_format
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- pymdownx.tasklist:
|
||||
custom_checkbox: true
|
||||
- pymdownx.tilde
|
||||
|
||||
plugins:
|
||||
- search:
|
||||
lang: zh
|
||||
separator: '[\s\-\.]+'
|
||||
- minify:
|
||||
minify_html: true
|
||||
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
link: https://github.com/your-org/AI-VideoAssistant
|
||||
name: GitHub
|
||||
generator: false
|
||||
analytics:
|
||||
provider: google
|
||||
property: G-XXXXXXXXXX
|
||||
|
||||
extra_css:
|
||||
- stylesheets/extra.css
|
||||
|
||||
extra_javascript:
|
||||
- javascripts/mermaid.mjs
|
||||
- javascripts/extra.js
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user