Files
ai-video-fullstack/backend/services/pipecat/service_factory.py
Xin Wang 809b634420 Enhance AssistantConfig and pipeline for FastGPT integration
- Add new fields in AssistantConfig for FastGPT connection details, including `fastgpt_api_url`, `fastgpt_api_key`, and `fastgpt_app_id`.
- Update the pipeline to utilize the new FastGPT configuration, ensuring proper integration with external services.
- Introduce type handling for different assistant types, including support for realtime modes and external brain management.
- Refactor frontend components to include hints for FastGPT configuration inputs, improving user guidance during setup.
2026-06-16 16:55:51 +08:00

163 lines
7.0 KiB
Python

"""创建 STT / LLM / TTS 服务。
对应 dograh 的 service_factory.py,但只留一套国产栈(OpenAI 兼容),
按 interface_type 扩展时在这里加分支即可——这是未来接更多模型的唯一入口。
"""
import config
from loguru import logger
from models import AssistantConfig
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.openai.stt import OpenAISTTService
from pipecat.services.openai.tts import VALID_VOICES, OpenAITTSService
from pipecat.transcriptions.language import Language
from services.pipecat.xfyun_asr import DEFAULT_XFYUN_ASR_URL, XfyunASRService
from services.pipecat.xfyun_config import websocket_url, xfyun_language, xfyun_speed
from services.pipecat.xfyun_super_tts import (
DEFAULT_XFYUN_SUPER_TTS_URL,
XfyunSuperTTSService,
)
from services.pipecat.xfyun_tts import DEFAULT_XFYUN_TTS_URL, XfyunTTSService
# TTS「说完」判定的空闲时长:默认 3.0s 过长(导致工作流结束节点说完后还要等约 3s
# 才挂断,也拖慢日常轮次的交还)。设 1.0s 既能让结束语文字/音频送达,又更跟手。
# 流式 TTS 句间音频间隔通常远小于 1s,不会把一段多句回复误判为结束。
TTS_STOP_FRAME_TIMEOUT_S = 1.0
def _language(value: str) -> Language | None:
if not value:
return None
try:
return Language(value)
except ValueError:
logger.warning(f"忽略不支持的 ASR language: {value}")
return None
def create_stt(cfg: AssistantConfig):
"""SenseVoice / FunASR 等,走 OpenAI 兼容的 /v1/audio/transcriptions。
连接信息优先用 cfg(由 config_resolver 从 DB 注入),为空回退 .env 默认。
"""
if cfg.stt_interface_type == "xfyun-asr":
return XfyunASRService(
app_id=str(cfg.stt_secrets.get("appId") or ""),
api_key=str(cfg.stt_secrets.get("apiKey") or ""),
api_secret=str(cfg.stt_secrets.get("apiSecret") or ""),
url=websocket_url(cfg.stt_base_url, DEFAULT_XFYUN_ASR_URL),
language=xfyun_language(cfg.stt_language),
sample_rate=16000,
domain=str(cfg.stt_values.get("domain") or "iat"),
accent=str(cfg.stt_values.get("accent") or "mandarin"),
frame_size=int(cfg.stt_values.get("frameSize") or 1280),
dynamic_correction=bool(cfg.stt_values.get("dynamicCorrection", False)),
)
if cfg.stt_interface_type not in {"openai-asr", "dashscope-asr"}:
raise ValueError(f"不支持的 ASR 接口类型: {cfg.stt_interface_type}")
return OpenAISTTService(
api_key=cfg.stt_api_key or config.STT_API_KEY,
base_url=cfg.stt_base_url or config.STT_BASE_URL,
settings=OpenAISTTService.Settings(
model=cfg.asr or config.STT_MODEL,
language=_language(cfg.stt_language),
),
)
def create_llm(cfg: AssistantConfig):
"""DeepSeek 等,走 OpenAI 兼容的 /v1/chat/completions。"""
if cfg.llm_interface_type not in {"openai-llm", "dashscope-llm"}:
raise ValueError(f"不支持的 LLM 接口类型: {cfg.llm_interface_type}")
return OpenAILLMService(
api_key=cfg.llm_api_key or config.LLM_API_KEY,
base_url=cfg.llm_base_url or config.LLM_BASE_URL,
settings=OpenAILLMService.Settings(model=cfg.model or config.LLM_MODEL),
)
def create_tts(cfg: AssistantConfig):
"""CosyVoice 等,走 OpenAI 兼容的 /v1/audio/speech。"""
voice = cfg.voice or config.TTS_VOICE
if cfg.tts_interface_type == "xfyun-super-tts":
return XfyunSuperTTSService(
app_id=str(cfg.tts_secrets.get("appId") or ""),
api_key=str(cfg.tts_secrets.get("apiKey") or ""),
api_secret=str(cfg.tts_secrets.get("apiSecret") or ""),
voice=voice,
url=websocket_url(cfg.tts_base_url, DEFAULT_XFYUN_SUPER_TTS_URL),
sample_rate=16000,
source_sample_rate=int(cfg.tts_values.get("sourceSampleRate") or 24000),
speed=xfyun_speed(cfg.tts_speed),
volume=int(cfg.tts_values.get("volume") or 50),
pitch=int(cfg.tts_values.get("pitch") or 50),
oral_level=str(cfg.tts_values.get("oralLevel") or "mid"),
text_aggregation_mode=str(
cfg.tts_values.get("textAggregationMode") or "token"
),
)
if cfg.tts_interface_type == "xfyun-tts":
return XfyunTTSService(
app_id=str(cfg.tts_secrets.get("appId") or ""),
api_key=str(cfg.tts_secrets.get("apiKey") or ""),
api_secret=str(cfg.tts_secrets.get("apiSecret") or ""),
voice=voice,
url=websocket_url(cfg.tts_base_url, DEFAULT_XFYUN_TTS_URL),
sample_rate=16000,
source_sample_rate=int(cfg.tts_values.get("sourceSampleRate") or 16000),
speed=xfyun_speed(cfg.tts_speed),
volume=int(cfg.tts_values.get("volume") or 50),
pitch=int(cfg.tts_values.get("pitch") or 50),
push_stop_frames=True,
stop_frame_timeout_s=TTS_STOP_FRAME_TIMEOUT_S,
)
if cfg.tts_interface_type not in {"openai-tts", "dashscope-tts"}:
raise ValueError(f"不支持的 TTS 接口类型: {cfg.tts_interface_type}")
# Pipecat 默认只接受 OpenAI 官方音色。OpenAI 兼容服务常使用自定义 voice id,
# 注册为原样映射后仍由 OpenAI SDK 按字符串透传给供应商。
VALID_VOICES.setdefault(voice, voice)
return OpenAITTSService(
api_key=cfg.tts_api_key or config.TTS_API_KEY,
base_url=cfg.tts_base_url or config.TTS_BASE_URL,
stop_frame_timeout_s=TTS_STOP_FRAME_TIMEOUT_S,
settings=OpenAITTSService.Settings(
model=cfg.tts_model or config.TTS_MODEL,
voice=voice,
speed=cfg.tts_speed,
),
)
def create_realtime_service(cfg: AssistantConfig):
"""Create a speech-to-speech service that owns STT, LLM, and TTS."""
if cfg.realtime_interface_type == "stepfun-realtime":
from services.pipecat.stepfun_realtime import StepFunRealtimeService
return StepFunRealtimeService(
api_key=cfg.realtime_api_key,
model=cfg.realtimeModel,
base_url=cfg.realtime_base_url,
instructions=cfg.prompt,
voice=str(cfg.realtime_values.get("voice") or "linjiajiejie"),
input_sample_rate=int(
cfg.realtime_values.get("inputSampleRate") or 24000
),
output_sample_rate=int(
cfg.realtime_values.get("outputSampleRate") or 24000
),
prefix_padding_ms=int(
cfg.realtime_values.get("prefixPaddingMs") or 500
),
silence_duration_ms=int(
cfg.realtime_values.get("silenceDurationMs") or 300
),
energy_awakeness_threshold=int(
cfg.realtime_values.get("energyAwakenessThreshold") or 2500
),
)
raise ValueError(f"不支持的 Realtime 接口类型: {cfg.realtime_interface_type}")