- Introduce new Xfyun ASR and TTS services, enabling integration with iFlytek's voice recognition and synthesis capabilities. - Update AssistantConfig model to include interface types for STT and TTS. - Enhance credential testing to validate Xfyun credentials. - Modify service factory to create Xfyun services based on configuration. - Update README with new configuration details for Xfyun integration. - Add new frontend components for visualizing audio streams and managing user interactions.
127 lines
4.4 KiB
Python
127 lines
4.4 KiB
Python
"""创建 STT / LLM / TTS 服务。
|
|
|
|
对应 dograh 的 service_factory.py,但只留一套国产栈(OpenAI 兼容),
|
|
按 provider 扩展时在这里加分支即可——这是未来接更多模型的唯一入口。
|
|
"""
|
|
|
|
import config
|
|
from loguru import logger
|
|
from models import AssistantConfig
|
|
|
|
from pipecat.services.openai.llm import OpenAILLMService
|
|
from pipecat.services.openai.stt import OpenAISTTService
|
|
from pipecat.services.openai.tts import VALID_VOICES, OpenAITTSService
|
|
from pipecat.transcriptions.language import Language
|
|
|
|
from services.pipecat.xfyun_asr import DEFAULT_XFYUN_ASR_URL, XfyunASRService
|
|
from services.pipecat.xfyun_config import (
|
|
is_super_tts,
|
|
parse_xfyun_credential,
|
|
websocket_url,
|
|
xfyun_language,
|
|
xfyun_speed,
|
|
)
|
|
from services.pipecat.xfyun_super_tts import (
|
|
DEFAULT_XFYUN_SUPER_TTS_URL,
|
|
XfyunSuperTTSService,
|
|
)
|
|
from services.pipecat.xfyun_tts import DEFAULT_XFYUN_TTS_URL, XfyunTTSService
|
|
|
|
|
|
def _language(value: str) -> Language | None:
|
|
if not value:
|
|
return None
|
|
try:
|
|
return Language(value)
|
|
except ValueError:
|
|
logger.warning(f"忽略不支持的 ASR language: {value}")
|
|
return None
|
|
|
|
|
|
def create_stt(cfg: AssistantConfig):
|
|
"""SenseVoice / FunASR 等,走 OpenAI 兼容的 /v1/audio/transcriptions。
|
|
|
|
连接信息优先用 cfg(由 config_resolver 从 DB 注入),为空回退 .env 默认。
|
|
"""
|
|
if cfg.stt_interface_type == "xfyun":
|
|
credential = parse_xfyun_credential(cfg.stt_api_key)
|
|
return XfyunASRService(
|
|
app_id=credential.app_id,
|
|
api_key=credential.api_key,
|
|
api_secret=credential.api_secret,
|
|
url=websocket_url(cfg.stt_base_url, DEFAULT_XFYUN_ASR_URL),
|
|
language=xfyun_language(cfg.stt_language),
|
|
sample_rate=16000,
|
|
)
|
|
|
|
return OpenAISTTService(
|
|
api_key=cfg.stt_api_key or config.STT_API_KEY,
|
|
base_url=cfg.stt_base_url or config.STT_BASE_URL,
|
|
settings=OpenAISTTService.Settings(
|
|
model=cfg.asr or config.STT_MODEL,
|
|
language=_language(cfg.stt_language),
|
|
),
|
|
)
|
|
|
|
|
|
def create_llm(cfg: AssistantConfig):
|
|
"""DeepSeek 等,走 OpenAI 兼容的 /v1/chat/completions。"""
|
|
return OpenAILLMService(
|
|
api_key=cfg.llm_api_key or config.LLM_API_KEY,
|
|
base_url=cfg.llm_base_url or config.LLM_BASE_URL,
|
|
settings=OpenAILLMService.Settings(model=cfg.model or config.LLM_MODEL),
|
|
)
|
|
|
|
|
|
def create_tts(cfg: AssistantConfig):
|
|
"""CosyVoice 等,走 OpenAI 兼容的 /v1/audio/speech。"""
|
|
voice = cfg.voice or config.TTS_VOICE
|
|
if cfg.tts_interface_type == "xfyun":
|
|
credential = parse_xfyun_credential(cfg.tts_api_key)
|
|
speed = xfyun_speed(cfg.tts_speed)
|
|
if is_super_tts(cfg.tts_model, cfg.tts_base_url):
|
|
return XfyunSuperTTSService(
|
|
app_id=credential.app_id,
|
|
api_key=credential.api_key,
|
|
api_secret=credential.api_secret,
|
|
voice=voice,
|
|
url=websocket_url(cfg.tts_base_url, DEFAULT_XFYUN_SUPER_TTS_URL),
|
|
sample_rate=16000,
|
|
source_sample_rate=24000,
|
|
speed=speed,
|
|
)
|
|
return XfyunTTSService(
|
|
app_id=credential.app_id,
|
|
api_key=credential.api_key,
|
|
api_secret=credential.api_secret,
|
|
voice=voice,
|
|
url=websocket_url(cfg.tts_base_url, DEFAULT_XFYUN_TTS_URL),
|
|
sample_rate=16000,
|
|
source_sample_rate=16000,
|
|
speed=speed,
|
|
push_stop_frames=True,
|
|
)
|
|
|
|
# Pipecat 默认只接受 OpenAI 官方音色。OpenAI 兼容服务常使用自定义 voice id,
|
|
# 注册为原样映射后仍由 OpenAI SDK 按字符串透传给供应商。
|
|
VALID_VOICES.setdefault(voice, voice)
|
|
return OpenAITTSService(
|
|
api_key=cfg.tts_api_key or config.TTS_API_KEY,
|
|
base_url=cfg.tts_base_url or config.TTS_BASE_URL,
|
|
settings=OpenAITTSService.Settings(
|
|
model=cfg.tts_model or config.TTS_MODEL,
|
|
voice=voice,
|
|
speed=cfg.tts_speed,
|
|
),
|
|
)
|
|
|
|
|
|
def create_services(cfg: AssistantConfig):
|
|
logger.info(
|
|
f"创建服务: stt={cfg.stt_interface_type}/{cfg.asr or config.STT_MODEL} "
|
|
f"llm={cfg.model or config.LLM_MODEL} "
|
|
f"tts={cfg.tts_interface_type}/{cfg.tts_model or config.TTS_MODEL} "
|
|
f"voice={cfg.voice or config.TTS_VOICE}"
|
|
)
|
|
return create_stt(cfg), create_llm(cfg), create_tts(cfg)
|