Refactor backend integration and service architecture

- Removed the backend client compatibility wrapper and associated methods to streamline backend integration.
- Updated session management to utilize control plane gateways and runtime configuration providers.
- Adjusted TTS service implementations to remove the EdgeTTS service and simplify service dependencies.
- Enhanced documentation to reflect changes in backend integration and service architecture.
- Updated configuration files to remove deprecated TTS provider options and clarify available settings.
This commit is contained in:
Xin Wang
2026-03-06 09:00:43 +08:00
parent 6b589a1b7c
commit 4e2450e800
22 changed files with 632 additions and 452 deletions

View File

@@ -26,21 +26,25 @@ import aiohttp
from loguru import logger
from app.config import settings
from app.service_factory import DefaultRealtimeServiceFactory
from core.conversation import ConversationManager, ConversationState
from core.events import get_event_bus
from core.ports import (
ASRPort,
ASRServiceSpec,
LLMPort,
LLMServiceSpec,
RealtimeServiceFactory,
TTSPort,
TTSServiceSpec,
)
from core.tool_executor import execute_server_tool
from core.transports import BaseTransport
from models.ws_v1 import ev
from processors.eou import EouDetector
from processors.vad import SileroVAD, VADProcessor
from services.asr import BufferedASRService
from services.base import BaseASRService, BaseLLMService, BaseTTSService, LLMMessage, LLMStreamEvent
from services.dashscope_tts import DashScopeTTSService
from services.llm import MockLLMService, OpenAILLMService
from services.openai_compatible_asr import OpenAICompatibleASRService
from services.openai_compatible_tts import OpenAICompatibleTTSService
from services.base import LLMMessage, LLMStreamEvent
from services.streaming_text import extract_tts_sentence, has_spoken_content
from services.tts import EdgeTTSService, MockTTSService
class DuplexPipeline:
@@ -258,9 +262,9 @@ class DuplexPipeline:
self,
transport: BaseTransport,
session_id: str,
llm_service: Optional[BaseLLMService] = None,
tts_service: Optional[BaseTTSService] = None,
asr_service: Optional[BaseASRService] = None,
llm_service: Optional[LLMPort] = None,
tts_service: Optional[TTSPort] = None,
asr_service: Optional[ASRPort] = None,
system_prompt: Optional[str] = None,
greeting: Optional[str] = None,
knowledge_searcher: Optional[
@@ -272,6 +276,7 @@ class DuplexPipeline:
server_tool_executor: Optional[
Callable[[Dict[str, Any]], Awaitable[Dict[str, Any]]]
] = None,
service_factory: Optional[RealtimeServiceFactory] = None,
):
"""
Initialize duplex pipeline.
@@ -279,8 +284,8 @@ class DuplexPipeline:
Args:
transport: Transport for sending audio/events
session_id: Session identifier
llm_service: LLM service (defaults to OpenAI)
tts_service: TTS service (defaults to EdgeTTS)
llm_service: Optional injected LLM port implementation
tts_service: Optional injected TTS port implementation
asr_service: ASR service (optional)
system_prompt: System prompt for LLM
greeting: Optional greeting to speak on start
@@ -312,6 +317,7 @@ class DuplexPipeline:
self.llm_service = llm_service
self.tts_service = tts_service
self.asr_service = asr_service # Will be initialized in start()
self._service_factory = service_factory or DefaultRealtimeServiceFactory()
self._knowledge_searcher = knowledge_searcher
self._tool_resource_resolver = tool_resource_resolver
self._server_tool_executor = server_tool_executor
@@ -776,21 +782,11 @@ class DuplexPipeline:
return False
return None
@staticmethod
def _is_openai_compatible_provider(provider: Any) -> bool:
normalized = str(provider or "").strip().lower()
return normalized in {"openai_compatible", "openai-compatible", "siliconflow"}
@staticmethod
def _is_dashscope_tts_provider(provider: Any) -> bool:
normalized = str(provider or "").strip().lower()
return normalized == "dashscope"
@staticmethod
def _is_llm_provider_supported(provider: Any) -> bool:
normalized = str(provider or "").strip().lower()
return normalized in {"openai", "openai_compatible", "openai-compatible", "siliconflow"}
@staticmethod
def _default_llm_base_url(provider: Any) -> Optional[str]:
normalized = str(provider or "").strip().lower()
@@ -798,10 +794,6 @@ class DuplexPipeline:
return "https://api.siliconflow.cn/v1"
return None
@staticmethod
def _default_dashscope_tts_realtime_url() -> str:
return "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
@staticmethod
def _default_dashscope_tts_model() -> str:
return "qwen3-tts-flash-realtime"
@@ -900,18 +892,18 @@ class DuplexPipeline:
or self._default_llm_base_url(llm_provider)
)
llm_model = self._runtime_llm.get("model") or settings.llm_model
if self._is_llm_provider_supported(llm_provider) and llm_api_key:
self.llm_service = OpenAILLMService(
api_key=llm_api_key,
base_url=llm_base_url,
model=llm_model,
self.llm_service = self._service_factory.create_llm_service(
LLMServiceSpec(
provider=llm_provider,
model=str(llm_model),
api_key=str(llm_api_key).strip() if llm_api_key else None,
base_url=str(llm_base_url).strip() if llm_base_url else None,
system_prompt=self.conversation.system_prompt,
temperature=settings.llm_temperature,
knowledge_config=self._resolved_knowledge_config(),
knowledge_searcher=self._knowledge_searcher,
)
else:
logger.warning("LLM provider unsupported or API key missing - using mock LLM")
self.llm_service = MockLLMService()
)
if hasattr(self.llm_service, "set_knowledge_config"):
self.llm_service.set_knowledge_config(self._resolved_knowledge_config())
@@ -938,41 +930,29 @@ class DuplexPipeline:
"services.tts.mode is DashScope-only and will be ignored "
f"for provider={tts_provider}"
)
if self._is_dashscope_tts_provider(tts_provider) and tts_api_key:
self.tts_service = DashScopeTTSService(
api_key=tts_api_key,
api_url=tts_api_url or self._default_dashscope_tts_realtime_url(),
voice=tts_voice,
model=tts_model or self._default_dashscope_tts_model(),
self.tts_service = self._service_factory.create_tts_service(
TTSServiceSpec(
provider=tts_provider,
api_key=str(tts_api_key).strip() if tts_api_key else None,
api_url=str(tts_api_url).strip() if tts_api_url else None,
voice=str(tts_voice),
model=str(tts_model).strip() if tts_model else None,
sample_rate=settings.sample_rate,
speed=tts_speed,
mode=str(tts_mode),
sample_rate=settings.sample_rate,
speed=tts_speed
)
logger.info("Using DashScope realtime TTS service")
elif self._is_openai_compatible_provider(tts_provider) and tts_api_key:
self.tts_service = OpenAICompatibleTTSService(
api_key=tts_api_key,
api_url=tts_api_url,
voice=tts_voice,
model=tts_model or "FunAudioLLM/CosyVoice2-0.5B",
sample_rate=settings.sample_rate,
speed=tts_speed
)
logger.info(f"Using OpenAI-compatible TTS service (provider={tts_provider})")
else:
self.tts_service = EdgeTTSService(
voice=tts_voice,
sample_rate=settings.sample_rate
)
logger.info("Using Edge TTS service")
)
try:
await self.tts_service.connect()
except Exception as e:
logger.warning(f"TTS backend unavailable ({e}); falling back to MockTTS")
self.tts_service = MockTTSService(
sample_rate=settings.sample_rate
logger.warning(f"TTS backend unavailable ({e}); falling back to default TTS adapter")
self.tts_service = self._service_factory.create_tts_service(
TTSServiceSpec(
provider="mock",
voice="mock",
sample_rate=settings.sample_rate,
)
)
await self.tts_service.connect()
else:
@@ -988,22 +968,19 @@ class DuplexPipeline:
asr_interim_interval = int(self._runtime_asr.get("interimIntervalMs") or settings.asr_interim_interval_ms)
asr_min_audio_ms = int(self._runtime_asr.get("minAudioMs") or settings.asr_min_audio_ms)
if self._is_openai_compatible_provider(asr_provider) and asr_api_key:
self.asr_service = OpenAICompatibleASRService(
api_key=asr_api_key,
api_url=asr_api_url,
model=asr_model or "FunAudioLLM/SenseVoiceSmall",
self.asr_service = self._service_factory.create_asr_service(
ASRServiceSpec(
provider=asr_provider,
sample_rate=settings.sample_rate,
language="auto",
api_key=str(asr_api_key).strip() if asr_api_key else None,
api_url=str(asr_api_url).strip() if asr_api_url else None,
model=str(asr_model).strip() if asr_model else None,
interim_interval_ms=asr_interim_interval,
min_audio_for_interim_ms=asr_min_audio_ms,
on_transcript=self._on_transcript_callback
on_transcript=self._on_transcript_callback,
)
logger.info(f"Using OpenAI-compatible ASR service (provider={asr_provider})")
else:
self.asr_service = BufferedASRService(
sample_rate=settings.sample_rate
)
logger.info("Using Buffered ASR service (no real transcription)")
)
await self.asr_service.connect()