Refactor backend integration and service architecture
- Removed the backend client compatibility wrapper and associated methods to streamline backend integration. - Updated session management to utilize control plane gateways and runtime configuration providers. - Adjusted TTS service implementations to remove the EdgeTTS service and simplify service dependencies. - Enhanced documentation to reflect changes in backend integration and service architecture. - Updated configuration files to remove deprecated TTS provider options and clarify available settings.
This commit is contained in:
@@ -26,21 +26,25 @@ import aiohttp
|
||||
from loguru import logger
|
||||
|
||||
from app.config import settings
|
||||
from app.service_factory import DefaultRealtimeServiceFactory
|
||||
from core.conversation import ConversationManager, ConversationState
|
||||
from core.events import get_event_bus
|
||||
from core.ports import (
|
||||
ASRPort,
|
||||
ASRServiceSpec,
|
||||
LLMPort,
|
||||
LLMServiceSpec,
|
||||
RealtimeServiceFactory,
|
||||
TTSPort,
|
||||
TTSServiceSpec,
|
||||
)
|
||||
from core.tool_executor import execute_server_tool
|
||||
from core.transports import BaseTransport
|
||||
from models.ws_v1 import ev
|
||||
from processors.eou import EouDetector
|
||||
from processors.vad import SileroVAD, VADProcessor
|
||||
from services.asr import BufferedASRService
|
||||
from services.base import BaseASRService, BaseLLMService, BaseTTSService, LLMMessage, LLMStreamEvent
|
||||
from services.dashscope_tts import DashScopeTTSService
|
||||
from services.llm import MockLLMService, OpenAILLMService
|
||||
from services.openai_compatible_asr import OpenAICompatibleASRService
|
||||
from services.openai_compatible_tts import OpenAICompatibleTTSService
|
||||
from services.base import LLMMessage, LLMStreamEvent
|
||||
from services.streaming_text import extract_tts_sentence, has_spoken_content
|
||||
from services.tts import EdgeTTSService, MockTTSService
|
||||
|
||||
|
||||
class DuplexPipeline:
|
||||
@@ -258,9 +262,9 @@ class DuplexPipeline:
|
||||
self,
|
||||
transport: BaseTransport,
|
||||
session_id: str,
|
||||
llm_service: Optional[BaseLLMService] = None,
|
||||
tts_service: Optional[BaseTTSService] = None,
|
||||
asr_service: Optional[BaseASRService] = None,
|
||||
llm_service: Optional[LLMPort] = None,
|
||||
tts_service: Optional[TTSPort] = None,
|
||||
asr_service: Optional[ASRPort] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
greeting: Optional[str] = None,
|
||||
knowledge_searcher: Optional[
|
||||
@@ -272,6 +276,7 @@ class DuplexPipeline:
|
||||
server_tool_executor: Optional[
|
||||
Callable[[Dict[str, Any]], Awaitable[Dict[str, Any]]]
|
||||
] = None,
|
||||
service_factory: Optional[RealtimeServiceFactory] = None,
|
||||
):
|
||||
"""
|
||||
Initialize duplex pipeline.
|
||||
@@ -279,8 +284,8 @@ class DuplexPipeline:
|
||||
Args:
|
||||
transport: Transport for sending audio/events
|
||||
session_id: Session identifier
|
||||
llm_service: LLM service (defaults to OpenAI)
|
||||
tts_service: TTS service (defaults to EdgeTTS)
|
||||
llm_service: Optional injected LLM port implementation
|
||||
tts_service: Optional injected TTS port implementation
|
||||
asr_service: ASR service (optional)
|
||||
system_prompt: System prompt for LLM
|
||||
greeting: Optional greeting to speak on start
|
||||
@@ -312,6 +317,7 @@ class DuplexPipeline:
|
||||
self.llm_service = llm_service
|
||||
self.tts_service = tts_service
|
||||
self.asr_service = asr_service # Will be initialized in start()
|
||||
self._service_factory = service_factory or DefaultRealtimeServiceFactory()
|
||||
self._knowledge_searcher = knowledge_searcher
|
||||
self._tool_resource_resolver = tool_resource_resolver
|
||||
self._server_tool_executor = server_tool_executor
|
||||
@@ -776,21 +782,11 @@ class DuplexPipeline:
|
||||
return False
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _is_openai_compatible_provider(provider: Any) -> bool:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
return normalized in {"openai_compatible", "openai-compatible", "siliconflow"}
|
||||
|
||||
@staticmethod
|
||||
def _is_dashscope_tts_provider(provider: Any) -> bool:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
return normalized == "dashscope"
|
||||
|
||||
@staticmethod
|
||||
def _is_llm_provider_supported(provider: Any) -> bool:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
return normalized in {"openai", "openai_compatible", "openai-compatible", "siliconflow"}
|
||||
|
||||
@staticmethod
|
||||
def _default_llm_base_url(provider: Any) -> Optional[str]:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
@@ -798,10 +794,6 @@ class DuplexPipeline:
|
||||
return "https://api.siliconflow.cn/v1"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _default_dashscope_tts_realtime_url() -> str:
|
||||
return "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
|
||||
|
||||
@staticmethod
|
||||
def _default_dashscope_tts_model() -> str:
|
||||
return "qwen3-tts-flash-realtime"
|
||||
@@ -900,18 +892,18 @@ class DuplexPipeline:
|
||||
or self._default_llm_base_url(llm_provider)
|
||||
)
|
||||
llm_model = self._runtime_llm.get("model") or settings.llm_model
|
||||
|
||||
if self._is_llm_provider_supported(llm_provider) and llm_api_key:
|
||||
self.llm_service = OpenAILLMService(
|
||||
api_key=llm_api_key,
|
||||
base_url=llm_base_url,
|
||||
model=llm_model,
|
||||
self.llm_service = self._service_factory.create_llm_service(
|
||||
LLMServiceSpec(
|
||||
provider=llm_provider,
|
||||
model=str(llm_model),
|
||||
api_key=str(llm_api_key).strip() if llm_api_key else None,
|
||||
base_url=str(llm_base_url).strip() if llm_base_url else None,
|
||||
system_prompt=self.conversation.system_prompt,
|
||||
temperature=settings.llm_temperature,
|
||||
knowledge_config=self._resolved_knowledge_config(),
|
||||
knowledge_searcher=self._knowledge_searcher,
|
||||
)
|
||||
else:
|
||||
logger.warning("LLM provider unsupported or API key missing - using mock LLM")
|
||||
self.llm_service = MockLLMService()
|
||||
)
|
||||
|
||||
if hasattr(self.llm_service, "set_knowledge_config"):
|
||||
self.llm_service.set_knowledge_config(self._resolved_knowledge_config())
|
||||
@@ -938,41 +930,29 @@ class DuplexPipeline:
|
||||
"services.tts.mode is DashScope-only and will be ignored "
|
||||
f"for provider={tts_provider}"
|
||||
)
|
||||
|
||||
if self._is_dashscope_tts_provider(tts_provider) and tts_api_key:
|
||||
self.tts_service = DashScopeTTSService(
|
||||
api_key=tts_api_key,
|
||||
api_url=tts_api_url or self._default_dashscope_tts_realtime_url(),
|
||||
voice=tts_voice,
|
||||
model=tts_model or self._default_dashscope_tts_model(),
|
||||
self.tts_service = self._service_factory.create_tts_service(
|
||||
TTSServiceSpec(
|
||||
provider=tts_provider,
|
||||
api_key=str(tts_api_key).strip() if tts_api_key else None,
|
||||
api_url=str(tts_api_url).strip() if tts_api_url else None,
|
||||
voice=str(tts_voice),
|
||||
model=str(tts_model).strip() if tts_model else None,
|
||||
sample_rate=settings.sample_rate,
|
||||
speed=tts_speed,
|
||||
mode=str(tts_mode),
|
||||
sample_rate=settings.sample_rate,
|
||||
speed=tts_speed
|
||||
)
|
||||
logger.info("Using DashScope realtime TTS service")
|
||||
elif self._is_openai_compatible_provider(tts_provider) and tts_api_key:
|
||||
self.tts_service = OpenAICompatibleTTSService(
|
||||
api_key=tts_api_key,
|
||||
api_url=tts_api_url,
|
||||
voice=tts_voice,
|
||||
model=tts_model or "FunAudioLLM/CosyVoice2-0.5B",
|
||||
sample_rate=settings.sample_rate,
|
||||
speed=tts_speed
|
||||
)
|
||||
logger.info(f"Using OpenAI-compatible TTS service (provider={tts_provider})")
|
||||
else:
|
||||
self.tts_service = EdgeTTSService(
|
||||
voice=tts_voice,
|
||||
sample_rate=settings.sample_rate
|
||||
)
|
||||
logger.info("Using Edge TTS service")
|
||||
)
|
||||
|
||||
try:
|
||||
await self.tts_service.connect()
|
||||
except Exception as e:
|
||||
logger.warning(f"TTS backend unavailable ({e}); falling back to MockTTS")
|
||||
self.tts_service = MockTTSService(
|
||||
sample_rate=settings.sample_rate
|
||||
logger.warning(f"TTS backend unavailable ({e}); falling back to default TTS adapter")
|
||||
self.tts_service = self._service_factory.create_tts_service(
|
||||
TTSServiceSpec(
|
||||
provider="mock",
|
||||
voice="mock",
|
||||
sample_rate=settings.sample_rate,
|
||||
)
|
||||
)
|
||||
await self.tts_service.connect()
|
||||
else:
|
||||
@@ -988,22 +968,19 @@ class DuplexPipeline:
|
||||
asr_interim_interval = int(self._runtime_asr.get("interimIntervalMs") or settings.asr_interim_interval_ms)
|
||||
asr_min_audio_ms = int(self._runtime_asr.get("minAudioMs") or settings.asr_min_audio_ms)
|
||||
|
||||
if self._is_openai_compatible_provider(asr_provider) and asr_api_key:
|
||||
self.asr_service = OpenAICompatibleASRService(
|
||||
api_key=asr_api_key,
|
||||
api_url=asr_api_url,
|
||||
model=asr_model or "FunAudioLLM/SenseVoiceSmall",
|
||||
self.asr_service = self._service_factory.create_asr_service(
|
||||
ASRServiceSpec(
|
||||
provider=asr_provider,
|
||||
sample_rate=settings.sample_rate,
|
||||
language="auto",
|
||||
api_key=str(asr_api_key).strip() if asr_api_key else None,
|
||||
api_url=str(asr_api_url).strip() if asr_api_url else None,
|
||||
model=str(asr_model).strip() if asr_model else None,
|
||||
interim_interval_ms=asr_interim_interval,
|
||||
min_audio_for_interim_ms=asr_min_audio_ms,
|
||||
on_transcript=self._on_transcript_callback
|
||||
on_transcript=self._on_transcript_callback,
|
||||
)
|
||||
logger.info(f"Using OpenAI-compatible ASR service (provider={asr_provider})")
|
||||
else:
|
||||
self.asr_service = BufferedASRService(
|
||||
sample_rate=settings.sample_rate
|
||||
)
|
||||
logger.info("Using Buffered ASR service (no real transcription)")
|
||||
)
|
||||
|
||||
await self.asr_service.connect()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user