Use openai compatible as vendor

2026-02-12 18:44:55 +08:00
parent 260ff621bf
commit ff3a03b1ad
23 changed files with 822 additions and 905 deletions
--- a/api/app/routers/asr.py
+++ b/api/app/routers/asr.py
@@ -16,16 +16,22 @@ from ..schemas import (
 router = APIRouter(prefix="/asr", tags=["ASR Models"])
-SILICONFLOW_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
+OPENAI_COMPATIBLE_DEFAULT_ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
-def _is_siliconflow_vendor(vendor: str) -> bool:
+def _is_openai_compatible_vendor(vendor: str) -> bool:
-    return (vendor or "").strip().lower() in {"siliconflow", "硅基流动"}
+    normalized = (vendor or "").strip().lower()
    return normalized in {
        "openai compatible",
        "openai-compatible",
        "siliconflow",  # backward compatibility
        "硅基流动",      # backward compatibility
    }
 def _default_asr_model(vendor: str) -> str:
-    if _is_siliconflow_vendor(vendor):
+    if _is_openai_compatible_vendor(vendor):
-        return SILICONFLOW_DEFAULT_ASR_MODEL
+        return OPENAI_COMPATIBLE_DEFAULT_ASR_MODEL
    return "whisper-1"
@@ -129,7 +135,7 @@ def test_asr_model(
        # 连接性测试优先，避免依赖真实音频输入
        headers = {"Authorization": f"Bearer {model.api_key}"}
        with httpx.Client(timeout=60.0) as client:
-            if model.vendor.lower() in ["siliconflow", "paraformer"]:
+            if _is_openai_compatible_vendor(model.vendor) or model.vendor.lower() == "paraformer":
                response = client.get(f"{model.base_url}/asr", headers=headers)
            elif model.vendor.lower() == "openai":
                response = client.get(f"{model.base_url}/audio/models", headers=headers)
@@ -258,7 +264,7 @@ async def preview_asr_model(
        raise HTTPException(status_code=400, detail="Uploaded audio file is empty")
    effective_api_key = (api_key or "").strip() or (model.api_key or "").strip()
-    if not effective_api_key and _is_siliconflow_vendor(model.vendor):
+    if not effective_api_key and _is_openai_compatible_vendor(model.vendor):
        effective_api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
    if not effective_api_key:
        raise HTTPException(status_code=400, detail=f"API key is required for ASR model: {model.name}")
--- a/api/app/routers/assistants.py
+++ b/api/app/routers/assistants.py
@@ -13,8 +13,13 @@ from ..schemas import (
 router = APIRouter(prefix="/assistants", tags=["Assistants"])
-def _is_siliconflow_vendor(vendor: Optional[str]) -> bool:
+def _is_openai_compatible_vendor(vendor: Optional[str]) -> bool:
-    return (vendor or "").strip().lower() in {"siliconflow", "硅基流动"}
+    return (vendor or "").strip().lower() in {
        "siliconflow",
        "硅基流动",
        "openai compatible",
        "openai-compatible",
    }
 def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
@@ -47,11 +52,11 @@ def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
    if assistant.asr_model_id:
        asr = db.query(ASRModel).filter(ASRModel.id == assistant.asr_model_id).first()
        if asr:
-            asr_provider = "siliconflow" if _is_siliconflow_vendor(asr.vendor) else "buffered"
+            asr_provider = "openai_compatible" if _is_openai_compatible_vendor(asr.vendor) else "buffered"
            metadata["services"]["asr"] = {
                "provider": asr_provider,
                "model": asr.model_name or asr.name,
-                "apiKey": asr.api_key if asr_provider == "siliconflow" else None,
+                "apiKey": asr.api_key if asr_provider == "openai_compatible" else None,
            }
        else:
            warnings.append(f"ASR model not found: {assistant.asr_model_id}")
@@ -61,12 +66,12 @@ def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
    elif assistant.voice:
        voice = db.query(Voice).filter(Voice.id == assistant.voice).first()
        if voice:
-            tts_provider = "siliconflow" if _is_siliconflow_vendor(voice.vendor) else "edge"
+            tts_provider = "openai_compatible" if _is_openai_compatible_vendor(voice.vendor) else "edge"
            metadata["services"]["tts"] = {
                "enabled": True,
                "provider": tts_provider,
                "model": voice.model,
-                "apiKey": voice.api_key if tts_provider == "siliconflow" else None,
+                "apiKey": voice.api_key if tts_provider == "openai_compatible" else None,
                "voice": voice.voice_key or voice.id,
                "speed": assistant.speed or voice.speed,
            }
--- a/api/app/routers/tools.py
+++ b/api/app/routers/tools.py
@@ -467,7 +467,13 @@ def _test_asr_model(db: Session, model_id: str, result: AutotestResult):
        headers = {"Authorization": f"Bearer {model.api_key}"}
        with httpx.Client(timeout=30.0) as client:
-            if model.vendor.lower() in ["siliconflow", "paraformer"]:
+            normalized_vendor = (model.vendor or "").strip().lower()
            if normalized_vendor in [
                "openai compatible",
                "openai-compatible",
                "siliconflow",  # backward compatibility
                "paraformer",
            ]:
                response = client.get(
                    f"{model.base_url}/asr",
                    headers=headers
--- a/api/app/routers/voices.py
+++ b/api/app/routers/voices.py
@@ -13,20 +13,26 @@ from ..schemas import VoiceCreate, VoiceOut, VoicePreviewRequest, VoicePreviewRe
 router = APIRouter(prefix="/voices", tags=["Voices"])
-SILICONFLOW_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
+OPENAI_COMPATIBLE_DEFAULT_MODEL = "FunAudioLLM/CosyVoice2-0.5B"
-def _is_siliconflow_vendor(vendor: str) -> bool:
+def _is_openai_compatible_vendor(vendor: str) -> bool:
-    return vendor.strip().lower() in {"siliconflow", "硅基流动"}
+    normalized = (vendor or "").strip().lower()
    return normalized in {
        "openai compatible",
        "openai-compatible",
        "siliconflow",  # backward compatibility
        "硅基流动",      # backward compatibility
    }
 def _default_base_url(vendor: str) -> Optional[str]:
-    if _is_siliconflow_vendor(vendor):
+    if _is_openai_compatible_vendor(vendor):
        return "https://api.siliconflow.cn/v1"
    return None
-def _build_siliconflow_voice_key(voice: Voice, model: str) -> str:
+def _build_openai_compatible_voice_key(voice: Voice, model: str) -> str:
    if voice.voice_key:
        return voice.voice_key
    if ":" in voice.id:
@@ -65,8 +71,8 @@ def create_voice(data: VoiceCreate, db: Session = Depends(get_db)):
    model = data.model
    voice_key = data.voice_key
-    if _is_siliconflow_vendor(vendor):
+    if _is_openai_compatible_vendor(vendor):
-        model = model or SILICONFLOW_DEFAULT_MODEL
+        model = model or OPENAI_COMPATIBLE_DEFAULT_MODEL
        if not voice_key:
            raw_id = (data.id or data.name).strip()
            voice_key = raw_id if ":" in raw_id else f"{model}:{raw_id}"
@@ -115,11 +121,11 @@ def update_voice(id: str, data: VoiceUpdate, db: Session = Depends(get_db)):
        update_data["vendor"] = update_data["vendor"].strip()
    vendor_for_defaults = update_data.get("vendor", voice.vendor)
-    if _is_siliconflow_vendor(vendor_for_defaults):
+    if _is_openai_compatible_vendor(vendor_for_defaults):
-        model = update_data.get("model") or voice.model or SILICONFLOW_DEFAULT_MODEL
+        model = update_data.get("model") or voice.model or OPENAI_COMPATIBLE_DEFAULT_MODEL
        voice_key = update_data.get("voice_key") or voice.voice_key
        update_data["model"] = model
-        update_data["voice_key"] = voice_key or _build_siliconflow_voice_key(voice, model)
+        update_data["voice_key"] = voice_key or _build_openai_compatible_voice_key(voice, model)
    for field, value in update_data.items():
        setattr(voice, field, value)
@@ -152,7 +158,7 @@ def preview_voice(id: str, data: VoicePreviewRequest, db: Session = Depends(get_
        raise HTTPException(status_code=400, detail="Preview text cannot be empty")
    api_key = (data.api_key or "").strip() or (voice.api_key or "").strip()
-    if not api_key and _is_siliconflow_vendor(voice.vendor):
+    if not api_key and _is_openai_compatible_vendor(voice.vendor):
        api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
    if not api_key:
        raise HTTPException(status_code=400, detail=f"API key is required for voice: {voice.name}")
@@ -161,11 +167,11 @@ def preview_voice(id: str, data: VoicePreviewRequest, db: Session = Depends(get_
    if not base_url:
        raise HTTPException(status_code=400, detail=f"Base URL is required for voice: {voice.name}")
-    model = voice.model or SILICONFLOW_DEFAULT_MODEL
+    model = voice.model or OPENAI_COMPATIBLE_DEFAULT_MODEL
    payload = {
        "model": model,
        "input": text,
-        "voice": voice.voice_key or _build_siliconflow_voice_key(voice, model),
+        "voice": voice.voice_key or _build_openai_compatible_voice_key(voice, model),
        "response_format": "mp3",
        "speed": data.speed if data.speed is not None else voice.speed,
    }
--- a/api/docs/asr.md
+++ b/api/docs/asr.md
@@ -20,7 +20,7 @@ interface ASRModel {
  id: string;           // 模型唯一标识 (8位UUID)
  user_id: number;      // 所属用户ID
  name: string;        // 模型显示名称
-  vendor: string;      // 供应商: "OpenAI" | "SiliconFlow" | "Paraformer" | 等
+  vendor: string;      // 供应商: "OpenAI Compatible" | "Paraformer" | 等
  language: string;     // 识别语言: "zh" | "en" | "Multi-lingual"
  base_url: string;     // API Base URL
  api_key: string;     // API Key
@@ -64,7 +64,7 @@ GET /api/v1/asr
      "id": "abc12345",
      "user_id": 1,
      "name": "Whisper 多语种识别",
-      "vendor": "OpenAI",
+      "vendor": "OpenAI Compatible",
      "language": "Multi-lingual",
      "base_url": "https://api.openai.com/v1",
      "api_key": "sk-***",
@@ -78,7 +78,7 @@ GET /api/v1/asr
      "id": "def67890",
      "user_id": 1,
      "name": "SenseVoice 中文识别",
-      "vendor": "SiliconFlow",
+      "vendor": "OpenAI Compatible",
      "language": "zh",
      "base_url": "https://api.siliconflow.cn/v1",
      "api_key": "sf-***",
@@ -114,7 +114,7 @@ GET /api/v1/asr/{id}
  "id": "abc12345",
  "user_id": 1,
  "name": "Whisper 多语种识别",
-  "vendor": "OpenAI",
+  "vendor": "OpenAI Compatible",
  "language": "Multi-lingual",
  "base_url": "https://api.openai.com/v1",
  "api_key": "sk-***",
@@ -140,7 +140,7 @@ POST /api/v1/asr
 ```json
 {
  "name": "SenseVoice 中文识别",
-  "vendor": "SiliconFlow",
+  "vendor": "OpenAI Compatible",
  "language": "zh",
  "base_url": "https://api.siliconflow.cn/v1",
  "api_key": "sk-your-api-key",
@@ -157,7 +157,7 @@ POST /api/v1/asr
 | 字段 | 类型 | 必填 | 说明 |
 |------|------|------|------|
 | name | string | 是 | 模型显示名称 |
-| vendor | string | 是 | 供应商: "OpenAI" / "SiliconFlow" / "Paraformer" |
+| vendor | string | 是 | 供应商: "OpenAI Compatible" / "Paraformer" |
 | language | string | 是 | 语言: "zh" / "en" / "Multi-lingual" |
 | base_url | string | 是 | API Base URL |
 | api_key | string | 是 | API Key |
@@ -347,7 +347,7 @@ class ASRTestResponse(BaseModel):
 ```json
 {
-  "vendor": "OpenAI",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.openai.com/v1",
  "api_key": "sk-xxx",
  "model_name": "whisper-1",
@@ -357,11 +357,11 @@ class ASRTestResponse(BaseModel):
 }
 ```
-### SiliconFlow Paraformer
+### OpenAI Compatible Paraformer
 ```json
 {
-  "vendor": "SiliconFlow",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.siliconflow.cn/v1",
  "api_key": "sf-xxx",
  "model_name": "paraformer-v2",
@@ -393,7 +393,7 @@ class ASRTestResponse(BaseModel):
 | test_filter_asr_models_by_language | 按语言过滤测试 |
 | test_filter_asr_models_by_enabled | 按启用状态过滤测试 |
 | test_create_asr_model_with_hotwords | 热词配置测试 |
-| test_test_asr_model_siliconflow | SiliconFlow 供应商测试 |
+| test_test_asr_model_siliconflow | OpenAI Compatible 供应商测试 |
 | test_test_asr_model_openai | OpenAI 供应商测试 |
 | test_different_asr_languages | 多语言测试 |
 | test_different_asr_vendors | 多供应商测试 |
--- a/api/docs/llm.md
+++ b/api/docs/llm.md
@@ -20,7 +20,7 @@ interface LLMModel {
  id: string;           // 模型唯一标识 (8位UUID)
  user_id: number;      // 所属用户ID
  name: string;         // 模型显示名称
-  vendor: string;       // 供应商: "OpenAI" | "SiliconFlow" | "Dify" | "FastGPT" | 等
+  vendor: string;       // 供应商: "OpenAI Compatible" | "Dify" | "FastGPT" | 等
  type: string;        // 类型: "text" | "embedding" | "rerank"
  base_url: string;     // API Base URL
  api_key: string;      // API Key
@@ -64,7 +64,7 @@ GET /api/v1/llm
      "id": "abc12345",
      "user_id": 1,
      "name": "GPT-4o",
-      "vendor": "OpenAI",
+      "vendor": "OpenAI Compatible",
      "type": "text",
      "base_url": "https://api.openai.com/v1",
      "api_key": "sk-***",
@@ -79,7 +79,7 @@ GET /api/v1/llm
      "id": "def67890",
      "user_id": 1,
      "name": "Embedding-3-Small",
-      "vendor": "OpenAI",
+      "vendor": "OpenAI Compatible",
      "type": "embedding",
      "base_url": "https://api.openai.com/v1",
      "api_key": "sk-***",
@@ -111,7 +111,7 @@ GET /api/v1/llm/{id}
  "id": "abc12345",
  "user_id": 1,
  "name": "GPT-4o",
-  "vendor": "OpenAI",
+  "vendor": "OpenAI Compatible",
  "type": "text",
  "base_url": "https://api.openai.com/v1",
  "api_key": "sk-***",
@@ -137,7 +137,7 @@ POST /api/v1/llm
 ```json
 {
  "name": "GPT-4o",
-  "vendor": "OpenAI",
+  "vendor": "OpenAI Compatible",
  "type": "text",
  "base_url": "https://api.openai.com/v1",
  "api_key": "sk-your-api-key",
@@ -314,11 +314,11 @@ class LLMModelTestResponse(BaseModel):
 ## 供应商配置示例
-### OpenAI
+### OpenAI Compatible (OpenAI Endpoint)
 ```json
 {
-  "vendor": "OpenAI",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.openai.com/v1",
  "api_key": "sk-xxx",
  "model_name": "gpt-4o",
@@ -327,11 +327,11 @@ class LLMModelTestResponse(BaseModel):
 }
 ```
-### SiliconFlow
+### OpenAI Compatible
 ```json
 {
-  "vendor": "SiliconFlow",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.siliconflow.com/v1",
  "api_key": "sf-xxx",
  "model_name": "deepseek-v3",
@@ -356,7 +356,7 @@ class LLMModelTestResponse(BaseModel):
 ```json
 {
-  "vendor": "OpenAI",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.openai.com/v1",
  "api_key": "sk-xxx",
  "model_name": "text-embedding-3-small",
--- a/api/docs/model-access.md
+++ b/api/docs/model-access.md
@@ -20,7 +20,7 @@ interface LLMModel {
  id: string;           // 模型唯一标识
  user_id: number;      // 所属用户ID
  name: string;         // 模型显示名称
-  vendor: string;       // 供应商: "OpenAI Compatible" | "SiliconFlow" | "Dify" | "FastGPT"
+  vendor: string;       // 供应商: "OpenAI Compatible" | "Dify" | "FastGPT"
  type: string;         // 类型: "text" | "embedding" | "rerank"
  base_url: string;     // API Base URL
  api_key: string;       // API Key
@@ -57,7 +57,7 @@ interface TTSModel {
  id: string;
  user_id: number;
  name: string;
-  vendor: string;       // "Ali" | "Volcano" | "Minimax" | "硅基流动"
+  vendor: string;       // "OpenAI Compatible" | "Ali" | "Volcano" | "Minimax"
  language: string;      // "zh" | "en"
  voice_list?: string[]; // 支持的声音列表
  enabled: boolean;
@@ -316,7 +316,6 @@ class LLMModelType(str, Enum):
 class LLMModelVendor(str, Enum):
    OPENAI_COMPATIBLE = "OpenAI Compatible"
    SILICONFLOW = "SiliconFlow"
    DIFY = "Dify"
    FASTGPT = "FastGPT"
@@ -389,11 +388,11 @@ class ASRModelOut(ASRModelBase):
 }
 ```
-### SiliconFlow
+### OpenAI Compatible
 ```json
 {
-  "vendor": "SiliconFlow",
+  "vendor": "OpenAI Compatible",
  "base_url": "https://api.siliconflow.com/v1",
  "api_key": "sf-xxx",
  "model_name": "deepseek-v3"
--- a/api/init_db.py
+++ b/api/init_db.py
@@ -135,21 +135,21 @@ def rebuild_vector_store(reset_doc_status: bool = True):
 def init_default_data():
    with db_session() as db:
        # 检查是否已有数据
-        # SiliconFlow CosyVoice 2.0 预设声音 (8个)
+        # OpenAI Compatible (SiliconFlow API) CosyVoice 2.0 预设声音 (8个)
        # 参考: https://docs.siliconflow.cn/cn/api-reference/audio/create-speech
        voices = [
            # 男声 (Male Voices)
-            Voice(id="alex", name="Alex", vendor="SiliconFlow", gender="Male", language="en",
+            Voice(id="alex", name="Alex", vendor="OpenAI Compatible", gender="Male", language="en",
                  description="Steady male voice.", is_system=True),
-            Voice(id="david", name="David", vendor="SiliconFlow", gender="Male", language="en",
+            Voice(id="david", name="David", vendor="OpenAI Compatible", gender="Male", language="en",
                  description="Cheerful male voice.", is_system=True),
            # 女声 (Female Voices)
-            Voice(id="bella", name="Bella", vendor="SiliconFlow", gender="Female", language="en",
+            Voice(id="bella", name="Bella", vendor="OpenAI Compatible", gender="Female", language="en",
                  description="Passionate female voice.", is_system=True),
-            Voice(id="claire", name="Claire", vendor="SiliconFlow", gender="Female", language="en",
+            Voice(id="claire", name="Claire", vendor="OpenAI Compatible", gender="Female", language="en",
                  description="Gentle female voice.", is_system=True),
        ]
-        seed_if_empty(db, Voice, voices, "✅ 默认声音数据已初始化 (SiliconFlow CosyVoice 2.0)")
+        seed_if_empty(db, Voice, voices, "✅ 默认声音数据已初始化 (OpenAI Compatible CosyVoice 2.0)")
 def init_default_tools(recreate: bool = False):
@@ -181,7 +181,7 @@ def init_default_assistants():
                voice="anna",
                speed=1.0,
                hotwords=[],
-                tools=["calculator", "current_time"],
+                tools=["current_time"],
                interruption_sensitivity=500,
                config_mode="platform",
                llm_model_id="deepseek-chat",
@@ -215,7 +215,7 @@ def init_default_assistants():
                voice="alex",
                speed=1.0,
                hotwords=["grammar", "vocabulary", "practice"],
-                tools=["calculator"],
+                tools=["current_time"],
                interruption_sensitivity=400,
                config_mode="platform",
            ),
@@ -294,7 +294,7 @@ def init_default_llm_models():
                id="deepseek-chat",
                user_id=1,
                name="DeepSeek Chat",
-                vendor="SiliconFlow",
+                vendor="OpenAI Compatible",
                type="text",
                base_url="https://api.deepseek.com",
                api_key="YOUR_API_KEY",  # 用户需替换
@@ -320,7 +320,7 @@ def init_default_llm_models():
                id="text-embedding-3-small",
                user_id=1,
                name="Embedding 3 Small",
-                vendor="OpenAI",
+                vendor="OpenAI Compatible",
                type="embedding",
                base_url="https://api.openai.com/v1",
                api_key="YOUR_API_KEY",
@@ -339,7 +339,7 @@ def init_default_asr_models():
                id="FunAudioLLM/SenseVoiceSmall",
                user_id=1,
                name="FunAudioLLM/SenseVoiceSmall",
-                vendor="SiliconFlow",
+                vendor="OpenAI Compatible",
                language="Multi-lingual",
                base_url="https://api.siliconflow.cn/v1",
                api_key="YOUR_API_KEY",
@@ -353,7 +353,7 @@ def init_default_asr_models():
                id="TeleAI/TeleSpeechASR",
                user_id=1,
                name="TeleAI/TeleSpeechASR",
-                vendor="SiliconFlow",
+                vendor="OpenAI Compatible",
                language="Multi-lingual",
                base_url="https://api.siliconflow.cn/v1",
                api_key="YOUR_API_KEY",
--- a/engine/.env.example
+++ b/engine/.env.example
@@ -41,19 +41,19 @@ LLM_MODEL=gpt-4o-mini
 LLM_TEMPERATURE=0.7
 # TTS
-# edge: no SiliconFlow key needed
+# edge: no API key needed
-# siliconflow: requires SILICONFLOW_API_KEY
+# openai_compatible: compatible with SiliconFlow-style endpoints
-TTS_PROVIDER=siliconflow
+TTS_PROVIDER=openai_compatible
 TTS_VOICE=anna
 TTS_SPEED=1.0
-# SiliconFlow (used by TTS and/or ASR when provider=siliconflow)
+# SiliconFlow (used by TTS and/or ASR when provider=openai_compatible)
 SILICONFLOW_API_KEY=your_siliconflow_api_key_here
 SILICONFLOW_TTS_MODEL=FunAudioLLM/CosyVoice2-0.5B
 SILICONFLOW_ASR_MODEL=FunAudioLLM/SenseVoiceSmall
 # ASR
-ASR_PROVIDER=siliconflow
+ASR_PROVIDER=openai_compatible
 # Interim cadence and minimum audio before interim decode.
 ASR_INTERIM_INTERVAL_MS=500
 ASR_MIN_AUDIO_MS=300
--- a/engine/app/config.py
+++ b/engine/app/config.py
@@ -44,7 +44,10 @@ class Settings(BaseSettings):
    llm_temperature: float = Field(default=0.7, description="LLM temperature for response generation")
    # TTS Configuration
-    tts_provider: str = Field(default="siliconflow", description="TTS provider (edge, siliconflow)")
+    tts_provider: str = Field(
        default="openai_compatible",
        description="TTS provider (edge, openai_compatible; siliconflow alias supported)"
    )
    tts_voice: str = Field(default="anna", description="TTS voice name")
    tts_speed: float = Field(default=1.0, description="TTS speech speed multiplier")
@@ -53,7 +56,10 @@ class Settings(BaseSettings):
    siliconflow_tts_model: str = Field(default="FunAudioLLM/CosyVoice2-0.5B", description="SiliconFlow TTS model")
    # ASR Configuration
-    asr_provider: str = Field(default="siliconflow", description="ASR provider (siliconflow, buffered)")
+    asr_provider: str = Field(
        default="openai_compatible",
        description="ASR provider (openai_compatible, buffered; siliconflow alias supported)"
    )
    siliconflow_asr_model: str = Field(default="FunAudioLLM/SenseVoiceSmall", description="SiliconFlow ASR model")
    asr_interim_interval_ms: int = Field(default=500, description="Interval for interim ASR results in ms")
    asr_min_audio_ms: int = Field(default=300, description="Minimum audio duration before first ASR result")
--- a/engine/core/duplex_pipeline.py
+++ b/engine/core/duplex_pipeline.py
@@ -30,8 +30,8 @@ from processors.vad import SileroVAD, VADProcessor
 from services.asr import BufferedASRService
 from services.base import BaseASRService, BaseLLMService, BaseTTSService, LLMMessage, LLMStreamEvent
 from services.llm import MockLLMService, OpenAILLMService
-from services.siliconflow_asr import SiliconFlowASRService
+from services.openai_compatible_asr import OpenAICompatibleASRService
-from services.siliconflow_tts import SiliconFlowTTSService
+from services.openai_compatible_tts import OpenAICompatibleTTSService
 from services.streaming_text import extract_tts_sentence, has_spoken_content
 from services.tts import EdgeTTSService, MockTTSService
@@ -60,57 +60,6 @@ class DuplexPipeline:
    _TOOL_WAIT_TIMEOUT_SECONDS = 15.0
    _SERVER_TOOL_TIMEOUT_SECONDS = 15.0
    _DEFAULT_TOOL_SCHEMAS: Dict[str, Dict[str, Any]] = {
        "search": {
            "name": "search",
            "description": "Search the internet for recent information",
            "parameters": {
                "type": "object",
                "properties": {"query": {"type": "string"}},
                "required": ["query"],
            },
        },
        "calculator": {
            "name": "calculator",
            "description": "Evaluate a math expression",
            "parameters": {
                "type": "object",
                "properties": {"expression": {"type": "string"}},
                "required": ["expression"],
            },
        },
        "weather": {
            "name": "weather",
            "description": "Get weather by city name",
            "parameters": {
                "type": "object",
                "properties": {"city": {"type": "string"}},
                "required": ["city"],
            },
        },
        "translate": {
            "name": "translate",
            "description": "Translate text to target language",
            "parameters": {
                "type": "object",
                "properties": {
                    "text": {"type": "string"},
                    "target_lang": {"type": "string"},
                },
                "required": ["text", "target_lang"],
            },
        },
        "knowledge": {
            "name": "knowledge",
            "description": "Query knowledge base by question",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string"},
                    "kb_id": {"type": "string"},
                },
                "required": ["query"],
            },
        },
        "current_time": {
            "name": "current_time",
            "description": "Get current local time",
@@ -120,51 +69,6 @@ class DuplexPipeline:
                "required": [],
            },
        },
        "code_interpreter": {
            "name": "code_interpreter",
            "description": "Execute Python code in a controlled environment",
            "parameters": {
                "type": "object",
                "properties": {"code": {"type": "string"}},
                "required": ["code"],
            },
        },
        "turn_on_camera": {
            "name": "turn_on_camera",
            "description": "Turn on camera on client device",
            "parameters": {
                "type": "object",
                "properties": {},
                "required": [],
            },
        },
        "turn_off_camera": {
            "name": "turn_off_camera",
            "description": "Turn off camera on client device",
            "parameters": {
                "type": "object",
                "properties": {},
                "required": [],
            },
        },
        "increase_volume": {
            "name": "increase_volume",
            "description": "Increase speaker volume",
            "parameters": {
                "type": "object",
                "properties": {"step": {"type": "integer"}},
                "required": [],
            },
        },
        "decrease_volume": {
            "name": "decrease_volume",
            "description": "Decrease speaker volume",
            "parameters": {
                "type": "object",
                "properties": {"step": {"type": "integer"}},
                "required": [],
            },
        },
    }
    def __init__(
@@ -386,6 +290,11 @@ class DuplexPipeline:
                return False
        return None
    @staticmethod
    def _is_openai_compatible_provider(provider: Any) -> bool:
        normalized = str(provider or "").strip().lower()
        return normalized in {"openai_compatible", "openai-compatible", "siliconflow"}
    def _tts_output_enabled(self) -> bool:
        enabled = self._coerce_bool(self._runtime_tts.get("enabled"))
        if enabled is not None:
@@ -495,15 +404,15 @@ class DuplexPipeline:
                    tts_model = self._runtime_tts.get("model") or settings.siliconflow_tts_model
                    tts_speed = float(self._runtime_tts.get("speed") or settings.tts_speed)
-                    if tts_provider == "siliconflow" and tts_api_key:
+                    if self._is_openai_compatible_provider(tts_provider) and tts_api_key:
-                        self.tts_service = SiliconFlowTTSService(
+                        self.tts_service = OpenAICompatibleTTSService(
                            api_key=tts_api_key,
                            voice=tts_voice,
                            model=tts_model,
                            sample_rate=settings.sample_rate,
                            speed=tts_speed
                        )
-                        logger.info("Using SiliconFlow TTS service")
+                        logger.info("Using OpenAI-compatible TTS service (SiliconFlow implementation)")
                    else:
                        self.tts_service = EdgeTTSService(
                            voice=tts_voice,
@@ -531,8 +440,8 @@ class DuplexPipeline:
                asr_interim_interval = int(self._runtime_asr.get("interimIntervalMs") or settings.asr_interim_interval_ms)
                asr_min_audio_ms = int(self._runtime_asr.get("minAudioMs") or settings.asr_min_audio_ms)
-                if asr_provider == "siliconflow" and asr_api_key:
+                if self._is_openai_compatible_provider(asr_provider) and asr_api_key:
-                    self.asr_service = SiliconFlowASRService(
+                    self.asr_service = OpenAICompatibleASRService(
                        api_key=asr_api_key,
                        model=asr_model,
                        sample_rate=settings.sample_rate,
@@ -540,7 +449,7 @@ class DuplexPipeline:
                        min_audio_for_interim_ms=asr_min_audio_ms,
                        on_transcript=self._on_transcript_callback
                    )
-                    logger.info("Using SiliconFlow ASR service")
+                    logger.info("Using OpenAI-compatible ASR service (SiliconFlow implementation)")
                else:
                    self.asr_service = BufferedASRService(
                        sample_rate=settings.sample_rate
--- a/engine/docs/ws_v1_schema.md
+++ b/engine/docs/ws_v1_schema.md
@@ -66,7 +66,7 @@ Rules:
        "baseUrl": "https://api.openai.com/v1"
      },
      "asr": {
-        "provider": "siliconflow",
+        "provider": "openai_compatible",
        "model": "FunAudioLLM/SenseVoiceSmall",
        "apiKey": "sf-...",
        "interimIntervalMs": 500,
@@ -74,7 +74,7 @@ Rules:
      },
      "tts": {
        "enabled": true,
-        "provider": "siliconflow",
+        "provider": "openai_compatible",
        "model": "FunAudioLLM/CosyVoice2-0.5B",
        "apiKey": "sf-...",
        "voice": "anna",
--- a/engine/services/init.py
+++ b/engine/services/init.py
@@ -15,8 +15,8 @@ from services.base import (
 from services.llm import OpenAILLMService, MockLLMService
 from services.tts import EdgeTTSService, MockTTSService
 from services.asr import BufferedASRService, MockASRService
-from services.siliconflow_asr import SiliconFlowASRService
+from services.openai_compatible_asr import OpenAICompatibleASRService, SiliconFlowASRService
-from services.siliconflow_tts import SiliconFlowTTSService
+from services.openai_compatible_tts import OpenAICompatibleTTSService, SiliconFlowTTSService
 from services.streaming_tts_adapter import StreamingTTSAdapter
 from services.realtime import RealtimeService, RealtimeConfig, RealtimePipeline
@@ -38,8 +38,10 @@ __all__ = [
    # ASR
    "BufferedASRService",
    "MockASRService",
    "OpenAICompatibleASRService",
    "SiliconFlowASRService",
    # TTS (SiliconFlow)
    "OpenAICompatibleTTSService",
    "SiliconFlowTTSService",
    "StreamingTTSAdapter",
    # Realtime
--- a/engine/services/openai_compatible_asr.py
+++ b/engine/services/openai_compatible_asr.py
@@ -0,0 +1,321 @@
 """OpenAI-compatible ASR (Automatic Speech Recognition) Service.
 Uses the SiliconFlow API for speech-to-text transcription.
 API: https://docs.siliconflow.cn/cn/api-reference/audio/create-audio-transcriptions
 """
 import asyncio
 import io
 import wave
 from typing import AsyncIterator, Optional, Callable, Awaitable
 from loguru import logger
 try:
    import aiohttp
    AIOHTTP_AVAILABLE = True
 except ImportError:
    AIOHTTP_AVAILABLE = False
    logger.warning("aiohttp not available - OpenAICompatibleASRService will not work")
 from services.base import BaseASRService, ASRResult, ServiceState
 class OpenAICompatibleASRService(BaseASRService):
    """
    OpenAI-compatible ASR service for speech-to-text transcription.
    Features:
    - Buffers incoming audio chunks
    - Provides interim transcriptions periodically (for streaming to client)
    - Final transcription on EOU
    API Details:
    - Endpoint: POST https://api.siliconflow.cn/v1/audio/transcriptions
    - Models: FunAudioLLM/SenseVoiceSmall (default), TeleAI/TeleSpeechASR
    - Input: Audio file (multipart/form-data)
    - Output: {"text": "transcribed text"}
    """
    # Supported models
    MODELS = {
        "sensevoice": "FunAudioLLM/SenseVoiceSmall",
        "telespeech": "TeleAI/TeleSpeechASR",
    }
    API_URL = "https://api.siliconflow.cn/v1/audio/transcriptions"
    def __init__(
        self,
        api_key: str,
        model: str = "FunAudioLLM/SenseVoiceSmall",
        sample_rate: int = 16000,
        language: str = "auto",
        interim_interval_ms: int = 500,  # How often to send interim results
        min_audio_for_interim_ms: int = 300,  # Min audio before first interim
        on_transcript: Optional[Callable[[str, bool], Awaitable[None]]] = None
    ):
        """
        Initialize OpenAI-compatible ASR service.
        Args:
            api_key: Provider API key
            model: ASR model name or alias
            sample_rate: Audio sample rate (16000 recommended)
            language: Language code (auto for automatic detection)
            interim_interval_ms: How often to generate interim transcriptions
            min_audio_for_interim_ms: Minimum audio duration before first interim
            on_transcript: Callback for transcription results (text, is_final)
        """
        super().__init__(sample_rate=sample_rate, language=language)
        if not AIOHTTP_AVAILABLE:
            raise RuntimeError("aiohttp is required for OpenAICompatibleASRService")
        self.api_key = api_key
        self.model = self.MODELS.get(model.lower(), model)
        self.interim_interval_ms = interim_interval_ms
        self.min_audio_for_interim_ms = min_audio_for_interim_ms
        self.on_transcript = on_transcript
        # Session
        self._session: Optional[aiohttp.ClientSession] = None
        # Audio buffer
        self._audio_buffer: bytes = b""
        self._current_text: str = ""
        self._last_interim_time: float = 0
        # Transcript queue for async iteration
        self._transcript_queue: asyncio.Queue[ASRResult] = asyncio.Queue()
        # Background task for interim results
        self._interim_task: Optional[asyncio.Task] = None
        self._running = False
        logger.info(f"OpenAICompatibleASRService initialized with model: {self.model}")
    async def connect(self) -> None:
        """Connect to the service."""
        self._session = aiohttp.ClientSession(
            headers={
                "Authorization": f"Bearer {self.api_key}"
            }
        )
        self._running = True
        self.state = ServiceState.CONNECTED
        logger.info("OpenAICompatibleASRService connected")
    async def disconnect(self) -> None:
        """Disconnect and cleanup."""
        self._running = False
        if self._interim_task:
            self._interim_task.cancel()
            try:
                await self._interim_task
            except asyncio.CancelledError:
                pass
            self._interim_task = None
        if self._session:
            await self._session.close()
            self._session = None
        self._audio_buffer = b""
        self._current_text = ""
        self.state = ServiceState.DISCONNECTED
        logger.info("OpenAICompatibleASRService disconnected")
    async def send_audio(self, audio: bytes) -> None:
        """
        Buffer incoming audio data.
        Args:
            audio: PCM audio data (16-bit, mono)
        """
        self._audio_buffer += audio
    async def transcribe_buffer(self, is_final: bool = False) -> Optional[str]:
        """
        Transcribe current audio buffer.
        Args:
            is_final: Whether this is the final transcription
        Returns:
            Transcribed text or None if not enough audio
        """
        if not self._session:
            logger.warning("ASR session not connected")
            return None
        # Check minimum audio duration
        audio_duration_ms = len(self._audio_buffer) / (self.sample_rate * 2) * 1000
        if not is_final and audio_duration_ms < self.min_audio_for_interim_ms:
            return None
        if audio_duration_ms < 100:  # Less than 100ms - too short
            return None
        try:
            # Convert PCM to WAV in memory
            wav_buffer = io.BytesIO()
            with wave.open(wav_buffer, 'wb') as wav_file:
                wav_file.setnchannels(1)
                wav_file.setsampwidth(2)  # 16-bit
                wav_file.setframerate(self.sample_rate)
                wav_file.writeframes(self._audio_buffer)
            wav_buffer.seek(0)
            wav_data = wav_buffer.read()
            # Send to API
            form_data = aiohttp.FormData()
            form_data.add_field(
                'file',
                wav_data,
                filename='audio.wav',
                content_type='audio/wav'
            )
            form_data.add_field('model', self.model)
            async with self._session.post(self.API_URL, data=form_data) as response:
                if response.status == 200:
                    result = await response.json()
                    text = result.get("text", "").strip()
                    if text:
                        self._current_text = text
                        # Notify via callback
                        if self.on_transcript:
                            await self.on_transcript(text, is_final)
                        # Queue result
                        await self._transcript_queue.put(
                            ASRResult(text=text, is_final=is_final)
                        )
                        logger.debug(f"ASR {'final' if is_final else 'interim'}: {text[:50]}...")
                        return text
                else:
                    error_text = await response.text()
                    logger.error(f"ASR API error {response.status}: {error_text}")
                    return None
        except Exception as e:
            logger.error(f"ASR transcription error: {e}")
            return None
    async def get_final_transcription(self) -> str:
        """
        Get final transcription and clear buffer.
        Call this when EOU is detected.
        Returns:
            Final transcribed text
        """
        # Transcribe full buffer as final
        text = await self.transcribe_buffer(is_final=True)
        # Clear buffer
        result = text or self._current_text
        self._audio_buffer = b""
        self._current_text = ""
        return result
    def get_and_clear_text(self) -> str:
        """
        Get accumulated text and clear buffer.
        Compatible with BufferedASRService interface.
        """
        text = self._current_text
        self._current_text = ""
        self._audio_buffer = b""
        return text
    def get_audio_buffer(self) -> bytes:
        """Get current audio buffer."""
        return self._audio_buffer
    def get_audio_duration_ms(self) -> float:
        """Get current audio buffer duration in milliseconds."""
        return len(self._audio_buffer) / (self.sample_rate * 2) * 1000
    def clear_buffer(self) -> None:
        """Clear audio and text buffers."""
        self._audio_buffer = b""
        self._current_text = ""
    async def receive_transcripts(self) -> AsyncIterator[ASRResult]:
        """
        Async iterator for transcription results.
        Yields:
            ASRResult with text and is_final flag
        """
        while self._running:
            try:
                result = await asyncio.wait_for(
                    self._transcript_queue.get(),
                    timeout=0.1
                )
                yield result
            except asyncio.TimeoutError:
                continue
            except asyncio.CancelledError:
                break
    async def start_interim_transcription(self) -> None:
        """
        Start background task for interim transcriptions.
        This periodically transcribes buffered audio for
        real-time feedback to the user.
        """
        if self._interim_task and not self._interim_task.done():
            return
        self._interim_task = asyncio.create_task(self._interim_loop())
    async def stop_interim_transcription(self) -> None:
        """Stop interim transcription task."""
        if self._interim_task:
            self._interim_task.cancel()
            try:
                await self._interim_task
            except asyncio.CancelledError:
                pass
            self._interim_task = None
    async def _interim_loop(self) -> None:
        """Background loop for interim transcriptions."""
        import time
        while self._running:
            try:
                await asyncio.sleep(self.interim_interval_ms / 1000)
                # Check if we have enough new audio
                current_time = time.time()
                time_since_last = (current_time - self._last_interim_time) * 1000
                if time_since_last >= self.interim_interval_ms:
                    audio_duration = self.get_audio_duration_ms()
                    if audio_duration >= self.min_audio_for_interim_ms:
                        await self.transcribe_buffer(is_final=False)
                        self._last_interim_time = current_time
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error(f"Interim transcription error: {e}")
 # Backward-compatible alias
 SiliconFlowASRService = OpenAICompatibleASRService
--- a/engine/services/openai_compatible_tts.py
+++ b/engine/services/openai_compatible_tts.py
@@ -0,0 +1,315 @@
 """OpenAI-compatible TTS Service with streaming support.
 Uses SiliconFlow's CosyVoice2 or MOSS-TTSD models for low-latency
 text-to-speech synthesis with streaming.
 API Docs: https://docs.siliconflow.cn/cn/api-reference/audio/create-speech
 """
 import os
 import asyncio
 import aiohttp
 from typing import AsyncIterator, Optional
 from loguru import logger
 from services.base import BaseTTSService, TTSChunk, ServiceState
 from services.streaming_tts_adapter import StreamingTTSAdapter  # backward-compatible re-export
 class OpenAICompatibleTTSService(BaseTTSService):
    """
    OpenAI-compatible TTS service with streaming support.
    Supports CosyVoice2-0.5B and MOSS-TTSD-v0.5 models.
    """
    # Available voices
    VOICES = {
        "alex": "FunAudioLLM/CosyVoice2-0.5B:alex",
        "anna": "FunAudioLLM/CosyVoice2-0.5B:anna",
        "bella": "FunAudioLLM/CosyVoice2-0.5B:bella",
        "benjamin": "FunAudioLLM/CosyVoice2-0.5B:benjamin",
        "charles": "FunAudioLLM/CosyVoice2-0.5B:charles",
        "claire": "FunAudioLLM/CosyVoice2-0.5B:claire",
        "david": "FunAudioLLM/CosyVoice2-0.5B:david",
        "diana": "FunAudioLLM/CosyVoice2-0.5B:diana",
    }
    def __init__(
        self,
        api_key: Optional[str] = None,
        voice: str = "anna",
        model: str = "FunAudioLLM/CosyVoice2-0.5B",
        sample_rate: int = 16000,
        speed: float = 1.0
    ):
        """
        Initialize OpenAI-compatible TTS service.
        Args:
            api_key: Provider API key (defaults to SILICONFLOW_API_KEY env var)
            voice: Voice name (alex, anna, bella, benjamin, charles, claire, david, diana)
            model: Model name
            sample_rate: Output sample rate (8000, 16000, 24000, 32000, 44100)
            speed: Speech speed (0.25 to 4.0)
        """
        # Resolve voice name
        if voice in self.VOICES:
            full_voice = self.VOICES[voice]
        else:
            full_voice = voice
        super().__init__(voice=full_voice, sample_rate=sample_rate, speed=speed)
        self.api_key = api_key or os.getenv("SILICONFLOW_API_KEY")
        self.model = model
        self.api_url = "https://api.siliconflow.cn/v1/audio/speech"
        self._session: Optional[aiohttp.ClientSession] = None
        self._cancel_event = asyncio.Event()
    async def connect(self) -> None:
        """Initialize HTTP session."""
        if not self.api_key:
            raise ValueError("SiliconFlow API key not provided. Set SILICONFLOW_API_KEY env var.")
        self._session = aiohttp.ClientSession(
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            }
        )
        self.state = ServiceState.CONNECTED
        logger.info(f"SiliconFlow TTS service ready: voice={self.voice}, model={self.model}")
    async def disconnect(self) -> None:
        """Close HTTP session."""
        if self._session:
            await self._session.close()
            self._session = None
        self.state = ServiceState.DISCONNECTED
        logger.info("SiliconFlow TTS service disconnected")
    async def synthesize(self, text: str) -> bytes:
        """Synthesize complete audio for text."""
        audio_data = b""
        async for chunk in self.synthesize_stream(text):
            audio_data += chunk.audio
        return audio_data
    async def synthesize_stream(self, text: str) -> AsyncIterator[TTSChunk]:
        """
        Synthesize audio in streaming mode.
        Args:
            text: Text to synthesize
        Yields:
            TTSChunk objects with PCM audio
        """
        if not self._session:
            raise RuntimeError("TTS service not connected")
        if not text.strip():
            return
        self._cancel_event.clear()
        payload = {
            "model": self.model,
            "input": text,
            "voice": self.voice,
            "response_format": "pcm",
            "sample_rate": self.sample_rate,
            "stream": True,
            "speed": self.speed
        }
        try:
            async with self._session.post(self.api_url, json=payload) as response:
                if response.status != 200:
                    error_text = await response.text()
                    logger.error(f"SiliconFlow TTS error: {response.status} - {error_text}")
                    return
                # Stream audio chunks
                chunk_size = self.sample_rate * 2 // 10  # 100ms chunks
                buffer = b""
                pending_chunk = None
                async for chunk in response.content.iter_any():
                    if self._cancel_event.is_set():
                        logger.info("TTS synthesis cancelled")
                        return
                    buffer += chunk
                    # Yield complete chunks
                    while len(buffer) >= chunk_size:
                        audio_chunk = buffer[:chunk_size]
                        buffer = buffer[chunk_size:]
                        # Keep one full chunk buffered so we can always tag the true
                        # last full chunk as final when stream length is an exact multiple.
                        if pending_chunk is not None:
                            yield TTSChunk(
                                audio=pending_chunk,
                                sample_rate=self.sample_rate,
                                is_final=False
                            )
                        pending_chunk = audio_chunk
                # Flush pending chunk(s) and remaining tail.
                if pending_chunk is not None:
                    if buffer:
                        yield TTSChunk(
                            audio=pending_chunk,
                            sample_rate=self.sample_rate,
                            is_final=False
                        )
                        pending_chunk = None
                    else:
                        yield TTSChunk(
                            audio=pending_chunk,
                            sample_rate=self.sample_rate,
                            is_final=True
                        )
                        pending_chunk = None
                if buffer:
                    yield TTSChunk(
                        audio=buffer,
                        sample_rate=self.sample_rate,
                        is_final=True
                    )
        except asyncio.CancelledError:
            logger.info("TTS synthesis cancelled via asyncio")
            raise
        except Exception as e:
            logger.error(f"TTS synthesis error: {e}")
            raise
    async def cancel(self) -> None:
        """Cancel ongoing synthesis."""
        self._cancel_event.set()
 class StreamingTTSAdapter:
    """
    Adapter for streaming LLM text to TTS with sentence-level chunking.
    This reduces latency by starting TTS as soon as a complete sentence
    is received from the LLM, rather than waiting for the full response.
    """
    # Sentence delimiters
    SENTENCE_ENDS = {'，', '。', '！', '？', '.', '!', '?', '\n'}
    def __init__(self, tts_service: BaseTTSService, transport, session_id: str):
        self.tts_service = tts_service
        self.transport = transport
        self.session_id = session_id
        self._buffer = ""
        self._cancel_event = asyncio.Event()
        self._is_speaking = False
    def _is_non_sentence_period(self, text: str, idx: int) -> bool:
        """Check whether '.' should NOT be treated as a sentence delimiter."""
        if text[idx] != ".":
            return False
        # Decimal/version segment: 1.2, v1.2.3
        if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit():
            return True
        # Number abbreviations: No.1 / No. 1
        left_start = idx - 1
        while left_start >= 0 and text[left_start].isalpha():
            left_start -= 1
        left_token = text[left_start + 1:idx].lower()
        if left_token == "no":
            j = idx + 1
            while j < len(text) and text[j].isspace():
                j += 1
            if j < len(text) and text[j].isdigit():
                return True
        return False
    async def process_text_chunk(self, text_chunk: str) -> None:
        """
        Process a text chunk from LLM and trigger TTS when sentence is complete.
        Args:
            text_chunk: Text chunk from LLM streaming
        """
        if self._cancel_event.is_set():
            return
        self._buffer += text_chunk
        # Check for sentence completion
        while True:
            split_idx = -1
            for i, char in enumerate(self._buffer):
                if char == "." and self._is_non_sentence_period(self._buffer, i):
                    continue
                if char in self.SENTENCE_ENDS:
                    split_idx = i
                    break
            if split_idx < 0:
                break
            end_idx = split_idx + 1
            while end_idx < len(self._buffer) and self._buffer[end_idx] in self.SENTENCE_ENDS:
                end_idx += 1
            sentence = self._buffer[:end_idx].strip()
            self._buffer = self._buffer[end_idx:]
            if sentence and any(ch.isalnum() for ch in sentence):
                await self._speak_sentence(sentence)
    async def flush(self) -> None:
        """Flush remaining buffer."""
        if self._buffer.strip() and not self._cancel_event.is_set():
            await self._speak_sentence(self._buffer.strip())
        self._buffer = ""
    async def _speak_sentence(self, text: str) -> None:
        """Synthesize and send a sentence."""
        if not text or self._cancel_event.is_set():
            return
        self._is_speaking = True
        try:
            async for chunk in self.tts_service.synthesize_stream(text):
                if self._cancel_event.is_set():
                    break
                await self.transport.send_audio(chunk.audio)
                await asyncio.sleep(0.01)  # Prevent flooding
        except Exception as e:
            logger.error(f"TTS speak error: {e}")
        finally:
            self._is_speaking = False
    def cancel(self) -> None:
        """Cancel ongoing speech."""
        self._cancel_event.set()
        self._buffer = ""
    def reset(self) -> None:
        """Reset for new turn."""
        self._cancel_event.clear()
        self._buffer = ""
        self._is_speaking = False
    @property
    def is_speaking(self) -> bool:
        return self._is_speaking
 # Backward-compatible alias
 SiliconFlowTTSService = OpenAICompatibleTTSService
--- a/engine/services/siliconflow_asr.py
+++ b/engine/services/siliconflow_asr.py
@@ -1,317 +1,8 @@
-"""SiliconFlow ASR (Automatic Speech Recognition) Service.
+"""Backward-compatible imports for legacy siliconflow_asr module."""
-Uses the SiliconFlow API for speech-to-text transcription.
+from services.openai_compatible_asr import OpenAICompatibleASRService
 API: https://docs.siliconflow.cn/cn/api-reference/audio/create-audio-transcriptions
 """
-import asyncio
+# Backward-compatible alias
-import io
+SiliconFlowASRService = OpenAICompatibleASRService
 import wave
 from typing import AsyncIterator, Optional, Callable, Awaitable
 from loguru import logger
-try:
+__all__ = ["OpenAICompatibleASRService", "SiliconFlowASRService"]
    import aiohttp
    AIOHTTP_AVAILABLE = True
 except ImportError:
    AIOHTTP_AVAILABLE = False
    logger.warning("aiohttp not available - SiliconFlowASRService will not work")
 from services.base import BaseASRService, ASRResult, ServiceState
 class SiliconFlowASRService(BaseASRService):
    """
    SiliconFlow ASR service for speech-to-text transcription.
    Features:
    - Buffers incoming audio chunks
    - Provides interim transcriptions periodically (for streaming to client)
    - Final transcription on EOU
    API Details:
    - Endpoint: POST https://api.siliconflow.cn/v1/audio/transcriptions
    - Models: FunAudioLLM/SenseVoiceSmall (default), TeleAI/TeleSpeechASR
    - Input: Audio file (multipart/form-data)
    - Output: {"text": "transcribed text"}
    """
    # Supported models
    MODELS = {
        "sensevoice": "FunAudioLLM/SenseVoiceSmall",
        "telespeech": "TeleAI/TeleSpeechASR",
    }
    API_URL = "https://api.siliconflow.cn/v1/audio/transcriptions"
    def __init__(
        self,
        api_key: str,
        model: str = "FunAudioLLM/SenseVoiceSmall",
        sample_rate: int = 16000,
        language: str = "auto",
        interim_interval_ms: int = 500,  # How often to send interim results
        min_audio_for_interim_ms: int = 300,  # Min audio before first interim
        on_transcript: Optional[Callable[[str, bool], Awaitable[None]]] = None
    ):
        """
        Initialize SiliconFlow ASR service.
        Args:
            api_key: SiliconFlow API key
            model: ASR model name or alias
            sample_rate: Audio sample rate (16000 recommended)
            language: Language code (auto for automatic detection)
            interim_interval_ms: How often to generate interim transcriptions
            min_audio_for_interim_ms: Minimum audio duration before first interim
            on_transcript: Callback for transcription results (text, is_final)
        """
        super().__init__(sample_rate=sample_rate, language=language)
        if not AIOHTTP_AVAILABLE:
            raise RuntimeError("aiohttp is required for SiliconFlowASRService")
        self.api_key = api_key
        self.model = self.MODELS.get(model.lower(), model)
        self.interim_interval_ms = interim_interval_ms
        self.min_audio_for_interim_ms = min_audio_for_interim_ms
        self.on_transcript = on_transcript
        # Session
        self._session: Optional[aiohttp.ClientSession] = None
        # Audio buffer
        self._audio_buffer: bytes = b""
        self._current_text: str = ""
        self._last_interim_time: float = 0
        # Transcript queue for async iteration
        self._transcript_queue: asyncio.Queue[ASRResult] = asyncio.Queue()
        # Background task for interim results
        self._interim_task: Optional[asyncio.Task] = None
        self._running = False
        logger.info(f"SiliconFlowASRService initialized with model: {self.model}")
    async def connect(self) -> None:
        """Connect to the service."""
        self._session = aiohttp.ClientSession(
            headers={
                "Authorization": f"Bearer {self.api_key}"
            }
        )
        self._running = True
        self.state = ServiceState.CONNECTED
        logger.info("SiliconFlowASRService connected")
    async def disconnect(self) -> None:
        """Disconnect and cleanup."""
        self._running = False
        if self._interim_task:
            self._interim_task.cancel()
            try:
                await self._interim_task
            except asyncio.CancelledError:
                pass
            self._interim_task = None
        if self._session:
            await self._session.close()
            self._session = None
        self._audio_buffer = b""
        self._current_text = ""
        self.state = ServiceState.DISCONNECTED
        logger.info("SiliconFlowASRService disconnected")
    async def send_audio(self, audio: bytes) -> None:
        """
        Buffer incoming audio data.
        Args:
            audio: PCM audio data (16-bit, mono)
        """
        self._audio_buffer += audio
    async def transcribe_buffer(self, is_final: bool = False) -> Optional[str]:
        """
        Transcribe current audio buffer.
        Args:
            is_final: Whether this is the final transcription
        Returns:
            Transcribed text or None if not enough audio
        """
        if not self._session:
            logger.warning("ASR session not connected")
            return None
        # Check minimum audio duration
        audio_duration_ms = len(self._audio_buffer) / (self.sample_rate * 2) * 1000
        if not is_final and audio_duration_ms < self.min_audio_for_interim_ms:
            return None
        if audio_duration_ms < 100:  # Less than 100ms - too short
            return None
        try:
            # Convert PCM to WAV in memory
            wav_buffer = io.BytesIO()
            with wave.open(wav_buffer, 'wb') as wav_file:
                wav_file.setnchannels(1)
                wav_file.setsampwidth(2)  # 16-bit
                wav_file.setframerate(self.sample_rate)
                wav_file.writeframes(self._audio_buffer)
            wav_buffer.seek(0)
            wav_data = wav_buffer.read()
            # Send to API
            form_data = aiohttp.FormData()
            form_data.add_field(
                'file',
                wav_data,
                filename='audio.wav',
                content_type='audio/wav'
            )
            form_data.add_field('model', self.model)
            async with self._session.post(self.API_URL, data=form_data) as response:
                if response.status == 200:
                    result = await response.json()
                    text = result.get("text", "").strip()
                    if text:
                        self._current_text = text
                        # Notify via callback
                        if self.on_transcript:
                            await self.on_transcript(text, is_final)
                        # Queue result
                        await self._transcript_queue.put(
                            ASRResult(text=text, is_final=is_final)
                        )
                        logger.debug(f"ASR {'final' if is_final else 'interim'}: {text[:50]}...")
                        return text
                else:
                    error_text = await response.text()
                    logger.error(f"ASR API error {response.status}: {error_text}")
                    return None
        except Exception as e:
            logger.error(f"ASR transcription error: {e}")
            return None
    async def get_final_transcription(self) -> str:
        """
        Get final transcription and clear buffer.
        Call this when EOU is detected.
        Returns:
            Final transcribed text
        """
        # Transcribe full buffer as final
        text = await self.transcribe_buffer(is_final=True)
        # Clear buffer
        result = text or self._current_text
        self._audio_buffer = b""
        self._current_text = ""
        return result
    def get_and_clear_text(self) -> str:
        """
        Get accumulated text and clear buffer.
        Compatible with BufferedASRService interface.
        """
        text = self._current_text
        self._current_text = ""
        self._audio_buffer = b""
        return text
    def get_audio_buffer(self) -> bytes:
        """Get current audio buffer."""
        return self._audio_buffer
    def get_audio_duration_ms(self) -> float:
        """Get current audio buffer duration in milliseconds."""
        return len(self._audio_buffer) / (self.sample_rate * 2) * 1000
    def clear_buffer(self) -> None:
        """Clear audio and text buffers."""
        self._audio_buffer = b""
        self._current_text = ""
    async def receive_transcripts(self) -> AsyncIterator[ASRResult]:
        """
        Async iterator for transcription results.
        Yields:
            ASRResult with text and is_final flag
        """
        while self._running:
            try:
                result = await asyncio.wait_for(
                    self._transcript_queue.get(),
                    timeout=0.1
                )
                yield result
            except asyncio.TimeoutError:
                continue
            except asyncio.CancelledError:
                break
    async def start_interim_transcription(self) -> None:
        """
        Start background task for interim transcriptions.
        This periodically transcribes buffered audio for
        real-time feedback to the user.
        """
        if self._interim_task and not self._interim_task.done():
            return
        self._interim_task = asyncio.create_task(self._interim_loop())
    async def stop_interim_transcription(self) -> None:
        """Stop interim transcription task."""
        if self._interim_task:
            self._interim_task.cancel()
            try:
                await self._interim_task
            except asyncio.CancelledError:
                pass
            self._interim_task = None
    async def _interim_loop(self) -> None:
        """Background loop for interim transcriptions."""
        import time
        while self._running:
            try:
                await asyncio.sleep(self.interim_interval_ms / 1000)
                # Check if we have enough new audio
                current_time = time.time()
                time_since_last = (current_time - self._last_interim_time) * 1000
                if time_since_last >= self.interim_interval_ms:
                    audio_duration = self.get_audio_duration_ms()
                    if audio_duration >= self.min_audio_for_interim_ms:
                        await self.transcribe_buffer(is_final=False)
                        self._last_interim_time = current_time
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error(f"Interim transcription error: {e}")
--- a/engine/services/siliconflow_tts.py
+++ b/engine/services/siliconflow_tts.py
@@ -1,311 +1,8 @@
-"""SiliconFlow TTS Service with streaming support.
+"""Backward-compatible imports for legacy siliconflow_tts module."""
-Uses SiliconFlow's CosyVoice2 or MOSS-TTSD models for low-latency
+from services.openai_compatible_tts import OpenAICompatibleTTSService, StreamingTTSAdapter
 text-to-speech synthesis with streaming.
-API Docs: https://docs.siliconflow.cn/cn/api-reference/audio/create-speech
+# Backward-compatible alias
-"""
+SiliconFlowTTSService = OpenAICompatibleTTSService
-import os
+__all__ = ["OpenAICompatibleTTSService", "SiliconFlowTTSService", "StreamingTTSAdapter"]
 import asyncio
 import aiohttp
 from typing import AsyncIterator, Optional
 from loguru import logger
 from services.base import BaseTTSService, TTSChunk, ServiceState
 from services.streaming_tts_adapter import StreamingTTSAdapter  # backward-compatible re-export
 class SiliconFlowTTSService(BaseTTSService):
    """
    SiliconFlow TTS service with streaming support.
    Supports CosyVoice2-0.5B and MOSS-TTSD-v0.5 models.
    """
    # Available voices
    VOICES = {
        "alex": "FunAudioLLM/CosyVoice2-0.5B:alex",
        "anna": "FunAudioLLM/CosyVoice2-0.5B:anna",
        "bella": "FunAudioLLM/CosyVoice2-0.5B:bella",
        "benjamin": "FunAudioLLM/CosyVoice2-0.5B:benjamin",
        "charles": "FunAudioLLM/CosyVoice2-0.5B:charles",
        "claire": "FunAudioLLM/CosyVoice2-0.5B:claire",
        "david": "FunAudioLLM/CosyVoice2-0.5B:david",
        "diana": "FunAudioLLM/CosyVoice2-0.5B:diana",
    }
    def __init__(
        self,
        api_key: Optional[str] = None,
        voice: str = "anna",
        model: str = "FunAudioLLM/CosyVoice2-0.5B",
        sample_rate: int = 16000,
        speed: float = 1.0
    ):
        """
        Initialize SiliconFlow TTS service.
        Args:
            api_key: SiliconFlow API key (defaults to SILICONFLOW_API_KEY env var)
            voice: Voice name (alex, anna, bella, benjamin, charles, claire, david, diana)
            model: Model name
            sample_rate: Output sample rate (8000, 16000, 24000, 32000, 44100)
            speed: Speech speed (0.25 to 4.0)
        """
        # Resolve voice name
        if voice in self.VOICES:
            full_voice = self.VOICES[voice]
        else:
            full_voice = voice
        super().__init__(voice=full_voice, sample_rate=sample_rate, speed=speed)
        self.api_key = api_key or os.getenv("SILICONFLOW_API_KEY")
        self.model = model
        self.api_url = "https://api.siliconflow.cn/v1/audio/speech"
        self._session: Optional[aiohttp.ClientSession] = None
        self._cancel_event = asyncio.Event()
    async def connect(self) -> None:
        """Initialize HTTP session."""
        if not self.api_key:
            raise ValueError("SiliconFlow API key not provided. Set SILICONFLOW_API_KEY env var.")
        self._session = aiohttp.ClientSession(
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            }
        )
        self.state = ServiceState.CONNECTED
        logger.info(f"SiliconFlow TTS service ready: voice={self.voice}, model={self.model}")
    async def disconnect(self) -> None:
        """Close HTTP session."""
        if self._session:
            await self._session.close()
            self._session = None
        self.state = ServiceState.DISCONNECTED
        logger.info("SiliconFlow TTS service disconnected")
    async def synthesize(self, text: str) -> bytes:
        """Synthesize complete audio for text."""
        audio_data = b""
        async for chunk in self.synthesize_stream(text):
            audio_data += chunk.audio
        return audio_data
    async def synthesize_stream(self, text: str) -> AsyncIterator[TTSChunk]:
        """
        Synthesize audio in streaming mode.
        Args:
            text: Text to synthesize
        Yields:
            TTSChunk objects with PCM audio
        """
        if not self._session:
            raise RuntimeError("TTS service not connected")
        if not text.strip():
            return
        self._cancel_event.clear()
        payload = {
            "model": self.model,
            "input": text,
            "voice": self.voice,
            "response_format": "pcm",
            "sample_rate": self.sample_rate,
            "stream": True,
            "speed": self.speed
        }
        try:
            async with self._session.post(self.api_url, json=payload) as response:
                if response.status != 200:
                    error_text = await response.text()
                    logger.error(f"SiliconFlow TTS error: {response.status} - {error_text}")
                    return
                # Stream audio chunks
                chunk_size = self.sample_rate * 2 // 10  # 100ms chunks
                buffer = b""
                pending_chunk = None
                async for chunk in response.content.iter_any():
                    if self._cancel_event.is_set():
                        logger.info("TTS synthesis cancelled")
                        return
                    buffer += chunk
                    # Yield complete chunks
                    while len(buffer) >= chunk_size:
                        audio_chunk = buffer[:chunk_size]
                        buffer = buffer[chunk_size:]
                        # Keep one full chunk buffered so we can always tag the true
                        # last full chunk as final when stream length is an exact multiple.
                        if pending_chunk is not None:
                            yield TTSChunk(
                                audio=pending_chunk,
                                sample_rate=self.sample_rate,
                                is_final=False
                            )
                        pending_chunk = audio_chunk
                # Flush pending chunk(s) and remaining tail.
                if pending_chunk is not None:
                    if buffer:
                        yield TTSChunk(
                            audio=pending_chunk,
                            sample_rate=self.sample_rate,
                            is_final=False
                        )
                        pending_chunk = None
                    else:
                        yield TTSChunk(
                            audio=pending_chunk,
                            sample_rate=self.sample_rate,
                            is_final=True
                        )
                        pending_chunk = None
                if buffer:
                    yield TTSChunk(
                        audio=buffer,
                        sample_rate=self.sample_rate,
                        is_final=True
                    )
        except asyncio.CancelledError:
            logger.info("TTS synthesis cancelled via asyncio")
            raise
        except Exception as e:
            logger.error(f"TTS synthesis error: {e}")
            raise
    async def cancel(self) -> None:
        """Cancel ongoing synthesis."""
        self._cancel_event.set()
 class StreamingTTSAdapter:
    """
    Adapter for streaming LLM text to TTS with sentence-level chunking.
    This reduces latency by starting TTS as soon as a complete sentence
    is received from the LLM, rather than waiting for the full response.
    """
    # Sentence delimiters
    SENTENCE_ENDS = {'，', '。', '！', '？', '.', '!', '?', '\n'}
    def __init__(self, tts_service: BaseTTSService, transport, session_id: str):
        self.tts_service = tts_service
        self.transport = transport
        self.session_id = session_id
        self._buffer = ""
        self._cancel_event = asyncio.Event()
        self._is_speaking = False
    def _is_non_sentence_period(self, text: str, idx: int) -> bool:
        """Check whether '.' should NOT be treated as a sentence delimiter."""
        if text[idx] != ".":
            return False
        # Decimal/version segment: 1.2, v1.2.3
        if idx > 0 and idx < len(text) - 1 and text[idx - 1].isdigit() and text[idx + 1].isdigit():
            return True
        # Number abbreviations: No.1 / No. 1
        left_start = idx - 1
        while left_start >= 0 and text[left_start].isalpha():
            left_start -= 1
        left_token = text[left_start + 1:idx].lower()
        if left_token == "no":
            j = idx + 1
            while j < len(text) and text[j].isspace():
                j += 1
            if j < len(text) and text[j].isdigit():
                return True
        return False
    async def process_text_chunk(self, text_chunk: str) -> None:
        """
        Process a text chunk from LLM and trigger TTS when sentence is complete.
        Args:
            text_chunk: Text chunk from LLM streaming
        """
        if self._cancel_event.is_set():
            return
        self._buffer += text_chunk
        # Check for sentence completion
        while True:
            split_idx = -1
            for i, char in enumerate(self._buffer):
                if char == "." and self._is_non_sentence_period(self._buffer, i):
                    continue
                if char in self.SENTENCE_ENDS:
                    split_idx = i
                    break
            if split_idx < 0:
                break
            end_idx = split_idx + 1
            while end_idx < len(self._buffer) and self._buffer[end_idx] in self.SENTENCE_ENDS:
                end_idx += 1
            sentence = self._buffer[:end_idx].strip()
            self._buffer = self._buffer[end_idx:]
            if sentence and any(ch.isalnum() for ch in sentence):
                await self._speak_sentence(sentence)
    async def flush(self) -> None:
        """Flush remaining buffer."""
        if self._buffer.strip() and not self._cancel_event.is_set():
            await self._speak_sentence(self._buffer.strip())
        self._buffer = ""
    async def _speak_sentence(self, text: str) -> None:
        """Synthesize and send a sentence."""
        if not text or self._cancel_event.is_set():
            return
        self._is_speaking = True
        try:
            async for chunk in self.tts_service.synthesize_stream(text):
                if self._cancel_event.is_set():
                    break
                await self.transport.send_audio(chunk.audio)
                await asyncio.sleep(0.01)  # Prevent flooding
        except Exception as e:
            logger.error(f"TTS speak error: {e}")
        finally:
            self._is_speaking = False
    def cancel(self) -> None:
        """Cancel ongoing speech."""
        self._cancel_event.set()
        self._buffer = ""
    def reset(self) -> None:
        """Reset for new turn."""
        self._cancel_event.clear()
        self._buffer = ""
        self._is_speaking = False
    @property
    def is_speaking(self) -> bool:
        return self._is_speaking
--- a/web/pages/ASRLibrary.tsx
+++ b/web/pages/ASRLibrary.tsx
@@ -85,7 +85,7 @@ const convertRecordedBlobToWav = async (blob: Blob): Promise<File> => {
 export const ASRLibraryPage: React.FC = () => {
  const [models, setModels] = useState<ASRModel[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
-  const [vendorFilter, setVendorFilter] = useState<string>('all');
+  const [vendorFilter, setVendorFilter] = useState<string>('OpenAI Compatible');
  const [langFilter, setLangFilter] = useState<string>('all');
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
  const [editingModel, setEditingModel] = useState<ASRModel | null>(null);
@@ -111,7 +111,7 @@ export const ASRLibraryPage: React.FC = () => {
  const filteredModels = models.filter((m) => {
    const q = searchTerm.toLowerCase();
    const matchesSearch = m.name.toLowerCase().includes(q) || (m.modelName || '').toLowerCase().includes(q);
-    const matchesVendor = vendorFilter === 'all' || m.vendor === vendorFilter;
+    const matchesVendor = m.vendor === vendorFilter;
    const matchesLang = langFilter === 'all' || m.language === langFilter || (langFilter !== 'all' && m.language === 'Multi-lingual');
    return matchesSearch && matchesVendor && matchesLang;
  });
@@ -134,8 +134,6 @@ export const ASRLibraryPage: React.FC = () => {
    setModels((prev) => prev.filter((m) => m.id !== id));
  };
  const vendorOptions = Array.from(new Set(models.map((m) => m.vendor).filter(Boolean)));
  return (
    <div className="space-y-6 animate-in fade-in py-4 pb-10">
      <div className="flex items-center justify-between">
@@ -162,10 +160,7 @@ export const ASRLibraryPage: React.FC = () => {
            value={vendorFilter}
            onChange={(e) => setVendorFilter(e.target.value)}
          >
-            <option value="all">所有厂商</option>
+            <option value="OpenAI Compatible">OpenAI Compatible</option>
            {vendorOptions.map((vendor) => (
              <option key={vendor} value={vendor}>{vendor}</option>
            ))}
          </select>
        </div>
        <div className="flex items-center space-x-2">
@@ -371,7 +366,6 @@ const ASRModelModal: React.FC<{
              onChange={(e) => setVendor(e.target.value)}
            >
              <option value="OpenAI Compatible">OpenAI Compatible</option>
              <option value="SiliconFlow">SiliconFlow</option>
            </select>
          </div>
          <div className="space-y-1.5">
--- a/web/pages/Assistants.tsx
+++ b/web/pages/Assistants.tsx
@@ -5,32 +5,37 @@ import { Button, Input, Badge, Drawer, Dialog } from '../components/UI';
 import { ASRModel, Assistant, KnowledgeBase, LLMModel, TabValue, Tool, Voice } from '../types';
 import { createAssistant, deleteAssistant, fetchASRModels, fetchAssistants, fetchKnowledgeBases, fetchLLMModels, fetchTools, fetchVoices, updateAssistant as updateAssistantApi } from '../services/backendApi';
-const isSiliconflowVendor = (vendor?: string) => {
+const isOpenAICompatibleVendor = (vendor?: string) => {
  const normalized = String(vendor || '').trim().toLowerCase();
-  return normalized === 'siliconflow' || normalized === '硅基流动';
+  return (
    normalized === 'siliconflow' ||
    normalized === '硅基流动' ||
    normalized === 'openai compatible' ||
    normalized === 'openai-compatible'
  );
 };
-const SILICONFLOW_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
+const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
-const buildSiliconflowVoiceKey = (voiceId: string, model?: string) => {
+const buildOpenAICompatibleVoiceKey = (voiceId: string, model?: string) => {
  const id = String(voiceId || '').trim();
  if (!id) return '';
  if (id.includes(':')) return id;
-  return `${model || SILICONFLOW_DEFAULT_MODEL}:${id}`;
+  return `${model || OPENAI_COMPATIBLE_DEFAULT_MODEL}:${id}`;
 };
 const resolveRuntimeTtsVoice = (selectedVoiceId: string, voice: Voice) => {
  const explicitKey = String(voice.voiceKey || '').trim();
-  if (!isSiliconflowVendor(voice.vendor)) {
+  if (!isOpenAICompatibleVendor(voice.vendor)) {
    return explicitKey || selectedVoiceId;
  }
  if (voice.isSystem) {
-    const canonical = buildSiliconflowVoiceKey(selectedVoiceId, voice.model);
+    const canonical = buildOpenAICompatibleVoiceKey(selectedVoiceId, voice.model);
    if (!explicitKey) return canonical;
    const explicitSuffix = explicitKey.includes(':') ? explicitKey.split(':').pop() : explicitKey;
    if (explicitSuffix && explicitSuffix !== selectedVoiceId) return canonical;
  }
-  return explicitKey || buildSiliconflowVoiceKey(selectedVoiceId, voice.model);
+  return explicitKey || buildOpenAICompatibleVoiceKey(selectedVoiceId, voice.model);
 };
 const renderToolIcon = (icon: string) => {
@@ -1830,11 +1835,11 @@ export const DebugDrawer: React.FC<{
    if (assistant.asrModelId) {
      const asr = asrModels.find((item) => item.id === assistant.asrModelId);
      if (asr) {
-        const asrProvider = isSiliconflowVendor(asr.vendor) ? 'siliconflow' : 'buffered';
+        const asrProvider = isOpenAICompatibleVendor(asr.vendor) ? 'openai_compatible' : 'buffered';
        services.asr = {
          provider: asrProvider,
          model: asr.modelName || asr.name,
-          apiKey: asrProvider === 'siliconflow' ? asr.apiKey : null,
+          apiKey: asrProvider === 'openai_compatible' ? asr.apiKey : null,
        };
      } else {
        warnings.push(`ASR model not found in loaded list: ${assistant.asrModelId}`);
@@ -1844,12 +1849,12 @@ export const DebugDrawer: React.FC<{
    if (assistant.voice) {
      const voice = voices.find((item) => item.id === assistant.voice);
      if (voice) {
-        const ttsProvider = isSiliconflowVendor(voice.vendor) ? 'siliconflow' : 'edge';
+        const ttsProvider = isOpenAICompatibleVendor(voice.vendor) ? 'openai_compatible' : 'edge';
        services.tts = {
          enabled: ttsEnabled,
          provider: ttsProvider,
          model: voice.model,
-          apiKey: ttsProvider === 'siliconflow' ? voice.apiKey : null,
+          apiKey: ttsProvider === 'openai_compatible' ? voice.apiKey : null,
          voice: resolveRuntimeTtsVoice(assistant.voice, voice),
          speed: assistant.speed || voice.speed || 1.0,
        };
--- a/web/pages/LLMLibrary.tsx
+++ b/web/pages/LLMLibrary.tsx
@@ -13,7 +13,7 @@ const maskApiKey = (key?: string) => {
 export const LLMLibraryPage: React.FC = () => {
  const [models, setModels] = useState<LLMModel[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
-  const [vendorFilter, setVendorFilter] = useState<string>('all');
+  const [vendorFilter, setVendorFilter] = useState<string>('OpenAI Compatible');
  const [typeFilter, setTypeFilter] = useState<string>('all');
  const [isAddModalOpen, setIsAddModalOpen] = useState(false);
  const [editingModel, setEditingModel] = useState<LLMModel | null>(null);
@@ -41,7 +41,7 @@ export const LLMLibraryPage: React.FC = () => {
      m.name.toLowerCase().includes(q) ||
      (m.modelName || '').toLowerCase().includes(q) ||
      (m.baseUrl || '').toLowerCase().includes(q);
-    const matchesVendor = vendorFilter === 'all' || m.vendor === vendorFilter;
+    const matchesVendor = m.vendor === vendorFilter;
    const matchesType = typeFilter === 'all' || m.type === typeFilter;
    return matchesSearch && matchesVendor && matchesType;
  });
@@ -64,8 +64,6 @@ export const LLMLibraryPage: React.FC = () => {
    setModels((prev) => prev.filter((item) => item.id !== id));
  };
  const vendorOptions = Array.from(new Set(models.map((m) => m.vendor).filter(Boolean)));
  return (
    <div className="space-y-6 animate-in fade-in py-4 pb-10">
      <div className="flex items-center justify-between">
@@ -92,10 +90,7 @@ export const LLMLibraryPage: React.FC = () => {
            value={vendorFilter}
            onChange={(e) => setVendorFilter(e.target.value)}
          >
-            <option value="all">所有厂商</option>
+            <option value="OpenAI Compatible">OpenAI Compatible</option>
            {vendorOptions.map((vendor) => (
              <option key={vendor} value={vendor}>{vendor}</option>
            ))}
          </select>
        </div>
        <div className="flex items-center space-x-2">
@@ -284,8 +279,6 @@ const LLMModelModal: React.FC<{
            onChange={(e) => setVendor(e.target.value)}
          >
            <option value="OpenAI Compatible">OpenAI Compatible</option>
            <option value="OpenAI">OpenAI</option>
            <option value="SiliconFlow">SiliconFlow</option>
          </select>
        </div>
--- a/web/pages/VoiceLibrary.tsx
+++ b/web/pages/VoiceLibrary.tsx
@@ -1,12 +1,12 @@
 import React, { useEffect, useState, useRef } from 'react';
-import { Search, Mic2, Play, Pause, Upload, Filter, Plus, Volume2, Sparkles, ChevronDown, Pencil, Trash2 } from 'lucide-react';
+import { Search, Mic2, Play, Pause, Upload, Filter, Plus, Volume2, Pencil, Trash2 } from 'lucide-react';
 import { Button, Input, TableHeader, TableRow, TableHead, TableCell, Dialog, Badge } from '../components/UI';
 import { Voice } from '../types';
 import { createVoice, deleteVoice, fetchVoices, previewVoice, updateVoice } from '../services/backendApi';
-const SILICONFLOW_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
+const OPENAI_COMPATIBLE_DEFAULT_MODEL = 'FunAudioLLM/CosyVoice2-0.5B';
-const buildSiliconflowVoiceKey = (rawId: string, model: string): string => {
+const buildOpenAICompatibleVoiceKey = (rawId: string, model: string): string => {
  const id = (rawId || '').trim();
  if (!id) return `${model}:anna`;
  return id.includes(':') ? id : `${model}:${id}`;
@@ -15,7 +15,7 @@ const buildSiliconflowVoiceKey = (rawId: string, model: string): string => {
 export const VoiceLibraryPage: React.FC = () => {
  const [voices, setVoices] = useState<Voice[]>([]);
  const [searchTerm, setSearchTerm] = useState('');
-  const [vendorFilter, setVendorFilter] = useState<'all' | 'Ali' | 'Volcano' | 'Minimax' | '硅基流动' | 'SiliconFlow'>('all');
+  const [vendorFilter, setVendorFilter] = useState<'OpenAI Compatible'>('OpenAI Compatible');
  const [genderFilter, setGenderFilter] = useState<'all' | 'Male' | 'Female'>('all');
  const [langFilter, setLangFilter] = useState<'all' | 'zh' | 'en'>('all');
@@ -44,7 +44,7 @@ export const VoiceLibraryPage: React.FC = () => {
  const filteredVoices = voices.filter((voice) => {
    const matchesSearch = voice.name.toLowerCase().includes(searchTerm.toLowerCase());
-    const matchesVendor = vendorFilter === 'all' || voice.vendor === vendorFilter;
+    const matchesVendor = voice.vendor === vendorFilter;
    const matchesGender = genderFilter === 'all' || voice.gender === genderFilter;
    const matchesLang = langFilter === 'all' || voice.language === langFilter;
    return matchesSearch && matchesVendor && matchesGender && matchesLang;
@@ -138,12 +138,7 @@ export const VoiceLibraryPage: React.FC = () => {
            value={vendorFilter}
            onChange={(e) => setVendorFilter(e.target.value as any)}
          >
-            <option value="all">所有厂商</option>
+            <option value="OpenAI Compatible">OpenAI Compatible</option>
            <option value="硅基流动">硅基流动 (SiliconFlow)</option>
            <option value="SiliconFlow">SiliconFlow</option>
            <option value="Ali">阿里 (Ali)</option>
            <option value="Volcano">火山 (Volcano)</option>
            <option value="Minimax">Minimax</option>
          </select>
        </div>
        <div className="flex items-center space-x-2">
@@ -187,15 +182,12 @@ export const VoiceLibraryPage: React.FC = () => {
              <TableRow key={voice.id}>
                <TableCell className="font-medium">
                  <div className="flex flex-col">
-                    <span className="flex items-center text-white">
+                    <span className="flex items-center text-white">{voice.name}</span>
                      {voice.vendor === '硅基流动' && <Sparkles className="w-3 h-3 text-primary mr-1.5" />}
                      {voice.name}
                    </span>
                    {voice.description && <span className="text-xs text-muted-foreground">{voice.description}</span>}
                  </div>
                </TableCell>
                <TableCell>
-                  <Badge variant={voice.vendor === '硅基流动' ? 'default' : 'outline'}>{voice.vendor}</Badge>
+                  <Badge variant="outline">{voice.vendor}</Badge>
                </TableCell>
                <TableCell className="text-muted-foreground">{voice.gender === 'Male' ? '男' : '女'}</TableCell>
                <TableCell className="text-muted-foreground">{voice.language === 'zh' ? '中文' : 'English'}</TableCell>
@@ -254,17 +246,15 @@ const AddVoiceModal: React.FC<{
  onSuccess: (voice: Voice) => Promise<void>;
  initialVoice?: Voice;
 }> = ({ isOpen, onClose, onSuccess, initialVoice }) => {
-  const [vendor, setVendor] = useState<'硅基流动' | 'Ali' | 'Volcano' | 'Minimax'>('硅基流动');
+  const [vendor, setVendor] = useState<'OpenAI Compatible'>('OpenAI Compatible');
  const [name, setName] = useState('');
-  const [sfModel, setSfModel] = useState(SILICONFLOW_DEFAULT_MODEL);
+  const [openaiCompatibleModel, setOpenaiCompatibleModel] = useState(OPENAI_COMPATIBLE_DEFAULT_MODEL);
  const [sfVoiceId, setSfVoiceId] = useState('FunAudioLLM/CosyVoice2-0.5B:anna');
  const [sfSpeed, setSfSpeed] = useState(1);
  const [sfGain, setSfGain] = useState(0);
  const [sfPitch, setSfPitch] = useState(0);
  const [model, setModel] = useState('');
  const [voiceKey, setVoiceKey] = useState('');
  const [gender, setGender] = useState('Female');
  const [language, setLanguage] = useState('zh');
  const [description, setDescription] = useState('');
@@ -278,17 +268,15 @@ const AddVoiceModal: React.FC<{
  useEffect(() => {
    if (!initialVoice) return;
-    const nextVendor = initialVoice.vendor === 'SiliconFlow' ? '硅基流动' : initialVoice.vendor;
+    const nextVendor = 'OpenAI Compatible';
-    const nextModel = initialVoice.model || SILICONFLOW_DEFAULT_MODEL;
+    const nextModel = initialVoice.model || OPENAI_COMPATIBLE_DEFAULT_MODEL;
-    const defaultVoiceKey = buildSiliconflowVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);
+    const defaultVoiceKey = buildOpenAICompatibleVoiceKey(initialVoice.id || initialVoice.name || '', nextModel);
-    setVendor((nextVendor as any) || '硅基流动');
+    setVendor(nextVendor);
    setName(initialVoice.name || '');
    setGender(initialVoice.gender || 'Female');
    setLanguage(initialVoice.language || 'zh');
    setDescription(initialVoice.description || '');
-    setModel(initialVoice.model || '');
+    setOpenaiCompatibleModel(nextModel);
    setVoiceKey(initialVoice.voiceKey || '');
    setSfModel(nextModel);
    setSfVoiceId((initialVoice.voiceKey || '').trim() || defaultVoiceKey);
    setSfSpeed(initialVoice.speed ?? 1);
    setSfGain(initialVoice.gain ?? 0);
@@ -325,21 +313,21 @@ const AddVoiceModal: React.FC<{
      return;
    }
-    const resolvedSiliconflowVoiceKey = (() => {
+    const resolvedVoiceKey = (() => {
      const current = (sfVoiceId || '').trim();
      if (current) return current;
-      return buildSiliconflowVoiceKey(initialVoice?.id || name, sfModel || SILICONFLOW_DEFAULT_MODEL);
+      return buildOpenAICompatibleVoiceKey(initialVoice?.id || name, openaiCompatibleModel || OPENAI_COMPATIBLE_DEFAULT_MODEL);
    })();
    const newVoice: Voice = {
-      id: initialVoice?.id || `${vendor === '硅基流动' ? 'sf' : 'gen'}-${Date.now()}`,
+      id: initialVoice?.id || `oa-${Date.now()}`,
      name,
      vendor,
      gender,
      language,
-      description: description || (vendor === '硅基流动' ? `Model: ${sfModel}` : `Model: ${model}`),
+      description: description || `Model: ${openaiCompatibleModel}`,
-      model: vendor === '硅基流动' ? sfModel : model,
+      model: openaiCompatibleModel,
-      voiceKey: vendor === '硅基流动' ? resolvedSiliconflowVoiceKey : voiceKey,
+      voiceKey: resolvedVoiceKey,
      apiKey,
      baseUrl,
      speed: sfSpeed,
@@ -351,10 +339,8 @@ const AddVoiceModal: React.FC<{
      setIsSaving(true);
      await onSuccess(newVoice);
      setName('');
-      setVendor('硅基流动');
+      setVendor('OpenAI Compatible');
      setDescription('');
      setModel('');
      setVoiceKey('');
      setApiKey('');
      setBaseUrl('');
    } catch (error: any) {
@@ -381,19 +367,7 @@ const AddVoiceModal: React.FC<{
      <div className="space-y-4 max-h-[75vh] overflow-y-auto px-1 custom-scrollbar">
        <div className="space-y-1.5">
          <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">厂商 (Vendor)</label>
-          <div className="relative">
+          <Input value={vendor} readOnly className="h-10 border border-white/10 bg-white/5" />
            <select
              className="flex h-10 w-full rounded-md border border-white/10 bg-white/5 px-3 py-1 text-sm shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-primary/50 text-foreground appearance-none cursor-pointer [&>option]:bg-card"
              value={vendor}
              onChange={(e) => setVendor(e.target.value as any)}
            >
              <option value="硅基流动">硅基流动 (SiliconFlow)</option>
              <option value="Ali">阿里 (Ali)</option>
              <option value="Volcano">火山 (Volcano)</option>
              <option value="Minimax">Minimax</option>
            </select>
            <ChevronDown className="absolute right-3 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground pointer-events-none" />
          </div>
        </div>
        <div className="h-px bg-white/5"></div>
@@ -403,15 +377,14 @@ const AddVoiceModal: React.FC<{
          <Input value={name} onChange={(e) => setName(e.target.value)} placeholder="例如: 客服小美" />
        </div>
        {vendor === '硅基流动' ? (
        <div className="space-y-4 animate-in fade-in slide-in-from-top-1 duration-200">
            <div className="grid grid-cols-2 gap-4">
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型 (Model)</label>
                <Input
                  className="font-mono text-xs"
-                  value={sfModel}
+                  value={openaiCompatibleModel}
-                  onChange={(e) => setSfModel(e.target.value)}
+                  onChange={(e) => setOpenaiCompatibleModel(e.target.value)}
                  placeholder="例如: FunAudioLLM/CosyVoice2-0.5B"
                />
              </div>
@@ -445,20 +418,6 @@ const AddVoiceModal: React.FC<{
              </div>
            </div>
          </div>
        ) : (
          <div className="space-y-4 animate-in fade-in slide-in-from-top-1 duration-200">
            <div className="grid grid-cols-2 gap-4">
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">模型标识</label>
                <Input value={model} onChange={(e) => setModel(e.target.value)} placeholder="API Model Key" />
              </div>
              <div className="space-y-1.5">
                <label className="text-[10px] font-black text-muted-foreground uppercase tracking-widest block">发音人标识</label>
                <Input value={voiceKey} onChange={(e) => setVoiceKey(e.target.value)} placeholder="Voice Key" />
              </div>
            </div>
          </div>
        )}
        <div className="grid grid-cols-2 gap-4">
          <div className="space-y-1.5">
@@ -560,7 +519,7 @@ const CloneVoiceModal: React.FC<{
    const newVoice: Voice = {
      id: `v-${Date.now()}`,
      name,
-      vendor: 'Volcano',
+      vendor: 'OpenAI Compatible',
      gender: 'Female',
      language: 'zh',
      description: description || 'User cloned voice',
--- a/web/services/backendApi.ts
+++ b/web/services/backendApi.ts
@@ -55,8 +55,11 @@ const mapVoice = (raw: AnyRecord): Voice => ({
  id: String(readField(raw, ['id'], '')),
  name: readField(raw, ['name'], ''),
  vendor: ((): string => {
-    const vendor = String(readField(raw, ['vendor'], ''));
+    const vendor = String(readField(raw, ['vendor'], '')).trim().toLowerCase();
-    return vendor.toLowerCase() === 'siliconflow' ? '硅基流动' : vendor;
+    if (vendor === 'siliconflow' || vendor === '硅基流动' || vendor === 'openai-compatible') {
      return 'OpenAI Compatible';
    }
    return String(readField(raw, ['vendor'], 'OpenAI Compatible')) || 'OpenAI Compatible';
  })(),
  gender: readField(raw, ['gender'], ''),
  language: readField(raw, ['language'], ''),
@@ -296,7 +299,7 @@ export const createVoice = async (data: Partial<Voice>): Promise<Voice> => {
  const payload = {
    id: data.id || undefined,
    name: data.name || 'New Voice',
-    vendor: data.vendor === '硅基流动' ? 'SiliconFlow' : (data.vendor || 'SiliconFlow'),
+    vendor: data.vendor || 'OpenAI Compatible',
    gender: data.gender || 'Female',
    language: data.language || 'zh',
    description: data.description || '',
@@ -316,7 +319,7 @@ export const createVoice = async (data: Partial<Voice>): Promise<Voice> => {
 export const updateVoice = async (id: string, data: Partial<Voice>): Promise<Voice> => {
  const payload = {
    name: data.name,
-    vendor: data.vendor === '硅基流动' ? 'SiliconFlow' : data.vendor,
+    vendor: data.vendor,
    gender: data.gender,
    language: data.language,
    description: data.description,
--- a/web/services/mockData.ts
+++ b/web/services/mockData.ts
@@ -200,7 +200,7 @@ export const mockLLMModels: LLMModel[] = [
  { id: 'm1', name: 'GPT-4o', vendor: 'OpenAI Compatible', type: 'text', baseUrl: 'https://api.openai.com/v1', apiKey: 'sk-***', temperature: 0.7 },
  { id: 'm2', name: 'DeepSeek-V3', vendor: 'OpenAI Compatible', type: 'text', baseUrl: 'https://api.deepseek.com', apiKey: 'sk-***', temperature: 0.5 },
  { id: 'm3', name: 'text-embedding-3-small', vendor: 'OpenAI Compatible', type: 'embedding', baseUrl: 'https://api.openai.com/v1', apiKey: 'sk-***' },
-  { id: 'm4', name: 'bge-reranker-v2-m3', vendor: 'SiliconFlow', type: 'rerank', baseUrl: 'https://api.siliconflow.cn/v1', apiKey: 'sk-***' },
+  { id: 'm4', name: 'bge-reranker-v2-m3', vendor: 'OpenAI Compatible', type: 'rerank', baseUrl: 'https://api.siliconflow.cn/v1', apiKey: 'sk-***' },
 ];
 export const mockASRModels: ASRModel[] = [