From 21f6c2afa59fb90a9dfd2ae06eeb815c14a78837 Mon Sep 17 00:00:00 2001
From: Sathwika Reddy Geereddy <sathwika77@gmail.com>
Date: Thu, 23 Apr 2026 18:31:27 +0530
Subject: [PATCH] Update NVIDIA STT services for Nemotron Speech defaults and
 config parity (#4269)

* Update NVIDIA STT services for Nemotron Speech defaults and config parity

* Add changelog entry for PR #4269

* initialize boosted LM settings defaults in streaming STT

* Align NVIDIA STT language handling with other STT services

* add finalised flag to Nvidia stt final transcripts, remove processing latency logs

* Changing interim transcription logging to tracing.

---------

Co-authored-by: sathwika <geereddysath@nvidia.com>
Co-authored-by: filipi87 <filipi87@gmail.com>
---
 changelog/4269.changed.md          |   2 +
 src/pipecat/services/nvidia/stt.py | 321 +++++++++++++++++++----------
 2 files changed, 212 insertions(+), 111 deletions(-)
 create mode 100644 changelog/4269.changed.md

diff --git a/changelog/4269.changed.md b/changelog/4269.changed.md
new file mode 100644
index 000000000..e88c45b2d
--- /dev/null
+++ b/changelog/4269.changed.md
@@ -0,0 +1,2 @@
+- Updated NVIDIA STT services to align with Nemotron Speech defaults and configuration: `api_key` is now optional for local deployments, additional recognition settings are available (including alternatives, word offsets, and diarization), and streaming/segmented docs now reflect Nemotron Speech APIs.
+- NVIDIA streaming STT now sets `TranscriptionFrame.finalized=True` when the provider marks a result as final, and preserves `language` on both `TranscriptionFrame` and `InterimTranscriptionFrame`.
diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py
index 3181b53ba..d8b87aab7 100644
--- a/src/pipecat/services/nvidia/stt.py
+++ b/src/pipecat/services/nvidia/stt.py
@@ -2,9 +2,15 @@
 # Copyright (c) 2024-2026, Daily
 #
 # SPDX-License-Identifier: BSD 2-Clause License
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
 #
 
-"""NVIDIA Riva Speech-to-Text service implementations for real-time and batch transcription."""
+"""NVIDIA Nemotron Speech-to-Text service implementations for real-time and batch transcription.
+
+Refer to the NVIDIA ASR NIM documentation for usage, customization,
+and local deployment steps:
+https://docs.nvidia.com/nim/speech/latest/asr/
+"""
 
 import asyncio
 from collections.abc import AsyncGenerator, Mapping
@@ -32,25 +38,28 @@ from pipecat.utils.time import time_now_iso8601
 from pipecat.utils.tracing.service_decorators import traced_stt
 
 try:
+    import grpc
     import riva.client
 
 except ModuleNotFoundError as e:
     logger.error(f"Exception: {e}")
-    logger.error("In order to use NVIDIA Riva STT, you need to `pip install pipecat-ai[nvidia]`.")
+    logger.error(
+        "In order to use NVIDIA Nemotron Speech STT, you need to `pip install pipecat-ai[nvidia]`."
+    )
     raise Exception(f"Missing module: {e}")
 
 
-def language_to_nvidia_riva_language(language: Language) -> str | None:
-    """Maps Language enum to NVIDIA Riva ASR language codes.
+def language_to_nvidia_nemotron_speech_language(language: Language) -> str | None:
+    """Maps Language enum to NVIDIA Nemotron Speech ASR language codes.
 
     Source:
-    https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-riva-build-table.html?highlight=fr%20fr
+    https://docs.nvidia.com/nim/speech/latest/reference/support-matrix/asr.html#supported-languages-by-model-type
 
     Args:
         language: Language enum value.
 
     Returns:
-        Optional[str]: NVIDIA Riva language code or None if not supported.
+        str | None: NVIDIA Nemotron Speech language code or None if not supported.
     """
     LANGUAGE_MAP = {
         # Arabic
@@ -93,15 +102,8 @@ def language_to_nvidia_riva_language(language: Language) -> str | None:
 
 
 @dataclass
-class NvidiaSTTSettings(STTSettings):
-    """Settings for NvidiaSTTService."""
-
-    pass
-
-
-@dataclass
-class NvidiaSegmentedSTTSettings(STTSettings):
-    """Settings for NvidiaSegmentedSTTService.
+class _NvidiaBaseSTTSettings(STTSettings):
+    """Shared settings for NVIDIA Nemotron Speech STT services.
 
     Parameters:
         profanity_filter: Whether to filter profanity from results.
@@ -109,6 +111,10 @@ class NvidiaSegmentedSTTSettings(STTSettings):
         verbatim_transcripts: Whether to return verbatim transcripts.
         boosted_lm_words: List of words to boost in language model.
         boosted_lm_score: Score boost for specified words.
+        max_alternatives: Maximum number of recognition alternatives.
+        word_time_offsets: Whether to include word-level time offsets.
+        speaker_diarization: Whether to enable speaker diarization.
+        diarization_max_speakers: Maximum number of speakers for diarization.
     """
 
     profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -116,12 +122,34 @@ class NvidiaSegmentedSTTSettings(STTSettings):
     verbatim_transcripts: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
     boosted_lm_words: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
     boosted_lm_score: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    max_alternatives: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    word_time_offsets: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    speaker_diarization: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+    diarization_max_speakers: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+
+
+@dataclass
+class NvidiaSTTSettings(_NvidiaBaseSTTSettings):
+    """Settings for NvidiaSTTService.
+
+    Parameters:
+        interim_results: Whether to return interim (partial) results.
+    """
+
+    interim_results: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+
+
+@dataclass
+class NvidiaSegmentedSTTSettings(_NvidiaBaseSTTSettings):
+    """Settings for NvidiaSegmentedSTTService."""
+
+    pass
 
 
 class NvidiaSTTService(STTService):
-    """Real-time speech-to-text service using NVIDIA Riva streaming ASR.
+    """Real-time speech-to-text service using NVIDIA Nemotron Speech streaming ASR.
 
-    Provides real-time transcription capabilities using NVIDIA's Riva ASR models
+    Provides real-time transcription capabilities using NVIDIA's Nemotron Speech ASR models
     through streaming recognition. Supports interim results and continuous audio
     processing for low-latency applications.
     """
@@ -130,7 +158,7 @@ class NvidiaSTTService(STTService):
     _settings: Settings
 
     class InputParams(BaseModel):
-        """Configuration parameters for NVIDIA Riva STT service.
+        """Configuration parameters for NVIDIA Nemotron Speech STT service.
 
         .. deprecated:: 0.0.105
             Use ``settings=NvidiaSTTService.Settings(...)`` instead.
@@ -144,32 +172,52 @@ class NvidiaSTTService(STTService):
     def __init__(
         self,
         *,
-        api_key: str,
+        api_key: str | None = None,
         server: str = "grpc.nvcf.nvidia.com:443",
         model_function_map: Mapping[str, str] = {
-            "function_id": "1598d209-5e27-4d3c-8079-4751568b1081",
-            "model_name": "parakeet-ctc-1.1b-asr",
+            "function_id": "bb0837de-8c7b-481f-9ec8-ef5663e9c1fa",
+            "model_name": "nemotron-asr-streaming",
         },
         sample_rate: int | None = None,
         params: InputParams | None = None,
         use_ssl: bool = True,
+        audio_channel_count: int = 1,
+        start_history: int = -1,
+        start_threshold: float = -1.0,
+        stop_history: int = 320,
+        stop_threshold: float = -1.0,
+        stop_history_eou: int = -1,
+        stop_threshold_eou: float = -1.0,
+        custom_configuration: str = "",
         settings: Settings | None = None,
         ttfs_p99_latency: float | None = NVIDIA_TTFS_P99,
         **kwargs,
     ):
-        """Initialize the NVIDIA Riva STT service.
+        """Initialize the NVIDIA Nemotron Speech STT service.
 
         Args:
-            api_key: NVIDIA API key for authentication.
-            server: NVIDIA Riva server address. Defaults to NVIDIA Cloud Function endpoint.
+            api_key: NVIDIA API key for authentication. Required when using the
+                cloud endpoint. Not needed for local deployments.
+            server: NVIDIA Nemotron Speech server address. Defaults to NVIDIA Cloud Function endpoint.
+                For local deployments, pass the local address (e.g. ``localhost:50051``).
             model_function_map: Mapping containing 'function_id' and 'model_name' for the ASR model.
             sample_rate: Audio sample rate in Hz. If None, uses pipeline default.
-            params: Additional configuration parameters for NVIDIA Riva.
+            params: Additional configuration parameters for NVIDIA Nemotron Speech.
 
                 .. deprecated:: 0.0.105
                     Use ``settings=NvidiaSTTService.Settings(...)`` instead.
 
-            use_ssl: Whether to use SSL for the NVIDIA Riva server. Defaults to True.
+            use_ssl: Whether to use SSL for the gRPC connection. Defaults to True
+                for the NVIDIA cloud endpoint. Set to False for local deployments.
+            audio_channel_count: Number of audio channels.
+            start_history: VAD start history in frames. Use -1 for Nemotron Speech default.
+            start_threshold: VAD start threshold. Use -1.0 for Nemotron Speech default.
+            stop_history: VAD stop history in frames. Use -1 for Nemotron Speech default.
+            stop_threshold: VAD stop threshold. Use -1.0 for Nemotron Speech default.
+            stop_history_eou: End-of-utterance stop history in frames. Use -1 for Nemotron Speech default.
+            stop_threshold_eou: End-of-utterance stop threshold. Use -1.0 for Nemotron Speech default.
+            custom_configuration: Custom Nemotron Speech configuration string
+                (e.g. ``"enable_vad_endpointing:true,neural_vad.onset:0.65"``).
             settings: Runtime-updatable settings. When provided alongside deprecated
                 parameters, ``settings`` values take precedence.
             ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
@@ -180,6 +228,16 @@ class NvidiaSTTService(STTService):
         default_settings = self.Settings(
             model=model_function_map.get("model_name"),
             language=Language.EN_US,
+            profanity_filter=False,
+            automatic_punctuation=True,
+            verbatim_transcripts=True,
+            boosted_lm_words=None,
+            boosted_lm_score=4.0,
+            max_alternatives=1,
+            interim_results=True,
+            word_time_offsets=False,
+            speaker_diarization=False,
+            diarization_max_speakers=0,
         )
 
         # 2. (no deprecated direct args for this service)
@@ -204,13 +262,14 @@ class NvidiaSTTService(STTService):
         self._server = server
         self._api_key = api_key
         self._use_ssl = use_ssl
-        self._start_history = -1
-        self._start_threshold = -1.0
-        self._stop_history = -1
-        self._stop_threshold = -1.0
-        self._stop_history_eou = -1
-        self._stop_threshold_eou = -1.0
-        self._custom_configuration = ""
+        self._audio_channel_count = audio_channel_count
+        self._start_history = start_history
+        self._start_threshold = start_threshold
+        self._stop_history = stop_history
+        self._stop_threshold = stop_threshold
+        self._stop_history_eou = stop_history_eou
+        self._stop_threshold_eou = stop_threshold_eou
+        self._custom_configuration = custom_configuration
         self._function_id = model_function_map.get("function_id")
 
         self._asr_service = None
@@ -219,31 +278,38 @@ class NvidiaSTTService(STTService):
         self._thread_task = None
 
     def _initialize_client(self):
-        metadata = [
-            ["function-id", self._function_id],
-            ["authorization", f"Bearer {self._api_key}"],
-        ]
+        """Initialize the NVIDIA Nemotron Speech ASR client with authentication metadata."""
+        metadata = []
+        if self._function_id:
+            metadata.append(["function-id", self._function_id])
+        if self._api_key:
+            metadata.append(["authorization", f"Bearer {self._api_key}"])
         auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
 
         self._asr_service = riva.client.ASRService(auth)
 
     def _create_recognition_config(self):
-        """Create the NVIDIA Riva ASR recognition configuration."""
+        """Create the NVIDIA Nemotron Speech ASR recognition configuration."""
+        s = self._settings
         config = riva.client.StreamingRecognitionConfig(
             config=riva.client.RecognitionConfig(
                 encoding=riva.client.AudioEncoding.LINEAR_PCM,
-                language_code=self._settings.language,
+                language_code=s.language,
                 model="",
-                max_alternatives=1,
-                profanity_filter=False,
-                enable_automatic_punctuation=True,
-                verbatim_transcripts=True,
+                max_alternatives=s.max_alternatives,
+                profanity_filter=s.profanity_filter,
+                enable_automatic_punctuation=s.automatic_punctuation,
+                verbatim_transcripts=s.verbatim_transcripts,
                 sample_rate_hertz=self.sample_rate,
-                audio_channel_count=1,
+                audio_channel_count=self._audio_channel_count,
+                enable_word_time_offsets=s.word_time_offsets,
             ),
-            interim_results=True,
+            interim_results=s.interim_results,
         )
 
+        if s.boosted_lm_words:
+            riva.client.add_word_boosting_to_config(config, s.boosted_lm_words, s.boosted_lm_score)
+
         riva.client.add_endpoint_parameters_to_config(
             config,
             self._start_history,
@@ -253,7 +319,14 @@ class NvidiaSTTService(STTService):
             self._stop_threshold,
             self._stop_threshold_eou,
         )
-        riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
+
+        if self._custom_configuration:
+            riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
+
+        if s.speaker_diarization:
+            riva.client.add_speaker_diarization_to_config(
+                config, s.speaker_diarization, s.diarization_max_speakers
+            )
 
         return config
 
@@ -261,15 +334,31 @@ class NvidiaSTTService(STTService):
         """Check if this service can generate processing metrics.
 
         Returns:
-            False - this service does not support metrics generation.
+            True - this service supports metrics generation.
         """
-        return False
+        return True
+
+    async def _update_settings(self, delta: STTSettings) -> dict[str, Any]:
+        """Apply a settings delta and sync internal state.
+
+        Args:
+            delta: A :class:`STTSettings` (or ``NvidiaSTTService.Settings``) delta.
+
+        Returns:
+            Dict mapping changed field names to their previous values.
+        """
+        changed = await super()._update_settings(delta)
+
+        if changed and self._config is not None:
+            self._config = self._create_recognition_config()
+
+        return changed
 
     async def set_model(self, model: str):
         """Set the ASR model for transcription.
 
         .. deprecated:: 0.0.104
-            Model cannot be changed after initialization for NVIDIA Riva streaming STT.
+            Model cannot be changed after initialization for NVIDIA Nemotron Speech streaming STT.
             Set model and function id in the constructor instead.
 
             Example::
@@ -288,7 +377,7 @@ class NvidiaSTTService(STTService):
             warnings.simplefilter("always")
             warnings.warn(
                 "'set_model' is deprecated. Model cannot be changed after initialization"
-                " for NVIDIA Riva streaming STT. Set model and function id in the"
+                " for NVIDIA Nemotron Speech streaming STT. Set model and function id in the"
                 " constructor instead, e.g.:"
                 " NvidiaSTTService(api_key=..., model_function_map="
                 "{'function_id': '<UUID>', 'model_name': '<model_name>'})",
@@ -297,7 +386,7 @@ class NvidiaSTTService(STTService):
             )
 
     async def start(self, frame: StartFrame):
-        """Start the NVIDIA Riva STT service and initialize streaming configuration.
+        """Start the NVIDIA Nemotron Speech STT service and initialize streaming configuration.
 
         Args:
             frame: StartFrame indicating pipeline start.
@@ -314,7 +403,7 @@ class NvidiaSTTService(STTService):
         logger.debug(f"Initialized NvidiaSTTService with model: {self._settings.model}")
 
     async def stop(self, frame: EndFrame):
-        """Stop the NVIDIA Riva STT service and clean up resources.
+        """Stop the NVIDIA Nemotron Speech STT service and clean up resources.
 
         Args:
             frame: EndFrame indicating pipeline stop.
@@ -323,7 +412,7 @@ class NvidiaSTTService(STTService):
         await self._stop_tasks()
 
     async def cancel(self, frame: CancelFrame):
-        """Cancel the NVIDIA Riva STT service operation.
+        """Cancel the NVIDIA Nemotron Speech STT service operation.
 
         Args:
             frame: CancelFrame indicating operation cancellation.
@@ -337,14 +426,25 @@ class NvidiaSTTService(STTService):
             self._thread_task = None
 
     def _response_handler(self):
-        responses = self._asr_service.streaming_response_generator(
-            audio_chunks=self,
-            streaming_config=self._config,
-        )
-        for response in responses:
-            if not response.results:
-                continue
-            asyncio.run_coroutine_threadsafe(self._handle_response(response), self.get_event_loop())
+        try:
+            responses = self._asr_service.streaming_response_generator(
+                audio_chunks=self,
+                streaming_config=self._config,
+            )
+            for response in responses:
+                if not response.results:
+                    continue
+                asyncio.run_coroutine_threadsafe(
+                    self._handle_response(response), self.get_event_loop()
+                )
+        except grpc.RpcError as e:
+            status = e.code().name if hasattr(e, "code") else "UNKNOWN"
+            details = e.details() if hasattr(e, "details") else str(e)
+            logger.error(f"{self} gRPC streaming error ({status}): {details}")
+            asyncio.run_coroutine_threadsafe(
+                self.push_error(f"{self} STT streaming failed (gRPC {status}): {details}"),
+                self.get_event_loop(),
+            )
 
     async def _thread_task_handler(self):
         try:
@@ -370,6 +470,7 @@ class NvidiaSTTService(STTService):
             if transcript and len(transcript) > 0:
                 if result.is_final:
                     await self.stop_processing_metrics()
+                    logger.debug(f"Transcription: [{transcript}]")
                     await self.push_frame(
                         TranscriptionFrame(
                             transcript,
@@ -377,6 +478,7 @@ class NvidiaSTTService(STTService):
                             time_now_iso8601(),
                             self._settings.language,
                             result=result,
+                            finalized=True,
                         )
                     )
                     await self._handle_transcription(
@@ -394,6 +496,7 @@ class NvidiaSTTService(STTService):
                             result=result,
                         )
                     )
+                    logger.trace(f"Interim Transcription: [{transcript}]")
 
     async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
         """Process audio data for speech-to-text transcription.
@@ -409,7 +512,7 @@ class NvidiaSTTService(STTService):
         yield None
 
     def __next__(self) -> bytes:
-        """Get the next audio chunk for NVIDIA Riva processing.
+        """Get the next audio chunk for NVIDIA Nemotron Speech processing.
 
         Returns:
             Audio bytes from the queue.
@@ -422,7 +525,8 @@ class NvidiaSTTService(STTService):
 
         try:
             future = asyncio.run_coroutine_threadsafe(self._queue.get(), self.get_event_loop())
-            return future.result()
+            audio = future.result()
+            return audio
         except FuturesCancelledError:
             raise StopIteration
 
@@ -436,9 +540,9 @@ class NvidiaSTTService(STTService):
 
 
 class NvidiaSegmentedSTTService(SegmentedSTTService):
-    """Speech-to-text service using NVIDIA Riva's offline/batch models.
+    """Speech-to-text service using NVIDIA Nemotron Speech's offline/batch models.
 
-    By default, his service uses NVIDIA's Riva Canary ASR API to perform speech-to-text
+    By default, this service uses NVIDIA's Nemotron Speech Canary ASR API to perform speech-to-text
     transcription on audio segments. It inherits from SegmentedSTTService to handle
     audio buffering and speech detection.
     """
@@ -447,7 +551,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
     _settings: Settings
 
     class InputParams(BaseModel):
-        """Configuration parameters for NVIDIA Riva segmented STT service.
+        """Configuration parameters for NVIDIA Nemotron Speech segmented STT service.
 
         .. deprecated:: 0.0.105
             Use ``settings=NvidiaSegmentedSTTService.Settings(...)`` instead.
@@ -471,7 +575,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
     def __init__(
         self,
         *,
-        api_key: str,
+        api_key: str | None = None,
         server: str = "grpc.nvcf.nvidia.com:443",
         model_function_map: Mapping[str, str] = {
             "function_id": "ee8dc628-76de-4acc-8595-1836e7e857bd",
@@ -480,28 +584,34 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
         sample_rate: int | None = None,
         params: InputParams | None = None,
         use_ssl: bool = True,
+        custom_configuration: str = "",
         settings: Settings | None = None,
         ttfs_p99_latency: float | None = NVIDIA_TTFS_P99,
         **kwargs,
     ):
-        """Initialize the NVIDIA Riva segmented STT service.
+        """Initialize the NVIDIA Nemotron Speech segmented STT service.
 
         Args:
-            api_key: NVIDIA API key for authentication
-            server: NVIDIA Riva server address (defaults to NVIDIA Cloud Function endpoint)
-            model_function_map: Mapping of model name and its corresponding NVIDIA Cloud Function ID
-            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate
-            params: Additional configuration parameters for NVIDIA Riva
+            api_key: NVIDIA API key for authentication. Required when using the
+                cloud endpoint. Not needed for local deployments.
+            server: NVIDIA Nemotron Speech server address. Defaults to NVIDIA Cloud Function endpoint.
+                For local deployments, pass the local address (e.g. ``localhost:50051``).
+            model_function_map: Mapping of model name and its corresponding NVIDIA Cloud Function ID.
+            sample_rate: Audio sample rate in Hz. If not provided, uses the pipeline's rate.
+            params: Additional configuration parameters for NVIDIA Nemotron Speech.
 
                 .. deprecated:: 0.0.105
                     Use ``settings=NvidiaSegmentedSTTService.Settings(...)`` instead.
 
-            use_ssl: Whether to use SSL for the NVIDIA Riva server. Defaults to True.
+            use_ssl: Whether to use SSL for the gRPC connection. Defaults to True
+                for the NVIDIA cloud endpoint. Set to False for local deployments.
+            custom_configuration: Custom Nemotron Speech configuration string
+                (e.g. ``"enable_vad_endpointing:true,neural_vad.onset:0.65"``).
             settings: Runtime-updatable settings. When provided alongside deprecated
                 parameters, ``settings`` values take precedence.
             ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
                 Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
-            **kwargs: Additional arguments passed to SegmentedSTTService
+            **kwargs: Additional arguments passed to SegmentedSTTService.
         """
         # 1. Initialize default_settings with hardcoded defaults
         default_settings = self.Settings(
@@ -512,6 +622,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
             verbatim_transcripts=False,
             boosted_lm_words=None,
             boosted_lm_score=4.0,
+            max_alternatives=1,
+            word_time_offsets=False,
         )
 
         # 2. (no deprecated direct args for this service)
@@ -538,81 +650,64 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
             **kwargs,
         )
 
-        # Initialize NVIDIA Riva settings
+        # Initialize NVIDIA Nemotron Speech settings
         self._api_key = api_key
         self._server = server
         self._use_ssl = use_ssl
         self._function_id = model_function_map.get("function_id")
-
-        # Voice activity detection thresholds (use NVIDIA Riva defaults)
-        self._start_history = -1
-        self._start_threshold = -1.0
-        self._stop_history = -1
-        self._stop_threshold = -1.0
-        self._stop_history_eou = -1
-        self._stop_threshold_eou = -1.0
-        self._custom_configuration = ""
+        self._custom_configuration = custom_configuration
 
         self._config = None
         self._asr_service = None
 
     def language_to_service_language(self, language: Language) -> str | None:
-        """Convert pipecat Language enum to NVIDIA Riva's language code.
+        """Convert pipecat Language enum to NVIDIA Nemotron Speech's language code.
 
         Args:
             language: Language enum value.
 
         Returns:
-            NVIDIA Riva language code or None if not supported.
+            NVIDIA Nemotron Speech language code or None if not supported.
         """
-        return language_to_nvidia_riva_language(language)
+        return language_to_nvidia_nemotron_speech_language(language)
 
     def _initialize_client(self):
-        """Initialize the NVIDIA Riva ASR client with authentication metadata."""
+        """Initialize the NVIDIA Nemotron Speech ASR client with authentication metadata."""
         if self._asr_service is not None:
             return
 
         # Set up authentication metadata for NVIDIA Cloud Functions
-        metadata = [
-            ["function-id", self._function_id],
-            ["authorization", f"Bearer {self._api_key}"],
-        ]
+        metadata = []
+        if self._function_id:
+            metadata.append(["function-id", self._function_id])
+        if self._api_key:
+            metadata.append(["authorization", f"Bearer {self._api_key}"])
 
         # Create authenticated client
         auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
         self._asr_service = riva.client.ASRService(auth)
 
     def _get_language_code(self) -> str:
-        """Get the current NVIDIA Riva language code string."""
+        """Get the current NVIDIA Nemotron Speech language code string."""
         return self._settings.language or "en-US"
 
     def _create_recognition_config(self):
-        """Create the NVIDIA Riva ASR recognition configuration."""
+        """Create the NVIDIA Nemotron Speech ASR recognition configuration."""
         # Create base configuration
         s = self._settings
         config = riva.client.RecognitionConfig(
             language_code=self._get_language_code(),
-            max_alternatives=1,
+            max_alternatives=s.max_alternatives,
             profanity_filter=s.profanity_filter,
             enable_automatic_punctuation=s.automatic_punctuation,
             verbatim_transcripts=s.verbatim_transcripts,
+            enable_word_time_offsets=s.word_time_offsets,
         )
 
         # Add word boosting if specified
         if s.boosted_lm_words:
             riva.client.add_word_boosting_to_config(config, s.boosted_lm_words, s.boosted_lm_score)
 
-        # Add voice activity detection parameters
-        riva.client.add_endpoint_parameters_to_config(
-            config,
-            self._start_history,
-            self._start_threshold,
-            self._stop_history,
-            self._stop_history_eou,
-            self._stop_threshold,
-            self._stop_threshold_eou,
-        )
-
         # Add any custom configuration
         if self._custom_configuration:
             riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
@@ -676,7 +771,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
 
             await self.start_processing_metrics()
 
-            # Process audio with NVIDIA Riva ASR - explicitly request non-future response
+            # Process audio with NVIDIA Nemotron Speech ASR - explicitly request non-future response
             raw_response = self._asr_service.offline_recognize(audio, self._config, future=False)
 
             await self.stop_processing_metrics()
@@ -712,10 +807,14 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
                         await self._handle_transcription(text, True, self._settings.language)
 
             if not transcription_found:
-                logger.debug(f"{self}: No transcription results found in NVIDIA Riva response")
+                logger.debug(
+                    f"{self}: No transcription results found in NVIDIA Nemotron Speech response"
+                )
         except AttributeError as ae:
-            logger.error(f"{self}: Unexpected response structure from NVIDIA Riva: {ae}")
-            yield ErrorFrame(f"{self}: Unexpected NVIDIA Riva response format: {str(ae)}")
+            logger.error(f"{self}: Unexpected response structure from NVIDIA Nemotron Speech: {ae}")
+            yield ErrorFrame(
+                error=f"{self}: Unexpected NVIDIA Nemotron Speech response format: {str(ae)}"
+            )
         except Exception as e:
             logger.error(f"{self} exception: {e}")
             yield ErrorFrame(error=f"{self} error: {e}")