Compare commits
2 Commits
main
...
jh/aws-aut
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b6da5c18b7 | ||
|
|
4b6881b81d |
9
changelog/4389.fixed.md
Normal file
9
changelog/4389.fixed.md
Normal file
@@ -0,0 +1,9 @@
|
||||
- Fixed AWS services failing silently on missing or invalid credentials.
|
||||
`AWSNovaSonicLLMService`, `AWSBedrockLLMService`, `AWSPollyTTSService`,
|
||||
and `AWSTranscribeSTTService` now push a fatal `ErrorFrame` with a
|
||||
"check AWS credentials and region" hint on auth-class failures, so the
|
||||
pipeline cancels promptly instead of continuing to run with no output.
|
||||
- Fixed `AWSNovaSonicLLMService._disconnect` raising `InvalidStateError`
|
||||
from `awscrt/aio/http.py` when cleanup ran on a stream from a failed
|
||||
`invoke_model_with_bidirectional_stream` call. The error was masking
|
||||
the real connect-time auth failure in the logs.
|
||||
@@ -43,7 +43,7 @@ from pipecat.utils.tracing.service_decorators import traced_llm
|
||||
try:
|
||||
import aioboto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ReadTimeoutError
|
||||
from botocore.exceptions import ClientError, ReadTimeoutError
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error(
|
||||
@@ -52,6 +52,23 @@ except ModuleNotFoundError as e:
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
# AWS error codes that indicate the service won't work until creds/region are
|
||||
# fixed. We treat these as fatal so the pipeline stops instead of silently
|
||||
# degrading.
|
||||
_AWS_AUTH_ERROR_CODES = frozenset(
|
||||
{
|
||||
"UnrecognizedClientException",
|
||||
"InvalidSignatureException",
|
||||
"AccessDeniedException",
|
||||
"ExpiredTokenException",
|
||||
"InvalidAccessKeyId",
|
||||
"SignatureDoesNotMatch",
|
||||
"MissingAuthenticationTokenException",
|
||||
"AuthFailure",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AWSBedrockLLMSettings(LLMSettings):
|
||||
"""Settings for AWSBedrockLLMService.
|
||||
@@ -555,6 +572,20 @@ class AWSBedrockLLMService(LLMService):
|
||||
raise
|
||||
except (TimeoutError, ReadTimeoutError):
|
||||
await self._call_event_handler("on_completion_timeout")
|
||||
except ClientError as e:
|
||||
error_code = e.response.get("Error", {}).get("Code", "")
|
||||
if error_code in _AWS_AUTH_ERROR_CODES:
|
||||
await self.push_error(
|
||||
error_msg=(
|
||||
"AWS Bedrock authentication failed. "
|
||||
"Check AWS credentials and region. "
|
||||
f"Underlying error: {e}"
|
||||
),
|
||||
exception=e,
|
||||
fatal=True,
|
||||
)
|
||||
else:
|
||||
await self.push_error(error_msg=f"AWS Bedrock client error: {e}", exception=e)
|
||||
except Exception as e:
|
||||
await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
|
||||
finally:
|
||||
|
||||
@@ -602,7 +602,19 @@ class AWSNovaSonicLLMService(LLMService):
|
||||
self._ready_to_send_context = True
|
||||
await self._finish_connecting_if_context_available()
|
||||
except Exception as e:
|
||||
await self.push_error(error_msg=f"Initialization error: {e}", exception=e)
|
||||
# Connect-time failures (most commonly bad/missing AWS credentials or
|
||||
# an unsupported region) leave the bidirectional stream in a partial
|
||||
# state and produce no audio output. Treat them as fatal so the
|
||||
# pipeline cancels with a clear ERROR rather than continuing silently.
|
||||
await self.push_error(
|
||||
error_msg=(
|
||||
"AWS Nova Sonic failed to start. "
|
||||
"Check AWS credentials and region. "
|
||||
f"Underlying error: {e}"
|
||||
),
|
||||
exception=e,
|
||||
fatal=True,
|
||||
)
|
||||
await self._disconnect()
|
||||
|
||||
async def _process_completed_function_calls(self, send_new_results: bool):
|
||||
@@ -703,17 +715,28 @@ class AWSNovaSonicLLMService(LLMService):
|
||||
# NOTE: see explanation of HACK, below
|
||||
self._disconnecting = True
|
||||
|
||||
# Clean up client
|
||||
# Clean up client. If connect failed (e.g. bad credentials), the
|
||||
# session may not have started, so end events can fail. Don't let
|
||||
# that mask the real error or block cleanup.
|
||||
if self._client:
|
||||
await self._send_session_end_events()
|
||||
try:
|
||||
await self._send_session_end_events()
|
||||
except Exception as e:
|
||||
logger.debug(f"Ignoring error while sending session-end events: {e}")
|
||||
self._client = None
|
||||
|
||||
# Clean up context
|
||||
self._context = None
|
||||
|
||||
# Clean up stream
|
||||
# Clean up stream. A stream from a failed
|
||||
# invoke_model_with_bidirectional_stream call has an already-
|
||||
# cancelled awscrt future; closing it raises InvalidStateError that
|
||||
# otherwise drowns out the real connect error in the logs.
|
||||
if self._stream:
|
||||
await self._stream.close()
|
||||
try:
|
||||
await self._stream.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Ignoring error while closing partial stream: {e}")
|
||||
self._stream = None
|
||||
|
||||
# NOTE: see explanation of HACK, below
|
||||
|
||||
@@ -323,8 +323,18 @@ class AWSTranscribeSTTService(WebsocketSTTService):
|
||||
await self._call_event_handler("on_connected")
|
||||
logger.info(f"{self} Successfully connected to AWS Transcribe")
|
||||
except Exception as e:
|
||||
# Connect-time failures (most commonly bad/missing AWS credentials,
|
||||
# an unsupported region, or a 403 from the presigned URL) won't
|
||||
# recover on retry. Treat them as fatal so the pipeline cancels
|
||||
# with a clear ERROR rather than silently producing no transcripts.
|
||||
await self.push_error(
|
||||
error_msg=f"Unable to connect to AWS Transcribe: {e}", exception=e
|
||||
error_msg=(
|
||||
"Unable to connect to AWS Transcribe. "
|
||||
"Check AWS credentials and region. "
|
||||
f"Underlying error: {e}"
|
||||
),
|
||||
exception=e,
|
||||
fatal=True,
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@@ -37,6 +37,23 @@ except ModuleNotFoundError as e:
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
# AWS error codes that indicate the service won't work until creds/region are
|
||||
# fixed. We treat these as fatal so the pipeline stops instead of silently
|
||||
# degrading.
|
||||
_AWS_AUTH_ERROR_CODES = frozenset(
|
||||
{
|
||||
"UnrecognizedClientException",
|
||||
"InvalidSignatureException",
|
||||
"AccessDeniedException",
|
||||
"ExpiredTokenException",
|
||||
"InvalidAccessKeyId",
|
||||
"SignatureDoesNotMatch",
|
||||
"MissingAuthenticationTokenException",
|
||||
"AuthFailure",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def language_to_aws_language(language: Language) -> str | None:
|
||||
"""Convert a Language enum to AWS Polly language code.
|
||||
|
||||
@@ -366,6 +383,21 @@ class AWSPollyTTSService(TTSService):
|
||||
frame = TTSAudioRawFrame(chunk, self.sample_rate, 1, context_id=context_id)
|
||||
yield frame
|
||||
|
||||
except (BotoCoreError, ClientError) as error:
|
||||
error_message = f"AWS Polly TTS error: {str(error)}"
|
||||
yield ErrorFrame(error=error_message)
|
||||
except ClientError as error:
|
||||
error_code = error.response.get("Error", {}).get("Code", "")
|
||||
if error_code in _AWS_AUTH_ERROR_CODES:
|
||||
# Bad/missing credentials won't fix themselves between calls.
|
||||
# Stop the pipeline so the failure surfaces clearly.
|
||||
await self.push_error(
|
||||
error_msg=(
|
||||
"AWS Polly authentication failed. "
|
||||
"Check AWS credentials and region. "
|
||||
f"Underlying error: {error}"
|
||||
),
|
||||
exception=error,
|
||||
fatal=True,
|
||||
)
|
||||
else:
|
||||
yield ErrorFrame(error=f"AWS Polly TTS error: {error}")
|
||||
except BotoCoreError as error:
|
||||
yield ErrorFrame(error=f"AWS Polly TTS error: {error}")
|
||||
|
||||
Reference in New Issue
Block a user