add documents
This commit is contained in:
343
docs/advanced/detail_mode.md
Normal file
343
docs/advanced/detail_mode.md
Normal file
@@ -0,0 +1,343 @@
|
||||
# Detail Mode
|
||||
|
||||
Learn how to use FastGPT's detail mode to get comprehensive execution data for your requests.
|
||||
|
||||
## What is Detail Mode?
|
||||
|
||||
When `detail=True`, FastGPT returns extensive execution information including:
|
||||
|
||||
- Module-by-module execution details
|
||||
- Token usage per module
|
||||
- Execution time for each node
|
||||
- Knowledge base citations
|
||||
- Complete message contexts
|
||||
- Cost information
|
||||
|
||||
## Enabling Detail Mode
|
||||
|
||||
```python
|
||||
from fastgpt_client import ChatClient
|
||||
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Explain AI"}],
|
||||
detail=True, # Enable detail mode
|
||||
stream=False
|
||||
)
|
||||
result = response.json()
|
||||
```
|
||||
|
||||
## Response Structure with Detail Mode
|
||||
|
||||
### Basic Response (detail=False)
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-xxx",
|
||||
"choices": [{
|
||||
"message": {"content": "AI is..."}
|
||||
}],
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 50,
|
||||
"total_tokens": 60
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Detailed Response (detail=True)
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "chatcmpl-xxx",
|
||||
"choices": [{
|
||||
"message": {"content": "AI is..."}
|
||||
}],
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 50,
|
||||
"total_tokens": 60
|
||||
},
|
||||
"responseData": [
|
||||
{
|
||||
"moduleName": "Chat Node",
|
||||
"moduleType": "chatNode",
|
||||
"tokens": 60,
|
||||
"price": 0.0012,
|
||||
"runningTime": 1.5,
|
||||
"quoteList": [
|
||||
{
|
||||
"sourceId": "kb_123",
|
||||
"sourceName": "AI Knowledge Base",
|
||||
"text": "AI stands for Artificial Intelligence..."
|
||||
}
|
||||
],
|
||||
"completeMessages": [...]
|
||||
},
|
||||
{
|
||||
"moduleName": "Dataset Search",
|
||||
"moduleType": "datasetSearchNode",
|
||||
"tokens": 20,
|
||||
"price": 0.0004,
|
||||
"runningTime": 0.5
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Parsing Detailed Responses
|
||||
|
||||
```python
|
||||
def parse_detail_response(response_data: dict):
|
||||
"""Parse and display detailed execution data."""
|
||||
|
||||
# Basic response info
|
||||
content = response_data['choices'][0]['message']['content']
|
||||
usage = response_data.get('usage', {})
|
||||
print(f"Response: {content[:100]}...")
|
||||
print(f"Total Tokens: {usage.get('total_tokens', 0)}")
|
||||
|
||||
# Detailed execution data
|
||||
response_details = response_data.get('responseData', [])
|
||||
|
||||
if response_details:
|
||||
print("\n=== Execution Details ===")
|
||||
|
||||
for module in response_details:
|
||||
module_name = module.get('moduleName', 'Unknown')
|
||||
module_type = module.get('moduleType', 'Unknown')
|
||||
tokens = module.get('tokens', 0)
|
||||
price = module.get('price', 0)
|
||||
running_time = module.get('runningTime', 0)
|
||||
|
||||
print(f"\nModule: {module_name} ({module_type})")
|
||||
print(f" Tokens: {tokens}")
|
||||
print(f" Price: ${price:.6f}")
|
||||
print(f" Runtime: {running_time}s")
|
||||
|
||||
# Knowledge base citations
|
||||
quote_list = module.get('quoteList', [])
|
||||
if quote_list:
|
||||
print(f" Citations:")
|
||||
for quote in quote_list:
|
||||
source = quote.get('sourceName', 'Unknown')
|
||||
text = quote.get('text', '')[:100]
|
||||
print(f" - {source}: {text}...")
|
||||
|
||||
|
||||
# Usage
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "What is AI?"}],
|
||||
detail=True,
|
||||
stream=False
|
||||
)
|
||||
result = response.json()
|
||||
parse_detail_response(result)
|
||||
```
|
||||
|
||||
## Streaming with Detail Mode
|
||||
|
||||
```python
|
||||
import json
|
||||
from fastgpt_client import ChatClient
|
||||
|
||||
def stream_with_detail(client, messages):
|
||||
"""Stream with detail mode events."""
|
||||
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
detail=True,
|
||||
stream=True
|
||||
)
|
||||
|
||||
modules = []
|
||||
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:flowNodeStatus"):
|
||||
# Node status updates
|
||||
data = json.loads(line[6:].split('data:', 1)[1])
|
||||
status = data.get('status')
|
||||
node = data.get('moduleName')
|
||||
print(f"[{status.upper()}] {node}")
|
||||
|
||||
elif line.startswith("event:flowResponses"):
|
||||
# Complete module execution data
|
||||
data = json.loads(line[6:].split('data:', 1)[1])
|
||||
modules.append(data)
|
||||
|
||||
elif line.startswith("data:"):
|
||||
# Standard response chunks
|
||||
data = line[5:].strip()
|
||||
if data and data != "[DONE]":
|
||||
chunk = json.loads(data)
|
||||
if "choices" in chunk and chunk["choices"]:
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
print(content, end="", flush=True)
|
||||
|
||||
print("\n\n=== Module Summary ===")
|
||||
for module in modules:
|
||||
print(f"{module.get('moduleName')}: {module.get('tokens')} tokens")
|
||||
|
||||
|
||||
# Usage
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
stream_with_detail(
|
||||
client,
|
||||
[{"role": "user", "content": "Explain quantum computing"}]
|
||||
)
|
||||
```
|
||||
|
||||
## Extracting Knowledge Base Citations
|
||||
|
||||
```python
|
||||
def get_citations(response_data: dict) -> list[dict]:
|
||||
"""Extract knowledge base citations from detailed response."""
|
||||
|
||||
citations = []
|
||||
response_details = response_data.get('responseData', [])
|
||||
|
||||
for module in response_details:
|
||||
quote_list = module.get('quoteList', [])
|
||||
for quote in quote_list:
|
||||
citations.append({
|
||||
'source': quote.get('sourceName', 'Unknown'),
|
||||
'source_id': quote.get('sourceId', ''),
|
||||
'text': quote.get('text', ''),
|
||||
'module': module.get('moduleName', 'Unknown')
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
|
||||
# Usage
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "What is machine learning?"}],
|
||||
detail=True,
|
||||
stream=False
|
||||
)
|
||||
result = response.json()
|
||||
|
||||
citations = get_citations(result)
|
||||
print(f"Found {len(citations)} citations:")
|
||||
for i, citation in enumerate(citations, 1):
|
||||
print(f"\n{i}. {citation['source']}")
|
||||
print(f" {citation['text'][:150]}...")
|
||||
```
|
||||
|
||||
## Calculating Costs
|
||||
|
||||
```python
|
||||
def calculate_costs(response_data: dict) -> dict:
|
||||
"""Calculate total and per-module costs."""
|
||||
|
||||
total_cost = 0
|
||||
total_tokens = 0
|
||||
module_costs = []
|
||||
|
||||
response_details = response_data.get('responseData', [])
|
||||
|
||||
for module in response_details:
|
||||
cost = module.get('price', 0)
|
||||
tokens = module.get('tokens', 0)
|
||||
module_name = module.get('moduleName', 'Unknown')
|
||||
|
||||
total_cost += cost
|
||||
total_tokens += tokens
|
||||
|
||||
module_costs.append({
|
||||
'module': module_name,
|
||||
'cost': cost,
|
||||
'tokens': tokens
|
||||
})
|
||||
|
||||
return {
|
||||
'total_cost': total_cost,
|
||||
'total_tokens': total_tokens,
|
||||
'modules': module_costs
|
||||
}
|
||||
|
||||
|
||||
# Usage
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Tell me about AI"}],
|
||||
detail=True,
|
||||
stream=False
|
||||
)
|
||||
result = response.json()
|
||||
|
||||
costs = calculate_costs(result)
|
||||
print(f"Total Cost: ${costs['total_cost']:.6f}")
|
||||
print(f"Total Tokens: {costs['total_tokens']}")
|
||||
print("\nPer-Module Costs:")
|
||||
for module in costs['modules']:
|
||||
print(f" {module['module']}: ${module['cost']:.6f} ({module['tokens']} tokens)")
|
||||
```
|
||||
|
||||
## Analyzing Execution Time
|
||||
|
||||
```python
|
||||
def analyze_performance(response_data: dict) -> dict:
|
||||
"""Analyze module execution performance."""
|
||||
|
||||
total_time = 0
|
||||
modules = []
|
||||
|
||||
for module in response_data.get('responseData', []):
|
||||
runtime = module.get('runningTime', 0)
|
||||
module_name = module.get('moduleName', 'Unknown')
|
||||
|
||||
total_time += runtime
|
||||
modules.append({
|
||||
'module': module_name,
|
||||
'runtime': runtime
|
||||
})
|
||||
|
||||
# Sort by runtime
|
||||
modules.sort(key=lambda x: x['runtime'], reverse=True)
|
||||
|
||||
return {
|
||||
'total_time': total_time,
|
||||
'modules': modules
|
||||
}
|
||||
|
||||
|
||||
# Usage
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Analyze this data"}],
|
||||
detail=True,
|
||||
stream=False
|
||||
)
|
||||
result = response.json()
|
||||
|
||||
perf = analyze_performance(result)
|
||||
print(f"Total Runtime: {perf['total_time']:.2f}s")
|
||||
print("\nModule Execution Times:")
|
||||
for module in perf['modules']:
|
||||
print(f" {module['module']}: {module['runtime']:.2f}s")
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
1. **Debugging** - Identify slow or expensive modules
|
||||
2. **Cost Optimization** - Track token usage and costs
|
||||
3. **Transparency** - Show sources and reasoning to users
|
||||
4. **Analytics** - Monitor application performance
|
||||
5. **Compliance** - Track AI-generated content sources
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use selectively** - Detail mode adds overhead
|
||||
2. **Cache results** - Store detailed data for analysis
|
||||
3. **Monitor costs** - Track token usage over time
|
||||
4. **Optimize workflows** - Use performance data to improve
|
||||
|
||||
## See Also
|
||||
|
||||
- [Streaming Events](streaming_events.md) - Real-time execution events
|
||||
- [Record Detail API](../api/chat_client.md#get_record_detail) - Get details for past records
|
||||
319
docs/advanced/error_handling.md
Normal file
319
docs/advanced/error_handling.md
Normal file
@@ -0,0 +1,319 @@
|
||||
# Error Handling
|
||||
|
||||
A comprehensive guide to handling errors in the FastGPT Python SDK.
|
||||
|
||||
## Exception Types
|
||||
|
||||
The SDK provides specific exceptions for different error scenarios:
|
||||
|
||||
| Exception | Status Code | When to Use |
|
||||
|-----------|-------------|-------------|
|
||||
| `AuthenticationError` | 401 | Invalid API key |
|
||||
| `RateLimitError` | 429 | Too many requests |
|
||||
| `ValidationError` | 422 | Invalid parameters |
|
||||
| `APIError` | 4xx/5xx | General API errors |
|
||||
|
||||
## Basic Error Handling
|
||||
|
||||
```python
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import (
|
||||
APIError,
|
||||
AuthenticationError,
|
||||
RateLimitError,
|
||||
ValidationError
|
||||
)
|
||||
|
||||
try:
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
print(result['choices'][0]['message']['content'])
|
||||
|
||||
except AuthenticationError:
|
||||
print("Authentication failed. Check your API key.")
|
||||
|
||||
except RateLimitError as e:
|
||||
print(f"Rate limit exceeded. Retry after: {e.retry_after}")
|
||||
|
||||
except ValidationError as e:
|
||||
print(f"Invalid parameters: {e.message}")
|
||||
|
||||
except APIError as e:
|
||||
print(f"API error: {e.message}")
|
||||
```
|
||||
|
||||
## Comprehensive Error Handler
|
||||
|
||||
```python
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import FastGPTError
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.ERROR)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChatService:
|
||||
"""Chat service with comprehensive error handling."""
|
||||
|
||||
def __init__(self, api_key: str, base_url: str):
|
||||
self.client = ChatClient(api_key=api_key, base_url=base_url)
|
||||
|
||||
def send_message(self, message: str) -> str | None:
|
||||
"""Send a message with error handling."""
|
||||
try:
|
||||
response = self.client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": message}],
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
return result['choices'][0]['message']['content']
|
||||
|
||||
except AuthenticationError:
|
||||
logger.error("Invalid API key")
|
||||
return "Error: Authentication failed. Please check your API key."
|
||||
|
||||
except RateLimitError as e:
|
||||
logger.error(f"Rate limit exceeded: {e}")
|
||||
wait_time = int(e.retry_after) if e.retry_after else 5
|
||||
return f"Error: Too many requests. Please wait {wait_time} seconds."
|
||||
|
||||
except ValidationError as e:
|
||||
logger.error(f"Validation error: {e}")
|
||||
return f"Error: Invalid request - {e.message}"
|
||||
|
||||
except APIError as e:
|
||||
logger.error(f"API error: {e}")
|
||||
return f"Error: Server error - {e.message}"
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error: {e}")
|
||||
return "Error: An unexpected error occurred."
|
||||
|
||||
def close(self):
|
||||
"""Close the client."""
|
||||
self.client.close()
|
||||
```
|
||||
|
||||
## Retry Logic
|
||||
|
||||
### Simple Retry
|
||||
|
||||
```python
|
||||
import time
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import RateLimitError
|
||||
|
||||
def chat_with_retry(client, messages, max_retries=3):
|
||||
"""Retry chat completion on rate limit errors."""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < max_retries - 1:
|
||||
wait_time = int(e.retry_after) if e.retry_after else 5
|
||||
print(f"Rate limited. Waiting {wait_time} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
raise
|
||||
```
|
||||
|
||||
### Exponential Backoff
|
||||
|
||||
```python
|
||||
import time
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import APIError
|
||||
|
||||
def chat_with_backoff(client, messages, max_retries=5):
|
||||
"""Retry with exponential backoff."""
|
||||
base_delay = 1 # seconds
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except APIError as e:
|
||||
if attempt < max_retries - 1 and e.status_code >= 500:
|
||||
# Exponential backoff for server errors
|
||||
delay = base_delay * (2 ** attempt)
|
||||
print(f"Server error. Retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
raise
|
||||
```
|
||||
|
||||
## Streaming Error Handling
|
||||
|
||||
```python
|
||||
import json
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import FastGPTError
|
||||
|
||||
def stream_chat_safely(client, messages):
|
||||
"""Handle streaming with error recovery."""
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=True
|
||||
)
|
||||
|
||||
for line in response.iter_lines():
|
||||
try:
|
||||
if line.startswith("data:"):
|
||||
data = line[5:].strip()
|
||||
if data and data != "[DONE]":
|
||||
chunk = json.loads(data)
|
||||
if "choices" in chunk and chunk["choices"]:
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
print(content, end="", flush=True)
|
||||
except json.JSONDecodeError:
|
||||
# Skip malformed JSON chunks
|
||||
continue
|
||||
|
||||
except FastGPTError as e:
|
||||
print(f"\nStream error: {e}")
|
||||
```
|
||||
|
||||
## Response Validation
|
||||
|
||||
```python
|
||||
def validate_response(response_data: dict) -> bool:
|
||||
"""Validate API response structure."""
|
||||
if "choices" not in response_data:
|
||||
raise ValueError("Response missing 'choices' field")
|
||||
|
||||
if not response_data["choices"]:
|
||||
raise ValueError("Empty choices array")
|
||||
|
||||
choice = response_data["choices"][0]
|
||||
if "message" not in choice:
|
||||
raise ValueError("Choice missing 'message' field")
|
||||
|
||||
message = choice["message"]
|
||||
if "content" not in message:
|
||||
raise ValueError("Message missing 'content' field")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def safe_chat_completion(client, messages):
|
||||
"""Chat with response validation."""
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
try:
|
||||
validate_response(result)
|
||||
return result['choices'][0]['message']['content']
|
||||
except ValueError as e:
|
||||
print(f"Invalid response format: {e}")
|
||||
return None
|
||||
```
|
||||
|
||||
## Logging Errors
|
||||
|
||||
```python
|
||||
import logging
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import FastGPTError
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def log_errors(client, messages):
|
||||
"""Log errors with context."""
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except FastGPTError as e:
|
||||
logger.error(
|
||||
f"API Error: {type(e).__name__} - {e.message}",
|
||||
extra={
|
||||
"status_code": e.status_code,
|
||||
"response_data": e.response_data,
|
||||
}
|
||||
)
|
||||
raise
|
||||
```
|
||||
|
||||
## Custom Exception Handler
|
||||
|
||||
```python
|
||||
from functools import wraps
|
||||
from fastgpt_client.exceptions import FastGPTError
|
||||
|
||||
def handle_fastgpt_errors(func):
|
||||
"""Decorator for handling FastGPT errors."""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except FastGPTError as e:
|
||||
print(f"FastGPT Error: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Unexpected Error: {e}")
|
||||
return None
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@handle_fastgpt_errors
|
||||
def send_message(client, message: str):
|
||||
"""Send message with automatic error handling."""
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": message}],
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
return result['choices'][0]['message']['content']
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always use `raise_for_status()`** - Catches HTTP errors early
|
||||
2. **Handle specific exceptions** - Use except blocks for known error types
|
||||
3. **Log all errors** - Helps with debugging and monitoring
|
||||
4. **Provide user feedback** - Show meaningful error messages
|
||||
5. **Implement retries** - For transient errors (rate limits, server errors)
|
||||
6. **Validate responses** - Ensure data structure is correct
|
||||
7. **Use context managers** - Ensures proper cleanup
|
||||
|
||||
## See Also
|
||||
|
||||
- [Exceptions Reference](../api/exceptions.md) - Exception types and attributes
|
||||
- [Rate Limiting](rate_limiting.md) - Handling rate limits effectively
|
||||
298
docs/advanced/rate_limiting.md
Normal file
298
docs/advanced/rate_limiting.md
Normal file
@@ -0,0 +1,298 @@
|
||||
# Rate Limiting
|
||||
|
||||
Understanding and handling rate limits in the FastGPT API.
|
||||
|
||||
## Understanding Rate Limits
|
||||
|
||||
FastGPT API may enforce rate limits to:
|
||||
|
||||
- Prevent API abuse
|
||||
- Ensure fair resource allocation
|
||||
- Maintain system stability
|
||||
|
||||
When you exceed the rate limit, you'll receive a `429 Too Many Requests` response.
|
||||
|
||||
## RateLimitError
|
||||
|
||||
The SDK raises `RateLimitError` when rate limits are exceeded:
|
||||
|
||||
```python
|
||||
from fastgpt_client import ChatClient
|
||||
from fastgpt_client.exceptions import RateLimitError
|
||||
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Hello"}]
|
||||
)
|
||||
except RateLimitError as e:
|
||||
print(f"Rate limit exceeded!")
|
||||
print(f"Status code: {e.status_code}") # 429
|
||||
print(f"Retry after: {e.retry_after}") # Suggested wait time
|
||||
```
|
||||
|
||||
## Handling Rate Limits
|
||||
|
||||
### 1. Simple Retry with Delay
|
||||
|
||||
```python
|
||||
import time
|
||||
from fastgpt_client.exceptions import RateLimitError
|
||||
|
||||
def chat_with_retry(client, messages, max_retries=3):
|
||||
"""Retry on rate limit with fixed delay."""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < max_retries - 1:
|
||||
# Use Retry-After header or default to 5 seconds
|
||||
wait_time = int(e.retry_after) if e.retry_after else 5
|
||||
print(f"Rate limited. Waiting {wait_time} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
print("Max retries exceeded")
|
||||
raise
|
||||
```
|
||||
|
||||
### 2. Exponential Backoff
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
def chat_with_backoff(client, messages, max_retries=5):
|
||||
"""Retry with exponential backoff."""
|
||||
base_delay = 1 # Start with 1 second
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < max_retries - 1:
|
||||
# Exponential backoff with jitter
|
||||
delay = base_delay * (2 ** attempt)
|
||||
# Add jitter to avoid thundering herd
|
||||
import random
|
||||
jitter = random.uniform(0, 0.5 * delay)
|
||||
wait_time = delay + jitter
|
||||
|
||||
print(f"Rate limited. Waiting {wait_time:.1f} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
raise
|
||||
```
|
||||
|
||||
### 3. Async Retry with Backoff
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
|
||||
async def async_chat_with_retry(client, messages, max_retries=5):
|
||||
"""Async retry with exponential backoff."""
|
||||
base_delay = 1
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
response = await client.create_chat_completion(
|
||||
messages=messages,
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < max_retries - 1:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
print(f"Rate limited. Waiting {delay} seconds...")
|
||||
await asyncio.sleep(delay)
|
||||
else:
|
||||
raise
|
||||
```
|
||||
|
||||
### 4. Rate Limiter Class
|
||||
|
||||
```python
|
||||
import time
|
||||
from collections import deque
|
||||
from threading import Lock
|
||||
|
||||
class RateLimiter:
|
||||
"""Token bucket rate limiter."""
|
||||
|
||||
def __init__(self, rate: int, per: float = 60.0):
|
||||
"""
|
||||
Args:
|
||||
rate: Number of requests allowed
|
||||
per: Time period in seconds
|
||||
"""
|
||||
self.rate = rate
|
||||
self.per = per
|
||||
self.allowance = rate
|
||||
self.last_check = time.time()
|
||||
self.lock = Lock()
|
||||
|
||||
def acquire(self, block: bool = True, timeout: float = None) -> bool:
|
||||
"""Acquire a token from the bucket."""
|
||||
with self.lock:
|
||||
current = time.time()
|
||||
time_passed = current - self.last_check
|
||||
self.last_check = current
|
||||
|
||||
# Refill bucket
|
||||
self.allowance += time_passed * (self.rate / self.per)
|
||||
|
||||
if self.allowance > self.rate:
|
||||
self.allowance = self.rate
|
||||
|
||||
if self.allowance < 1.0:
|
||||
if not block:
|
||||
return False
|
||||
|
||||
# Calculate wait time
|
||||
sleep_time = (1.0 - self.allowance) * (self.per / self.rate)
|
||||
|
||||
if timeout is not None and sleep_time > timeout:
|
||||
return False
|
||||
|
||||
time.sleep(sleep_time)
|
||||
self.allowance = 0.0
|
||||
else:
|
||||
self.allowance -= 1.0
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# Usage
|
||||
rate_limiter = RateLimiter(rate=10, per=60) # 10 requests per minute
|
||||
|
||||
for i in range(15):
|
||||
if rate_limiter.acquire():
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": f"Message {i}"}]
|
||||
)
|
||||
print(f"Sent message {i}")
|
||||
else:
|
||||
print(f"Rate limited, skipping message {i}")
|
||||
```
|
||||
|
||||
### 5. Decorator for Rate Limiting
|
||||
|
||||
```python
|
||||
import time
|
||||
import functools
|
||||
from fastgpt_client.exceptions import RateLimitError
|
||||
|
||||
def rate_limit_retry(max_retries=3, base_delay=1):
|
||||
"""Decorator to handle rate limiting with retries."""
|
||||
|
||||
def decorator(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except RateLimitError as e:
|
||||
if attempt < max_retries - 1:
|
||||
delay = base_delay * (2 ** attempt)
|
||||
wait_time = int(e.retry_after) if e.retry_after else delay
|
||||
print(f"Rate limited. Waiting {wait_time} seconds...")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
raise
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
# Usage
|
||||
@rate_limit_retry(max_retries=3, base_delay=2)
|
||||
def send_message(client, message: str):
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": message}],
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
```
|
||||
|
||||
## Monitoring Rate Limits
|
||||
|
||||
```python
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
class RequestMonitor:
|
||||
"""Monitor API request rates."""
|
||||
|
||||
def __init__(self, window_seconds=60):
|
||||
self.window = window_seconds
|
||||
self.requests = defaultdict(list)
|
||||
self.lock = Lock()
|
||||
|
||||
def record_request(self, endpoint: str):
|
||||
"""Record an API request."""
|
||||
with self.lock:
|
||||
now = time.time()
|
||||
self.requests[endpoint].append(now)
|
||||
|
||||
# Remove old requests outside the window
|
||||
cutoff = now - self.window
|
||||
self.requests[endpoint] = [
|
||||
t for t in self.requests[endpoint] if t > cutoff
|
||||
]
|
||||
|
||||
def get_rate(self, endpoint: str) -> float:
|
||||
"""Get requests per second for an endpoint."""
|
||||
with self.lock:
|
||||
recent = self.requests[endpoint]
|
||||
if not recent:
|
||||
return 0.0
|
||||
return len(recent) / self.window
|
||||
|
||||
def is_rate_limited(self, endpoint: str, limit: int) -> bool:
|
||||
"""Check if endpoint is rate limited."""
|
||||
with self.lock:
|
||||
cutoff = time.time() - self.window
|
||||
recent = [t for t in self.requests[endpoint] if t > cutoff]
|
||||
return len(recent) >= limit
|
||||
|
||||
|
||||
# Usage
|
||||
monitor = RequestMonitor(window_seconds=60)
|
||||
|
||||
def make_request(client, messages):
|
||||
endpoint = "/api/v1/chat/completions"
|
||||
|
||||
# Check if we're rate limited
|
||||
if monitor.is_rate_limited(endpoint, limit=100):
|
||||
print("Approaching rate limit, slowing down...")
|
||||
time.sleep(1)
|
||||
|
||||
monitor.record_request(endpoint)
|
||||
response = client.create_chat_completion(messages=messages)
|
||||
return response
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Implement backoff** - Use exponential backoff for retries
|
||||
2. **Respect Retry-After** - Use the `retry_after` header when available
|
||||
3. **Monitor usage** - Track request rates to avoid hitting limits
|
||||
4. **Queue requests** - For batch operations, use rate limiting
|
||||
5. **Handle gracefully** - Show user-friendly messages when rate limited
|
||||
6. **Use async** - Better resource utilization with concurrent requests
|
||||
|
||||
## See Also
|
||||
|
||||
- [Error Handling](error_handling.md) - Comprehensive error handling guide
|
||||
- [Exceptions Reference](../api/exceptions.md) - Exception types and attributes
|
||||
284
docs/advanced/streaming_events.md
Normal file
284
docs/advanced/streaming_events.md
Normal file
@@ -0,0 +1,284 @@
|
||||
# Streaming Events
|
||||
|
||||
FastGPT uses Server-Sent Events (SSE) for streaming responses. This guide covers all event types you may encounter.
|
||||
|
||||
## SSE Format
|
||||
|
||||
FastGPT sends events in this format:
|
||||
|
||||
```
|
||||
event: eventType
|
||||
data: {"key": "value"}
|
||||
|
||||
event: anotherEvent
|
||||
data: {"key": "value"}
|
||||
```
|
||||
|
||||
## Event Types
|
||||
|
||||
### 1. `data` Event
|
||||
|
||||
The main streaming event, compatible with OpenAI's format:
|
||||
|
||||
```python
|
||||
import json
|
||||
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("data:"):
|
||||
data = line[5:].strip()
|
||||
if data and data != "[DONE]":
|
||||
chunk = json.loads(data)
|
||||
# Process OpenAI-compatible response
|
||||
if "choices" in chunk and chunk["choices"]:
|
||||
delta = chunk["choices"][0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
print(content, end="", flush=True)
|
||||
```
|
||||
|
||||
### 2. `answer` Event
|
||||
|
||||
Main chat response content (alternative format):
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:answer"):
|
||||
# Next line contains the data
|
||||
answer_data = json.loads(next_line[5:])
|
||||
print(answer_data.get("text", ""), end="", flush=True)
|
||||
```
|
||||
|
||||
### 3. `fastAnswer` Event
|
||||
|
||||
Quick reply content:
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:fastAnswer"):
|
||||
fast_answer_data = json.loads(next_line[5:])
|
||||
print(f"Quick reply: {fast_answer_data}")
|
||||
```
|
||||
|
||||
### 4. `flowNodeStatus` Event
|
||||
|
||||
Workflow node status updates:
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:flowNodeStatus"):
|
||||
status_data = json.loads(next_line[5:])
|
||||
status = status_data.get("status") # "running", "completed", "error"
|
||||
node_name = status_data.get("nodeName")
|
||||
print(f"[{status.upper()}] {node_name}")
|
||||
```
|
||||
|
||||
### 5. `flowResponses` Event
|
||||
|
||||
Complete node response data (requires `detail=True`):
|
||||
|
||||
```python
|
||||
client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
stream=True,
|
||||
detail=True # Enable detailed responses
|
||||
)
|
||||
|
||||
# Then in the stream:
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:flowResponses"):
|
||||
response_data = json.loads(next_line[5:])
|
||||
# Contains module execution details
|
||||
module_name = response_data.get("moduleName")
|
||||
tokens = response_data.get("tokens")
|
||||
print(f"Module: {module_name}, Tokens: {tokens}")
|
||||
```
|
||||
|
||||
### 6. `interactive` Event
|
||||
|
||||
Interactive node (requires user input or selection):
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:interactive"):
|
||||
interactive_data = json.loads(next_line[5:])
|
||||
interactive_type = interactive_data.get("type")
|
||||
|
||||
if interactive_type == "userSelect":
|
||||
options = interactive_data["params"]["userSelectOptions"]
|
||||
print("Please select an option:")
|
||||
for i, option in enumerate(options):
|
||||
print(f"{i + 1}. {option['value']}")
|
||||
|
||||
elif interactive_type == "userInput":
|
||||
form_fields = interactive_data["params"]["inputForm"]
|
||||
print("Please provide the following information:")
|
||||
for field in form_fields:
|
||||
print(f"- {field['label']}")
|
||||
```
|
||||
|
||||
### 7. `updateVariables` Event
|
||||
|
||||
Variable updates during execution:
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:updateVariables"):
|
||||
var_data = json.loads(next_line[5:])
|
||||
variables = var_data.get("variables", {})
|
||||
print(f"Variables updated: {variables}")
|
||||
```
|
||||
|
||||
### 8. `error` Event
|
||||
|
||||
Error events:
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:error"):
|
||||
error_data = json.loads(next_line[5:])
|
||||
error_message = error_data.get("message", "Unknown error")
|
||||
error_type = error_data.get("type", "Error")
|
||||
print(f"Error [{error_type}]: {error_message}")
|
||||
```
|
||||
|
||||
### 9. `toolCall`, `toolParams`, `toolResponse` Events
|
||||
|
||||
Tool/agent operation events:
|
||||
|
||||
```python
|
||||
for line in response.iter_lines():
|
||||
if line.startswith("event:toolCall"):
|
||||
tool_data = json.loads(next_line[5:])
|
||||
tool_name = tool_data.get("toolName")
|
||||
print(f"Tool called: {tool_name}")
|
||||
|
||||
elif line.startswith("event:toolParams"):
|
||||
params_data = json.loads(next_line[5:])
|
||||
print(f"Tool parameters: {params_data}")
|
||||
|
||||
elif line.startswith("event:toolResponse"):
|
||||
response_data = json.loads(next_line[5:])
|
||||
print(f"Tool response: {response_data}")
|
||||
```
|
||||
|
||||
## Complete Event Handler
|
||||
|
||||
```python
|
||||
import json
|
||||
from fastgpt_client import ChatClient
|
||||
|
||||
def handle_all_events(response):
|
||||
"""Handle all streaming event types."""
|
||||
|
||||
buffer = ""
|
||||
current_event = None
|
||||
|
||||
for line in response.iter_lines():
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Event type line
|
||||
if line.startswith("event:"):
|
||||
current_event = line[6:].strip()
|
||||
|
||||
# Data line
|
||||
elif line.startswith("data:"):
|
||||
data = line[5:].strip()
|
||||
if not data or data == "[DONE]":
|
||||
continue
|
||||
|
||||
try:
|
||||
data_obj = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Handle based on event type
|
||||
if current_event is None:
|
||||
# Default: OpenAI-compatible format
|
||||
if "choices" in data_obj and data_obj["choices"]:
|
||||
delta = data_obj["choices"][0].get("delta", {})
|
||||
content = delta.get("content", "")
|
||||
if content:
|
||||
buffer += content
|
||||
print(content, end="", flush=True)
|
||||
|
||||
elif current_event == "answer":
|
||||
text = data_obj.get("text", "")
|
||||
if text:
|
||||
buffer += text
|
||||
print(text, end="", flush=True)
|
||||
|
||||
elif current_event == "flowNodeStatus":
|
||||
status = data_obj.get("status")
|
||||
node = data_obj.get("nodeName", "Unknown")
|
||||
print(f"\n[{status.upper()}] {node}")
|
||||
|
||||
elif current_event == "interactive":
|
||||
interactive_type = data_obj.get("type")
|
||||
print(f"\n[INTERACTIVE] {interactive_type}")
|
||||
print(f"Details: {data_obj.get('params', {})}")
|
||||
|
||||
elif current_event == "error":
|
||||
print(f"\n[ERROR] {data_obj.get('message', 'Unknown error')}")
|
||||
|
||||
elif current_event == "toolCall":
|
||||
print(f"\n[TOOL] Calling: {data_obj.get('toolName')}")
|
||||
|
||||
# Reset event
|
||||
current_event = None
|
||||
|
||||
return buffer
|
||||
|
||||
|
||||
# Usage
|
||||
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
||||
response = client.create_chat_completion(
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
stream=True,
|
||||
detail=True # Enable flow responses
|
||||
)
|
||||
|
||||
full_response = handle_all_events(response)
|
||||
print(f"\n\nFull response: {full_response}")
|
||||
```
|
||||
|
||||
## Event Flow Example
|
||||
|
||||
A typical streaming conversation might generate events like:
|
||||
|
||||
```
|
||||
event:flowNodeStatus
|
||||
data:{"status": "running", "nodeName": "Chat Node"}
|
||||
|
||||
event:answer
|
||||
data:{"text": "Hello"}
|
||||
|
||||
event:answer
|
||||
data:{"text": "! How"}
|
||||
|
||||
event:answer
|
||||
data:{"text": " can I help"}
|
||||
|
||||
event:flowNodeStatus
|
||||
data:{"status": "completed", "nodeName": "Chat Node"}
|
||||
|
||||
event:flowResponses
|
||||
data:{"moduleName": "Chat Node", "tokens": 50}
|
||||
|
||||
data:{"choices": [{"delta": {"content": "Hello! How can I help"}}], "usage": {"total_tokens": 50}}
|
||||
|
||||
data:[DONE]
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Handle `[DONE]`** - Check for end of stream
|
||||
2. **Validate JSON** - Use try/except for parsing
|
||||
3. **Buffer content** - Accumulate text for display
|
||||
4. **Handle errors** - Watch for error events
|
||||
5. **Check event types** - Use `startswith("event:")` to detect events
|
||||
|
||||
## See Also
|
||||
|
||||
- [Streaming Example](../examples/streaming.md) - Basic streaming usage
|
||||
- [Detail Mode](detail_mode.md) - Enable detailed execution data
|
||||
Reference in New Issue
Block a user