add documents

2026-01-08 17:35:09 +08:00
parent 6a6d736991
commit f1bd12353a
21 changed files with 4268 additions and 0 deletions
--- a/docs/advanced/detail_mode.md
+++ b/docs/advanced/detail_mode.md
@@ -0,0 +1,343 @@
+# Detail Mode
+
+Learn how to use FastGPT's detail mode to get comprehensive execution data for your requests.
+
+## What is Detail Mode?
+
+When `detail=True`, FastGPT returns extensive execution information including:
+
+- Module-by-module execution details
+- Token usage per module
+- Execution time for each node
+- Knowledge base citations
+- Complete message contexts
+- Cost information
+
+## Enabling Detail Mode
+
+```python
+from fastgpt_client import ChatClient
+
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "Explain AI"}],
+        detail=True,  # Enable detail mode
+        stream=False
+    )
+    result = response.json()
+```
+
+## Response Structure with Detail Mode
+
+### Basic Response (detail=False)
+
+```json
+{
+    "id": "chatcmpl-xxx",
+    "choices": [{
+        "message": {"content": "AI is..."}
+    }],
+    "usage": {
+        "prompt_tokens": 10,
+        "completion_tokens": 50,
+        "total_tokens": 60
+    }
+}
+```
+
+### Detailed Response (detail=True)
+
+```json
+{
+    "id": "chatcmpl-xxx",
+    "choices": [{
+        "message": {"content": "AI is..."}
+    }],
+    "usage": {
+        "prompt_tokens": 10,
+        "completion_tokens": 50,
+        "total_tokens": 60
+    },
+    "responseData": [
+        {
+            "moduleName": "Chat Node",
+            "moduleType": "chatNode",
+            "tokens": 60,
+            "price": 0.0012,
+            "runningTime": 1.5,
+            "quoteList": [
+                {
+                    "sourceId": "kb_123",
+                    "sourceName": "AI Knowledge Base",
+                    "text": "AI stands for Artificial Intelligence..."
+                }
+            ],
+            "completeMessages": [...]
+        },
+        {
+            "moduleName": "Dataset Search",
+            "moduleType": "datasetSearchNode",
+            "tokens": 20,
+            "price": 0.0004,
+            "runningTime": 0.5
+        }
+    ]
+}
+```
+
+## Parsing Detailed Responses
+
+```python
+def parse_detail_response(response_data: dict):
+    """Parse and display detailed execution data."""
+
+    # Basic response info
+    content = response_data['choices'][0]['message']['content']
+    usage = response_data.get('usage', {})
+    print(f"Response: {content[:100]}...")
+    print(f"Total Tokens: {usage.get('total_tokens', 0)}")
+
+    # Detailed execution data
+    response_details = response_data.get('responseData', [])
+
+    if response_details:
+        print("\n=== Execution Details ===")
+
+        for module in response_details:
+            module_name = module.get('moduleName', 'Unknown')
+            module_type = module.get('moduleType', 'Unknown')
+            tokens = module.get('tokens', 0)
+            price = module.get('price', 0)
+            running_time = module.get('runningTime', 0)
+
+            print(f"\nModule: {module_name} ({module_type})")
+            print(f"  Tokens: {tokens}")
+            print(f"  Price: ${price:.6f}")
+            print(f"  Runtime: {running_time}s")
+
+            # Knowledge base citations
+            quote_list = module.get('quoteList', [])
+            if quote_list:
+                print(f"  Citations:")
+                for quote in quote_list:
+                    source = quote.get('sourceName', 'Unknown')
+                    text = quote.get('text', '')[:100]
+                    print(f"    - {source}: {text}...")
+
+
+# Usage
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "What is AI?"}],
+        detail=True,
+        stream=False
+    )
+    result = response.json()
+    parse_detail_response(result)
+```
+
+## Streaming with Detail Mode
+
+```python
+import json
+from fastgpt_client import ChatClient
+
+def stream_with_detail(client, messages):
+    """Stream with detail mode events."""
+
+    response = client.create_chat_completion(
+        messages=messages,
+        detail=True,
+        stream=True
+    )
+
+    modules = []
+
+    for line in response.iter_lines():
+        if line.startswith("event:flowNodeStatus"):
+            # Node status updates
+            data = json.loads(line[6:].split('data:', 1)[1])
+            status = data.get('status')
+            node = data.get('moduleName')
+            print(f"[{status.upper()}] {node}")
+
+        elif line.startswith("event:flowResponses"):
+            # Complete module execution data
+            data = json.loads(line[6:].split('data:', 1)[1])
+            modules.append(data)
+
+        elif line.startswith("data:"):
+            # Standard response chunks
+            data = line[5:].strip()
+            if data and data != "[DONE]":
+                chunk = json.loads(data)
+                if "choices" in chunk and chunk["choices"]:
+                    delta = chunk["choices"][0].get("delta", {})
+                    content = delta.get("content", "")
+                    if content:
+                        print(content, end="", flush=True)
+
+    print("\n\n=== Module Summary ===")
+    for module in modules:
+        print(f"{module.get('moduleName')}: {module.get('tokens')} tokens")
+
+
+# Usage
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    stream_with_detail(
+        client,
+        [{"role": "user", "content": "Explain quantum computing"}]
+    )
+```
+
+## Extracting Knowledge Base Citations
+
+```python
+def get_citations(response_data: dict) -> list[dict]:
+    """Extract knowledge base citations from detailed response."""
+
+    citations = []
+    response_details = response_data.get('responseData', [])
+
+    for module in response_details:
+        quote_list = module.get('quoteList', [])
+        for quote in quote_list:
+            citations.append({
+                'source': quote.get('sourceName', 'Unknown'),
+                'source_id': quote.get('sourceId', ''),
+                'text': quote.get('text', ''),
+                'module': module.get('moduleName', 'Unknown')
+            })
+
+    return citations
+
+
+# Usage
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "What is machine learning?"}],
+        detail=True,
+        stream=False
+    )
+    result = response.json()
+
+    citations = get_citations(result)
+    print(f"Found {len(citations)} citations:")
+    for i, citation in enumerate(citations, 1):
+        print(f"\n{i}. {citation['source']}")
+        print(f"   {citation['text'][:150]}...")
+```
+
+## Calculating Costs
+
+```python
+def calculate_costs(response_data: dict) -> dict:
+    """Calculate total and per-module costs."""
+
+    total_cost = 0
+    total_tokens = 0
+    module_costs = []
+
+    response_details = response_data.get('responseData', [])
+
+    for module in response_details:
+        cost = module.get('price', 0)
+        tokens = module.get('tokens', 0)
+        module_name = module.get('moduleName', 'Unknown')
+
+        total_cost += cost
+        total_tokens += tokens
+
+        module_costs.append({
+            'module': module_name,
+            'cost': cost,
+            'tokens': tokens
+        })
+
+    return {
+        'total_cost': total_cost,
+        'total_tokens': total_tokens,
+        'modules': module_costs
+    }
+
+
+# Usage
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "Tell me about AI"}],
+        detail=True,
+        stream=False
+    )
+    result = response.json()
+
+    costs = calculate_costs(result)
+    print(f"Total Cost: ${costs['total_cost']:.6f}")
+    print(f"Total Tokens: {costs['total_tokens']}")
+    print("\nPer-Module Costs:")
+    for module in costs['modules']:
+        print(f"  {module['module']}: ${module['cost']:.6f} ({module['tokens']} tokens)")
+```
+
+## Analyzing Execution Time
+
+```python
+def analyze_performance(response_data: dict) -> dict:
+    """Analyze module execution performance."""
+
+    total_time = 0
+    modules = []
+
+    for module in response_data.get('responseData', []):
+        runtime = module.get('runningTime', 0)
+        module_name = module.get('moduleName', 'Unknown')
+
+        total_time += runtime
+        modules.append({
+            'module': module_name,
+            'runtime': runtime
+        })
+
+    # Sort by runtime
+    modules.sort(key=lambda x: x['runtime'], reverse=True)
+
+    return {
+        'total_time': total_time,
+        'modules': modules
+    }
+
+
+# Usage
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "Analyze this data"}],
+        detail=True,
+        stream=False
+    )
+    result = response.json()
+
+    perf = analyze_performance(result)
+    print(f"Total Runtime: {perf['total_time']:.2f}s")
+    print("\nModule Execution Times:")
+    for module in perf['modules']:
+        print(f"  {module['module']}: {module['runtime']:.2f}s")
+```
+
+## Use Cases
+
+1. **Debugging** - Identify slow or expensive modules
+2. **Cost Optimization** - Track token usage and costs
+3. **Transparency** - Show sources and reasoning to users
+4. **Analytics** - Monitor application performance
+5. **Compliance** - Track AI-generated content sources
+
+## Best Practices
+
+1. **Use selectively** - Detail mode adds overhead
+2. **Cache results** - Store detailed data for analysis
+3. **Monitor costs** - Track token usage over time
+4. **Optimize workflows** - Use performance data to improve
+
+## See Also
+
+- [Streaming Events](streaming_events.md) - Real-time execution events
+- [Record Detail API](../api/chat_client.md#get_record_detail) - Get details for past records
--- a/docs/advanced/error_handling.md
+++ b/docs/advanced/error_handling.md
@@ -0,0 +1,319 @@
+# Error Handling
+
+A comprehensive guide to handling errors in the FastGPT Python SDK.
+
+## Exception Types
+
+The SDK provides specific exceptions for different error scenarios:
+
+| Exception | Status Code | When to Use |
+|-----------|-------------|-------------|
+| `AuthenticationError` | 401 | Invalid API key |
+| `RateLimitError` | 429 | Too many requests |
+| `ValidationError` | 422 | Invalid parameters |
+| `APIError` | 4xx/5xx | General API errors |
+
+## Basic Error Handling
+
+```python
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import (
+    APIError,
+    AuthenticationError,
+    RateLimitError,
+    ValidationError
+)
+
+try:
+    with ChatClient(api_key="fastgpt-xxxxx") as client:
+        response = client.create_chat_completion(
+            messages=[{"role": "user", "content": "Hello"}],
+            stream=False
+        )
+        response.raise_for_status()
+        result = response.json()
+        print(result['choices'][0]['message']['content'])
+
+except AuthenticationError:
+    print("Authentication failed. Check your API key.")
+
+except RateLimitError as e:
+    print(f"Rate limit exceeded. Retry after: {e.retry_after}")
+
+except ValidationError as e:
+    print(f"Invalid parameters: {e.message}")
+
+except APIError as e:
+    print(f"API error: {e.message}")
+```
+
+## Comprehensive Error Handler
+
+```python
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import FastGPTError
+import logging
+
+logging.basicConfig(level=logging.ERROR)
+logger = logging.getLogger(__name__)
+
+
+class ChatService:
+    """Chat service with comprehensive error handling."""
+
+    def __init__(self, api_key: str, base_url: str):
+        self.client = ChatClient(api_key=api_key, base_url=base_url)
+
+    def send_message(self, message: str) -> str | None:
+        """Send a message with error handling."""
+        try:
+            response = self.client.create_chat_completion(
+                messages=[{"role": "user", "content": message}],
+                stream=False
+            )
+            response.raise_for_status()
+            result = response.json()
+            return result['choices'][0]['message']['content']
+
+        except AuthenticationError:
+            logger.error("Invalid API key")
+            return "Error: Authentication failed. Please check your API key."
+
+        except RateLimitError as e:
+            logger.error(f"Rate limit exceeded: {e}")
+            wait_time = int(e.retry_after) if e.retry_after else 5
+            return f"Error: Too many requests. Please wait {wait_time} seconds."
+
+        except ValidationError as e:
+            logger.error(f"Validation error: {e}")
+            return f"Error: Invalid request - {e.message}"
+
+        except APIError as e:
+            logger.error(f"API error: {e}")
+            return f"Error: Server error - {e.message}"
+
+        except Exception as e:
+            logger.exception(f"Unexpected error: {e}")
+            return "Error: An unexpected error occurred."
+
+    def close(self):
+        """Close the client."""
+        self.client.close()
+```
+
+## Retry Logic
+
+### Simple Retry
+
+```python
+import time
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import RateLimitError
+
+def chat_with_retry(client, messages, max_retries=3):
+    """Retry chat completion on rate limit errors."""
+    for attempt in range(max_retries):
+        try:
+            response = client.create_chat_completion(
+                messages=messages,
+                stream=False
+            )
+            response.raise_for_status()
+            return response.json()
+
+        except RateLimitError as e:
+            if attempt < max_retries - 1:
+                wait_time = int(e.retry_after) if e.retry_after else 5
+                print(f"Rate limited. Waiting {wait_time} seconds...")
+                time.sleep(wait_time)
+            else:
+                raise
+```
+
+### Exponential Backoff
+
+```python
+import time
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import APIError
+
+def chat_with_backoff(client, messages, max_retries=5):
+    """Retry with exponential backoff."""
+    base_delay = 1  # seconds
+
+    for attempt in range(max_retries):
+        try:
+            response = client.create_chat_completion(
+                messages=messages,
+                stream=False
+            )
+            response.raise_for_status()
+            return response.json()
+
+        except APIError as e:
+            if attempt < max_retries - 1 and e.status_code >= 500:
+                # Exponential backoff for server errors
+                delay = base_delay * (2 ** attempt)
+                print(f"Server error. Retrying in {delay} seconds...")
+                time.sleep(delay)
+            else:
+                raise
+```
+
+## Streaming Error Handling
+
+```python
+import json
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import FastGPTError
+
+def stream_chat_safely(client, messages):
+    """Handle streaming with error recovery."""
+    try:
+        response = client.create_chat_completion(
+            messages=messages,
+            stream=True
+        )
+
+        for line in response.iter_lines():
+            try:
+                if line.startswith("data:"):
+                    data = line[5:].strip()
+                    if data and data != "[DONE]":
+                        chunk = json.loads(data)
+                        if "choices" in chunk and chunk["choices"]:
+                            delta = chunk["choices"][0].get("delta", {})
+                            content = delta.get("content", "")
+                            if content:
+                                print(content, end="", flush=True)
+            except json.JSONDecodeError:
+                # Skip malformed JSON chunks
+                continue
+
+    except FastGPTError as e:
+        print(f"\nStream error: {e}")
+```
+
+## Response Validation
+
+```python
+def validate_response(response_data: dict) -> bool:
+    """Validate API response structure."""
+    if "choices" not in response_data:
+        raise ValueError("Response missing 'choices' field")
+
+    if not response_data["choices"]:
+        raise ValueError("Empty choices array")
+
+    choice = response_data["choices"][0]
+    if "message" not in choice:
+        raise ValueError("Choice missing 'message' field")
+
+    message = choice["message"]
+    if "content" not in message:
+        raise ValueError("Message missing 'content' field")
+
+    return True
+
+
+def safe_chat_completion(client, messages):
+    """Chat with response validation."""
+    response = client.create_chat_completion(
+        messages=messages,
+        stream=False
+    )
+    response.raise_for_status()
+    result = response.json()
+
+    try:
+        validate_response(result)
+        return result['choices'][0]['message']['content']
+    except ValueError as e:
+        print(f"Invalid response format: {e}")
+        return None
+```
+
+## Logging Errors
+
+```python
+import logging
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import FastGPTError
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+def log_errors(client, messages):
+    """Log errors with context."""
+    try:
+        response = client.create_chat_completion(
+            messages=messages,
+            stream=False
+        )
+        response.raise_for_status()
+        return response.json()
+
+    except FastGPTError as e:
+        logger.error(
+            f"API Error: {type(e).__name__} - {e.message}",
+            extra={
+                "status_code": e.status_code,
+                "response_data": e.response_data,
+            }
+        )
+        raise
+```
+
+## Custom Exception Handler
+
+```python
+from functools import wraps
+from fastgpt_client.exceptions import FastGPTError
+
+def handle_fastgpt_errors(func):
+    """Decorator for handling FastGPT errors."""
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except FastGPTError as e:
+            print(f"FastGPT Error: {e}")
+            return None
+        except Exception as e:
+            print(f"Unexpected Error: {e}")
+            return None
+
+    return wrapper
+
+
+@handle_fastgpt_errors
+def send_message(client, message: str):
+    """Send message with automatic error handling."""
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": message}],
+        stream=False
+    )
+    response.raise_for_status()
+    result = response.json()
+    return result['choices'][0]['message']['content']
+```
+
+## Best Practices
+
+1. **Always use `raise_for_status()`** - Catches HTTP errors early
+2. **Handle specific exceptions** - Use except blocks for known error types
+3. **Log all errors** - Helps with debugging and monitoring
+4. **Provide user feedback** - Show meaningful error messages
+5. **Implement retries** - For transient errors (rate limits, server errors)
+6. **Validate responses** - Ensure data structure is correct
+7. **Use context managers** - Ensures proper cleanup
+
+## See Also
+
+- [Exceptions Reference](../api/exceptions.md) - Exception types and attributes
+- [Rate Limiting](rate_limiting.md) - Handling rate limits effectively
--- a/docs/advanced/rate_limiting.md
+++ b/docs/advanced/rate_limiting.md
@@ -0,0 +1,298 @@
+# Rate Limiting
+
+Understanding and handling rate limits in the FastGPT API.
+
+## Understanding Rate Limits
+
+FastGPT API may enforce rate limits to:
+
+- Prevent API abuse
+- Ensure fair resource allocation
+- Maintain system stability
+
+When you exceed the rate limit, you'll receive a `429 Too Many Requests` response.
+
+## RateLimitError
+
+The SDK raises `RateLimitError` when rate limits are exceeded:
+
+```python
+from fastgpt_client import ChatClient
+from fastgpt_client.exceptions import RateLimitError
+
+try:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "Hello"}]
+    )
+except RateLimitError as e:
+    print(f"Rate limit exceeded!")
+    print(f"Status code: {e.status_code}")  # 429
+    print(f"Retry after: {e.retry_after}")  # Suggested wait time
+```
+
+## Handling Rate Limits
+
+### 1. Simple Retry with Delay
+
+```python
+import time
+from fastgpt_client.exceptions import RateLimitError
+
+def chat_with_retry(client, messages, max_retries=3):
+    """Retry on rate limit with fixed delay."""
+    for attempt in range(max_retries):
+        try:
+            response = client.create_chat_completion(
+                messages=messages,
+                stream=False
+            )
+            response.raise_for_status()
+            return response.json()
+
+        except RateLimitError as e:
+            if attempt < max_retries - 1:
+                # Use Retry-After header or default to 5 seconds
+                wait_time = int(e.retry_after) if e.retry_after else 5
+                print(f"Rate limited. Waiting {wait_time} seconds...")
+                time.sleep(wait_time)
+            else:
+                print("Max retries exceeded")
+                raise
+```
+
+### 2. Exponential Backoff
+
+```python
+import time
+
+def chat_with_backoff(client, messages, max_retries=5):
+    """Retry with exponential backoff."""
+    base_delay = 1  # Start with 1 second
+
+    for attempt in range(max_retries):
+        try:
+            response = client.create_chat_completion(
+                messages=messages,
+                stream=False
+            )
+            response.raise_for_status()
+            return response.json()
+
+        except RateLimitError as e:
+            if attempt < max_retries - 1:
+                # Exponential backoff with jitter
+                delay = base_delay * (2 ** attempt)
+                # Add jitter to avoid thundering herd
+                import random
+                jitter = random.uniform(0, 0.5 * delay)
+                wait_time = delay + jitter
+
+                print(f"Rate limited. Waiting {wait_time:.1f} seconds...")
+                time.sleep(wait_time)
+            else:
+                raise
+```
+
+### 3. Async Retry with Backoff
+
+```python
+import asyncio
+
+async def async_chat_with_retry(client, messages, max_retries=5):
+    """Async retry with exponential backoff."""
+    base_delay = 1
+
+    for attempt in range(max_retries):
+        try:
+            response = await client.create_chat_completion(
+                messages=messages,
+                stream=False
+            )
+            response.raise_for_status()
+            return response.json()
+
+        except RateLimitError as e:
+            if attempt < max_retries - 1:
+                delay = base_delay * (2 ** attempt)
+                print(f"Rate limited. Waiting {delay} seconds...")
+                await asyncio.sleep(delay)
+            else:
+                raise
+```
+
+### 4. Rate Limiter Class
+
+```python
+import time
+from collections import deque
+from threading import Lock
+
+class RateLimiter:
+    """Token bucket rate limiter."""
+
+    def __init__(self, rate: int, per: float = 60.0):
+        """
+        Args:
+            rate: Number of requests allowed
+            per: Time period in seconds
+        """
+        self.rate = rate
+        self.per = per
+        self.allowance = rate
+        self.last_check = time.time()
+        self.lock = Lock()
+
+    def acquire(self, block: bool = True, timeout: float = None) -> bool:
+        """Acquire a token from the bucket."""
+        with self.lock:
+            current = time.time()
+            time_passed = current - self.last_check
+            self.last_check = current
+
+            # Refill bucket
+            self.allowance += time_passed * (self.rate / self.per)
+
+            if self.allowance > self.rate:
+                self.allowance = self.rate
+
+            if self.allowance < 1.0:
+                if not block:
+                    return False
+
+                # Calculate wait time
+                sleep_time = (1.0 - self.allowance) * (self.per / self.rate)
+
+                if timeout is not None and sleep_time > timeout:
+                    return False
+
+                time.sleep(sleep_time)
+                self.allowance = 0.0
+            else:
+                self.allowance -= 1.0
+
+            return True
+
+
+# Usage
+rate_limiter = RateLimiter(rate=10, per=60)  # 10 requests per minute
+
+for i in range(15):
+    if rate_limiter.acquire():
+        response = client.create_chat_completion(
+            messages=[{"role": "user", "content": f"Message {i}"}]
+        )
+        print(f"Sent message {i}")
+    else:
+        print(f"Rate limited, skipping message {i}")
+```
+
+### 5. Decorator for Rate Limiting
+
+```python
+import time
+import functools
+from fastgpt_client.exceptions import RateLimitError
+
+def rate_limit_retry(max_retries=3, base_delay=1):
+    """Decorator to handle rate limiting with retries."""
+
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            for attempt in range(max_retries):
+                try:
+                    return func(*args, **kwargs)
+                except RateLimitError as e:
+                    if attempt < max_retries - 1:
+                        delay = base_delay * (2 ** attempt)
+                        wait_time = int(e.retry_after) if e.retry_after else delay
+                        print(f"Rate limited. Waiting {wait_time} seconds...")
+                        time.sleep(wait_time)
+                    else:
+                        raise
+        return wrapper
+    return decorator
+
+
+# Usage
+@rate_limit_retry(max_retries=3, base_delay=2)
+def send_message(client, message: str):
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": message}],
+        stream=False
+    )
+    response.raise_for_status()
+    return response.json()
+```
+
+## Monitoring Rate Limits
+
+```python
+import time
+from collections import defaultdict
+
+class RequestMonitor:
+    """Monitor API request rates."""
+
+    def __init__(self, window_seconds=60):
+        self.window = window_seconds
+        self.requests = defaultdict(list)
+        self.lock = Lock()
+
+    def record_request(self, endpoint: str):
+        """Record an API request."""
+        with self.lock:
+            now = time.time()
+            self.requests[endpoint].append(now)
+
+            # Remove old requests outside the window
+            cutoff = now - self.window
+            self.requests[endpoint] = [
+                t for t in self.requests[endpoint] if t > cutoff
+            ]
+
+    def get_rate(self, endpoint: str) -> float:
+        """Get requests per second for an endpoint."""
+        with self.lock:
+            recent = self.requests[endpoint]
+            if not recent:
+                return 0.0
+            return len(recent) / self.window
+
+    def is_rate_limited(self, endpoint: str, limit: int) -> bool:
+        """Check if endpoint is rate limited."""
+        with self.lock:
+            cutoff = time.time() - self.window
+            recent = [t for t in self.requests[endpoint] if t > cutoff]
+            return len(recent) >= limit
+
+
+# Usage
+monitor = RequestMonitor(window_seconds=60)
+
+def make_request(client, messages):
+    endpoint = "/api/v1/chat/completions"
+
+    # Check if we're rate limited
+    if monitor.is_rate_limited(endpoint, limit=100):
+        print("Approaching rate limit, slowing down...")
+        time.sleep(1)
+
+    monitor.record_request(endpoint)
+    response = client.create_chat_completion(messages=messages)
+    return response
+```
+
+## Best Practices
+
+1. **Implement backoff** - Use exponential backoff for retries
+2. **Respect Retry-After** - Use the `retry_after` header when available
+3. **Monitor usage** - Track request rates to avoid hitting limits
+4. **Queue requests** - For batch operations, use rate limiting
+5. **Handle gracefully** - Show user-friendly messages when rate limited
+6. **Use async** - Better resource utilization with concurrent requests
+
+## See Also
+
+- [Error Handling](error_handling.md) - Comprehensive error handling guide
+- [Exceptions Reference](../api/exceptions.md) - Exception types and attributes
--- a/docs/advanced/streaming_events.md
+++ b/docs/advanced/streaming_events.md
@@ -0,0 +1,284 @@
+# Streaming Events
+
+FastGPT uses Server-Sent Events (SSE) for streaming responses. This guide covers all event types you may encounter.
+
+## SSE Format
+
+FastGPT sends events in this format:
+
+```
+event: eventType
+data: {"key": "value"}
+
+event: anotherEvent
+data: {"key": "value"}
+```
+
+## Event Types
+
+### 1. `data` Event
+
+The main streaming event, compatible with OpenAI's format:
+
+```python
+import json
+
+for line in response.iter_lines():
+    if line.startswith("data:"):
+        data = line[5:].strip()
+        if data and data != "[DONE]":
+            chunk = json.loads(data)
+            # Process OpenAI-compatible response
+            if "choices" in chunk and chunk["choices"]:
+                delta = chunk["choices"][0].get("delta", {})
+                content = delta.get("content", "")
+                if content:
+                    print(content, end="", flush=True)
+```
+
+### 2. `answer` Event
+
+Main chat response content (alternative format):
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:answer"):
+        # Next line contains the data
+        answer_data = json.loads(next_line[5:])
+        print(answer_data.get("text", ""), end="", flush=True)
+```
+
+### 3. `fastAnswer` Event
+
+Quick reply content:
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:fastAnswer"):
+        fast_answer_data = json.loads(next_line[5:])
+        print(f"Quick reply: {fast_answer_data}")
+```
+
+### 4. `flowNodeStatus` Event
+
+Workflow node status updates:
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:flowNodeStatus"):
+        status_data = json.loads(next_line[5:])
+        status = status_data.get("status")  # "running", "completed", "error"
+        node_name = status_data.get("nodeName")
+        print(f"[{status.upper()}] {node_name}")
+```
+
+### 5. `flowResponses` Event
+
+Complete node response data (requires `detail=True`):
+
+```python
+client.create_chat_completion(
+    messages=[{"role": "user", "content": "Hello"}],
+    stream=True,
+    detail=True  # Enable detailed responses
+)
+
+# Then in the stream:
+for line in response.iter_lines():
+    if line.startswith("event:flowResponses"):
+        response_data = json.loads(next_line[5:])
+        # Contains module execution details
+        module_name = response_data.get("moduleName")
+        tokens = response_data.get("tokens")
+        print(f"Module: {module_name}, Tokens: {tokens}")
+```
+
+### 6. `interactive` Event
+
+Interactive node (requires user input or selection):
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:interactive"):
+        interactive_data = json.loads(next_line[5:])
+        interactive_type = interactive_data.get("type")
+
+        if interactive_type == "userSelect":
+            options = interactive_data["params"]["userSelectOptions"]
+            print("Please select an option:")
+            for i, option in enumerate(options):
+                print(f"{i + 1}. {option['value']}")
+
+        elif interactive_type == "userInput":
+            form_fields = interactive_data["params"]["inputForm"]
+            print("Please provide the following information:")
+            for field in form_fields:
+                print(f"- {field['label']}")
+```
+
+### 7. `updateVariables` Event
+
+Variable updates during execution:
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:updateVariables"):
+        var_data = json.loads(next_line[5:])
+        variables = var_data.get("variables", {})
+        print(f"Variables updated: {variables}")
+```
+
+### 8. `error` Event
+
+Error events:
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:error"):
+        error_data = json.loads(next_line[5:])
+        error_message = error_data.get("message", "Unknown error")
+        error_type = error_data.get("type", "Error")
+        print(f"Error [{error_type}]: {error_message}")
+```
+
+### 9. `toolCall`, `toolParams`, `toolResponse` Events
+
+Tool/agent operation events:
+
+```python
+for line in response.iter_lines():
+    if line.startswith("event:toolCall"):
+        tool_data = json.loads(next_line[5:])
+        tool_name = tool_data.get("toolName")
+        print(f"Tool called: {tool_name}")
+
+    elif line.startswith("event:toolParams"):
+        params_data = json.loads(next_line[5:])
+        print(f"Tool parameters: {params_data}")
+
+    elif line.startswith("event:toolResponse"):
+        response_data = json.loads(next_line[5:])
+        print(f"Tool response: {response_data}")
+```
+
+## Complete Event Handler
+
+```python
+import json
+from fastgpt_client import ChatClient
+
+def handle_all_events(response):
+    """Handle all streaming event types."""
+
+    buffer = ""
+    current_event = None
+
+    for line in response.iter_lines():
+        if not line:
+            continue
+
+        # Event type line
+        if line.startswith("event:"):
+            current_event = line[6:].strip()
+
+        # Data line
+        elif line.startswith("data:"):
+            data = line[5:].strip()
+            if not data or data == "[DONE]":
+                continue
+
+            try:
+                data_obj = json.loads(data)
+            except json.JSONDecodeError:
+                continue
+
+            # Handle based on event type
+            if current_event is None:
+                # Default: OpenAI-compatible format
+                if "choices" in data_obj and data_obj["choices"]:
+                    delta = data_obj["choices"][0].get("delta", {})
+                    content = delta.get("content", "")
+                    if content:
+                        buffer += content
+                        print(content, end="", flush=True)
+
+            elif current_event == "answer":
+                text = data_obj.get("text", "")
+                if text:
+                    buffer += text
+                    print(text, end="", flush=True)
+
+            elif current_event == "flowNodeStatus":
+                status = data_obj.get("status")
+                node = data_obj.get("nodeName", "Unknown")
+                print(f"\n[{status.upper()}] {node}")
+
+            elif current_event == "interactive":
+                interactive_type = data_obj.get("type")
+                print(f"\n[INTERACTIVE] {interactive_type}")
+                print(f"Details: {data_obj.get('params', {})}")
+
+            elif current_event == "error":
+                print(f"\n[ERROR] {data_obj.get('message', 'Unknown error')}")
+
+            elif current_event == "toolCall":
+                print(f"\n[TOOL] Calling: {data_obj.get('toolName')}")
+
+            # Reset event
+            current_event = None
+
+    return buffer
+
+
+# Usage
+with ChatClient(api_key="fastgpt-xxxxx") as client:
+    response = client.create_chat_completion(
+        messages=[{"role": "user", "content": "Hello"}],
+        stream=True,
+        detail=True  # Enable flow responses
+    )
+
+    full_response = handle_all_events(response)
+    print(f"\n\nFull response: {full_response}")
+```
+
+## Event Flow Example
+
+A typical streaming conversation might generate events like:
+
+```
+event:flowNodeStatus
+data:{"status": "running", "nodeName": "Chat Node"}
+
+event:answer
+data:{"text": "Hello"}
+
+event:answer
+data:{"text": "! How"}
+
+event:answer
+data:{"text": " can I help"}
+
+event:flowNodeStatus
+data:{"status": "completed", "nodeName": "Chat Node"}
+
+event:flowResponses
+data:{"moduleName": "Chat Node", "tokens": 50}
+
+data:{"choices": [{"delta": {"content": "Hello! How can I help"}}], "usage": {"total_tokens": 50}}
+
+data:[DONE]
+```
+
+## Best Practices
+
+1. **Handle `[DONE]`** - Check for end of stream
+2. **Validate JSON** - Use try/except for parsing
+3. **Buffer content** - Accumulate text for display
+4. **Handle errors** - Watch for error events
+5. **Check event types** - Use `startswith("event:")` to detect events
+
+## See Also
+
+- [Streaming Example](../examples/streaming.md) - Basic streaming usage
+- [Detail Mode](detail_mode.md) - Enable detailed execution data