Update agent flow and frontend chat experience

2026-03-31 21:58:24 +08:00
parent e34e569de4
commit 0e2dafe440
13 changed files with 345 additions and 372 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -1 +1,3 @@
-# GEMINI_API_KEY=
+# OPENAI_API_KEY=
 # OPENAI_BASE_URL=
 # TAVILY_API_KEY=
--- a/backend/examples/cli_research.py
+++ b/backend/examples/cli_research.py
@@ -21,7 +21,7 @@ def main() -> None:
    )
    parser.add_argument(
        "--reasoning-model",
-        default="gemini-2.5-pro-preview-05-06",
+        default="openai/gpt-5.4",
        help="Model for the final answer",
    )
    args = parser.parse_args()
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -11,13 +11,13 @@ requires-python = ">=3.11,<4.0"
 dependencies = [
    "langgraph>=0.2.6",
    "langchain>=0.3.19",
-    "langchain-google-genai",
+    "langchain-openai",
    "python-dotenv>=1.0.1",
    "langgraph-sdk>=0.1.57",
    "langgraph-cli",
    "langgraph-api",
    "fastapi",
-    "google-genai",
+    "tavily-python",
 ]
--- a/backend/src/agent/configuration.py
+++ b/backend/src/agent/configuration.py
@@ -9,26 +9,33 @@ class Configuration(BaseModel):
    """The configuration for the agent."""
    query_generator_model: str = Field(
-        default="gemini-2.0-flash",
+        default="openai/gpt-5.4",
        metadata={
            "description": "The name of the language model to use for the agent's query generation."
        },
    )
    reflection_model: str = Field(
-        default="gemini-2.5-flash",
+        default="openai/gpt-5.4",
        metadata={
            "description": "The name of the language model to use for the agent's reflection."
        },
    )
    answer_model: str = Field(
-        default="gemini-2.5-pro",
+        default="openai/gpt-5.4",
        metadata={
            "description": "The name of the language model to use for the agent's answer."
        },
    )
    reasoning_model: str = Field(
        default="openai/gpt-5.4",
        metadata={
            "description": "Fallback model used when the client does not provide a supported reasoning model."
        },
    )
    number_of_initial_queries: int = Field(
        default=3,
        metadata={"description": "The number of initial search queries to generate."},
--- a/backend/src/agent/graph.py
+++ b/backend/src/agent/graph.py
@@ -1,91 +1,134 @@
 import os
 from typing import Any
-from agent.tools_and_schemas import SearchQueryList, Reflection
+from agent.configuration import Configuration
-from dotenv import load_dotenv
+from agent.prompts import (
-from langchain_core.messages import AIMessage
+    answer_instructions,
-from langgraph.types import Send
+    get_current_date,
-from langgraph.graph import StateGraph
+    query_writer_instructions,
-from langgraph.graph import START, END
+    reflection_instructions,
-from langchain_core.runnables import RunnableConfig
+)
 from google.genai import Client
 from agent.state import (
    OverallState,
    QueryGenerationState,
    ReflectionState,
    WebSearchState,
 )
-from agent.configuration import Configuration
+from agent.tools_and_schemas import Reflection, SearchQueryList
 from agent.prompts import (
    get_current_date,
    query_writer_instructions,
    web_searcher_instructions,
    reflection_instructions,
    answer_instructions,
 )
 from langchain_google_genai import ChatGoogleGenerativeAI
 from agent.utils import (
-    get_citations,
+    format_sources_for_prompt,
    get_research_topic,
-    insert_citation_markers,
+    normalize_model_name,
-    resolve_urls,
+    normalize_tavily_sources,
    shorten_search_query,
 )
 from dotenv import load_dotenv
 from langchain_core.messages import AIMessage
 from langchain_core.runnables import RunnableConfig
 from langchain_openai import ChatOpenAI
 from langgraph.graph import END, START, StateGraph
 from langgraph.types import Send
 from tavily import TavilyClient
 load_dotenv()
-if os.getenv("GEMINI_API_KEY") is None:
+TAVILY_TOPIC = "general"
-    raise ValueError("GEMINI_API_KEY is not set")
+TAVILY_SEARCH_DEPTH = "advanced"
-
+TAVILY_MAX_RESULTS = 5
-# Used for Google Search API
+TAVILY_CHUNKS_PER_SOURCE = 3
-genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
+
 def require_env(name: str) -> str:
    """Return an environment variable or raise a clear error."""
    value = os.getenv(name)
    if not value:
        raise ValueError(f"{name} is not set")
    return value
 def get_chat_model(model_name: str, temperature: float = 0) -> ChatOpenAI:
    """Create the OpenAI chat model lazily for easier testing."""
    base_url = os.getenv("OPENAI_BASE_URL")
    return ChatOpenAI(
        model=normalize_model_name(model_name),
        api_key=require_env("OPENAI_API_KEY"),
        base_url=base_url or None,
        temperature=temperature,
    )
 def get_tavily_client() -> TavilyClient:
    """Create the Tavily client lazily for easier testing."""
    return TavilyClient(api_key=require_env("TAVILY_API_KEY"))
 def create_structured_response(
    prompt: str,
    model_name: str,
    schema_model: type[Any],
    temperature: float = 1,
 ) -> Any:
    """Call the LangChain OpenAI chat model and parse structured output."""
    llm = get_chat_model(model_name, temperature=temperature)
    return llm.with_structured_output(
        schema_model,
        method="function_calling",
    ).invoke(prompt)
 def extract_text_content(content: Any) -> str:
    """Normalize LangChain message content into a string."""
    if isinstance(content, str):
        return content.strip()
    if isinstance(content, list):
        text_chunks = []
        for chunk in content:
            if isinstance(chunk, dict) and chunk.get("type") == "text":
                text_chunks.append(str(chunk.get("text", "")))
        return "\n".join(text_chunks).strip()
    return str(content).strip()
 def create_text_response(prompt: str, model_name: str, temperature: float = 0) -> str:
    """Call the LangChain OpenAI chat model and return markdown output."""
    response = get_chat_model(model_name, temperature=temperature).invoke(prompt)
    text_output = extract_text_content(response.content)
    if not text_output:
        raise ValueError("OpenAI response did not include text output")
    return text_output
 # Nodes
 def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
-    """LangGraph node that generates search queries based on the User's question.
+    """Generate initial search queries from the user's request."""
    Uses Gemini 2.0 Flash to create an optimized search queries for web research based on
    the User's question.
    Args:
        state: Current graph state containing the User's question
        config: Configuration for the runnable, including LLM provider settings
    Returns:
        Dictionary with state update, including search_query key containing the generated queries
    """
    configurable = Configuration.from_runnable_config(config)
    reasoning_model = normalize_model_name(
        state.get("reasoning_model"),
        configurable.reasoning_model or configurable.query_generator_model,
    )
    # check for custom initial search query count
    if state.get("initial_search_query_count") is None:
        state["initial_search_query_count"] = configurable.number_of_initial_queries
    # init Gemini 2.0 Flash
    llm = ChatGoogleGenerativeAI(
        model=configurable.query_generator_model,
        temperature=1.0,
        max_retries=2,
        api_key=os.getenv("GEMINI_API_KEY"),
    )
    structured_llm = llm.with_structured_output(SearchQueryList)
    # Format the prompt
    current_date = get_current_date()
    formatted_prompt = query_writer_instructions.format(
-        current_date=current_date,
+        current_date=get_current_date(),
        research_topic=get_research_topic(state["messages"]),
        number_queries=state["initial_search_query_count"],
    )
-    # Generate the search queries
+    result = create_structured_response(
-    result = structured_llm.invoke(formatted_prompt)
+        formatted_prompt,
        reasoning_model,
        SearchQueryList,
        temperature=1,
    )
    return {"search_query": result.query}
-def continue_to_web_research(state: QueryGenerationState):
+def continue_to_web_research(state: QueryGenerationState) -> list[Send]:
-    """LangGraph node that sends the search queries to the web research node.
+    """Fan out search queries into parallel web research nodes."""
    This is used to spawn n number of web research nodes, one for each search query.
    """
    return [
        Send("web_research", {"search_query": search_query, "id": int(idx)})
        for idx, search_query in enumerate(state["search_query"])
@@ -93,83 +136,63 @@ def continue_to_web_research(state: QueryGenerationState):
 def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
-    """LangGraph node that performs web research using the native Google Search API tool.
+    """Execute Tavily search and return raw evidence for one query."""
-
+    search_query = shorten_search_query(state["search_query"])
-    Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash.
+    tavily_response = get_tavily_client().search(
-
+        query=search_query,
-    Args:
+        topic=TAVILY_TOPIC,
-        state: Current graph state containing the search query and research loop count
+        search_depth=TAVILY_SEARCH_DEPTH,
-        config: Configuration for the runnable, including search API settings
+        chunks_per_source=TAVILY_CHUNKS_PER_SOURCE,
-
+        max_results=TAVILY_MAX_RESULTS,
-    Returns:
+        include_answer=False,
-        Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
+        include_raw_content=False,
    """
    # Configure
    configurable = Configuration.from_runnable_config(config)
    formatted_prompt = web_searcher_instructions.format(
        current_date=get_current_date(),
        research_topic=state["search_query"],
    )
    sources = normalize_tavily_sources(tavily_response.get("results", []))
-    # Uses the google genai client as the langchain client doesn't return grounding metadata
+    if not sources:
-    response = genai_client.models.generate_content(
+        return {
-        model=configurable.query_generator_model,
+            "sources_gathered": [],
-        contents=formatted_prompt,
+            "search_query": [search_query],
-        config={
+            "web_research_result": [
-            "tools": [{"google_search": {}}],
+                f'No Tavily results were returned for "{search_query}".'
-            "temperature": 0,
+            ],
-        },
+        }
    evidence = "\n\n".join(
        [
            f"Search Query: {search_query}",
            "Source Evidence:",
            format_sources_for_prompt(sources),
        ]
    )
    # resolve the urls to short urls for saving tokens and time
    resolved_urls = resolve_urls(
        response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
    )
    # Gets the citations and adds them to the generated text
    citations = get_citations(response, resolved_urls)
    modified_text = insert_citation_markers(response.text, citations)
    sources_gathered = [item for citation in citations for item in citation["segments"]]
    return {
-        "sources_gathered": sources_gathered,
+        "sources_gathered": sources,
-        "search_query": [state["search_query"]],
+        "search_query": [search_query],
-        "web_research_result": [modified_text],
+        "web_research_result": [evidence],
    }
 def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
-    """LangGraph node that identifies knowledge gaps and generates potential follow-up queries.
+    """Decide whether additional research is needed."""
    Analyzes the current summary to identify areas for further research and generates
    potential follow-up queries. Uses structured output to extract
    the follow-up query in JSON format.
    Args:
        state: Current graph state containing the running summary and research topic
        config: Configuration for the runnable, including LLM provider settings
    Returns:
        Dictionary with state update, including search_query key containing the generated follow-up query
    """
    configurable = Configuration.from_runnable_config(config)
    # Increment the research loop count and get the reasoning model
    state["research_loop_count"] = state.get("research_loop_count", 0) + 1
-    reasoning_model = state.get("reasoning_model", configurable.reflection_model)
+    reasoning_model = normalize_model_name(
        state.get("reasoning_model"),
        configurable.reasoning_model,
    )
    # Format the prompt
    current_date = get_current_date()
    formatted_prompt = reflection_instructions.format(
-        current_date=current_date,
+        current_date=get_current_date(),
        research_topic=get_research_topic(state["messages"]),
        summaries="\n\n---\n\n".join(state["web_research_result"]),
    )
-    # init Reasoning Model
+    result = create_structured_response(
-    llm = ChatGoogleGenerativeAI(
+        formatted_prompt,
-        model=reasoning_model,
+        reasoning_model,
-        temperature=1.0,
+        Reflection,
-        max_retries=2,
+        temperature=1,
        api_key=os.getenv("GEMINI_API_KEY"),
    )
    result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
    return {
        "is_sufficient": result.is_sufficient,
@@ -183,19 +206,8 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
 def evaluate_research(
    state: ReflectionState,
    config: RunnableConfig,
-) -> OverallState:
+) -> OverallState | list[Send]:
-    """LangGraph routing function that determines the next step in the research flow.
+    """Route either back into research or into final answer synthesis."""
    Controls the research loop by deciding whether to continue gathering information
    or to finalize the summary based on the configured maximum number of research loops.
    Args:
        state: Current graph state containing the research loop count
        config: Configuration for the runnable, including max_research_loops setting
    Returns:
        String literal indicating the next node to visit ("web_research" or "finalize_summary")
    """
    configurable = Configuration.from_runnable_config(config)
    max_research_loops = (
        state.get("max_research_loops")
@@ -204,90 +216,53 @@ def evaluate_research(
    )
    if state["is_sufficient"] or state["research_loop_count"] >= max_research_loops:
        return "finalize_answer"
-    else:
+
-        return [
+    return [
-            Send(
+        Send(
-                "web_research",
+            "web_research",
-                {
+            {
-                    "search_query": follow_up_query,
+                "search_query": follow_up_query,
-                    "id": state["number_of_ran_queries"] + int(idx),
+                "id": state["number_of_ran_queries"] + int(idx),
-                },
+            },
-            )
+        )
-            for idx, follow_up_query in enumerate(state["follow_up_queries"])
+        for idx, follow_up_query in enumerate(state["follow_up_queries"])
-        ]
+    ]
-def finalize_answer(state: OverallState, config: RunnableConfig):
+def finalize_answer(state: OverallState, config: RunnableConfig) -> OverallState:
-    """LangGraph node that finalizes the research summary.
+    """Generate the final cited answer from the accumulated research summaries."""
    Prepares the final output by deduplicating and formatting sources, then
    combining them with the running summary to create a well-structured
    research report with proper citations.
    Args:
        state: Current graph state containing the running summary and sources gathered
    Returns:
        Dictionary with state update, including running_summary key containing the formatted final summary with sources
    """
    configurable = Configuration.from_runnable_config(config)
-    reasoning_model = state.get("reasoning_model") or configurable.answer_model
+    reasoning_model = normalize_model_name(
        state.get("reasoning_model"),
        configurable.reasoning_model or configurable.answer_model,
    )
    # Format the prompt
    current_date = get_current_date()
    formatted_prompt = answer_instructions.format(
-        current_date=current_date,
+        current_date=get_current_date(),
        research_topic=get_research_topic(state["messages"]),
        summaries="\n---\n\n".join(state["web_research_result"]),
    )
-
+    answer = create_text_response(formatted_prompt, reasoning_model, temperature=0)
    # init Reasoning Model, default to Gemini 2.5 Flash
    llm = ChatGoogleGenerativeAI(
        model=reasoning_model,
        temperature=0,
        max_retries=2,
        api_key=os.getenv("GEMINI_API_KEY"),
    )
    result = llm.invoke(formatted_prompt)
    # Replace the short urls with the original urls and add all used urls to the sources_gathered
    unique_sources = []
    for source in state["sources_gathered"]:
        if source["short_url"] in result.content:
            result.content = result.content.replace(
                source["short_url"], source["value"]
            )
            unique_sources.append(source)
    return {
-        "messages": [AIMessage(content=result.content)],
+        "messages": [AIMessage(content=answer)],
-        "sources_gathered": unique_sources,
+        "sources_gathered": state.get("sources_gathered", []),
    }
 # Create our Agent Graph
 builder = StateGraph(OverallState, config_schema=Configuration)
 # Define the nodes we will cycle between
 builder.add_node("generate_query", generate_query)
 builder.add_node("web_research", web_research)
 builder.add_node("reflection", reflection)
 builder.add_node("finalize_answer", finalize_answer)
 # Set the entrypoint as `generate_query`
 # This means that this node is the first one called
 builder.add_edge(START, "generate_query")
 # Add conditional edge to continue with search queries in a parallel branch
 builder.add_conditional_edges(
    "generate_query", continue_to_web_research, ["web_research"]
 )
 # Reflect on the web research
 builder.add_edge("web_research", "reflection")
 # Evaluate the research
 builder.add_conditional_edges(
    "reflection", evaluate_research, ["web_research", "finalize_answer"]
 )
 # Finalize the answer
 builder.add_edge("finalize_answer", END)
 graph = builder.compile(name="pro-search-agent")
--- a/backend/src/agent/prompts.py
+++ b/backend/src/agent/prompts.py
@@ -15,6 +15,7 @@ Instructions:
 - Queries should be diverse, if the topic is broad, generate more than 1 query.
 - Don't generate multiple similar queries, 1 is enough.
 - Query should ensure that the most current information is gathered. The current date is {current_date}.
 - Keep every individual query under 400 characters. Prefer concise search terms over full-sentence restatements.
 Format: 
 - Format your response as a JSON object with ALL two of these exact keys:
@@ -34,26 +35,31 @@ Topic: What revenue grew more last year apple stock or the number of people buyi
 Context: {research_topic}"""
-web_searcher_instructions = """Conduct targeted Google Searches to gather the most recent, credible information on "{research_topic}" and synthesize it into a verifiable text artifact.
+web_searcher_instructions = """Review the provided Tavily search results for "{research_topic}" and synthesize them into a verifiable research note.
 Instructions:
 - Query should ensure that the most current information is gathered. The current date is {current_date}.
- Conduct multiple, diverse searches to gather comprehensive information.
+- Use only the provided Tavily search results. Do not invent or infer facts that are not supported by those results.
- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
+- Consolidate the key findings into a concise research note for this query.
- The output should be a well-written summary or report based on your search findings. 
+- Every factual paragraph or bullet must include at least one markdown citation using the exact title and URL from the provided sources, for example: [Reuters](https://www.reuters.com/example).
- Only include the information found in the search results, don't make up any information.
+- Preserve full URLs in citations. Do not use placeholders, IDs, or shortened URLs.
 - If the results are insufficient, explicitly say what is missing and cite the closest available sources.
 Research Topic:
 {research_topic}
 Search Results:
 {search_results}
 """
-reflection_instructions = """You are an expert research assistant analyzing summaries about "{research_topic}".
+reflection_instructions = """You are an expert research assistant analyzing collected research evidence about "{research_topic}".
 Instructions:
 - Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple).
- If provided summaries are sufficient to answer the user's question, don't generate a follow-up query.
+- If the provided evidence is sufficient to answer the user's question, don't generate a follow-up query.
 - If there is a knowledge gap, generate a follow-up query that would help expand your understanding.
 - Focus on technical details, implementation specifics, or emerging trends that weren't fully covered.
 - Keep every follow-up query under 400 characters. Prefer compact search phrases over long natural-language questions.
 Requirements:
 - Ensure the follow-up query is self-contained and includes necessary context for web search.
@@ -73,24 +79,26 @@ Example:
 }}
 ```
-Reflect carefully on the Summaries to identify knowledge gaps and produce a follow-up query. Then, produce your output following this JSON format:
+Reflect carefully on the Research Evidence to identify knowledge gaps and produce a follow-up query. Then, produce your output following this JSON format:
-Summaries:
+Research Evidence:
 {summaries}
 """
-answer_instructions = """Generate a high-quality answer to the user's question based on the provided summaries.
+answer_instructions = """Generate a high-quality answer to the user's question based on the provided research evidence.
 Instructions:
 - The current date is {current_date}.
 - You are the final step of a multi-step research process, don't mention that you are the final step. 
 - You have access to all the information gathered from the previous steps.
 - You have access to the user's question.
- Generate a high-quality answer to the user's question based on the provided summaries and the user's question.
+- Generate a high-quality answer to the user's question based on the provided research evidence and the user's question.
- Include the sources you used from the Summaries in the answer correctly, use markdown format (e.g. [apnews](https://vertexaisearch.cloud.google.com/id/1-0)). THIS IS A MUST.
+- Use only the evidence present in the Research Evidence.
 - Preserve or reuse source citations from the Research Evidence as full markdown links. Every factual paragraph should include at least one citation.
 - Do not invent sources, placeholders, or shortened URLs.
 User Context:
 - {research_topic}
-Summaries:
+Research Evidence:
 {summaries}"""
--- a/backend/src/agent/state.py
+++ b/backend/src/agent/state.py
@@ -29,13 +29,8 @@ class ReflectionState(TypedDict):
    number_of_ran_queries: int
 class Query(TypedDict):
    query: str
    rationale: str
 class QueryGenerationState(TypedDict):
-    search_query: list[Query]
+    search_query: list[str]
 class WebSearchState(TypedDict):
--- a/backend/src/agent/utils.py
+++ b/backend/src/agent/utils.py
@@ -1,166 +1,106 @@
-from typing import Any, Dict, List
+from __future__ import annotations
-from langchain_core.messages import AnyMessage, AIMessage, HumanMessage
+
 import re
 from typing import Any, List
 from langchain_core.messages import AIMessage, AnyMessage, HumanMessage
 DEFAULT_REASONING_MODEL = "openai/gpt-5.4"
 SUPPORTED_MODEL_PREFIXES = ("gpt-", "deepseek-", "glm-")
 MAX_TAVILY_QUERY_LENGTH = 380
 def get_research_topic(messages: List[AnyMessage]) -> str:
-    """
+    """Get the research topic from the messages."""
    Get the research topic from the messages.
    """
    # check if request has a history and combine the messages into a single string
    if len(messages) == 1:
-        research_topic = messages[-1].content
+        return str(messages[-1].content)
-    else:
+
-        research_topic = ""
+    research_topic = ""
-        for message in messages:
+    for message in messages:
-            if isinstance(message, HumanMessage):
+        if isinstance(message, HumanMessage):
-                research_topic += f"User: {message.content}\n"
+            research_topic += f"User: {message.content}\n"
-            elif isinstance(message, AIMessage):
+        elif isinstance(message, AIMessage):
-                research_topic += f"Assistant: {message.content}\n"
+            research_topic += f"Assistant: {message.content}\n"
    return research_topic
-def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
+def normalize_model_name(
-    """
+    model_name: str | None,
-    Create a map of the vertex ai search urls (very long) to a short url with a unique id for each url.
+    fallback: str = DEFAULT_REASONING_MODEL,
-    Ensures each original URL gets a consistent shortened form while maintaining uniqueness.
+) -> str:
-    """
+    """Normalize stale or unsupported model names to a supported default."""
-    prefix = f"https://vertexaisearch.cloud.google.com/id/"
+    candidate = (model_name or "").strip()
-    urls = [site.web.uri for site in urls_to_resolve]
+    if candidate.startswith(SUPPORTED_MODEL_PREFIXES):
        return candidate
-    # Create a dictionary that maps each unique URL to its first occurrence index
+    provider, _, model = candidate.partition("/")
-    resolved_map = {}
+    if provider and model.startswith(SUPPORTED_MODEL_PREFIXES):
-    for idx, url in enumerate(urls):
+        return candidate
        if url not in resolved_map:
            resolved_map[url] = f"{prefix}{id}-{idx}"
-    return resolved_map
+    return fallback
-def insert_citation_markers(text, citations_list):
+def _normalize_source_title(source: dict[str, Any], index: int) -> str:
-    """
+    title = (source.get("title") or "").strip()
-    Inserts citation markers into a text string based on start and end indices.
+    return title or f"Source {index}"
    Args:
        text (str): The original text string.
        citations_list (list): A list of dictionaries, where each dictionary
                               contains 'start_index', 'end_index', and
                               'segment_string' (the marker to insert).
                               Indices are assumed to be for the original text.
-    Returns:
+def normalize_tavily_sources(results: list[dict[str, Any]]) -> list[dict[str, str]]:
-        str: The text with citation markers inserted.
+    """Normalize Tavily results into a stable source shape."""
-    """
+    normalized_sources: list[dict[str, str]] = []
-    # Sort citations by end_index in descending order.
+    seen_urls: set[str] = set()
    # If end_index is the same, secondary sort by start_index descending.
    # This ensures that insertions at the end of the string don't affect
    # the indices of earlier parts of the string that still need to be processed.
    sorted_citations = sorted(
        citations_list, key=lambda c: (c["end_index"], c["start_index"]), reverse=True
    )
-    modified_text = text
+    for index, result in enumerate(results, start=1):
-    for citation_info in sorted_citations:
+        url = (result.get("url") or "").strip()
-        # These indices refer to positions in the *original* text,
+        if not url or url in seen_urls:
-        # but since we iterate from the end, they remain valid for insertion
+            continue
-        # relative to the parts of the string already processed.
+
-        end_idx = citation_info["end_index"]
+        normalized_sources.append(
-        marker_to_insert = ""
+            {
-        for segment in citation_info["segments"]:
+                "label": f"S{len(normalized_sources) + 1}",
-            marker_to_insert += f" [{segment['label']}]({segment['short_url']})"
+                "title": _normalize_source_title(result, index),
-        # Insert the citation marker at the original end_idx position
+                "value": url,
-        modified_text = (
+                "content": (result.get("content") or "").strip(),
-            modified_text[:end_idx] + marker_to_insert + modified_text[end_idx:]
+            }
        )
        seen_urls.add(url)
    return normalized_sources
 def format_sources_for_prompt(sources: list[dict[str, str]]) -> str:
    """Format normalized search results for prompt injection."""
    if not sources:
        return "No search results were returned."
    formatted_sources = []
    for source in sources:
        snippet = source.get("content") or "No snippet available."
        formatted_sources.append(
            "\n".join(
                [
                    f"{source['label']}: {source['title']}",
                    f"URL: {source['value']}",
                    f"Snippet: {snippet}",
                ]
            )
        )
-    return modified_text
+    return "\n\n".join(formatted_sources)
-def get_citations(response, resolved_urls_map):
+def shorten_search_query(
-    """
+    query: str,
-    Extracts and formats citation information from a Gemini model's response.
+    max_length: int = MAX_TAVILY_QUERY_LENGTH,
 ) -> str:
    """Normalize and trim search queries to fit Tavily's length limit."""
    normalized_query = re.sub(r"\s+", " ", query).strip()
    if len(normalized_query) <= max_length:
        return normalized_query
-    This function processes the grounding metadata provided in the response to
+    truncated_query = normalized_query[:max_length]
-    construct a list of citation objects. Each citation object includes the
+    last_space = truncated_query.rfind(" ")
-    start and end indices of the text segment it refers to, and a string
+    if last_space > max_length // 2:
-    containing formatted markdown links to the supporting web chunks.
+        truncated_query = truncated_query[:last_space]
-    Args:
+    return truncated_query.rstrip(" ,;:-")
        response: The response object from the Gemini model, expected to have
                  a structure including `candidates[0].grounding_metadata`.
                  It also relies on a `resolved_map` being available in its
                  scope to map chunk URIs to resolved URLs.
    Returns:
        list: A list of dictionaries, where each dictionary represents a citation
              and has the following keys:
              - "start_index" (int): The starting character index of the cited
                                     segment in the original text. Defaults to 0
                                     if not specified.
              - "end_index" (int): The character index immediately after the
                                   end of the cited segment (exclusive).
              - "segments" (list[str]): A list of individual markdown-formatted
                                        links for each grounding chunk.
              - "segment_string" (str): A concatenated string of all markdown-
                                        formatted links for the citation.
              Returns an empty list if no valid candidates or grounding supports
              are found, or if essential data is missing.
    """
    citations = []
    # Ensure response and necessary nested structures are present
    if not response or not response.candidates:
        return citations
    candidate = response.candidates[0]
    if (
        not hasattr(candidate, "grounding_metadata")
        or not candidate.grounding_metadata
        or not hasattr(candidate.grounding_metadata, "grounding_supports")
    ):
        return citations
    for support in candidate.grounding_metadata.grounding_supports:
        citation = {}
        # Ensure segment information is present
        if not hasattr(support, "segment") or support.segment is None:
            continue  # Skip this support if segment info is missing
        start_index = (
            support.segment.start_index
            if support.segment.start_index is not None
            else 0
        )
        # Ensure end_index is present to form a valid segment
        if support.segment.end_index is None:
            continue  # Skip if end_index is missing, as it's crucial
        # Add 1 to end_index to make it an exclusive end for slicing/range purposes
        # (assuming the API provides an inclusive end_index)
        citation["start_index"] = start_index
        citation["end_index"] = support.segment.end_index
        citation["segments"] = []
        if (
            hasattr(support, "grounding_chunk_indices")
            and support.grounding_chunk_indices
        ):
            for ind in support.grounding_chunk_indices:
                try:
                    chunk = candidate.grounding_metadata.grounding_chunks[ind]
                    resolved_url = resolved_urls_map.get(chunk.web.uri, None)
                    citation["segments"].append(
                        {
                            "label": chunk.web.title.split(".")[:-1][0],
                            "short_url": resolved_url,
                            "value": chunk.web.uri,
                        }
                    )
                except (IndexError, AttributeError, NameError):
                    # Handle cases where chunk, web, uri, or resolved_map might be problematic
                    # For simplicity, we'll just skip adding this particular segment link
                    # In a production system, you might want to log this.
                    pass
        citations.append(citation)
    return citations
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -38,7 +38,9 @@ services:
      langgraph-postgres:
        condition: service_healthy
    environment:
-      GEMINI_API_KEY: ${GEMINI_API_KEY}
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
      OPENAI_BASE_URL: ${OPENAI_BASE_URL}
      TAVILY_API_KEY: ${TAVILY_API_KEY}
      LANGSMITH_API_KEY: ${LANGSMITH_API_KEY}
      REDIS_URI: redis://langgraph-redis:6379
      POSTGRES_URI: postgres://postgres:postgres@langgraph-postgres:5432/postgres?sslmode=disable
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -6,10 +6,27 @@ import { WelcomeScreen } from "@/components/WelcomeScreen";
 import { ChatMessagesView } from "@/components/ChatMessagesView";
 import { Button } from "@/components/ui/button";
 const getSourceDisplayName = (source: any): string | null => {
  if (source?.title) {
    return source.title;
  }
  if (source?.value) {
    try {
      return new URL(source.value).hostname.replace(/^www\./, "");
    } catch {
      return null;
    }
  }
  return source?.label || null;
 };
 export default function App() {
  const [processedEventsTimeline, setProcessedEventsTimeline] = useState<
    ProcessedEvent[]
  >([]);
  const [selectedModel, setSelectedModel] = useState("openai/gpt-5.4");
  const [historicalActivities, setHistoricalActivities] = useState<
    Record<string, ProcessedEvent[]>
  >({});
@@ -38,7 +55,7 @@ export default function App() {
        const sources = event.web_research.sources_gathered || [];
        const numSources = sources.length;
        const uniqueLabels = [
-          ...new Set(sources.map((s: any) => s.label).filter(Boolean)),
+          ...new Set(sources.map(getSourceDisplayName).filter(Boolean)),
        ];
        const exampleLabels = uniqueLabels.slice(0, 3).join(", ");
        processedEvent = {
@@ -157,6 +174,8 @@ export default function App() {
              handleSubmit={handleSubmit}
              isLoading={thread.isLoading}
              onCancel={handleCancel}
              model={selectedModel}
              onModelChange={setSelectedModel}
            />
          ) : error ? (
            <div className="flex flex-col items-center justify-center h-full">
@@ -181,6 +200,8 @@ export default function App() {
              onCancel={handleCancel}
              liveActivityEvents={processedEventsTimeline}
              historicalActivities={historicalActivities}
              model={selectedModel}
              onModelChange={setSelectedModel}
            />
          )}
      </main>
--- a/frontend/src/components/ChatMessagesView.tsx
+++ b/frontend/src/components/ChatMessagesView.tsx
@@ -230,6 +230,8 @@ interface ChatMessagesViewProps {
  onCancel: () => void;
  liveActivityEvents: ProcessedEvent[];
  historicalActivities: Record<string, ProcessedEvent[]>;
  model: string;
  onModelChange: (model: string) => void;
 }
 export function ChatMessagesView({
@@ -240,6 +242,8 @@ export function ChatMessagesView({
  onCancel,
  liveActivityEvents,
  historicalActivities,
  model,
  onModelChange,
 }: ChatMessagesViewProps) {
  const [copiedMessageId, setCopiedMessageId] = useState<string | null>(null);
@@ -316,6 +320,8 @@ export function ChatMessagesView({
        isLoading={isLoading}
        onCancel={onCancel}
        hasHistory={messages.length > 0}
        model={model}
        onModelChange={onModelChange}
      />
    </div>
  );
--- a/frontend/src/components/InputForm.tsx
+++ b/frontend/src/components/InputForm.tsx
@@ -16,6 +16,8 @@ interface InputFormProps {
  onCancel: () => void;
  isLoading: boolean;
  hasHistory: boolean;
  model: string;
  onModelChange: (model: string) => void;
 }
 export const InputForm: React.FC<InputFormProps> = ({
@@ -23,10 +25,11 @@ export const InputForm: React.FC<InputFormProps> = ({
  onCancel,
  isLoading,
  hasHistory,
  model,
  onModelChange,
 }) => {
  const [internalInputValue, setInternalInputValue] = useState("");
  const [effort, setEffort] = useState("medium");
  const [model, setModel] = useState("gemini-2.5-flash-preview-04-17");
  const handleInternalSubmit = (e?: React.FormEvent) => {
    if (e) e.preventDefault();
@@ -130,33 +133,41 @@ export const InputForm: React.FC<InputFormProps> = ({
              <Cpu className="h-4 w-4 mr-2" />
              Model
            </div>
-            <Select value={model} onValueChange={setModel}>
+            <Select value={model} onValueChange={onModelChange}>
              <SelectTrigger className="w-[150px] bg-transparent border-none cursor-pointer">
                <SelectValue placeholder="Model" />
              </SelectTrigger>
              <SelectContent className="bg-neutral-700 border-neutral-600 text-neutral-300 cursor-pointer">
                <SelectItem
-                  value="gemini-2.0-flash"
+                  value="openai/gpt-5.4"
                  className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                >
                  <div className="flex items-center">
-                    <Zap className="h-4 w-4 mr-2 text-yellow-400" /> 2.0 Flash
+                    <Zap className="h-4 w-4 mr-2 text-emerald-400" /> openai/gpt-5.4
                  </div>
                </SelectItem>
                <SelectItem
-                  value="gemini-2.5-flash-preview-04-17"
+                  value="deepseek-v3"
                  className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                >
                  <div className="flex items-center">
-                    <Zap className="h-4 w-4 mr-2 text-orange-400" /> 2.5 Flash
+                    <Cpu className="h-4 w-4 mr-2 text-cyan-400" /> deepseek-v3
                  </div>
                </SelectItem>
                <SelectItem
-                  value="gemini-2.5-pro-preview-05-06"
+                  value="glm-4.5-air"
                  className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                >
                  <div className="flex items-center">
-                    <Cpu className="h-4 w-4 mr-2 text-purple-400" /> 2.5 Pro
+                    <Cpu className="h-4 w-4 mr-2 text-sky-400" /> glm-4.5-air
                  </div>
                </SelectItem>
                <SelectItem
                  value="glm-4.7"
                  className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                >
                  <div className="flex items-center">
                    <Cpu className="h-4 w-4 mr-2 text-violet-400" /> glm-4.7
                  </div>
                </SelectItem>
              </SelectContent>
--- a/frontend/src/components/WelcomeScreen.tsx
+++ b/frontend/src/components/WelcomeScreen.tsx
@@ -8,12 +8,16 @@ interface WelcomeScreenProps {
  ) => void;
  onCancel: () => void;
  isLoading: boolean;
  model: string;
  onModelChange: (model: string) => void;
 }
 export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({
  handleSubmit,
  onCancel,
  isLoading,
  model,
  onModelChange,
 }) => (
  <div className="h-full flex flex-col items-center justify-center text-center px-4 flex-1 w-full max-w-3xl mx-auto gap-4">
    <div>
@@ -30,6 +34,8 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({
        isLoading={isLoading}
        onCancel={onCancel}
        hasHistory={false}
        model={model}
        onModelChange={onModelChange}
      />
    </div>
    <p className="text-xs text-neutral-500">