microsoft · thinkall · Feb 8, 2024 · Feb 19, 2024 · Feb 20, 2024 · Feb 20, 2024
diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
@@ -45,15 +45,15 @@ jobs:
         run: |
           pip install docker
           pip install qdrant_client[fastembed]
-          pip install -e .[retrievechat]
+          pip install -e .[retrievechat,rag]
       - name: Coverage
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
           AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
         run: |
-          coverage run -a -m pytest test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py
+          coverage run -a -m pytest test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py test/agentchat/contrib/rag
           coverage xml
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3

diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
@@ -48,7 +48,7 @@ jobs:
           pip install unstructured[all-docs]
       - name: Install packages and dependencies for RetrieveChat
         run: |
-          pip install -e .[retrievechat]
+          pip install -e .[retrievechat,rag]
       - name: Set AUTOGEN_USE_DOCKER based on OS
         shell: bash
         run: |
@@ -57,11 +57,11 @@ jobs:
           fi
       - name: Test RetrieveChat
         run: |
-          pytest test/test_retrieve_utils.py test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py --skip-openai
+          pytest test/test_retrieve_utils.py test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py test/agentchat/contrib/rag --skip-openai
       - name: Coverage
         run: |
           pip install coverage>=5.3
-          coverage run -a -m pytest test/test_retrieve_utils.py test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py --skip-openai
+          coverage run -a -m pytest test/test_retrieve_utils.py test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py test/agentchat/contrib/rag --skip-openai
           coverage xml
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3

diff --git a/.gitignore b/.gitignore
@@ -178,3 +178,6 @@ test/agentchat/test_agent_scripts/*
 
 # test cache
 .cache_test
+
+# RAG DB folders
+.db/
diff --git a/autogen/agentchat/contrib/capabilities/rag_capability.py b/autogen/agentchat/contrib/capabilities/rag_capability.py
@@ -0,0 +1,149 @@
+import os
+from autogen.agentchat.assistant_agent import ConversableAgent
+from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
+from autogen.agentchat.contrib.rag import RagAgent, logger
+from autogen.agentchat.contrib.rag.prompts import PROMPT_CAPABILITIES
+from typing import Dict, Optional, Union, List, Tuple, Any, Literal
+
+
+class Ragability(AgentCapability):
+    """
+    Ragability gives an agent the ability to reply with RAG (Retrieval-Augmented Generation),
+    where the user is any caller (human or not) sending messages to the ragable agent.
+    Ragability is designed to be composable with other agent capabilities.
+    To make any conversable agent ragable, instantiate both the agent and the Ragability class,
+    then pass the agent to ragability.add_to_agent(agent).
+    """
+
+    def __init__(
+        self,
+        verbose: int = 1,
+        max_consecutive_auto_reply: Optional[int] = 5,
+        llm_config: Optional[Union[Dict, Literal[False]]] = False,
+        rag_config: Optional[Dict] = None,  # config for the ragability
+        rag_to_agent_prompt: Optional[str] = None,
+    ):
+        """
+        Args:
+            verbose (Optional, int): 0 for basic info, 1 for RAG agent info, 2 for debug info. Default is 1.
+            max_consecutive_auto_reply (int): the maximum number of consecutive auto replies for the RAG agent. Default is 5.
+            llm_config (dict or False or None): llm inference configuration.
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
+                for available options.
+                To disable llm-based auto reply, set to False.
+            rag_config (dict): config for the rag agent.
+                - llm_model (str): the language model to use for the RAG agent, it's used to count tokens.
+                    Default is llm_config["config_list"][0]["model"] or "gpt-3.5-turbo-0613".
+                - promptgen_n (int): the number of refined messages to generate for each message. Default is 2.
+                - top_k (int): the number of documents to retrieve for each refined message. Default is 10.
+                - filter_document (str): the filter for the documents, the usage would differ for different vector database.
+                    Default is None. For chromadb, `{"$contains": "spark"}` means to retrieve documents that contain "spark".
+                - filter_metadata (str): the filter for the metadata, the usage would differ for different vector database.
+                    Default is None. For chromadb, `{"color" : "red"}` means to retrieve documents with metadata "color" equals to "red".
+                - include (str): the attributes to include in the query results. Default is ["metadatas", "documents", "distances"]
+                - rag_llm_config (dict): the llm config for the RAG agent inner loop such as promptgenerator. Default is
+                    the same as the llm_config. Set to False to disable promptgenerator (prompts selection and message refinement).
+                - max_token_ratio_for_context (float): the maximum token ratio for the context, used to control the
+                    number of tokens in the input of LLM calls. Default is 0.8.
+                - splitter (str or Splitter): the splitter to use for the RAG agent. Default is "textline" which will use
+                    the built-in `TextLineSplitter` to split the text into lines. The splitter can be set to an instance
+                    of `Splitter` as well. Extend the `Splitter` class to create a custom splitter.
+                - docs_path (str): the path to the raw files for building the knowledge base. Default is None. If not
+                    provided, it will use the existing collection if it exists, otherwise it will raise an ValueError.
+                - recursive (bool): whether to recursively search for files in the `docs_path`. Default is True.
+                - chunk_size (int): the maximum number of tokens of each chunk. Default is 1024.
+                - chunk_mode (str): the chunk mode. Default is "multi_lines". Other option is "one_line".
+                - must_break_at_empty_line (bool): whether to break at empty line. Default is True.
+                - overlap (int): the number of overlapping lines. Default is 1.
+                - token_count_function (callable): the function to count the tokens. Default is `autogen.token_count_utils.count_token`.
+                    Pass a custom function to count the tokens if needed.
+                - max_token_limit (int): the maximum token limit of the conversation. Default is the maximum token limit for the llm model.
+                - custom_text_split_function (callable): the custom text split function. Default is None.
+                - embedding_function (str or EmbeddingFunction): the embedding function to use. Default is "sentence_transformer".
+                - retriever (str or Retriever): the retriever to use. Default is "chroma", will use the built-in `ChromaRetriever`.
+                    The retriever can be set to an instance of `Retriever` as well. Extend the `Retriever` class to create a custom retriever.
+                - collection_name (str): the collection name for the vector database. Default is "autogen-rag".
+                - db_path (str): the database path. Default is "./tmp/{retriever}". Invalid if retriever is an instance of `Retriever`.
+                - db_config (dict): the database config. Default is {}. The value will be different for different vector database.
+                - overwrite (bool): whether to overwrite the collection. Default is False. If True, will overwrite the
+                    collection if it exists or create a new collection if it doesn't exist.
+                - get_or_create (bool): whether to get or create the collection. Default is True. If True, will reuse the
+                    existing collection if it exists, otherwise will create a new collection. If False, will create a new
+                    collection if it doesn't exist, otherwise will raise an ValueError. Invalid if overwrite is True.
+                - upsert (bool): whether to upsert the documents. Default is True. If False, existing documents will not be updated.
+                - reranker (str or Reranker): the reranker to use. Default is "tfidf", which uses the built-in `TfidfReranker`.
+                    The reranker can be set to an instance of `Reranker` as well. Extend the `Reranker` class to create a custom reranker.
+                - post_process_func (callable): the post process function. Default is `add_source_to_reply` which simply
+                    adds the sources of the context to the end of the reply.
+                - prompt_generator_post_process_func (callable): the post process function for PromptGenerator. Default is None,
+                    will use the built-in `promptgenerator.extract_refined_questions`.
+                - prompt_refine (str): the prompt for refining the received message. Default is None, will use the
+                    built-in `prompts.PROMPTS_GENERATOR["refine"]`.
+                - prompt_select (str): the prompt for selecting the best prompts for replying the received message.
+                    Default is None, will use the built-in `prompts.PROMPTS_GENERATOR["select"]`.
+                - prompt_rag (str): the prompt for sending requests to LLM backend. Default is None, one of the built-in
+                    `prompts.PROMPTS_RAG` will be selected by `PromptGenerator`.
+                - enable_update_context (bool): whether to enable update context. Default is True. If True, the context will
+                    be updated if the message starts or ends with the trigger words.
+                - customized_trigger_words (Union[str, List[str]]): the customized trigger words, case insensitive.
+                    Default is ["update context", "question"]. If the message starts or ends with the trigger words,
+                    the context will be updated.
+                - vector_db_get_is_fast (bool): whether the vector db get is fast. If True, will save some memory w/o
+                    introducing much latency. Default is True. Set to False if the vector db has high latency.
+            rag_to_agent_prompt (Optional, str): the prompt for refine the rag reply to the agent. Default is built-in
+                `PROMPT_CAPABILITIES` in rag module. The prompt should contain `{text}` and `{rag_reply}`.
+        """
+        self.llm_config = llm_config
+        self.rag_config = rag_config
+        self.max_consecutive_auto_reply = max_consecutive_auto_reply
+        self.ragagent = None
+        self.ragable_agent = None
+        self.verbose = verbose
+        self.prompt = rag_to_agent_prompt or PROMPT_CAPABILITIES
+        if "{text}" not in self.prompt or "{rag_reply}" not in self.prompt:
+            raise ValueError("rag_to_agent_prompt should contain both {text} and {rag_reply}.")
+        if verbose >= 2:
+            logger.setLevel("DEBUG")
+
+    def add_to_agent(self, agent: ConversableAgent):
+        """Adds ragability to the given agent."""
+
+        # Register a hook for processing the last message.
+        agent.register_hook(hookable_method="process_last_received_message", hook=self.process_last_received_message)
+
+        # Was an llm_config passed to the constructor?
+        if self.llm_config is None:
+            # No. Use the agent's llm_config.
+            self.llm_config = agent.llm_config
+        assert self.llm_config, "Ragability requires a valid llm_config."
+
+        # Create the rag agent.
+        self.ragagent = RagAgent(
+            llm_config=self.llm_config,
+            rag_config=self.rag_config,
+            max_consecutive_auto_reply=self.max_consecutive_auto_reply,
+            code_execution_config=False,
+        )
+
+        # Append extra info to the system message.
+        agent.update_system_message(
+            agent.system_message
+            + "\nYou've been given the special ability to perform retrieval augmented generation (RAG) for replying a message. "
+            + "You can answer questions, solve problems based on a knowledge base."
+        )
+        self.ragable_agent = agent
+
+    def process_last_received_message(self, text: str) -> str:
+        """
+        Generates a response to the last received message using the RAG agent.
+        """
+
+        if not text:
+            return text
+
+        self.ragagent.reset()  # Reset the RAG agent.
+        self.ragable_agent.send(recipient=self.ragagent, message=text, request_reply=True, silent=(self.verbose < 1))
+        rag_reply = self.ragable_agent.last_message(self.ragagent).get("content")
+        rag_reply = self.prompt.format(text=text, rag_reply=rag_reply)
+        # logger.debug(f"Ragability RAG agent replied with: {rag_reply}")
+        return rag_reply
diff --git a/autogen/agentchat/contrib/rag/README.md b/autogen/agentchat/contrib/rag/README.md
@@ -0,0 +1,112 @@
+# AutoGen Retrieval-Augmented Generation Agent (RagAgent) and Capability (Ragability)
+
+Introducing an agent capable of performing Retrieval-Augmented Generation (RAG) for the given message.
+
+Upon receipt of a message, the agent employs RAG to generate a reply. It retrieves documents based on the message, then generates a reply using both the retrieved documents and the message itself. Additionally, it supports automatic context updates during the conversation, either autonomously or at the user`s request.
+
+We also support enabling the RAG capability for any conversable agent with `Ragability`.
+
+## Overall Design
+The overall architecture of the agent is outlined below:
+
+![architecture](images/autogen-rag-overall.png)
+
+It consists of two main workflows: raw document processing and user input response.
+
+Given raw documents encompassing text, code, metadata (such as tables or databases), and even images, we utilize a `Splitter` to segment the documents into `Chunks`. These Chunks are then encoded using an `Encoder` to compute embeddings, which are stored as `Documents` within a vector database. This process enables the creation of a comprehensive knowledge base for subsequent retrieval operations.
+
+Once the knowledge base is established, we can enhance our responses to user input. Upon receiving a message, the `Prompt Generator` categorizes it into different types, such as `qa`, `code`, or `unknown`, and selects an appropriate prompt. The message is also refined into multiple query strings for later retrieval. The `Retriever` searches the vector database for relevant documents based on these refined queries, yielding `Retrieved Chunks`. These Chunks are then passed to the `Reranker`, where relevant documents are reordered into `Reranked Chunks` based on relevance. A final prompt is generated using the `Selected Prompt` and `Reranked Chunks`, which is then sent to the backend Language Model via the `LLM Caller`. Post-processing, handled by the `Post Processor`, results in the generation of the `Final Response`.
+
+## RoadMap
+[Roadmap for RAG #1657](https://github.com/microsoft/autogen/issues/1657)
+
+- [x] core functionalities
+- [x] notebook example
+- [x] capability for any agent
+- [ ] blog
+- [ ] unstructured as splitter
+- [ ] qdrant as vectordb
+- [ ] lancedb as vectordb
+- [ ] benchmark
+- [ ] async
+
+## Class Diagram
+<div align="center"><img src=https://raw.githubusercontent.com/thinkall/imgbed/master/img/autogen-class-uml.png></img></div>
+
+## Demo Code
+
+- RagAgent
+
+```python
+import autogen
+from autogen.agentchat.contrib.rag import RagAgent, logger
+import logging
+
+logger.setLevel(logging.DEBUG)
+
+config_list = autogen.config_list_from_json(
+    "OAI_CONFIG_LIST",
+    file_location=".",
+    filter_dict={
+        "model": ["gpt-3.5-turbo", "gpt-35-turbo", "gpt-35-turbo-0613", "gpt-4", "gpt4", "gpt-4-32k"],
+    },
+)
+
+print("LLM models: ", [config_list[i]["model"] for i in range(len(config_list))])
+
+llm_config = {
+    "timeout": 60,
+    "config_list": config_list,
+}
+
+
+def termination_msg(x):
+    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()
+
+
+userproxy = autogen.UserProxyAgent(
+    name="userproxy",
+    is_termination_msg=termination_msg,
+    human_input_mode="ALWAYS",
+    code_execution_config={"use_docker": False, "work_dir": ".tmp"},
+    default_auto_reply="Reply `TERMINATE` if the task is done.",
+    description="The boss who ask questions and give tasks.",
+)
+
+
+rag_config = {
+    "docs_path": "./website/docs",
+}
+
+rag = RagAgent(
+    name="rag",
+    is_termination_msg=termination_msg,
+    human_input_mode="NEVER",
+    max_consecutive_auto_reply=5,
+    llm_config=llm_config,
+    rag_config=rag_config,
+    code_execution_config=False,
+    description="Assistant who has extra content retrieval power for solving difficult problems.",
+)
+
+userproxy.initiate_chat(recipient=rag, message="What is AutoGen?")
+```
+
+<div align="center"><img src=https://raw.githubusercontent.com/thinkall/imgbed/master/img/demo-rag.gif></img></div>
+
+- Ragability
+
+To make any conversable agent ragable, instantiate both the agent and the Ragability class, then pass the agent to
+ragability.add_to_agent(agent).
+
+```python
+normal_assistant = autogen.AssistantAgent(name="normal assistant", llm_config=llm_config, max_consecutive_auto_reply=3)
+
+ragability = Ragability(llm_config=llm_config, rag_config=rag_config, verbose=2)
+ragability.add_to_agent(normal_assistant)
+
+_ = userproxy.initiate_chat(normal_assistant, message=message)
+```
+
+## Notebook Example
+For more examples of RAG, please check [RAG notebook](../../../../notebook/agentchat_RAG_new.ipynb).
diff --git a/autogen/agentchat/contrib/rag/__init__.py b/autogen/agentchat/contrib/rag/__init__.py
@@ -0,0 +1,32 @@
+from .datamodel import Chunk, Document, Query, QueryResults
+from .encoder import EmbeddingFunction, EmbeddingFunctionFactory, Encoder
+from .promptgenerator import PromptGenerator
+from .rag_agent import RagAgent
+from .reranker import Reranker, RerankerFactory
+from .retriever import Retriever, RetrieverFactory
+from .splitter import Splitter, SplitterFactory, TextLineSplitter
+from .utils import logger, timer
+from .vectordb import VectorDB, VectorDBFactory
+
+__all__ = [
+    "Chunk",
+    "Document",
+    "Encoder",
+    "EmbeddingFunction",
+    "EmbeddingFunctionFactory",
+    "PromptGenerator",
+    "Reranker",
+    "RerankerFactory",
+    "Retriever",
+    "RetrieverFactory",
+    "Splitter",
+    "SplitterFactory",
+    "TextLineSplitter",
+    "VectorDB",
+    "VectorDBFactory",
+    "timer",
+    "logger",
+    "RagAgent",
+    "QueryResults",
+    "Query",
+]