BabyAGI 用户指南

This notebook demonstrates how to implement BabyAGI by Yohei Nakajima. BabyAGI 是一个可以根据给定目标生成并模拟执行任务的 AI 代理。

本指南将帮助您了解创建自己的递归代理的组件。

尽管 BabyAGI 使用特定的向量存储/模型提供程序（Pinecone、OpenAI），但使用 LangChain 实现的一个好处是您可以轻松地将其替换为其他选项。在此实现中，我们使用了一个 FAISS 向量存储（因为它在本地运行且免费）。

安装和导入所需模块 (Install and Import Required Modules)

import os
from collections import deque
from typing import Dict, List, Optional, Any

from langchain import LLMChain, OpenAI, PromptTemplate
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import BaseLLM
from langchain.vectorstores.base import VectorStore
from pydantic import BaseModel, Field
from langchain.chains.base import Chain

连接到向量存储 (Connect to the Vector Store)

根据您使用的向量存储不同，此步骤可能会有所不同。

from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore

# 定义您的嵌入模型
embeddings_model = OpenAIEmbeddings()
# 将向量存储初始化为空
import faiss

embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)
vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})

定义链

BabyAGI依赖于三个LLM链：

任务创建链，用于选择要添加到列表中的新任务
任务优先级链，用于重新设置任务的优先级
执行链，用于执行任务

class TaskCreationChain(LLMChain):
    """生成任务的链。"""

    @classmethod
    def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:
        """获取响应解析器。"""
        task_creation_template = (
            "你是一个任务创建AI，使用执行代理的结果来创建具有以下目标的新任务：{objective}，"
            "最后完成的任务的结果是：{result}。"
            "这个结果是基于以下任务描述的：{task_description}。"
            "这些是未完成的任务：{incomplete_tasks}。"
            "根据结果，创建新的任务供AI系统完成，这些任务不与未完成的任务重叠。"
            "将任务作为数组返回。"
        )
        prompt = PromptTemplate(
            template=task_creation_template,
            input_variables=[
                "result",
                "task_description",
                "incomplete_tasks",
                "objective",
            ],
        )
        return cls(prompt=prompt, llm=llm, verbose=verbose)

class TaskPrioritizationChain(LLMChain):
    """优先级任务链。"""

    @classmethod
    def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:
        """获取响应解析器。"""
        task_prioritization_template = (
            "你是一个任务优先级AI，负责清理格式并重新设置以下任务的优先级：{task_names}。"
            "考虑你团队的最终目标：{objective}。"
            "不要删除任何任务。将结果作为编号列表返回，例如："
            "#. 第一个任务"
            "#. 第二个任务"
            "以数字{next_task_id}开始任务列表。"
        )
        prompt = PromptTemplate(
            template=task_prioritization_template,
            input_variables=["task_names", "next_task_id", "objective"],
        )
        return cls(prompt=prompt, llm=llm, verbose=verbose)

class ExecutionChain(LLMChain):
    """执行任务链。"""

    @classmethod
    def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:
        """获取响应解析器。"""
        execution_template = (
            "你是一个执行任务的AI，根据以下目标执行一个任务：{objective}。"
            "考虑这些先前完成的任务：{context}。"
            "你的任务是：{task}。"
            "回答："
        )
        prompt = PromptTemplate(
            template=execution_template,
            input_variables=["objective", "context", "task"],
        )
        return cls(prompt=prompt, llm=llm, verbose=verbose)

定义BabyAGI控制器 (Define the BabyAGI Controller)

BabyAGI在上面定义的链中以（可能是）无限循环的方式组合。

def get_next_task(
    task_creation_chain: LLMChain,
    result: Dict,
    task_description: str,
    task_list: List[str],
    objective: str,
) -> List[Dict]:
    """获取下一个任务。"""
    incomplete_tasks = ", ".join(task_list)
    response = task_creation_chain.run(
        result=result,
        task_description=task_description,
        incomplete_tasks=incomplete_tasks,
        objective=objective,
    )
    new_tasks = response.split("\n")
    return [{"task_name": task_name} for task_name in new_tasks if task_name.strip()]

def prioritize_tasks(
    task_prioritization_chain: LLMChain,
    this_task_id: int,
    task_list: List[Dict],
    objective: str,
) -> List[Dict]:
    """优先处理任务。"""
    task_names = [t["task_name"] for t in task_list]
    next_task_id = int(this_task_id) + 1
    response = task_prioritization_chain.run(
        task_names=task_names, next_task_id=next_task_id, objective=objective
    )
    new_tasks = response.split("\n")
    prioritized_task_list = []
    for task_string in new_tasks:
        if not task_string.strip():
            continue
        task_parts = task_string.strip().split(".", 1)
        if len(task_parts) == 2:
            task_id = task_parts[0].strip()
            task_name = task_parts[1].strip()
            prioritized_task_list.append({"task_id": task_id, "task_name": task_name})
    return prioritized_task_list

def _get_top_tasks(vectorstore, query: str, k: int) -> List[str]:
    """基于查询获取前k个任务。"""
    results = vectorstore.similarity_search_with_score(query, k=k)
    if not results:
        return []
    sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))
    return [str(item.metadata["task"]) for item in sorted_results]


def execute_task(
    vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5
) -> str:
    """执行任务。"""
    context = _get_top_tasks(vectorstore, query=objective, k=k)
    return execution_chain.run(objective=objective, context=context, task=task)

class BabyAGI(Chain, BaseModel):
    """BabyAGI代理的控制器模型。"""

    task_list: deque = Field(default_factory=deque)
    task_creation_chain: TaskCreationChain = Field(...)
    task_prioritization_chain: TaskPrioritizationChain = Field(...)
    execution_chain: ExecutionChain = Field(...)
    task_id_counter: int = Field(1)
    vectorstore: VectorStore = Field(init=False)
    max_iterations: Optional[int] = None

    class Config:
        """此pydantic对象的配置。"""

        arbitrary_types_allowed = True

    def add_task(self, task: Dict):
        self.task_list.append(task)

    def print_task_list(self):
        print("\033[95m\033[1m" + "\n*****任务列表*****\n" + "\033[0m\033[0m")
        for t in self.task_list:
            print(str(t["task_id"]) + ": " + t["task_name"])

    def print_next_task(self, task: Dict):
        print("\033[92m\033[1m" + "\n*****下一个任务*****\n" + "\033[0m\033[0m")
        print(str(task["task_id"]) + ": " + task["task_name"])

    def print_task_result(self, result: str):
        print("\033[93m\033[1m" + "\n*****任务结果*****\n" + "\033[0m\033[0m")
        print(result)

    @property
    def input_keys(self) -> List[str]:
        return ["objective"]

    @property
    def output_keys(self) -> List[str]:
        return []

    def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        """运行代理。"""
        objective = inputs["objective"]
        first_task = inputs.get("first_task", "制作待办事项清单")
        self.add_task({"task_id": 1, "task_name": first_task})
        num_iters = 0
        while True:
            if self.task_list:
                self.print_task_list()

                # 步骤1：获取第一个任务
                task = self.task_list.popleft()
                self.print_next_task(task)

                # 步骤2：执行任务
                result = execute_task(
                    self.vectorstore, self.execution_chain, objective, task["task_name"]
                )
                this_task_id = int(task["task_id"])
                self.print_task_result(result)

                # 步骤3：将结果存储在Pinecone中
                result_id = f"result_{task['task_id']}_{num_iters}"
                self.vectorstore.add_texts(
                    texts=[result],
                    metadatas=[{"task": task["task_name"]}],
                    ids=[result_id],
                )

                # 步骤4：创建新任务并重新优先处理任务列表
                new_tasks = get_next_task(
                    self.task_creation_chain,
                    result,
                    task["task_name"],
                    [t["task_name"] for t in self.task_list],
                    objective,
                )
                for new_task in new_tasks:
                    self.task_id_counter += 1
                    new_task.update({"task_id": self.task_id_counter})
                    self.add_task(new_task)
                self.task_list = deque(
                    prioritize_tasks(
                        self.task_prioritization_chain,
                        this_task_id,
                        list(self.task_list),
                        objective,
                    )
                )
            num_iters += 1
            if self.max_iterations is not None and num_iters == self.max_iterations:
                print(
                    "\033[91m\033[1m" + "\n*****任务结束*****\n" + "\033[0m\033[0m"
                )
                break
        return {}

    @classmethod
    def from_llm(
        cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs
    ) -> "BabyAGI":
        """初始化BabyAGI控制器。"""
        task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)
        task_prioritization_chain = TaskPrioritizationChain.from_llm(
            llm, verbose=verbose
        )
        execution_chain = ExecutionChain.from_llm(llm, verbose=verbose)
        return cls(
            task_creation_chain=task_creation_chain,
            task_prioritization_chain=task_prioritization_chain,
            execution_chain=execution_chain,
            vectorstore=vectorstore,
            **kwargs,
        )

运行BabyAGI

现在是时候创建BabyAGI控制器并观察它尝试完成您的目标了。

OBJECTIVE = "编写今天的旧金山天气报告"

llm = OpenAI(temperature=0)

# LLMChains的日志记录
verbose = False
# 如果为None，将一直继续下去
max_iterations: Optional[int] = 3
baby_agi = BabyAGI.from_llm(
    llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations
)

baby_agi({"objective": OBJECTIVE})

    
    *****任务列表*****
    
    1: 制作一个待办事项清单
    
    *****下一个任务*****
    
    1: 制作一个待办事项清单
    
    *****任务结果*****
    
    1. 检查当天的温度范围。
    2. 收集今天旧金山的温度数据。
    3. 分析温度数据并创建天气报告。
    4. 发布天气报告。
    
    *****任务列表*****
    
    2: 收集当天预期温度范围的数据。
    3: 收集当天预期降水量的数据。
    4: 分析数据并创建天气报告。
    5: 检查旧金山的当前天气状况。
    6: 发布天气报告。
    
    *****下一个任务*****
    
    2: 收集当天预期温度范围的数据。
    
    *****任务结果*****
    
    我已经收集到了旧金山当天预期温度范围的数据。预测的温度范围为华氏55度到68度。
    
    *****任务列表*****
    
    3: 检查旧金山的当前天气状况。
    4: 计算旧金山当天的平均温度。
    5: 确定旧金山当天的降水概率。
    6: 查找旧金山当天的任何潜在天气警报或建议。
    7: 研究旧金山当天的任何历史天气模式。
    8: 将预期温度范围与旧金山当天的历史平均值进行比较。
    9: 收集当天预期降水量的数据。
    10: 分析数据并创建天气报告。
    11: 发布天气报告。
    
    *****下一个任务*****
    
    3: 检查旧金山的当前天气状况。
    
    *****任务结果*****
    
    我正在检查旧金山的当前天气状况。根据我收集到的数据，今天旧金山的温度目前约为华氏65度，天空晴朗。预计当天的温度范围将在60到70华氏度之间。
    
    *****任务结束*****
    
    {'objective': '编写今天的旧金山天气报告'}

BabyAGI 用户指南

安装和导入所需模块 (Install and Import Required Modules)​

连接到向量存储 (Connect to the Vector Store)​

定义链​

定义BabyAGI控制器 (Define the BabyAGI Controller)​

运行BabyAGI​

安装和导入所需模块 (Install and Import Required Modules)

连接到向量存储 (Connect to the Vector Store)

定义链

定义BabyAGI控制器 (Define the BabyAGI Controller)

运行BabyAGI