Atlas(阿特拉斯)
Atlas 是由
Nomic开发的一个平台,用于与小型和互联网规模的非结构化数据集进行交互。
本笔记本展示了如何使用与 AtlasDB 向量存储相关的功能。
pip install spacy
python3 -m spacy download en_core_web_sm
pip install nomic
import time
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import SpacyTextSplitter
from langchain.vectorstores import AtlasDB
from langchain.document_loaders import TextLoader
ATLAS_TEST_API_KEY = "7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6"
loader = TextLoader("../../../state_of_the_union.txt")
documents = loader.load()
text_splitter = SpacyTextSplitter(separator="|")
texts = []
for doc in text_splitter.split_documents(documents):
    texts.extend(doc.page_content.split("|"))
texts = [e.strip() for e in texts]
db = AtlasDB.from_texts(
    texts=texts,
    name="test_index_" + str(time.time()),  # 为您的向量存储指定一个唯一的名称
    description="test_index",  # 为您的向量存储指定一个描述
    api_key=ATLAS_TEST_API_KEY,
    index_kwargs={"build_topic_model": True},
)
db.project.wait_for_project_lock()
db.project
<strong><a href="https://atlas.nomic.ai/dashboard/project/ee2354a3-7f9a-4c6b-af43-b0cda09d7198">test_index_1677255228.136989</strong></a>
            <br>
            为您的项目指定一个描述 508个数据插入。
            <br>
            构建了1个索引。
            <br><strong>投影</strong>
<ul>
<li>test_index_1677255228.136989_index。状态已完成。 <a target="_blank" href="https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541">在线查看</a></li></ul><hr><script>
            destroy = function() {
                document.getElementById("iframedb996d77-8981-48a0-897a-ff2c22bbf541").remove()
            }
        </script>
        <h4>投影ID:db996d77-8981-48a0-897a-ff2c22bbf541</h4>
        <div class="actions">
            <div id="hide" class="action" onclick="destroy()">隐藏嵌入式项目</div>
            <div class="action" id="out">
                <a href="https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541" target="_blank">在atlas.nomic.ai上探索</a>
            </div>
        </div>
        <iframe class="iframe" id="iframedb996d77-8981-48a0-897a-ff2c22bbf541" allow="clipboard-read; clipboard-write" src="https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541">
        </iframe>
        <style>
            .iframe {
                /* vh can be **very** large in vscode html. */
                height: min(75vh, 66vw);
                width: 100%;
            }
        </style>
        <style>
            .actions {
              display: block;
            }
            .action {
              min-height: 18px;
              margin: 5px;
              transition: all 500ms ease-in-out;
            }
            .action:hover {
              cursor: pointer;
            }
            #hide:hover::after {
                content: " X";
            }
            #out:hover::after {
                content: "";
            }
        </style>
