from langchain_openai import OpenAIEmbeddings
e_model = OpenAIEmbeddings()
ebeddings = e_model.embed_documents(
[
"你好",
"你好啊",
"你叫什么名字?",
"我叫王大锤",
"很高兴认识你大锤",
]
)
ebeddings
embedded_query = e_model.embed_query("这段对话中提到了什么名字?")
embedded_query[:5]
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
u_embeddings = OpenAIEmbeddings()
fs = LocalFileStore("./cache/")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
u_embeddings,
fs,
namespace=u_embeddings.model,
)
list(fs.yield_keys())
#加载文档,切分文档,将切分文档向量化病存储在缓存中
raw_documents = TextLoader("letter.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=600,chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
from langchain.vectorstores import FAISS
%timeit -r 1 -n 1 db= FAISS.from_documents(documents,cached_embeddings)
#查看缓存中的键
list(fs.yield_keys())