import os import logging import sys from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) os.environ['OPENAI_API_KEY'] = 'sk-xx' def build_index(): documents = SimpleDirectoryReader('data').load_data() # 按最大token数500来把原文档切分为多个小的chunk,每个chunk转为向量,并构建索引 index = GPTSimpleVectorIndex(documents, chunk_size_limit=500) # 保存索引 index.save_to_disk('index.json') def query(): # 加载索引 new_index = GPTSimpleVectorIndex.load_from_disk('index.json') # 查询索引 response = new_index.query("What did the author do in 9th grade?") # 打印答案 print(response) if __name__=='__main__': build_index()