build_index.py 860 B

123456789101112131415161718192021222324252627
  1. import os
  2. import logging
  3. import sys
  4. from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
  5. logging.basicConfig(stream=sys.stdout, level=logging.INFO)
  6. logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
  7. os.environ['OPENAI_API_KEY'] = 'sk-xx'
  8. def build_index():
  9. documents = SimpleDirectoryReader('data').load_data()
  10. # 按最大token数500来把原文档切分为多个小的chunk,每个chunk转为向量,并构建索引
  11. index = GPTSimpleVectorIndex(documents, chunk_size_limit=500)
  12. # 保存索引
  13. index.save_to_disk('index.json')
  14. def query():
  15. # 加载索引
  16. new_index = GPTSimpleVectorIndex.load_from_disk('index.json')
  17. # 查询索引
  18. response = new_index.query("What did the author do in 9th grade?")
  19. # 打印答案
  20. print(response)
  21. if __name__=='__main__':
  22. build_index()