12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- from typing import List, Dict
- from libs.import_chunc import ImportChunc
- from libs.chunc_helper import ChuncHelper
- import json
- import sys
- import os
- from dotenv import load_dotenv
- from utils.es import ElasticsearchOperations
- from utils.factors import FactorsHelper
- load_dotenv()
- # DeepSeek API配置
- TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
- DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
- DOC_PATH = os.getenv("DOC_PATH")
- JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
- WORD_INDEX = os.getenv("WORD_INDEX")
- TITLE_INDEX = os.getenv("TITLE_INDEX")
- CHUNC_INDEX = os.getenv("CHUNC_INDEX")
-
- def build_test():
- helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH)
- helper.import_chunc_reverse_index()
- def build_index():
- helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH)
- helper.import_word_reverse_index()
- helper.import_title_reverse_index()
- #helper.import_chunc_reverse_index()
- def build_chunc():
- helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH)
- helper.import_chunc_reverse_index()
- def delete_index():
- helper = ElasticsearchOperations()
- print(">>> delete index")
- helper.delete_index(WORD_INDEX)
- helper.delete_index(TITLE_INDEX)
- helper.delete_index(CHUNC_INDEX)
- helper.delete_index("text_chunks")
- from functions.basic_function import search_document
- def test_index():
- helper = ElasticsearchOperations()
- try:
- question = "银行销售保险产品的规定"
- result = search_document(question)
- print(result)
- # articles = FactorsHelper()
- # chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
- # print(">>> question: test word index")
- # words = chunc_helper.cut_word(question)
- # data = helper.search_word_index(WORD_INDEX, [question]) #words)
-
- # for item in data:
- # print(f"{item['word']} {item['score']}")
- # for art in item["articles"]:
- # articles.add_factors(art, item['score'])
-
- # print(">>> test title index")
- # data = helper.search_title_index(TITLE_INDEX, question)
- # for item in data:
- # print(f"{item['title']} {item['score']}")
- # articles.add_factors(item['title'], item['score'])
- # print(">>> test chunc index")
- # data = helper.search_title_index(CHUNC_INDEX, question)
- # for item in data:
- # print(f"{item['title']} {item['score']}")
- # articles.add_factors(item['title'], item['score'])
- # print(">>> test factors calc")
- # sorted_articals = articles.sort_factors()
- # for key in sorted_articals:
- # print(key)
- #data = helper.get_document(TITLE_INDEX, helper.get_doc_id("保险代理人监管规定"))
- #print(data)
- except Exception as e:
- raise e
- # 使用示例
- if __name__ == "__main__":
- param_count = len(sys.argv)
- if param_count == 2:
- action = sys.argv[1]
- if action== "test":
- test_index()
- if action == "build":
- build_index()
- if action == "delete":
- delete_index()
- if action == "chunc":
- build_chunc()
- #build_index()
|