from typing import List, Dict from libs.import_chunc import ImportChunc from libs.chunc_helper import ChuncHelper import json import sys import os from dotenv import load_dotenv from utils.es import ElasticsearchOperations from utils.factors import FactorsHelper load_dotenv() # DeepSeek API配置 TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH") DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH") DOC_PATH = os.getenv("DOC_PATH") JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT") WORD_INDEX = os.getenv("WORD_INDEX") TITLE_INDEX = os.getenv("TITLE_INDEX") CHUNC_INDEX = os.getenv("CHUNC_INDEX") def build_test(): helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH) helper.import_chunc_reverse_index() def build_index(): helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH) helper.import_word_reverse_index() helper.import_title_reverse_index() #helper.import_chunc_reverse_index() def build_chunc(): helper = ImportChunc(data_dir=TRUNC_OUTPUT_PATH) helper.import_chunc_reverse_index() def delete_index(): helper = ElasticsearchOperations() print(">>> delete index") helper.delete_index(WORD_INDEX) helper.delete_index(TITLE_INDEX) helper.delete_index(CHUNC_INDEX) helper.delete_index("text_chunks") from functions.basic_function import search_document def test_index(): helper = ElasticsearchOperations() try: question = "银行销售保险产品的规定" result = search_document(question) print(result) # articles = FactorsHelper() # chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT) # print(">>> question: test word index") # words = chunc_helper.cut_word(question) # data = helper.search_word_index(WORD_INDEX, [question]) #words) # for item in data: # print(f"{item['word']} {item['score']}") # for art in item["articles"]: # articles.add_factors(art, item['score']) # print(">>> test title index") # data = helper.search_title_index(TITLE_INDEX, question) # for item in data: # print(f"{item['title']} {item['score']}") # articles.add_factors(item['title'], item['score']) # print(">>> test chunc index") # data = helper.search_title_index(CHUNC_INDEX, question) # for item in data: # print(f"{item['title']} {item['score']}") # articles.add_factors(item['title'], item['score']) # print(">>> test factors calc") # sorted_articals = articles.sort_factors() # for key in sorted_articals: # print(key) #data = helper.get_document(TITLE_INDEX, helper.get_doc_id("保险代理人监管规定")) #print(data) except Exception as e: raise e # 使用示例 if __name__ == "__main__": param_count = len(sys.argv) if param_count == 2: action = sys.argv[1] if action== "test": test_index() if action == "build": build_index() if action == "delete": delete_index() if action == "chunc": build_chunc() #build_index()