# coding=utf-8 from utils.es import ElasticsearchOperations es_ops = ElasticsearchOperations() basic_functions = [ #{ # "type":"function", # "function":{ # "name": "get_document_by_keyword", # "description": "按照关键词查询法律法规文档。指定的关键词会作为搜索条件。返回的是法律法规的全文。", # "parameters": { # "type": "object", # "properties": { # "keywords": { # "type": "string", # "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’" # } # }, # "required": ["keywords"] # } # } # }, # { # "type":"function", # "function":{ # "name": "get_chunk_by_keyword", # "description": "按照关键词查询法律法规文本。指定的关键词会作为搜索条件。返回的是文本的片段。", # "parameters": { # "type": "object", # "properties": { # "keywords": { # "type": "string", # "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’" # } # }, # "required": ["keywords"] # } # } # }, { "type":"function", "function":{ "name": "search_document", "description": "按照关键词搜索法律法规文件。参数是指定的关键词,多个关键词需要用空格分开。返回的文章标题列表。", "parameters": { "type": "object", "properties": { "keywords": { "type": "string", "description": "关键词信息,多个关键词需要用空格分开" } }, "required": ["keywords"] } } } ] from utils.factors import FactorsHelper from chunc.chunc_helper import ChuncHelper import os from dotenv import load_dotenv load_dotenv() TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH") DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH") DOC_PATH = os.getenv("DOC_PATH") JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT") WORD_INDEX = os.getenv("WORD_INDEX") TITLE_INDEX = os.getenv("TITLE_INDEX") CHUNC_INDEX = os.getenv("CHUNC_INDEX") from chunc.chunc_helper import ChuncHelper def get_document(title: str): print(">>>>>> get_document ", title) helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT) response = es_ops.search_title_index(index=TITLE_INDEX, title=title, top_k=1) if len(response) == 1: print(">>>> get document response: ", response[0]["title"]) return helper.get(response[0]["title"]) return "没有找到文件内容" def search_document(question: str): print(">>>>>>>>> search_document") output = [] helper = es_ops try: articles = FactorsHelper() chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT) print(f">>> question: {question}") #words = chunc_helper.cut_word(question) data = helper.search_word_index(WORD_INDEX, [question]) #words) for item in data: print(f"{item['word']} {item['score']}") for art in item["articles"]: articles.add_factors(art, item['score']) print(">>> test title index") data = helper.search_title_index(TITLE_INDEX, question) for item in data: print(f"{item['title']} {item['score']}") articles.add_factors(item['title'], item['score']) print(">>> test chunc index") data = helper.search_title_index(CHUNC_INDEX, question) for item in data: print(f"{item['title']} {item['score']}") articles.add_factors(item['title'], item['score']) print(">>> test factors calc") sorted_articals = articles.sort_factors() size = len(sorted_articals) if size > 20: size = 20 output.append(f"共找到{size}篇资料,以下是他们的标题和链接") index = 1 for key in sorted_articals: title, score = key if "已废止" in title: continue if "已失效" in title: continue # 过滤掉包含“银监会”但不包含“保监会”的文件 if "银监会" in title and "保监会" not in title: continue if "银行业监督管理委员会" in title and "保监会" not in title and "保险" not in title: continue if "银行" in title and "保监会" not in title and "保险" not in title and "非银行" not in title: continue output.append(f"{index}: {title}") index +=1 if index>=21: break return "\n".join(output) except Exception as e: print(e) return "没有找到任何资料" def get_document_by_keyword(keywords:str): print(">>> get_document_by_keyword ", keywords) results = es_ops.search_similar_texts(keywords) text = [] for result in results: if result['score'] > 1.62: print(">>> get_document_by_keyword ", result['text'][:100]) text.append(result['text']) return "\n".join(text) def get_chunk_by_keyword(keywords): results = es_ops.search_similar_texts(keywords) text = [] for result in results: if result['score'] > 1.62: print(">>> get_chunk_by_keyword ", result['text'][:100]) text.append(result['text']) return "\n".join(text) def get_weather_by_city(keywords:str): print(">>> get_weather_by_city ", keywords) return "南京今日天气为大雨,最高温度11度,最低温度8度。"