123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- # coding=utf-8
-
- from utils.es import ElasticsearchOperations
- es_ops = ElasticsearchOperations()
- basic_functions = [
- #{
- # "type":"function",
- # "function":{
- # "name": "get_document_by_keyword",
- # "description": "按照关键词查询法律法规文档。指定的关键词会作为搜索条件。返回的是法律法规的全文。",
- # "parameters": {
- # "type": "object",
- # "properties": {
- # "keywords": {
- # "type": "string",
- # "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’"
- # }
- # },
- # "required": ["keywords"]
- # }
- # }
-
- # },
- # {
- # "type":"function",
- # "function":{
- # "name": "get_chunk_by_keyword",
- # "description": "按照关键词查询法律法规文本。指定的关键词会作为搜索条件。返回的是文本的片段。",
- # "parameters": {
- # "type": "object",
- # "properties": {
- # "keywords": {
- # "type": "string",
- # "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’"
- # }
- # },
- # "required": ["keywords"]
- # }
- # }
- # },
- {
- "type":"function",
- "function":{
- "name": "search_document",
- "description": "按照关键词搜索法律法规文件。参数是指定的关键词,多个关键词需要用空格分开。返回的文章标题列表。",
- "parameters": {
- "type": "object",
- "properties": {
- "keywords": {
- "type": "string",
- "description": "关键词信息,多个关键词需要用空格分开"
- }
- },
- "required": ["keywords"]
- }
- }
- }
- ]
- from utils.factors import FactorsHelper
- from chunc.chunc_helper import ChuncHelper
- import os
- from dotenv import load_dotenv
- load_dotenv()
- TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
- DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
- DOC_PATH = os.getenv("DOC_PATH")
- JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
- WORD_INDEX = os.getenv("WORD_INDEX")
- TITLE_INDEX = os.getenv("TITLE_INDEX")
- CHUNC_INDEX = os.getenv("CHUNC_INDEX")
- from chunc.chunc_helper import ChuncHelper
- def get_document(title: str):
- print(">>>>>> get_document ", title)
- helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
- response = es_ops.search_title_index(index=TITLE_INDEX, title=title, top_k=1)
- if len(response) == 1:
- print(">>>> get document response: ", response[0]["title"])
- return helper.get(response[0]["title"])
- return "没有找到文件内容"
-
- def search_document(question: str):
- print(">>>>>>>>> search_document")
- output = []
- helper = es_ops
- try:
- articles = FactorsHelper()
- chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
- print(f">>> question: {question}")
- #words = chunc_helper.cut_word(question)
- data = helper.search_word_index(WORD_INDEX, [question]) #words)
-
- for item in data:
- print(f"{item['word']} {item['score']}")
- for art in item["articles"]:
- articles.add_factors(art, item['score'])
-
- print(">>> test title index")
- data = helper.search_title_index(TITLE_INDEX, question)
- for item in data:
- print(f"{item['title']} {item['score']}")
- articles.add_factors(item['title'], item['score'])
- print(">>> test chunc index")
- data = helper.search_title_index(CHUNC_INDEX, question)
- for item in data:
- print(f"{item['title']} {item['score']}")
- articles.add_factors(item['title'], item['score'])
- print(">>> test factors calc")
- sorted_articals = articles.sort_factors()
- size = len(sorted_articals)
- if size > 20:
- size = 20
- output.append(f"共找到{size}篇资料,以下是他们的标题和链接")
- index = 1
- for key in sorted_articals:
- title, score = key
- if "已废止" in title:
- continue
- if "已失效" in title:
- continue
- # 过滤掉包含“银监会”但不包含“保监会”的文件
- if "银监会" in title and "保监会" not in title:
- continue
- if "银行业监督管理委员会" in title and "保监会" not in title and "保险" not in title:
- continue
- if "银行" in title and "保监会" not in title and "保险" not in title and "非银行" not in title:
- continue
- output.append(f"{index}: {title}")
- index +=1
- if index>=21:
- break
- return "\n".join(output)
- except Exception as e:
- print(e)
- return "没有找到任何资料"
-
-
- def get_document_by_keyword(keywords:str):
- print(">>> get_document_by_keyword ", keywords)
- results = es_ops.search_similar_texts(keywords)
- text = []
- for result in results:
- if result['score'] > 1.62:
- print(">>> get_document_by_keyword ", result['text'][:100])
- text.append(result['text'])
-
- return "\n".join(text)
- def get_chunk_by_keyword(keywords):
- results = es_ops.search_similar_texts(keywords)
- text = []
- for result in results:
- if result['score'] > 1.62:
- print(">>> get_chunk_by_keyword ", result['text'][:100])
- text.append(result['text'])
-
- return "\n".join(text)
- def get_weather_by_city(keywords:str):
- print(">>> get_weather_by_city ", keywords)
- return "南京今日天气为大雨,最高温度11度,最低温度8度。"
-
|