basic_function.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # coding=utf-8
  2. from utils.es import ElasticsearchOperations
  3. es_ops = ElasticsearchOperations()
  4. basic_functions = [
  5. #{
  6. # "type":"function",
  7. # "function":{
  8. # "name": "get_document_by_keyword",
  9. # "description": "按照关键词查询法律法规文档。指定的关键词会作为搜索条件。返回的是法律法规的全文。",
  10. # "parameters": {
  11. # "type": "object",
  12. # "properties": {
  13. # "keywords": {
  14. # "type": "string",
  15. # "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’"
  16. # }
  17. # },
  18. # "required": ["keywords"]
  19. # }
  20. # }
  21. # },
  22. # {
  23. # "type":"function",
  24. # "function":{
  25. # "name": "get_chunk_by_keyword",
  26. # "description": "按照关键词查询法律法规文本。指定的关键词会作为搜索条件。返回的是文本的片段。",
  27. # "parameters": {
  28. # "type": "object",
  29. # "properties": {
  30. # "keywords": {
  31. # "type": "string",
  32. # "description": "用空格分隔的关键词,如‘关键词 关键词 关键词’"
  33. # }
  34. # },
  35. # "required": ["keywords"]
  36. # }
  37. # }
  38. # },
  39. {
  40. "type":"function",
  41. "function":{
  42. "name": "search_document",
  43. "description": "按照关键词搜索法律法规文件。参数是指定的关键词,多个关键词需要用空格分开。返回的文章标题列表。",
  44. "parameters": {
  45. "type": "object",
  46. "properties": {
  47. "keywords": {
  48. "type": "string",
  49. "description": "关键词信息,多个关键词需要用空格分开"
  50. }
  51. },
  52. "required": ["keywords"]
  53. }
  54. }
  55. }
  56. ]
  57. from utils.factors import FactorsHelper
  58. from chunc.chunc_helper import ChuncHelper
  59. import os
  60. from dotenv import load_dotenv
  61. load_dotenv()
  62. TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
  63. DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
  64. DOC_PATH = os.getenv("DOC_PATH")
  65. JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
  66. WORD_INDEX = os.getenv("WORD_INDEX")
  67. TITLE_INDEX = os.getenv("TITLE_INDEX")
  68. CHUNC_INDEX = os.getenv("CHUNC_INDEX")
  69. from chunc.chunc_helper import ChuncHelper
  70. def get_document(title: str):
  71. print(">>>>>> get_document ", title)
  72. helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
  73. response = es_ops.search_title_index(index=TITLE_INDEX, title=title, top_k=1)
  74. if len(response) == 1:
  75. print(">>>> get document response: ", response[0]["title"])
  76. return helper.get(response[0]["title"])
  77. return "没有找到文件内容"
  78. def search_document(question: str):
  79. print(">>>>>>>>> search_document")
  80. output = []
  81. helper = es_ops
  82. try:
  83. articles = FactorsHelper()
  84. chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
  85. print(f">>> question: {question}")
  86. #words = chunc_helper.cut_word(question)
  87. data = helper.search_word_index(WORD_INDEX, [question]) #words)
  88. for item in data:
  89. print(f"{item['word']} {item['score']}")
  90. for art in item["articles"]:
  91. articles.add_factors(art, item['score'])
  92. print(">>> test title index")
  93. data = helper.search_title_index(TITLE_INDEX, question)
  94. for item in data:
  95. print(f"{item['title']} {item['score']}")
  96. articles.add_factors(item['title'], item['score'])
  97. print(">>> test chunc index")
  98. data = helper.search_title_index(CHUNC_INDEX, question)
  99. for item in data:
  100. print(f"{item['title']} {item['score']}")
  101. articles.add_factors(item['title'], item['score'])
  102. print(">>> test factors calc")
  103. sorted_articals = articles.sort_factors()
  104. size = len(sorted_articals)
  105. if size > 20:
  106. size = 20
  107. output.append(f"共找到{size}篇资料,以下是他们的标题和链接")
  108. index = 1
  109. for key in sorted_articals:
  110. title, score = key
  111. if "已废止" in title:
  112. continue
  113. if "已失效" in title:
  114. continue
  115. # 过滤掉包含“银监会”但不包含“保监会”的文件
  116. if "银监会" in title and "保监会" not in title:
  117. continue
  118. if "银行业监督管理委员会" in title and "保监会" not in title and "保险" not in title:
  119. continue
  120. if "银行" in title and "保监会" not in title and "保险" not in title and "非银行" not in title:
  121. continue
  122. output.append(f"{index}: {title}")
  123. index +=1
  124. if index>=21:
  125. break
  126. return "\n".join(output)
  127. except Exception as e:
  128. print(e)
  129. return "没有找到任何资料"
  130. def get_document_by_keyword(keywords:str):
  131. print(">>> get_document_by_keyword ", keywords)
  132. results = es_ops.search_similar_texts(keywords)
  133. text = []
  134. for result in results:
  135. if result['score'] > 1.62:
  136. print(">>> get_document_by_keyword ", result['text'][:100])
  137. text.append(result['text'])
  138. return "\n".join(text)
  139. def get_chunk_by_keyword(keywords):
  140. results = es_ops.search_similar_texts(keywords)
  141. text = []
  142. for result in results:
  143. if result['score'] > 1.62:
  144. print(">>> get_chunk_by_keyword ", result['text'][:100])
  145. text.append(result['text'])
  146. return "\n".join(text)
  147. def get_weather_by_city(keywords:str):
  148. print(">>> get_weather_by_city ", keywords)
  149. return "南京今日天气为大雨,最高温度11度,最低温度8度。"