python
/
self-constructing_graph


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
							# coding=utf-8
 
from utils.es import ElasticsearchOperations
es_ops = ElasticsearchOperations()

basic_functions = [
    #{
    #     "type":"function",
    #     "function":{
    #         "name": "get_document_by_keyword",
    #         "description": "按照关键词查询法律法规文档。指定的关键词会作为搜索条件。返回的是法律法规的全文。",
    #         "parameters": {
    #             "type": "object",
    #             "properties": {
    #                 "keywords": {
    #                     "type": "string",
    #                     "description": "用空格分隔的关键词，如‘关键词 关键词 关键词’"
    #                 }
    #             },
    #             "required": ["keywords"]
    #         }
    #     }
       
    # },
    # {
    #     "type":"function",
    #     "function":{
    #         "name": "get_chunk_by_keyword",
    #         "description": "按照关键词查询法律法规文本。指定的关键词会作为搜索条件。返回的是文本的片段。",
    #         "parameters": {
    #             "type": "object",
    #             "properties": {
    #                 "keywords": {
    #                     "type": "string",
    #                     "description": "用空格分隔的关键词，如‘关键词 关键词 关键词’"
    #                 }
    #             },
    #             "required": ["keywords"]
    #         }
    #     } 
    # },
    {
        "type":"function",
        "function":{
            "name": "search_document",
            "description": "按照关键词搜索法律法规文件。参数是指定的关键词，多个关键词需要用空格分开。返回的文章标题列表。",
            "parameters": {
                "type": "object",
                "properties": {
                    "keywords": {
                        "type": "string",
                        "description": "关键词信息，多个关键词需要用空格分开"
                    }
                },
                "required": ["keywords"]
            }
        } 
    }
]

from utils.factors import FactorsHelper
from chunc.chunc_helper import ChuncHelper
import os
from dotenv import load_dotenv
load_dotenv()

TRUNC_OUTPUT_PATH = os.getenv("TRUNC_OUTPUT_PATH")
DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
DOC_PATH = os.getenv("DOC_PATH")
JIEBA_USER_DICT = os.getenv("JIEBA_USER_DICT")
WORD_INDEX = os.getenv("WORD_INDEX")
TITLE_INDEX = os.getenv("TITLE_INDEX")
CHUNC_INDEX = os.getenv("CHUNC_INDEX")


from chunc.chunc_helper import ChuncHelper
def get_document(title: str):
    print(">>>>>> get_document ", title)
    helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
    response = es_ops.search_title_index(index=TITLE_INDEX, title=title, top_k=1)
    if len(response) == 1:
        print(">>>> get document response: ", response[0]["title"])
        return helper.get(response[0]["title"])
    return "没有找到文件内容"
    
def search_document(question: str):
    print(">>>>>>>>> search_document")
    output = []
    helper = es_ops
    try:
        articles = FactorsHelper()
        chunc_helper = ChuncHelper(data_file=DOC_PATH, output_dir=TRUNC_OUTPUT_PATH, user_dict=JIEBA_USER_DICT)
        print(f">>> question: {question}")
        #words = chunc_helper.cut_word(question)
        data = helper.search_word_index(WORD_INDEX, [question]) #words)
        
        for item in data:
            print(f"{item['word']} {item['score']}")
            for art in item["articles"]:
                articles.add_factors(art, item['score'])
            
        print(">>> test title index")
        data = helper.search_title_index(TITLE_INDEX, question)
        for item in data:
            print(f"{item['title']} {item['score']}")            
            articles.add_factors(item['title'], item['score'])

        print(">>> test chunc index")
        data = helper.search_title_index(CHUNC_INDEX, question)
        for item in data:
            print(f"{item['title']} {item['score']}") 
            articles.add_factors(item['title'], item['score'])

        print(">>> test factors calc")
        sorted_articals = articles.sort_factors()
        size = len(sorted_articals)
        if size > 20:
            size = 20
        output.append(f"共找到{size}篇资料，以下是他们的标题和链接")
        index = 1
        for key in sorted_articals:
            title, score = key
            if "已废止" in title:
                continue
            if "已失效" in title:
                continue
            # 过滤掉包含“银监会”但不包含“保监会”的文件
            if "银监会" in title and "保监会" not in title:
                continue
            if "银行业监督管理委员会" in title and "保监会" not in title and "保险" not in title:
                continue
            if "银行" in title and "保监会" not in title and "保险" not in title and "非银行" not in title:
                continue
            output.append(f"{index}: {title}")
            index +=1
            if index>=21:
                break
        return "\n".join(output)
    except Exception as e:
        print(e)
    return "没有找到任何资料"
    
    
def get_document_by_keyword(keywords:str):   
    print(">>> get_document_by_keyword ", keywords)
    results = es_ops.search_similar_texts(keywords)
    text = []
    for result in results:
        if result['score'] > 1.62:
            print(">>> get_document_by_keyword ", result['text'][:100])
            text.append(result['text'])
            
    return "\n".join(text)


def get_chunk_by_keyword(keywords):    
    results = es_ops.search_similar_texts(keywords)
    text = []
    for result in results:
        if result['score'] > 1.62:
            print(">>> get_chunk_by_keyword ", result['text'][:100])
            text.append(result['text'])
            
    return "\n".join(text)

def get_weather_by_city(keywords:str):
    print(">>> get_weather_by_city ", keywords)
    return "南京今日天气为大雨，最高温度11度，最低温度8度。"