import sys,os current_path = os.getcwd() sys.path.append(current_path) import json from libs.embed_helper import EmbedHelper def embed_test(): embed_helper = EmbedHelper() result = embed_helper.embed_text("你好") print(f"result length: {len(result)}") print(result) def search_test(): from utils.es import ElasticsearchOperations es = ElasticsearchOperations() result = es.search("graph_entity_index", "上呼吸道感染", 10) for item in result: print(item) def load_entities(): print("load entity data") with open(f"{current_path}\\web\\cached_data\\entities_med.json", "r", encoding="utf-8") as f: entities = json.load(f) return entities def load_relationships(): print("load relationship data") with open(f"{current_path}\\web\\cached_data\\relationship_med.json", "r", encoding="utf-8") as f: relationships = json.load(f) return relationships def write_data_file(file_name, data): if len(data) == 0: return print("write data file", file_name) with open(file_name, "w", encoding="utf-8") as f: f.write(json.dumps(data, ensure_ascii=False,indent=4)) def build_index(): print("build index") embed_helper = EmbedHelper() entities = load_entities() count = 0 index = 0 for item in entities: node_id = item[0] print("process node: ",count, node_id) texts = [] attrs = item[1] attr_embed_list = [] if attrs["type"] == "Disease": for attr in attrs: if len(attrs[attr])>3 and attr not in ["type", "description"]: texts.append(attrs[attr]) attr_embed = embed_helper.embed_text(node_id+"-"+attr+"-"+attrs[attr]) attr_embed_list.append( { "title": node_id+"-"+attr, "text": attrs[attr], "embedding": attr_embed} ) else: print("skip", attr) doc = { "title": node_id, "text": "\n".join(texts), "embedding": attr_embed_list} # 初始化doc对象,确保它在循环外部定义 count += 1 if count % 1 == 0: write_data_file(f"{current_path}\\web\\cached_data\\diseases\\{index}.json", doc) index = index + 1 #write_data_file(f"{current_path}\\web\\cached_data\\diseases\\{index}.json", records) # 使用示例 if __name__ == "__main__": param_count = len(sys.argv) if param_count == 2: action = sys.argv[1] if action== "test": embed_test() search_test() if action == "build": build_index() #build_index()