import os import sys current_path = os.getcwd() sys.path.append(current_path) from community.graph_helper import GraphHelper from typing import List from cdss.models.schemas import CDSSInput class CDSSHelper(GraphHelper): def check_sex_allowed(self, node, sex): #性别过滤,假设疾病节点有一个属性叫做allowed_sex_type,值为“0,1,2”,分别代表未知,男,女 sex_allowed = self.graph.nodes[node].get('allowed_sex_list', None) if sex_allowed: sex_allowed_list = sex_allowed.split(',') if sex not in sex_allowed_list: #如果性别不匹配,跳过 return False return True def check_age_allowed(self, node, age): #年龄过滤,假设疾病节点有一个属性叫做allowed_age_range,值为“6-88”,代表年龄在0-88月之间是允许的 #如果说年龄小于6岁,那么我们就认为是儿童,所以儿童的年龄范围是0-6月 age_allowed = self.graph.nodes[node].get('allowed_age_range', None) if age_allowed: age_allowed_list = age_allowed.split('-') age_min = int(age_allowed_list[0]) age_max = int(age_allowed_list[-1]) if age >= age_min and age < age_max: #如果年龄范围正常,那么返回True return True else: #如果没有设置年龄范围,那么默认返回True return True return False def cdss_travel(self, input:CDSSInput, start_nodes:List, max_hops=3): #这里设置了节点的type取值范围,可以根据实际情况进行修改,允许出现多个类型 DEPARTMENT=['科室'] DIESEASE=['疾病'] DRUG=['药品'] CHECK=['检查'] SYMPTOM=['症状'] #allowed_types = ['科室', '疾病', '药品', '检查', '症状'] allowed_types = DEPARTMENT + DIESEASE+ DRUG + CHECK + SYMPTOM #这里设置了边的type取值范围,可以根据实际情况进行修改,允许出现多个类型 #不过后面的代码里面没有对边的type进行过滤,所以这里是留做以后扩展的 allowed_links = ['has_symptom', 'need_check', 'recommend_drug', 'belongs_to'] #详细解释下面一行代码 #queue是一个队列,里面存放了待遍历的节点,每个节点都有一个depth,表示当前节点的深度, #一个path,表示当前节点的路径,一个data,表示当前节点的一些额外信息,比如allowed_types和allowed_links #allowed_types表示当前节点的类型,allowed_links表示当前节点的边的类型 #这里的start_nodes是一个列表,里面存放了起始节点,每个起始节点都有一个depth为0,一个path为"/",一个data为{'allowed_types': allowed_types, 'allowed_links':allowed_links} #这里的"/"表示根节点,因为根节点没有父节点,所以路径为"/" #这里的data是一个字典,里面存放了allowed_types和allowed_links,这两个值都是列表,里面存放了允许的类型 queue = [(node, 0, "/", {'allowed_types': allowed_types, 'allowed_links':allowed_links}) for node in start_nodes] visited = set() results = {} #整理input的数据,这里主要是要检查输入数据是否正确,也需要做转换 if input.pat_age.value > 0 and input.pat_age.type == 'year': #这里将年龄从年转换为月,因为我们的图里面的年龄都是以月为单位的 input.pat_age.value = input.pat_age.value * 12 input.pat_age.type = 'month' #STEP 1: 假设start_nodes里面都是症状,第一步我们先找到这些症状对应的疾病 #由于这部分是按照症状逐一去寻找疾病,所以实际应用中可以缓存这些结果 while queue: node, depth, path, data = queue.pop(0) #allowed_types = data['allowed_types'] #allowed_links = data['allowed_links'] indent = depth * 4 node_type = self.graph.nodes[node].get('type') if node_type in DIESEASE: if self.check_sex_allowed(node, input.pat_sex.value) == False: continue if self.check_age_allowed(node, input.pat_age.value) == False: continue if node in results.keys(): results[node]["count"] = results[node]["count"] + 1 #print("疾病", node, "出现的次数", results[node]["count"]) else: results[node] = {"type": node_type, "count":1, 'path':path} continue if node in visited or depth > max_hops: #print(">>> already visited or reach max hops") continue visited.add(node) for edge in self.graph.edges(node, data=True): src, dest, edge_data = edge #if edge_data.get('type') not in allowed_links: # continue if edge[1] not in visited and depth + 1 < max_hops: queue.append((edge[1], depth + 1, path+"/"+src, data)) #print("-" * (indent+4), f"start travel from {src} to {dest}") #STEP 2: 找到这些疾病对应的科室,检查和药品 #由于这部分是按照疾病逐一去寻找,所以实际应用中可以缓存这些结果 for disease in results.keys(): queue = [(disease, 0, {'allowed_types': DEPARTMENT, 'allowed_links':['belongs_to']})] visited = set() while queue: node, depth, data = queue.pop(0) indent = depth * 4 if node in visited or depth > max_hops: #print(">>> already visited or reach max hops") continue visited.add(node) node_type = self.graph.nodes[node].get('type') if node_type in DEPARTMENT: #展开科室,重复次数为疾病出现的次数,为了方便后续统计 department_data = [node] * results[disease]["count"] # if results[disease]["count"] > 1: # print("展开了科室", node, "次数", results[disease]["count"], "次") if 'department' in results[disease].keys(): results[disease]["department"] = results[disease]["department"] + department_data else: results[disease]["department"] = department_data continue if node_type in CHECK: if 'check' in results[disease].keys(): results[disease]["check"] = list(set(results[disease]["check"]+[node])) else: results[disease]["check"] = [node] continue if node_type in DRUG: if 'drug' in results[disease].keys(): results[disease]["drug"] = list(set(results[disease]["drug"]+[node])) else: results[disease]["drug"] = [node] continue for edge in self.graph.edges(node, data=True): src, dest, edge_data = edge #if edge_data.get('type') not in allowed_links: # continue if edge[1] not in visited and depth + 1 < max_hops: queue.append((edge[1], depth + 1, data)) #print("-" * (indent+4), f"start travel from {src} to {dest}") #STEP 3: 对于结果按照科室维度进行汇总 final_results = {} total = 0 for disease in results.keys(): if 'department' in results[disease].keys(): total += 1 for department in results[disease]["department"]: if not department in final_results.keys(): final_results[department] = { "diseases": [disease], "checks": results[disease].get("check",[]), "drugs": results[disease].get("drug",[]), "count": 1 } else: final_results[department]["diseases"] = final_results[department]["diseases"]+[disease] final_results[department]["checks"] = final_results[department]["checks"]+results[disease].get("check",[]) final_results[department]["drugs"] = final_results[department]["drugs"]+results[disease].get("drug",[]) final_results[department]["count"] += 1 for department in final_results.keys(): final_results[department]["score"] = final_results[department]["count"] / total #STEP 4: 对于final_results里面的disease,checks和durgs统计出现的次数并且按照次数降序排序 def sort_data(data, count=5): tmp = {} for item in data: if item in tmp.keys(): tmp[item]["count"] +=1 else: tmp[item] = {"count":1} sorted_data = sorted(tmp.items(), key=lambda x:x[1]["count"],reverse=True) return sorted_data[:count] for department in final_results.keys(): final_results[department]['name'] = department final_results[department]["diseases"] = sort_data(final_results[department]["diseases"]) final_results[department]["checks"] = sort_data(final_results[department]["checks"]) final_results[department]["drugs"] = sort_data(final_results[department]["drugs"]) sorted_final_results = sorted(final_results.items(), key=lambda x:x[1]["count"],reverse=True) #STEP 5: 对于final_results里面的checks和durgs统计全局出现的次数并且按照次数降序排序 checks = {} drugs ={} total_check = 0 total_drug = 0 for department in final_results.keys(): for check, data in final_results[department]["checks"]: total_check += 1 if check in checks.keys(): checks[check]["count"] += data["count"] else: checks[check] = {"count":data["count"]} for drug, data in final_results[department]["drugs"]: total_drug += 1 if drug in drugs.keys(): drugs[drug]["count"] += data["count"] else: drugs[drug] = {"count":data["count"]} sorted_checks = sorted(checks.items(), key=lambda x:x[1]["count"],reverse=True) sorted_drugs = sorted(drugs.items(), key=lambda x:x[1]["count"],reverse=True) #STEP 6: 整合数据并返回 # if "department" in item.keys(): # final_results["department"] = list(set(final_results["department"]+item["department"])) # if "diseases" in item.keys(): # final_results["diseases"] = list(set(final_results["diseases"]+item["diseases"])) # if "checks" in item.keys(): # final_results["checks"] = list(set(final_results["checks"]+item["checks"])) # if "drugs" in item.keys(): # final_results["drugs"] = list(set(final_results["drugs"]+item["drugs"])) # if "symptoms" in item.keys(): # final_results["symptoms"] = list(set(final_results["symptoms"]+item["symptoms"])) return {"details":sorted_final_results, "checks":sorted_checks, "drugs":sorted_drugs, "total_checks":total_check, "total_drugs":total_drug}