SGTY 3 mesiacov pred
rodič
commit
1bc6ca70cc
3 zmenil súbory, kde vykonal 26 pridanie a 82 odobranie
  1. 2 2
      agent/cdss/capbility.py
  2. 23 79
      agent/cdss/libs/cdss_helper.py
  3. 1 1
      tests/test.py

+ 2 - 2
agent/cdss/capbility.py

@@ -23,8 +23,8 @@ class CDSSCapability:
                     keyword, limit=10, node_type="word"
                 )
                 for item in results:
-                    if item['score']>1.9:
-                        start_nodes.append(item['id'])
+                    #if item['score']>1.9:
+                    start_nodes.append(item['id'])
             end_time = time.time()
             print(f"node_search执行完成,耗时:{end_time - start_time:.2f}秒")
             logger.info(f"cdss start from {start_nodes}")    

+ 23 - 79
agent/cdss/libs/cdss_helper.py

@@ -189,9 +189,7 @@ class CDSSHelper(GraphHelper):
 
     def check_sex_allowed(self, node, sex):
         # 性别过滤,假设疾病节点有一个属性叫做allowed_sex_type,值为“0,1,2”,分别代表未知,男,女
-
         sex_allowed = self.graph.nodes[node].get('sex', None)
-
         #sexProps = self.propService.get_props_by_ref_id(node, 'sex')
         #if len(sexProps) > 0 and sexProps[0]['prop_value'] is not None and sexProps[0][
             #'prop_value'] != input.pat_sex.value:
@@ -307,10 +305,9 @@ class CDSSHelper(GraphHelper):
         for node in node_ids:
             visited = set()
             temp_results = {}
-            cache_key = f"symptom_{node}"
+            cache_key = f"symptom_ref_disease_{node}"
             cache_data = self.cache.get(cache_key)
             if cache_data:
-                logger.debug(f"cache hit for {cache_key}")
                 temp_results = cache_data
            
                 if results=={}:
@@ -394,7 +391,7 @@ class CDSSHelper(GraphHelper):
 
         # 这里我们需要对结果进行过滤,过滤掉不满足条件的疾病
         new_results = {}
-        print(len(results))
+
         for item in results:
             if self.check_sex_allowed(node, input.pat_sex.value) == False:
                 continue
@@ -439,7 +436,7 @@ class CDSSHelper(GraphHelper):
             log_data.append(f"|{data['name']}|{','.join(data['path'])}|{data['count']}|{data['relevant']}|")
 
         content = "疾病和症状相关性统计表格\n" + "\n".join(log_data)
-        #print(f"\n{content}")
+        print(f"\n{content}")
         # STEP 2: 找到这些疾病对应的科室,检查和药品
         # 由于这部分是按照疾病逐一去寻找,所以实际应用中可以缓存这些结果
         start_time = time.time()
@@ -449,76 +446,23 @@ class CDSSHelper(GraphHelper):
             # TODO 这里需要对疾病对应的科室检查药品进行加载缓存,性能可以得到很大的提升
             if results[disease]["relevant"] == False:
                 continue
-            print(f"search data for {disease}:{results[disease]['name']}")
-            queue = []
-            queue.append((disease, 0, disease, {'allowed_types': DEPARTMENT, 'allowed_links': ['belongs_to']}))
-
-            # 这里尝试过将visited放倒for disease循环外面,但是会造成一些问题,性能提升也不明显,所以这里还是放在for disease循环里面
-            visited = set()
-
-            while queue:
-                node, depth, disease, data = queue.pop(0)
 
-                if node in visited or depth > max_hops:
+            out_edges = self.graph.out_edges(disease, data=True)
+            for edge in out_edges:
+                src, dest, edge_data = edge
+                if edge_data["type"] != 'belongs_to':
                     continue
-                visited.add(node)
-
-                entity_data = self.entity_data[self.entity_data.index == node]
-
-                # 如果节点不存在,那么跳过
-                if entity_data.empty:
+                dest_data = self.entity_data[self.entity_data.index == dest]
+                if dest_data.empty:
                     continue
-                node_type = self.entity_data[self.entity_data.index == node]['type'].tolist()[0]
-                node_name = self.entity_data[self.entity_data.index == node]['name'].tolist()[0]
+                department_name = self.entity_data[self.entity_data.index == dest]['name'].tolist()[0]
 
-                # print(f"node {results[disease].get("name", disease)} {node_name} type {node_type}")
-                # node_type = self.graph.nodes[node].get('type')
-                if node_type in DEPARTMENT:
-                    # 展开科室,重复次数为疾病出现的次数,为了方便后续统计
-                    department_data = [node_name] * results[disease]["count"]
-                    if 'department' in results[disease].keys():
-                        results[disease]["department"] = results[disease]["department"] + department_data
-                    else:
-                        results[disease]["department"] = department_data
-                    continue
-                # if node_type in CHECK:
-                #     if 'check' in results[disease].keys():
-                #         results[disease]["check"] = list(set(results[disease]["check"]+[node_name]))
-                #     else:
-                #         results[disease]["check"] = [node_name]
-                #     continue
-                # if node_type in DRUG:
-                #     if 'drug' in results[disease].keys():
-                #         results[disease]["drug"] = list(set(results[disease]["drug"]+[node_name]))
-                #     else:
-                #         results[disease]["drug"] = [node_name]
-                #     continue
-                out_edges = self.graph.out_edges(node, data=True)
-
-                for edge in out_edges:
-                    src, dest, edge_data = edge
-                    src_data = self.entity_data[self.entity_data.index == src]
-                    if src_data.empty:
-                        continue
-                    dest_data = self.entity_data[self.entity_data.index == dest]
-                    if dest_data.empty:
-                        continue
-                    src_name = self.entity_data[self.entity_data.index == src]['name'].tolist()[0]
-                    dest_name = self.entity_data[self.entity_data.index == dest]['name'].tolist()[0]
-                    dest_type = self.entity_data[self.entity_data.index == dest]['type'].tolist()[0]
-
-                    if dest_type in allowed_types:
-                        if dest not in visited and depth + 1 < max_hops:
-                            # print(f"put travel request in queue from {src}:{src_name} to {dest}:{dest_name}")
-                            queue.append((edge[1], depth + 1, disease, data))
-
-                            # TODO 可以在这里将results里面的每个疾病对应的科室,检查和药品进行缓存,方便后续使用
-        # for item in results.keys():
-        #     department_data = results[item].get("department", [])
-        #     count_data = results[item].get("count")
-        #     check_data = results[item].get("check", [])
-        #     drug_data = results[item].get("drug", [])
-        #     #缓存代码放在这里
+                # 展开科室,重复次数为疾病出现的次数,为了方便后续统计
+                department_data = [department_name] * results[disease]["count"]
+                if 'department' in results[disease].keys():
+                    results[disease]["department"] = results[disease]["department"] + department_data
+                else:
+                    results[disease]["department"] = department_data
 
         print(f"STEP 2 finished")
         end_time = time.time()
@@ -534,7 +478,7 @@ class CDSSHelper(GraphHelper):
             log_data.append(
                 f"|{results[item].get("name", item)}|{count_data}|{','.join(department_data)}|{','.join(check_data)}|{','.join(drug_data)}|")
 
-        #print("疾病科室检查药品相关统计\n" + "\n".join(log_data))
+        print("疾病科室检查药品相关统计\n" + "\n".join(log_data))
         # 日志输出完毕
 
         # STEP 3: 对于结果按照科室维度进行汇总
@@ -592,7 +536,7 @@ class CDSSHelper(GraphHelper):
             log_data.append(
                 f"|{department}|{','.join(diesease_data)}|{','.join(check_data)}|{','.join(drug_data)}|{count_data}|{score_data}|")
 
-        #print("\n" + "\n".join(log_data))
+        print("\n" + "\n".join(log_data))
 
         # STEP 4: 对于final_results里面的disease,checks和durgs统计出现的次数并且按照次数降序排序
         print(f"STEP 4 start")
@@ -630,7 +574,7 @@ class CDSSHelper(GraphHelper):
             score_data = final_results[department].get("score", 0)
             log_data.append(f"|{department}|{diesease_data}|{check_data}|{drug_data}|{count_data}|{score_data}|")
 
-        #print("\n" + "\n".join(log_data))
+        print("\n" + "\n".join(log_data))
         # STEP 5: 对于final_results里面的diseases, checks和durgs统计全局出现的次数并且按照次数降序排序
         print(f"STEP 5 start")
         start_time = time.time()
@@ -654,9 +598,9 @@ class CDSSHelper(GraphHelper):
             for disease, data in final_results[department]["diseases"]:
                 total_diags += 1
                 key = 'disease_name_parent_' + disease
-                cached_value = self.cache.get(key)
-                if cached_value is not None:
-                    disease = cached_value
+                # cached_value = self.cache.get(key)
+                # if cached_value is not None:
+                #     disease = cached_value
                 if disease in diags.keys():
                     diags[disease]["count"] += data["count"]
                     diags[disease]["score"] += data["count"] * department_factor
@@ -694,7 +638,7 @@ class CDSSHelper(GraphHelper):
             score_data = final_results[department].get("score", 0)
             log_data.append(f"|{department}|{diesease_data}|{check_data}|{drug_data}|{count_data}|{score_data}|")
 
-        #print("这里是经过排序的数据\n" + "\n".join(log_data))
+        print("这里是经过排序的数据\n" + "\n".join(log_data))
         # STEP 6: 整合数据并返回
         # if "department" in item.keys():
         #     final_results["department"] = list(set(final_results["department"]+item["department"]))

+ 1 - 1
tests/test.py

@@ -6,7 +6,7 @@ capability = CDSSCapability()
 record = CDSSInput(
     pat_age=CDSSInt(type="month", value=24),
     pat_sex=CDSSText(type="sex", value="男"),
-    #chief_complaint=["腹痛", "发热", "腹泻"],
+    chief_complaint=["右下腹痛", "恶心", "呕吐"],
     #chief_complaint=["呕血", "黑便", "头晕", "心悸"],
     #chief_complaint=["流鼻涕"],