Procházet zdrojové kódy

对关系抽取输出的内容做预处理(根据已有诊断依据过滤)

hujing před 5 roky
rodič
revize
851e41e86b

+ 33 - 0
common-push/src/main/java/org/diagbot/common/push/cache/ApplicationCacheUtil.java

@@ -9,8 +9,10 @@ import org.diagbot.nlp.util.NlpCache;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 public class ApplicationCacheUtil {
 
@@ -24,6 +26,8 @@ public class ApplicationCacheUtil {
     public static Map<String, List<Rule>> kl_rule_filter_map = null;
     //危险值提醒
     public static Map<String, RuleApp> kl_rule_app_filter_map = null;
+    //pacs关系抽取过滤
+    public static Map<String, Map<String, String>> kl_diagnose_detail_filter_map = null;
 
     public static Map<String, Map<String, String>> getStandard_info_synonym_map() {
         if (standard_info_synonym_map == null) {
@@ -154,4 +158,33 @@ public class ApplicationCacheUtil {
             }
         }
     }
+
+    public static Map<String, Map<String, String>> getKl_diagnose_detail_filter_map() {
+        if (kl_diagnose_detail_filter_map == null) {
+            create_kl_diagnose_detail_filter_map();
+        }
+        return kl_diagnose_detail_filter_map;
+    }
+
+    public static void create_kl_diagnose_detail_filter_map() {
+        kl_diagnose_detail_filter_map = new HashMap<>();
+        Map<String, String> diagnoseDetailRelationMap = new HashMap<>();
+        Set<String> diagnoseDetailRelation = new HashSet<>();
+        Configuration configuration = new DefaultConfig();
+        List<String> fileContents = configuration.readFileContents("bigdata_diagnose_detail_filter.dict");
+        for (String line : fileContents) {
+            String[] content = line.split("\\|", -1);
+            String[] relations = content[1].split("、");
+            for (String relation : relations) {
+                if (diagnoseDetailRelation.add(relation)) {
+                    if (kl_diagnose_detail_filter_map.get(content[0]) == null) {
+                        diagnoseDetailRelationMap.put(relation, relation);
+                        kl_diagnose_detail_filter_map.put(content[0], diagnoseDetailRelationMap);
+                    } else {
+                        kl_diagnose_detail_filter_map.get(content[0]).put(relation, relation);
+                    }
+                }
+            }
+        }
+    }
 }

+ 12 - 0
common-push/src/main/java/org/diagbot/common/push/cache/CacheFileManager.java

@@ -398,6 +398,18 @@ public class CacheFileManager {
                 fw.write("\n");
             }
             fw.close();
+
+            sql = "SELECT type,relation FROM `kl_diagnose_detail` WHERE type = 4 AND LENGTH(relation) > 0 GROUP BY relation";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+            fw = new FileWriter(path + "bigdata_diagnose_detail_filter.dict");
+            while (rs.next()) {
+                r1 = String.valueOf(rs.getInt(1));
+                r2 = rs.getString(2);
+                fw.write(encrypDES.encrytor(r1+ "|" + r2));
+                fw.write("\n");
+            }
+            fw.close();
         } catch (IOException ioe) {
             ioe.printStackTrace();
         } catch (SQLException sqle) {

+ 27 - 9
common-push/src/main/java/org/diagbot/common/push/work/RelationExtractionUtil.java

@@ -3,6 +3,7 @@ package org.diagbot.common.push.work;
 import org.algorithm.core.cnn.entity.Lemma;
 import org.algorithm.core.cnn.entity.Triad;
 import org.diagbot.common.push.bean.SearchData;
+import org.diagbot.common.push.cache.ApplicationCacheUtil;
 import org.diagbot.common.push.util.PushConstants;
 import org.diagbot.nlp.participle.ParticipleUtil;
 import org.diagbot.nlp.participle.word.Lexeme;
@@ -58,17 +59,34 @@ public class RelationExtractionUtil {
             for (String content : contents) {
                 sb.append(content);
             }
-            Map<String, String> map = new HashMap<>();
-            map.put("featureType", "4");
-            map.put("featureName", sb.toString());
-            map.put("property", "17");
-            map.put("concept", sb.toString());
-            //全是有
-            map.put("negative", Constants.default_negative);
-            if (searchData.getInputs().get(map.get("featureName")) == null) {
-                searchData.getInputs().put(map.get("featureName"), map);
+            if (IsExist(sb.toString())) {
+                Map<String, String> map = new HashMap<>();
+                map.put("featureType", "5");
+                map.put("featureName", sb.toString());
+                map.put("property", "17");
+                map.put("concept", sb.toString());
+                //全是有
+                map.put("negative", Constants.default_negative);
+                if (searchData.getInputs().get(map.get("featureName")) == null) {
+                    searchData.getInputs().put(map.get("featureName"), map);
+                }
+            }
+        }
+    }
+
+    /**
+     * 关系抽取输出的content是否在已有诊断依据中存在
+     * @param content
+     * @return
+     */
+    public boolean IsExist(String content){
+        Map<String, Map<String, String>> kl_diagnose_detail_filter_map = ApplicationCacheUtil.getKl_diagnose_detail_filter_map();
+        if (kl_diagnose_detail_filter_map.get("4") != null){
+            if (kl_diagnose_detail_filter_map.get("4").containsKey(content)){
+                return true;
             }
         }
+        return false;
     }
 
 }