Browse Source

Merge remote-tracking branch 'origin/push-dev' into push-dev

hujing 5 years ago
parent
commit
4e00372551

+ 121 - 0
common-push/src/main/java/org/diagbot/common/push/cache/ApplicationCacheUtil.java

@@ -28,6 +28,10 @@ public class ApplicationCacheUtil {
     public static Map<String, RuleApp> kl_rule_app_filter_map = null;
     //pacs关系抽取过滤
     public static Map<String, Map<String, String>> kl_diagnose_detail_filter_map = null;
+    //朴素贝叶斯
+    public static Map<String, Map<String, Double>> doc_feature_naivebayes_prob_map = null;
+    //朴素贝叶斯规则过滤
+    public static Map<String, Map<String, Double>> relevant_feature_map = null;
 
     public static Map<String, Map<String, String>> getStandard_info_synonym_map() {
         if (standard_info_synonym_map == null) {
@@ -187,4 +191,121 @@ public class ApplicationCacheUtil {
             }
         }
     }
+
+    public static Map<String, Map<String, Double>> getDoc_feature_naivebayes_prob_map() {
+        if (doc_feature_naivebayes_prob_map == null) {
+            create_doc_feature_naivebayes_prob_map();
+        }
+        return doc_feature_naivebayes_prob_map;
+    }
+
+    public static void create_doc_feature_naivebayes_prob_map() {
+        doc_feature_naivebayes_prob_map = new HashMap<>();
+        //<rdn,[feature...]> 存储每个rdn对应的特征List
+        Map<String, List<String>> featureMap = new HashMap<>();
+        List<String> featureList = null;
+        Configuration configuration = new DefaultConfig();
+        List<String> fileFeatureContents = configuration.readFileContents("bigdata_naivebayes_features.dict");
+        for (String line : fileFeatureContents) {
+            String[] content = line.split("\\|", -1);
+            if (featureMap.get(content[0]) == null) {
+                featureList = new ArrayList<>();
+                for (String feature : content[1].split(" ")) {
+                    featureList.add(feature);
+                }
+                featureMap.put(content[0], featureList);
+            }
+        }
+
+        //<rdn,diagnose> 存每个rdn对应疾病
+        Map<String, String> diagnoseMap = new HashMap<>();
+        //<diagnose,count> 存每个疾病的数量
+        Map<String, Integer> diagnoseCount = new HashMap<>();
+        List<String> fileDiagnoseContents = configuration.readFileContents("bigdata_naivebayes_diagnose.dict");
+        diagnoseCount.put("diagnoseCount", fileDiagnoseContents.size());
+        for (String line : fileDiagnoseContents) {
+            String[] content = line.split("\\|", -1);
+            if (diagnoseMap.get(content[0]) == null) {
+                diagnoseMap.put(content[0], content[1]);
+            }
+            if (diagnoseCount.get(content[1]) == null) {
+                diagnoseCount.put(content[1], 1);
+            } else {
+                diagnoseCount.put(content[1], diagnoseCount.get(content[1]) + 1);
+            }
+        }
+
+        Map<String, Map<String, Integer>> diagnose2featureCount = new HashMap<>();
+        Map<String, Integer> featureCount = new HashMap<>();
+        for (Map.Entry<String, String> diagnoseMapEntry : diagnoseMap.entrySet()) {
+            //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
+            if (featureMap.get(diagnoseMapEntry.getKey()) == null) {
+                continue;
+            }
+            for (String feature : featureMap.get(diagnoseMapEntry.getKey())) {
+                /**
+                 diagnoseMapEntry <1596386_9,鼻炎> -> <rdn,diagnose>
+                 如果疾病对应特征列表为空 diagnoseMapEntry.getValue()->疾病
+                 */
+                if (diagnose2featureCount.get(diagnoseMapEntry.getValue()) == null) {
+                    featureCount = new HashMap<>();
+                    //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
+                    if (featureCount.get(feature) == null) {
+                        featureCount.put(feature, 1);
+                    } else {
+                        featureCount.put(feature, featureCount.get(feature) + 1);
+                    }
+                    //疾病对应病历数
+                    featureCount.put("diagnoseCount", diagnoseCount.get(diagnoseMapEntry.getValue()));
+                    diagnose2featureCount.put(diagnoseMapEntry.getValue(), featureCount);
+                } else {
+                    if (diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) == null) {
+                        diagnose2featureCount.get(diagnoseMapEntry.getValue()).put(feature, 1);
+                    } else {
+                        diagnose2featureCount.get(diagnoseMapEntry.getValue())
+                                .put(feature, diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) + 1);
+                    }
+                }
+            }
+        }
+
+        Map<String, Double> prob = null;
+        for (Map.Entry<String, Map<String, Integer>> diagnose2featureCountEntry : diagnose2featureCount.entrySet()) {
+            prob = new HashMap<>();
+            //计算先验概率
+            double priorProb = (double) diagnose2featureCountEntry.getValue().get("diagnoseCount") / diagnoseCount.get("diagnoseCount");
+            prob.put("priorProb", priorProb);
+            //计算条件概率
+            for (Map.Entry<String, Integer> featuresCount : diagnose2featureCountEntry.getValue().entrySet()) {
+                double conditionProb = (double) featuresCount.getValue() / diagnose2featureCountEntry.getValue().get("diagnoseCount");
+                prob.put(featuresCount.getKey(), conditionProb);
+            }
+            doc_feature_naivebayes_prob_map.put(diagnose2featureCountEntry.getKey(), prob);
+        }
+    }
+
+    public static Map<String, Map<String,Double>> getRelevant_feature_map() {
+        if (relevant_feature_map == null) {
+            createRelevant_feature_map();
+        }
+        return relevant_feature_map;
+    }
+
+    public static Map<String, Map<String,Double>> createRelevant_feature_map() {
+        relevant_feature_map = new HashMap<>();
+        Map<String,Double> relevantFeatureProb = null;
+        Configuration configuration = new DefaultConfig();
+        List<String> relevantFeatureList = configuration.readFileContents("bigdata_relevant_feature.dict");
+        for (String relevantFeature:relevantFeatureList) {
+            String[] content = relevantFeature.split("\\|", -1);
+            if (relevant_feature_map.get(content[0]) == null){
+                relevantFeatureProb = new HashMap<>();
+                relevantFeatureProb.put(content[1],0.00);
+                relevant_feature_map.put(content[0],relevantFeatureProb);
+            } else {
+                relevant_feature_map.get(content[0]).put(content[1],0.00);
+            }
+        }
+        return relevant_feature_map;
+    }
 }

+ 37 - 1
common-push/src/main/java/org/diagbot/common/push/cache/CacheFileManager.java

@@ -24,7 +24,7 @@ public class CacheFileManager {
 
     private String user = "root";
     private String password = "lantone";
-    private String url = "jdbc:mysql://192.168.2.121:3306/med?useUnicode=true&characterEncoding=UTF-8";
+    private String url = "jdbc:mysql://192.168.2.236:3306/med?useUnicode=true&characterEncoding=UTF-8";
 
     private String path = "";
 
@@ -410,6 +410,42 @@ public class CacheFileManager {
                 fw.write("\n");
             }
             fw.close();
+
+            sql = "SELECT rdn, GROUP_CONCAT(feature_name ORDER BY sn SEPARATOR ' ') AS features FROM doc_feature WHERE feature_type = 9 GROUP BY rdn;";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+            fw = new FileWriter(path + "bigdata_naivebayes_features.dict");
+            while (rs.next()) {
+                r1 = rs.getString(1);
+                r2 = rs.getString(2);
+                fw.write(encrypDES.encrytor(r1+ "|" + r2));
+                fw.write("\n");
+            }
+            fw.close();
+
+            sql = "select rdn, feature_name as diagnose from doc_feature where feature_type=2";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+            fw = new FileWriter(path + "bigdata_naivebayes_diagnose.dict");
+            while (rs.next()) {
+                r1 = rs.getString(1);
+                r2 = rs.getString(2);
+                fw.write(encrypDES.encrytor(r1+ "|" + r2));
+                fw.write("\n");
+            }
+            fw.close();
+
+            sql = "SELECT diagnose,feature FROM relevant_feature;";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+            fw = new FileWriter(path + "bigdata_relevant_feature.dict");
+            while (rs.next()) {
+                r1 = rs.getString(1);
+                r2 = rs.getString(2);
+                fw.write(encrypDES.encrytor(r1+ "|" + r2));
+                fw.write("\n");
+            }
+            fw.close();
         } catch (IOException ioe) {
             ioe.printStackTrace();
         } catch (SQLException sqle) {

+ 87 - 0
common-push/src/main/java/org/diagbot/common/push/naivebayes/AlgorithmNaiveBayesExecutor.java

@@ -0,0 +1,87 @@
+package org.diagbot.common.push.naivebayes;
+
+import org.diagbot.common.push.cache.ApplicationCacheUtil;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * @Description:
+ * @Author: HUJING
+ * @Date: 2019/10/11 14:25
+ */
+public class AlgorithmNaiveBayesExecutor {
+    private double e = Math.E;
+    private static double unknownProbWithRelevant = -2; //已知有关,但未在病历中统计出来的特征
+    private static double unknownProbWithoutRelevant = -6;  //无关事件间的共现概率
+    private static double denominator = 0.00;
+
+    public Map<String, Double> execute(Map<String, Map<String, String>> inputs) {
+        return softmax(probCalc(inputs));
+    }
+
+    public Map<String, Double> probCalc(Map<String, Map<String, String>> inputs) {
+        Map<String, Map<String, Double>> doc_feature_naivebayes_prob_map = ApplicationCacheUtil.getDoc_feature_naivebayes_prob_map();
+        Map<String, Map<String, Double>> relevant_feature_map = ApplicationCacheUtil.getRelevant_feature_map();
+        Map<String, Double> naivebayesResult = new HashMap<>();
+        for (Map.Entry<String, Map<String, Double>> naivebayesProb : doc_feature_naivebayes_prob_map.entrySet()) {
+            double sum = 0.00;
+            int i = 1;
+            for (String input : inputs.keySet()) {
+                //先验概率表里有该特征,就使用该特征的先验概率
+                if (naivebayesProb.getValue().containsKey(input)) {
+                    sum += Math.log10(naivebayesProb.getValue().get(input));
+                } else if (relevant_feature_map.get(naivebayesProb.getKey()) != null &&
+                        relevant_feature_map.get(naivebayesProb.getKey()).containsKey(input)) {
+                    //先验概率表里没有该特征 但 关联规则表里有该特征,则平滑处理(默认此时先验概率为10^-2)
+                    sum += unknownProbWithRelevant;
+                } else {
+                    sum += unknownProbWithoutRelevant;
+                }
+
+                if (i == inputs.size()) {
+                    sum += Math.log10(naivebayesProb.getValue().get("priorProb"));
+                    naivebayesResult.put(naivebayesProb.getKey(), sum);
+                }
+                i++;
+            }
+        }
+        naivebayesResult = sortMap(naivebayesResult);
+        return naivebayesResult;
+    }
+
+    public Map<String, Double> softmax(Map<String, Double> naivebayesResultMap) {
+        Map<String, Double> softmaxResult = new HashMap<>();
+        if (denominator == 0) {
+            for (Map.Entry<String, Double> naivebayesResult : naivebayesResultMap.entrySet()) {
+                //计算softmax算法分母
+                denominator += Math.pow(this.e, naivebayesResult.getValue());
+            }
+        }
+
+        for (Map.Entry<String, Double> naivebayesResult : naivebayesResultMap.entrySet()) {
+            softmaxResult.put(naivebayesResult.getKey(), Math.pow(this.e, naivebayesResult.getValue()) / denominator);
+        }
+
+        softmaxResult = sortMap(softmaxResult);
+        return softmaxResult;
+    }
+
+    public Map<String, Double> sortMap(Map<String, Double> ResultMap) {
+        ArrayList<Map.Entry<String, Double>> softmaxResultList = new ArrayList<>(ResultMap.entrySet());
+        softmaxResultList.sort(new Comparator<Map.Entry<String, Double>>() {
+            @Override
+            public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2) {
+                return o2.getValue().compareTo(o1.getValue());
+            }
+        });
+        ResultMap = new LinkedHashMap<>();
+        for (Map.Entry<String, Double> softmaxResultMap : softmaxResultList) {
+            ResultMap.put(softmaxResultMap.getKey(), softmaxResultMap.getValue());
+        }
+        return ResultMap;
+    }
+}

+ 28 - 0
common-push/src/main/java/org/diagbot/common/push/naivebayes/NaiveBayesTest.java

@@ -0,0 +1,28 @@
+package org.diagbot.common.push.naivebayes;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @Description:
+ * @Author: HUJING
+ * @Date: 2019/10/11 14:30
+ */
+public class NaiveBayesTest {
+    public static void main(String[] args) {
+        AlgorithmNaiveBayesExecutor a = new AlgorithmNaiveBayesExecutor();
+        Map<String, Map<String, String>> inputs = new HashMap<>();
+        inputs.put("咽部异物感",new HashMap<>());
+//        inputs.put("腹胀",new HashMap<>());
+//        inputs.put("乏力",new HashMap<>());
+        Map<String, Double> softmax = a.softmax(a.probCalc(inputs));
+        double i = 0.00;
+        for (Map.Entry<String, Double> s:softmax.entrySet()) {
+            i += s.getValue();
+            if (s.getValue() == 0){
+                System.out.println(s.getKey());
+            }
+        }
+        System.out.println(i);
+    }
+}