Browse Source

Revert "1.doc_feature做缓存,为朴素贝叶斯算法提供数据支持 2.添加朴素贝叶斯算法"

This reverts commit c6c41b0
hujing 5 years ago
parent
commit
8513cd5243

+ 0 - 121
common-push/src/main/java/org/diagbot/common/push/cache/ApplicationCacheUtil.java

@@ -28,10 +28,6 @@ public class ApplicationCacheUtil {
     public static Map<String, RuleApp> kl_rule_app_filter_map = null;
     //pacs关系抽取过滤
     public static Map<String, Map<String, String>> kl_diagnose_detail_filter_map = null;
-    //朴素贝叶斯
-    public static Map<String, Map<String, Double>> doc_feature_naivebayes_prob_map = null;
-    //朴素贝叶斯规则过滤
-    public static Map<String, Map<String, Double>> relevant_feature_map = null;
 
     public static Map<String, Map<String, String>> getStandard_info_synonym_map() {
         if (standard_info_synonym_map == null) {
@@ -191,121 +187,4 @@ public class ApplicationCacheUtil {
             }
         }
     }
-
-    public static Map<String, Map<String, Double>> getDoc_feature_naivebayes_prob_map() {
-        if (doc_feature_naivebayes_prob_map == null) {
-            create_doc_feature_naivebayes_prob_map();
-        }
-        return doc_feature_naivebayes_prob_map;
-    }
-
-    public static void create_doc_feature_naivebayes_prob_map() {
-        doc_feature_naivebayes_prob_map = new HashMap<>();
-        //<rdn,[feature...]> 存储每个rdn对应的特征List
-        Map<String, List<String>> featureMap = new HashMap<>();
-        List<String> featureList = null;
-        Configuration configuration = new DefaultConfig();
-        List<String> fileFeatureContents = configuration.readFileContents("bigdata_naivebayes_features.dict");
-        for (String line : fileFeatureContents) {
-            String[] content = line.split("\\|", -1);
-            if (featureMap.get(content[0]) == null) {
-                featureList = new ArrayList<>();
-                for (String feature : content[1].split(" ")) {
-                    featureList.add(feature);
-                }
-                featureMap.put(content[0], featureList);
-            }
-        }
-
-        //<rdn,diagnose> 存每个rdn对应疾病
-        Map<String, String> diagnoseMap = new HashMap<>();
-        //<diagnose,count> 存每个疾病的数量
-        Map<String, Integer> diagnoseCount = new HashMap<>();
-        List<String> fileDiagnoseContents = configuration.readFileContents("bigdata_naivebayes_diagnose.dict");
-        diagnoseCount.put("diagnoseCount", fileDiagnoseContents.size());
-        for (String line : fileDiagnoseContents) {
-            String[] content = line.split("\\|", -1);
-            if (diagnoseMap.get(content[0]) == null) {
-                diagnoseMap.put(content[0], content[1]);
-            }
-            if (diagnoseCount.get(content[1]) == null) {
-                diagnoseCount.put(content[1], 1);
-            } else {
-                diagnoseCount.put(content[1], diagnoseCount.get(content[1]) + 1);
-            }
-        }
-
-        Map<String, Map<String, Integer>> diagnose2featureCount = new HashMap<>();
-        Map<String, Integer> featureCount = new HashMap<>();
-        for (Map.Entry<String, String> diagnoseMapEntry : diagnoseMap.entrySet()) {
-            //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
-            if (featureMap.get(diagnoseMapEntry.getKey()) == null) {
-                continue;
-            }
-            for (String feature : featureMap.get(diagnoseMapEntry.getKey())) {
-                /**
-                 diagnoseMapEntry <1596386_9,鼻炎> -> <rdn,diagnose>
-                 如果疾病对应特征列表为空 diagnoseMapEntry.getValue()->疾病
-                 */
-                if (diagnose2featureCount.get(diagnoseMapEntry.getValue()) == null) {
-                    featureCount = new HashMap<>();
-                    //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
-                    if (featureCount.get(feature) == null) {
-                        featureCount.put(feature, 1);
-                    } else {
-                        featureCount.put(feature, featureCount.get(feature) + 1);
-                    }
-                    //疾病对应病历数
-                    featureCount.put("diagnoseCount", diagnoseCount.get(diagnoseMapEntry.getValue()));
-                    diagnose2featureCount.put(diagnoseMapEntry.getValue(), featureCount);
-                } else {
-                    if (diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) == null) {
-                        diagnose2featureCount.get(diagnoseMapEntry.getValue()).put(feature, 1);
-                    } else {
-                        diagnose2featureCount.get(diagnoseMapEntry.getValue())
-                                .put(feature, diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) + 1);
-                    }
-                }
-            }
-        }
-
-        Map<String, Double> prob = null;
-        for (Map.Entry<String, Map<String, Integer>> diagnose2featureCountEntry : diagnose2featureCount.entrySet()) {
-            prob = new HashMap<>();
-            //计算先验概率
-            double priorProb = (double) diagnose2featureCountEntry.getValue().get("diagnoseCount") / diagnoseCount.get("diagnoseCount");
-            prob.put("priorProb", priorProb);
-            //计算条件概率
-            for (Map.Entry<String, Integer> featuresCount : diagnose2featureCountEntry.getValue().entrySet()) {
-                double conditionProb = (double) featuresCount.getValue() / diagnose2featureCountEntry.getValue().get("diagnoseCount");
-                prob.put(featuresCount.getKey(), conditionProb);
-            }
-            doc_feature_naivebayes_prob_map.put(diagnose2featureCountEntry.getKey(), prob);
-        }
-    }
-
-    public static Map<String, Map<String,Double>> getRelevant_feature_map() {
-        if (relevant_feature_map == null) {
-            createRelevant_feature_map();
-        }
-        return relevant_feature_map;
-    }
-
-    public static Map<String, Map<String,Double>> createRelevant_feature_map() {
-        relevant_feature_map = new HashMap<>();
-        Map<String,Double> relevantFeatureProb = null;
-        Configuration configuration = new DefaultConfig();
-        List<String> relevantFeatureList = configuration.readFileContents("bigdata_relevant_feature.dict");
-        for (String relevantFeature:relevantFeatureList) {
-            String[] content = relevantFeature.split("\\|", -1);
-            if (relevant_feature_map.get(content[0]) == null){
-                relevantFeatureProb = new HashMap<>();
-                relevantFeatureProb.put(content[1],0.00);
-                relevant_feature_map.put(content[0],relevantFeatureProb);
-            } else {
-                relevant_feature_map.get(content[0]).put(content[1],0.00);
-            }
-        }
-        return relevant_feature_map;
-    }
 }

+ 1 - 37
common-push/src/main/java/org/diagbot/common/push/cache/CacheFileManager.java

@@ -24,7 +24,7 @@ public class CacheFileManager {
 
     private String user = "root";
     private String password = "lantone";
-    private String url = "jdbc:mysql://192.168.2.236:3306/med?useUnicode=true&characterEncoding=UTF-8";
+    private String url = "jdbc:mysql://192.168.2.121:3306/med?useUnicode=true&characterEncoding=UTF-8";
 
     private String path = "";
 
@@ -410,42 +410,6 @@ public class CacheFileManager {
                 fw.write("\n");
             }
             fw.close();
-
-            sql = "SELECT rdn, GROUP_CONCAT(feature_name ORDER BY sn SEPARATOR ' ') AS features FROM doc_feature WHERE feature_type = 9 GROUP BY rdn;";
-            st = conn.createStatement();
-            rs = st.executeQuery(sql);
-            fw = new FileWriter(path + "bigdata_naivebayes_features.dict");
-            while (rs.next()) {
-                r1 = rs.getString(1);
-                r2 = rs.getString(2);
-                fw.write(encrypDES.encrytor(r1+ "|" + r2));
-                fw.write("\n");
-            }
-            fw.close();
-
-            sql = "select rdn, feature_name as diagnose from doc_feature where feature_type=2";
-            st = conn.createStatement();
-            rs = st.executeQuery(sql);
-            fw = new FileWriter(path + "bigdata_naivebayes_diagnose.dict");
-            while (rs.next()) {
-                r1 = rs.getString(1);
-                r2 = rs.getString(2);
-                fw.write(encrypDES.encrytor(r1+ "|" + r2));
-                fw.write("\n");
-            }
-            fw.close();
-
-            sql = "SELECT diagnose,feature FROM relevant_feature;";
-            st = conn.createStatement();
-            rs = st.executeQuery(sql);
-            fw = new FileWriter(path + "bigdata_relevant_feature.dict");
-            while (rs.next()) {
-                r1 = rs.getString(1);
-                r2 = rs.getString(2);
-                fw.write(encrypDES.encrytor(r1+ "|" + r2));
-                fw.write("\n");
-            }
-            fw.close();
         } catch (IOException ioe) {
             ioe.printStackTrace();
         } catch (SQLException sqle) {

+ 0 - 87
common-push/src/main/java/org/diagbot/common/push/naivebayes/AlgorithmNaiveBayesExecutor.java

@@ -1,87 +0,0 @@
-package org.diagbot.common.push.naivebayes;
-
-import org.diagbot.common.push.cache.ApplicationCacheUtil;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * @Description:
- * @Author: HUJING
- * @Date: 2019/10/11 14:25
- */
-public class AlgorithmNaiveBayesExecutor {
-    private double e = Math.E;
-    private static double unknownProbWithRelevant = -2; //已知有关,但未在病历中统计出来的特征
-    private static double unknownProbWithoutRelevant = -6;  //无关事件间的共现概率
-    private static double denominator = 0.00;
-
-    public Map<String, Double> execute(Map<String, Map<String, String>> inputs) {
-        return softmax(probCalc(inputs));
-    }
-
-    public Map<String, Double> probCalc(Map<String, Map<String, String>> inputs) {
-        Map<String, Map<String, Double>> doc_feature_naivebayes_prob_map = ApplicationCacheUtil.getDoc_feature_naivebayes_prob_map();
-        Map<String, Map<String, Double>> relevant_feature_map = ApplicationCacheUtil.getRelevant_feature_map();
-        Map<String, Double> naivebayesResult = new HashMap<>();
-        for (Map.Entry<String, Map<String, Double>> naivebayesProb : doc_feature_naivebayes_prob_map.entrySet()) {
-            double sum = 0.00;
-            int i = 1;
-            for (String input : inputs.keySet()) {
-                //先验概率表里有该特征,就使用该特征的先验概率
-                if (naivebayesProb.getValue().containsKey(input)) {
-                    sum += Math.log10(naivebayesProb.getValue().get(input));
-                } else if (relevant_feature_map.get(naivebayesProb.getKey()) != null &&
-                        relevant_feature_map.get(naivebayesProb.getKey()).containsKey(input)) {
-                    //先验概率表里没有该特征 但 关联规则表里有该特征,则平滑处理(默认此时先验概率为10^-2)
-                    sum += unknownProbWithRelevant;
-                } else {
-                    sum += unknownProbWithoutRelevant;
-                }
-
-                if (i == inputs.size()) {
-                    sum += Math.log10(naivebayesProb.getValue().get("priorProb"));
-                    naivebayesResult.put(naivebayesProb.getKey(), sum);
-                }
-                i++;
-            }
-        }
-        naivebayesResult = sortMap(naivebayesResult);
-        return naivebayesResult;
-    }
-
-    public Map<String, Double> softmax(Map<String, Double> naivebayesResultMap) {
-        Map<String, Double> softmaxResult = new HashMap<>();
-        if (denominator == 0) {
-            for (Map.Entry<String, Double> naivebayesResult : naivebayesResultMap.entrySet()) {
-                //计算softmax算法分母
-                denominator += Math.pow(this.e, naivebayesResult.getValue());
-            }
-        }
-
-        for (Map.Entry<String, Double> naivebayesResult : naivebayesResultMap.entrySet()) {
-            softmaxResult.put(naivebayesResult.getKey(), Math.pow(this.e, naivebayesResult.getValue()) / denominator);
-        }
-
-        softmaxResult = sortMap(softmaxResult);
-        return softmaxResult;
-    }
-
-    public Map<String, Double> sortMap(Map<String, Double> ResultMap) {
-        ArrayList<Map.Entry<String, Double>> softmaxResultList = new ArrayList<>(ResultMap.entrySet());
-        softmaxResultList.sort(new Comparator<Map.Entry<String, Double>>() {
-            @Override
-            public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2) {
-                return o2.getValue().compareTo(o1.getValue());
-            }
-        });
-        ResultMap = new LinkedHashMap<>();
-        for (Map.Entry<String, Double> softmaxResultMap : softmaxResultList) {
-            ResultMap.put(softmaxResultMap.getKey(), softmaxResultMap.getValue());
-        }
-        return ResultMap;
-    }
-}

+ 0 - 28
common-push/src/main/java/org/diagbot/common/push/naivebayes/NaiveBayesTest.java

@@ -1,28 +0,0 @@
-package org.diagbot.common.push.naivebayes;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * @Description:
- * @Author: HUJING
- * @Date: 2019/10/11 14:30
- */
-public class NaiveBayesTest {
-    public static void main(String[] args) {
-        AlgorithmNaiveBayesExecutor a = new AlgorithmNaiveBayesExecutor();
-        Map<String, Map<String, String>> inputs = new HashMap<>();
-        inputs.put("咽部异物感",new HashMap<>());
-//        inputs.put("腹胀",new HashMap<>());
-//        inputs.put("乏力",new HashMap<>());
-        Map<String, Double> softmax = a.softmax(a.probCalc(inputs));
-        double i = 0.00;
-        for (Map.Entry<String, Double> s:softmax.entrySet()) {
-            i += s.getValue();
-            if (s.getValue() == 0){
-                System.out.println(s.getKey());
-            }
-        }
-        System.out.println(i);
-    }
-}