|
@@ -30,6 +30,10 @@ public class ApplicationCacheUtil {
|
|
|
public static Map<String, RuleApp> kl_rule_app_filter_map = null;
|
|
|
//pacs关系抽取过滤
|
|
|
public static Map<String, Map<String, String>> kl_diagnose_detail_filter_map = null;
|
|
|
+ //朴素贝叶斯
|
|
|
+ public static Map<String, Map<String, Float>> doc_feature_naivebayes_prob_map = null;
|
|
|
+ //朴素贝叶斯规则过滤
|
|
|
+ public static Map<String, Map<String, Float>> relevant_feature_bayes_map = null;
|
|
|
|
|
|
public static Map<String, Map<String, String>> getStandard_info_synonym_map() {
|
|
|
if (standard_info_synonym_map == null) {
|
|
@@ -237,4 +241,121 @@ public class ApplicationCacheUtil {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ public static Map<String, Map<String, Float>> getDoc_feature_naivebayes_prob_map() {
|
|
|
+ if (doc_feature_naivebayes_prob_map == null) {
|
|
|
+ create_doc_feature_naivebayes_prob_map();
|
|
|
+ }
|
|
|
+ return doc_feature_naivebayes_prob_map;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void create_doc_feature_naivebayes_prob_map() {
|
|
|
+ doc_feature_naivebayes_prob_map = new HashMap<>();
|
|
|
+ //<rdn,[feature...]> 存储每个rdn对应的特征List
|
|
|
+ Map<String, List<String>> featureMap = new HashMap<>();
|
|
|
+ List<String> featureList = null;
|
|
|
+ Configuration configuration = new DefaultConfig();
|
|
|
+ List<String> fileFeatureContents = configuration.readFileContents("bigdata_naivebayes_features.dict");
|
|
|
+ for (String line : fileFeatureContents) {
|
|
|
+ String[] content = line.split("\\|", -1);
|
|
|
+ if (featureMap.get(content[0]) == null) {
|
|
|
+ featureList = new ArrayList<>();
|
|
|
+ for (String feature : content[1].split(" ")) {
|
|
|
+ featureList.add(feature);
|
|
|
+ }
|
|
|
+ featureMap.put(content[0], featureList);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //<rdn,diagnose> 存每个rdn对应疾病
|
|
|
+ Map<String, String> diagnoseMap = new HashMap<>();
|
|
|
+ //<diagnose,count> 存每个疾病的数量
|
|
|
+ Map<String, Integer> diagnoseCount = new HashMap<>();
|
|
|
+ List<String> fileDiagnoseContents = configuration.readFileContents("bigdata_naivebayes_diagnose.dict");
|
|
|
+ diagnoseCount.put("diagnoseCount", fileDiagnoseContents.size());
|
|
|
+ for (String line : fileDiagnoseContents) {
|
|
|
+ String[] content = line.split("\\|", -1);
|
|
|
+ if (diagnoseMap.get(content[0]) == null) {
|
|
|
+ diagnoseMap.put(content[0], content[1]);
|
|
|
+ }
|
|
|
+ if (diagnoseCount.get(content[1]) == null) {
|
|
|
+ diagnoseCount.put(content[1], 1);
|
|
|
+ } else {
|
|
|
+ diagnoseCount.put(content[1], diagnoseCount.get(content[1]) + 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Map<String, Map<String, Integer>> diagnose2featureCount = new HashMap<>();
|
|
|
+ Map<String, Integer> featureCount = new HashMap<>();
|
|
|
+ for (Map.Entry<String, String> diagnoseMapEntry : diagnoseMap.entrySet()) {
|
|
|
+ //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
|
|
|
+ if (featureMap.get(diagnoseMapEntry.getKey()) == null) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ for (String feature : featureMap.get(diagnoseMapEntry.getKey())) {
|
|
|
+ /**
|
|
|
+ diagnoseMapEntry <1596386_9,鼻炎> -> <rdn,diagnose>
|
|
|
+ 如果疾病对应特征列表为空 diagnoseMapEntry.getValue()->疾病
|
|
|
+ */
|
|
|
+ if (diagnose2featureCount.get(diagnoseMapEntry.getValue()) == null) {
|
|
|
+ featureCount = new HashMap<>();
|
|
|
+ //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
|
|
|
+ if (featureCount.get(feature) == null) {
|
|
|
+ featureCount.put(feature, 1);
|
|
|
+ } else {
|
|
|
+ featureCount.put(feature, featureCount.get(feature) + 1);
|
|
|
+ }
|
|
|
+ //疾病对应病历数
|
|
|
+ featureCount.put("diagnoseCount", diagnoseCount.get(diagnoseMapEntry.getValue()));
|
|
|
+ diagnose2featureCount.put(diagnoseMapEntry.getValue(), featureCount);
|
|
|
+ } else {
|
|
|
+ if (diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) == null) {
|
|
|
+ diagnose2featureCount.get(diagnoseMapEntry.getValue()).put(feature, 1);
|
|
|
+ } else {
|
|
|
+ diagnose2featureCount.get(diagnoseMapEntry.getValue())
|
|
|
+ .put(feature, diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) + 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Map<String, Float> prob = null;
|
|
|
+ for (Map.Entry<String, Map<String, Integer>> diagnose2featureCountEntry : diagnose2featureCount.entrySet()) {
|
|
|
+ prob = new HashMap<>();
|
|
|
+ //计算先验概率
|
|
|
+ float priorProb = (float) diagnose2featureCountEntry.getValue().get("diagnoseCount") / diagnoseCount.get("diagnoseCount");
|
|
|
+ prob.put("priorProb", priorProb);
|
|
|
+ //计算条件概率
|
|
|
+ for (Map.Entry<String, Integer> featuresCount : diagnose2featureCountEntry.getValue().entrySet()) {
|
|
|
+ float conditionProb = (float) featuresCount.getValue() / diagnose2featureCountEntry.getValue().get("diagnoseCount");
|
|
|
+ prob.put(featuresCount.getKey(), conditionProb);
|
|
|
+ }
|
|
|
+ doc_feature_naivebayes_prob_map.put(diagnose2featureCountEntry.getKey(), prob);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static Map<String, Map<String,Float>> getRelevant_feature_map() {
|
|
|
+ if (relevant_feature_bayes_map == null) {
|
|
|
+ createRelevant_feature_map();
|
|
|
+ }
|
|
|
+ return relevant_feature_bayes_map;
|
|
|
+ }
|
|
|
+
|
|
|
+ public static Map<String, Map<String,Float>> createRelevant_feature_map() {
|
|
|
+ relevant_feature_bayes_map = new HashMap<>();
|
|
|
+ Map<String,Float> relevantFeatureProb = null;
|
|
|
+ Configuration configuration = new DefaultConfig();
|
|
|
+ List<String> relevantFeatureList = configuration.readFileContents("bigdata_relevant_feature.dict");
|
|
|
+ for (String relevantFeature:relevantFeatureList) {
|
|
|
+ String[] content = relevantFeature.split("\\|", -1);
|
|
|
+ if (relevant_feature_bayes_map.get(content[0]) == null){
|
|
|
+ relevantFeatureProb = new HashMap<>();
|
|
|
+ relevantFeatureProb.put(content[1],0.00f);
|
|
|
+ relevant_feature_bayes_map.put(content[0],relevantFeatureProb);
|
|
|
+ } else {
|
|
|
+ relevant_feature_bayes_map.get(content[0]).put(content[1],0.00f);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return relevant_feature_bayes_map;
|
|
|
+ }
|
|
|
}
|