|
@@ -28,10 +28,6 @@ public class ApplicationCacheUtil {
|
|
|
public static Map<String, RuleApp> kl_rule_app_filter_map = null;
|
|
|
//pacs关系抽取过滤
|
|
|
public static Map<String, Map<String, String>> kl_diagnose_detail_filter_map = null;
|
|
|
- //朴素贝叶斯
|
|
|
- public static Map<String, Map<String, Double>> doc_feature_naivebayes_prob_map = null;
|
|
|
- //朴素贝叶斯规则过滤
|
|
|
- public static Map<String, Map<String, Double>> relevant_feature_map = null;
|
|
|
|
|
|
public static Map<String, Map<String, String>> getStandard_info_synonym_map() {
|
|
|
if (standard_info_synonym_map == null) {
|
|
@@ -191,121 +187,4 @@ public class ApplicationCacheUtil {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- public static Map<String, Map<String, Double>> getDoc_feature_naivebayes_prob_map() {
|
|
|
- if (doc_feature_naivebayes_prob_map == null) {
|
|
|
- create_doc_feature_naivebayes_prob_map();
|
|
|
- }
|
|
|
- return doc_feature_naivebayes_prob_map;
|
|
|
- }
|
|
|
-
|
|
|
- public static void create_doc_feature_naivebayes_prob_map() {
|
|
|
- doc_feature_naivebayes_prob_map = new HashMap<>();
|
|
|
- //<rdn,[feature...]> 存储每个rdn对应的特征List
|
|
|
- Map<String, List<String>> featureMap = new HashMap<>();
|
|
|
- List<String> featureList = null;
|
|
|
- Configuration configuration = new DefaultConfig();
|
|
|
- List<String> fileFeatureContents = configuration.readFileContents("bigdata_naivebayes_features.dict");
|
|
|
- for (String line : fileFeatureContents) {
|
|
|
- String[] content = line.split("\\|", -1);
|
|
|
- if (featureMap.get(content[0]) == null) {
|
|
|
- featureList = new ArrayList<>();
|
|
|
- for (String feature : content[1].split(" ")) {
|
|
|
- featureList.add(feature);
|
|
|
- }
|
|
|
- featureMap.put(content[0], featureList);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- //<rdn,diagnose> 存每个rdn对应疾病
|
|
|
- Map<String, String> diagnoseMap = new HashMap<>();
|
|
|
- //<diagnose,count> 存每个疾病的数量
|
|
|
- Map<String, Integer> diagnoseCount = new HashMap<>();
|
|
|
- List<String> fileDiagnoseContents = configuration.readFileContents("bigdata_naivebayes_diagnose.dict");
|
|
|
- diagnoseCount.put("diagnoseCount", fileDiagnoseContents.size());
|
|
|
- for (String line : fileDiagnoseContents) {
|
|
|
- String[] content = line.split("\\|", -1);
|
|
|
- if (diagnoseMap.get(content[0]) == null) {
|
|
|
- diagnoseMap.put(content[0], content[1]);
|
|
|
- }
|
|
|
- if (diagnoseCount.get(content[1]) == null) {
|
|
|
- diagnoseCount.put(content[1], 1);
|
|
|
- } else {
|
|
|
- diagnoseCount.put(content[1], diagnoseCount.get(content[1]) + 1);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- Map<String, Map<String, Integer>> diagnose2featureCount = new HashMap<>();
|
|
|
- Map<String, Integer> featureCount = new HashMap<>();
|
|
|
- for (Map.Entry<String, String> diagnoseMapEntry : diagnoseMap.entrySet()) {
|
|
|
- //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
|
|
|
- if (featureMap.get(diagnoseMapEntry.getKey()) == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (String feature : featureMap.get(diagnoseMapEntry.getKey())) {
|
|
|
- /**
|
|
|
- diagnoseMapEntry <1596386_9,鼻炎> -> <rdn,diagnose>
|
|
|
- 如果疾病对应特征列表为空 diagnoseMapEntry.getValue()->疾病
|
|
|
- */
|
|
|
- if (diagnose2featureCount.get(diagnoseMapEntry.getValue()) == null) {
|
|
|
- featureCount = new HashMap<>();
|
|
|
- //featureMap -> <1000000_144 , [咳嗽,咳痰,1周,气管炎]>
|
|
|
- if (featureCount.get(feature) == null) {
|
|
|
- featureCount.put(feature, 1);
|
|
|
- } else {
|
|
|
- featureCount.put(feature, featureCount.get(feature) + 1);
|
|
|
- }
|
|
|
- //疾病对应病历数
|
|
|
- featureCount.put("diagnoseCount", diagnoseCount.get(diagnoseMapEntry.getValue()));
|
|
|
- diagnose2featureCount.put(diagnoseMapEntry.getValue(), featureCount);
|
|
|
- } else {
|
|
|
- if (diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) == null) {
|
|
|
- diagnose2featureCount.get(diagnoseMapEntry.getValue()).put(feature, 1);
|
|
|
- } else {
|
|
|
- diagnose2featureCount.get(diagnoseMapEntry.getValue())
|
|
|
- .put(feature, diagnose2featureCount.get(diagnoseMapEntry.getValue()).get(feature) + 1);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- Map<String, Double> prob = null;
|
|
|
- for (Map.Entry<String, Map<String, Integer>> diagnose2featureCountEntry : diagnose2featureCount.entrySet()) {
|
|
|
- prob = new HashMap<>();
|
|
|
- //计算先验概率
|
|
|
- double priorProb = (double) diagnose2featureCountEntry.getValue().get("diagnoseCount") / diagnoseCount.get("diagnoseCount");
|
|
|
- prob.put("priorProb", priorProb);
|
|
|
- //计算条件概率
|
|
|
- for (Map.Entry<String, Integer> featuresCount : diagnose2featureCountEntry.getValue().entrySet()) {
|
|
|
- double conditionProb = (double) featuresCount.getValue() / diagnose2featureCountEntry.getValue().get("diagnoseCount");
|
|
|
- prob.put(featuresCount.getKey(), conditionProb);
|
|
|
- }
|
|
|
- doc_feature_naivebayes_prob_map.put(diagnose2featureCountEntry.getKey(), prob);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- public static Map<String, Map<String,Double>> getRelevant_feature_map() {
|
|
|
- if (relevant_feature_map == null) {
|
|
|
- createRelevant_feature_map();
|
|
|
- }
|
|
|
- return relevant_feature_map;
|
|
|
- }
|
|
|
-
|
|
|
- public static Map<String, Map<String,Double>> createRelevant_feature_map() {
|
|
|
- relevant_feature_map = new HashMap<>();
|
|
|
- Map<String,Double> relevantFeatureProb = null;
|
|
|
- Configuration configuration = new DefaultConfig();
|
|
|
- List<String> relevantFeatureList = configuration.readFileContents("bigdata_relevant_feature.dict");
|
|
|
- for (String relevantFeature:relevantFeatureList) {
|
|
|
- String[] content = relevantFeature.split("\\|", -1);
|
|
|
- if (relevant_feature_map.get(content[0]) == null){
|
|
|
- relevantFeatureProb = new HashMap<>();
|
|
|
- relevantFeatureProb.put(content[1],0.00);
|
|
|
- relevant_feature_map.put(content[0],relevantFeatureProb);
|
|
|
- } else {
|
|
|
- relevant_feature_map.get(content[0]).put(content[1],0.00);
|
|
|
- }
|
|
|
- }
|
|
|
- return relevant_feature_map;
|
|
|
- }
|
|
|
}
|