Browse Source

特征提取

wangyu 5 years ago
parent
commit
c7e84d0f05

+ 63 - 0
common-push/src/main/java/org/diagbot/common/push/bean/RelevantFeature.java

@@ -0,0 +1,63 @@
+package org.diagbot.common.push.bean;
+
+/**
+ * @Description:
+ * @author: wangyu
+ * @time: 2019/10/14 16:36
+ */
+public class RelevantFeature {
+    private String id;
+    private String diagnose;
+    private String feature;
+    private String feature_type;
+    private String value_type;
+    private String find_suspect_diagnose;
+
+    public String getId() {
+        return id;
+    }
+
+    public void setId(String id) {
+        this.id = id;
+    }
+
+    public String getDiagnose() {
+        return diagnose;
+    }
+
+    public void setDiagnose(String diagnose) {
+        this.diagnose = diagnose;
+    }
+
+    public String getFeature() {
+        return feature;
+    }
+
+    public void setFeature(String feature) {
+        this.feature = feature;
+    }
+
+    public String getFeature_type() {
+        return feature_type;
+    }
+
+    public void setFeature_type(String feature_type) {
+        this.feature_type = feature_type;
+    }
+
+    public String getValue_type() {
+        return value_type;
+    }
+
+    public void setValue_type(String value_type) {
+        this.value_type = value_type;
+    }
+
+    public String getFind_suspect_diagnose() {
+        return find_suspect_diagnose;
+    }
+
+    public void setFind_suspect_diagnose(String find_suspect_diagnose) {
+        this.find_suspect_diagnose = find_suspect_diagnose;
+    }
+}

+ 34 - 0
common-push/src/main/java/org/diagbot/common/push/cache/ApplicationCacheUtil.java

@@ -1,5 +1,6 @@
 package org.diagbot.common.push.cache;
 
+import org.diagbot.common.push.bean.RelevantFeature;
 import org.diagbot.common.push.bean.ResultMappingFilter;
 import org.diagbot.common.push.bean.Rule;
 import org.diagbot.common.push.bean.RuleApp;
@@ -34,6 +35,8 @@ public class ApplicationCacheUtil {
     public static Map<String, Map<String, Float>> doc_feature_naivebayes_prob_map = null;
     //朴素贝叶斯规则过滤
     public static Map<String, Map<String, Float>> relevant_feature_bayes_map = null;
+    //体征过滤对比表信息
+    public static Map<String, RelevantFeature> relevant_feature_map = null;
 
     public static Map<String, Map<String, String>> getStandard_info_synonym_map() {
         if (standard_info_synonym_map == null) {
@@ -358,4 +361,35 @@ public class ApplicationCacheUtil {
         }
         return relevant_feature_bayes_map;
     }
+
+    /**
+     * 体征过滤获取对比表信息
+     *
+     * @return
+     */
+    public static Map<String, RelevantFeature> get_relevant_feature() {
+        if (relevant_feature_map == null) {
+            create_get_relevant_feature();
+        }
+        return relevant_feature_map;
+    }
+
+    public static void create_get_relevant_feature(){
+        relevant_feature_map = new HashMap<>();
+        Configuration configuration = new DefaultConfig();
+        List<String> fileContents = configuration.readFileContents("bigdata_diagnose_feature_filter.dict");
+        for (String line : fileContents) {
+            String[] content = line.split("\\|", -1);
+            RelevantFeature relevantFeature = new RelevantFeature();
+            if (content.length == 6) {
+                relevantFeature.setId(content[0] == null ? "" : content[0]);
+                relevantFeature.setDiagnose(content[1] == null ? "" : content[1]);
+                relevantFeature.setFeature(content[2] == null ? "" : content[2]);
+                relevantFeature.setFeature_type(content[3] == null ? "" : content[3]);
+                relevantFeature.setFind_suspect_diagnose(content[4] == null ? "" : content[4]);
+                relevantFeature.setValue_type(content[5] == null ? "" : content[5]);
+                relevant_feature_map.put(relevantFeature.getDiagnose(),relevantFeature);
+            }
+        }
+    }
 }

+ 24 - 0
common-push/src/main/java/org/diagbot/common/push/cache/CacheFileManager.java

@@ -478,6 +478,30 @@ public class CacheFileManager {
             }
             fw.close();
 
+            //特征提取过滤参照表信息
+            sql = "SELECT id,diagnose,feature,feature_type,value_type,find_suspect_diagnose FROM `doc_relevant_feature`";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+            fw = new FileWriter(path + "bigdata_diagnose_feature_filter.dict");
+            while (rs.next()) {
+                r1 = String.valueOf(rs.getInt(1));
+                r2 = rs.getString(2);
+                r3 = rs.getString(3);
+                r4 = rs.getString(4);
+                r5 = rs.getString(5);
+                r6 = rs.getString(6);
+                r1 = StringUtils.isEmpty(r1) ? "" : r1;
+                r2 = StringUtils.isEmpty(r2) ? "" : r2;
+                r3 = StringUtils.isEmpty(r3) ? "" : r3;
+                r4 = StringUtils.isEmpty(r4) ? "" : r4;
+                r5 = StringUtils.isEmpty(r5) ? "" : r5;
+                r6 = StringUtils.isEmpty(r6) ? "" : r6;
+                fw.write(encrypDES.encrytor(r1 + "|" + r2 + "|" + r3 + "|" + r4 + "|" + r5
+                        + "|" + r6 ));
+                fw.write("\n");
+            }
+            fw.close();
+
         } catch (IOException ioe) {
             ioe.printStackTrace();
         } catch (SQLException sqle) {

+ 11 - 9
common-push/src/main/java/org/diagbot/common/push/work/ParamsDataProxy.java

@@ -1,25 +1,22 @@
 package org.diagbot.common.push.work;
 
-import com.alibaba.fastjson.JSON;
-import org.algorithm.core.cnn.AlgorithmCNNExecutor;
 import org.algorithm.core.cnn.AlgorithmCNNExecutorPacs;
 import org.algorithm.factory.RelationExtractionFactory;
 import org.apache.commons.lang3.StringUtils;
-import org.diagbot.common.push.bean.FeatureRate;
-import org.diagbot.common.push.bean.ResponseData;
-import org.diagbot.common.push.bean.ResultMappingFilter;
 import org.diagbot.common.push.bean.SearchData;
-import org.diagbot.common.push.cache.ApplicationCacheUtil;
 import org.diagbot.common.push.util.PushConstants;
 import org.diagbot.nlp.feature.FeatureAnalyze;
 import org.diagbot.nlp.feature.FeatureType;
 import org.diagbot.nlp.util.Constants;
-import org.diagbot.nlp.util.NegativeEnum;
+import org.diagbot.pub.utils.PropertiesUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.servlet.http.HttpServletRequest;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 /**
  * @ClassName org.diagbot.bigdata.work.ParamsDataProxy
@@ -30,6 +27,7 @@ import java.util.*;
  **/
 public class ParamsDataProxy {
     Logger logger = LoggerFactory.getLogger(ParamsDataProxy.class);
+    private String featureNum = "";//特征提取范围
 
     public void createNormalInfo(SearchData searchData) throws Exception {
         //计算年龄区间
@@ -84,6 +82,10 @@ public class ParamsDataProxy {
         //获取入参中的特征信息
         FeatureAnalyze fa = new FeatureAnalyze();
         List<Map<String, Object>> featuresList = new ArrayList<>();
+        //获取配置文件中的特征范围
+        PropertiesUtil propertiesUtil = new PropertiesUtil("nlp.properties");
+        featureNum = propertiesUtil.getProperty("push.feature.num");
+        fa.setFeatureNum(featureNum);
         if (!StringUtils.isEmpty(searchData.getSymptom())) {
             //提取现病史
             featuresList = fa.start(searchData.getSymptom(), FeatureType.FEATURE);

+ 5 - 0
nlp-web/pom.xml

@@ -40,6 +40,11 @@
 			<artifactId>algorithm</artifactId>
 			<version>1.0.0</version>
 		</dependency>
+		<dependency>
+			<groupId>org.diagbot</groupId>
+			<artifactId>common-push</artifactId>
+			<version>1.0.0</version>
+		</dependency>
 		<dependency>
 			<groupId>org.diagbot</groupId>
 			<artifactId>common-service</artifactId>

+ 25 - 4
nlp-web/src/main/java/org/diagbot/nlp/controller/FeatureController.java

@@ -2,6 +2,8 @@ package org.diagbot.nlp.controller;
 
 import com.github.pagehelper.PageInfo;
 import org.apache.commons.lang3.StringUtils;
+import org.diagbot.common.push.bean.RelevantFeature;
+import org.diagbot.common.push.cache.ApplicationCacheUtil;
 import org.diagbot.nlp.common.NlpWebConstants;
 import org.diagbot.nlp.dao.model.Feature;
 import org.diagbot.nlp.dao.model.Info;
@@ -19,6 +21,7 @@ import org.diagbot.nlp.util.NlpCache;
 import org.diagbot.nlp.util.NlpUtil;
 import org.diagbot.pub.api.Response;
 import org.diagbot.pub.jdbc.MysqlJdbc;
+import org.diagbot.pub.utils.PropertiesUtil;
 import org.diagbot.pub.web.BaseController;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.core.env.Environment;
@@ -45,6 +48,7 @@ public class FeatureController extends BaseController<Feature, FeatureWrapper, L
     private Map<String, String> propelVital= null;
     private Map<String, String> propelLis = null;
     private Map<String, String> propelPacs = null;
+    private Map<String, RelevantFeature> relevant_feature_map = null;
 
     {
         listView = "pages/doc/feature/list";
@@ -151,12 +155,19 @@ public class FeatureController extends BaseController<Feature, FeatureWrapper, L
             Map<String, String> propel = null;
             List<Map<String, Object>> data = new ArrayList<>();
 
-           /* PropertiesUtil propertiesUtil = new PropertiesUtil("nlp.properties");
-            featureNum = propertiesUtil.getProperty("push.feature.num");*/
+            PropertiesUtil propertiesUtil = new PropertiesUtil("nlp.properties");
+            featureNum = propertiesUtil.getProperty("push.feature.num");
+            Boolean filter = false;//判断是否需要过滤
+            if(propertiesUtil.getProperty("extract.feature.filter").equals("1")){//配置为1时过滤
+                filter = true;
+            }
+            if (filter){
+                relevant_feature_map = ApplicationCacheUtil.get_relevant_feature();
+            }
             FeatureAnalyze sa = new FeatureAnalyze();
             for (String featureType : featureTypes.split(",")) {
                 for (String feature : featureNum.split(",")) {
-                    /*sa.setFeatureNum(feature);*/
+                    sa.setFeatureNum(feature);
                     for (Info info : docInfos) {
                         switch (FeatureType.parse(featureType)) {
                             case SYMPTOM:
@@ -203,7 +214,17 @@ public class FeatureController extends BaseController<Feature, FeatureWrapper, L
                             } else {
                                 featureMap.put("is_push", "1");
                             }
-                            data.add(featureMap);
+                            if(filter){//过滤过的结果
+                                if(relevant_feature_map != null){
+                                    if(relevant_feature_map.get(info.getDiag()) != null){//获取对应疾病特征信息
+                                        if(relevant_feature_map.get(info.getDiag()).getFeature().equals(featureMap.get("feature_name"))){//与特征信息作对比
+                                            data.add(featureMap);
+                                        }
+                                    }
+                                }
+                            }else {
+                                data.add(featureMap);
+                            }
                         }
 
                     }

+ 4 - 4
nlp/src/main/java/org/diagbot/nlp/feature/FeatureAnalyze.java

@@ -18,7 +18,7 @@ import java.util.Map;
 
 public class FeatureAnalyze {
     private LexemePath<Lexeme> lexemePath = null;
-/*    private String featureNum = "";//特征提取范围*/
+    private String featureNum = "";//特征提取范围
 
     Logger logger = LoggerFactory.getLogger(FeatureAnalyze.class);
 
@@ -62,7 +62,7 @@ public class FeatureAnalyze {
 //        }
 //        logger.info("分词文本结果:" + lexeme_text);
         lexemePath = replaceLexeme(lexemePath);
-        /*caseToken.getFeatureSize(featureNum);*/
+        caseToken.getFeatureSize(featureNum);
         return caseToken.analyze(lexemePath);
     }
 
@@ -101,8 +101,8 @@ public class FeatureAnalyze {
         return lexemePath;
     }
 
-  /*  public String setFeatureNum(String featureNum){
+    public String setFeatureNum(String featureNum){
         this.featureNum = featureNum;
         return featureNum;
-    }*/
+    }
 }

+ 6 - 13
nlp/src/main/java/org/diagbot/nlp/feature/extract/CaseToken.java

@@ -1,5 +1,6 @@
 package org.diagbot.nlp.feature.extract;
 
+import org.apache.commons.lang3.StringUtils;
 import org.diagbot.nlp.participle.word.Lexeme;
 import org.diagbot.nlp.participle.word.LexemePath;
 import org.diagbot.nlp.util.Constants;
@@ -22,7 +23,7 @@ public abstract class CaseToken {
     protected Lexeme leftFeatureLexeme = null;
     protected Lexeme rightFeatureLexeme = null;
     protected int sn = 0;
-/*    protected String featureSize = "";*/
+    protected String featureSize = "";
 
     static {
         Arrays.sort(ignore_symbol);
@@ -103,7 +104,7 @@ public abstract class CaseToken {
             }
         }
         if (!hasFeature) {
- /*           if (StringUtils.isNotEmpty(featureSize)) {
+            if (StringUtils.isNotEmpty(featureSize)) {
                 if(featureSize.equals("all")){//featureSize为all时提取所有特征
                     Map<String, Object> fMap = new HashMap<>(10);
                     fMap.put("feature_name", lexeme.getText());
@@ -125,19 +126,11 @@ public abstract class CaseToken {
                         featuresList.add(fMap);
                     }
                 }
-            }*/
-            Map<String, Object> fMap = new HashMap<>(10);
-            fMap.put("feature_name", lexeme.getText());
-            fMap.put("feature_type", featureType);
-            fMap.put("negative", key);
-            fMap.put("sn", String.valueOf(sn++));
-            fMap.put("property", lexeme.getProperty());
-            fMap.put("concept", lexeme.getConcept());
-            featuresList.add(fMap);
+            }
         }
     }
-  /*  public void getFeatureSize(String fetureSize){
+    public void getFeatureSize(String fetureSize){
         this.featureSize = fetureSize;
-    }*/
+    }
 }
 

+ 8 - 4
nlp/src/main/resources/nlp.properties

@@ -1,6 +1,10 @@
 #数据文件存放路径
-cache.file.dir=/opt/diagbot-push/cache_file/
-#cache.file.dir=e:\\cache_file\\
+#cache.file.dir=/opt/diagbot-push/cache_file/
+cache.file.dir=e:\\cache_file\\
 
-#特征提取范围(不限制范围时配置:all)
-#push.feature.num=all
+#抽取时——特征提取范围(不限制范围时配置:all)
+extract.feature.num=all
+#推送时——特征提取范围(不限制范围时配置:all)
+push.feature.num=5
+#是否过滤(0.不过滤 1.过滤)
+extract.feature.filter=0