Prechádzať zdrojové kódy

特征提取范围限制

wangyu 5 rokov pred
rodič
commit
7dafbfb1b4

+ 56 - 50
nlp-web/src/main/java/org/diagbot/nlp/controller/FeatureController.java

@@ -19,6 +19,7 @@ import org.diagbot.nlp.util.NlpCache;
 import org.diagbot.nlp.util.NlpUtil;
 import org.diagbot.pub.api.Response;
 import org.diagbot.pub.jdbc.MysqlJdbc;
+import org.diagbot.pub.utils.PropertiesUtil;
 import org.diagbot.pub.web.BaseController;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.core.env.Environment;
@@ -34,7 +35,8 @@ import java.util.*;
 @RequestMapping("/feature")
 public class FeatureController extends BaseController<Feature, FeatureWrapper, Long> {
     private String[] negative_words = Constants.negative_words;
-
+    //体征提取范围
+    private String featureNum;
     private Map<String, String> propelSymptom = null;
     private Map<String, String> propelDiag = null;
     private Map<String, String> propelVital= null;
@@ -146,60 +148,64 @@ public class FeatureController extends BaseController<Feature, FeatureWrapper, L
             Map<String, String> propel = null;
             List<Map<String, Object>> data = new ArrayList<>();
 
+            PropertiesUtil propertiesUtil = new PropertiesUtil("nlp.properties");
+            featureNum = propertiesUtil.getProperty("push.feature.num");
             FeatureAnalyze sa = new FeatureAnalyze();
             for (String featureType : featureTypes.split(",")) {
-                for (Info info : docInfos) {
-                    switch (FeatureType.parse(featureType)) {
-                        case SYMPTOM:
-                            content = info.getPresent();
-                            propel = propelSymptom;
-                            break;
-                        case TIME:
-                            content = info.getPresent();
-                            propel = propelSymptom;
-                            break;
-                        case FEATURE:
-                            content = info.getPresent();
-                            propel = propelSymptom;
-                            break;
-                        case VITAL:
-                            content = info.getVital();
-                            propel = propelVital;
-                            break;
-                        case PACS:
-                            content = info.getPacs();
-                            propel = propelPacs;
-                            break;
-                        case LIS:
-                            content = info.getLis();
-                            propel = propelLis;
-                            break;
-                        case DIAG:
-                            content = info.getDiag();
-                            propel = propelDiag;
-                            break;
-                    }
-                    featureList = sa.start(content, FeatureType.parse(featureType));
-
-                    if (featureList == null) {
-                        continue;
-                    }
-                    for (int i = 0; i < featureList.size(); i++) {
-                        featureMap = featureList.get(i);
-                        featureMap.put("rdn", info.getRdn());
-                        featureMap.put("age", StringUtils.isEmpty(info.getAge())?"0":info.getAge());
-                        featureMap.put("sex", info.getSex());
-                        featureMap.put("resource_type", info.getResourceType());
-
-                        if (propel.get(featureMap.get("feature_name")) == null) {
-                            featureMap.put("is_push", "0");
-                        } else {
-                            featureMap.put("is_push", "1");
+                for (String feature : featureNum.split(",")) {
+                    sa.setFeatureNum(feature);
+                    for (Info info : docInfos) {
+                        switch (FeatureType.parse(featureType)) {
+                            case SYMPTOM:
+                                content = info.getPresent();
+                                propel = propelSymptom;
+                                break;
+                            case TIME:
+                                content = info.getPresent();
+                                propel = propelSymptom;
+                                break;
+                            case FEATURE:
+                                content = info.getPresent();
+                                propel = propelSymptom;
+                                break;
+                            case VITAL:
+                                content = info.getVital();
+                                propel = propelVital;
+                                break;
+                            case PACS:
+                                content = info.getPacs();
+                                propel = propelPacs;
+                                break;
+                            case LIS:
+                                content = info.getLis();
+                                propel = propelLis;
+                                break;
+                            case DIAG:
+                                content = info.getDiag();
+                                propel = propelDiag;
+                                break;
+                        }
+                        featureList = sa.start(content, FeatureType.parse(featureType));
+                        if (featureList == null) {
+                            continue;
+                        }
+                        for (int i = 0; i < featureList.size(); i++) {
+                            featureMap = featureList.get(i);
+                            featureMap.put("rdn", info.getRdn() + "_" + feature);
+                            featureMap.put("age", StringUtils.isEmpty(info.getAge()) ? "0" : info.getAge());
+                            featureMap.put("sex", info.getSex());
+                            featureMap.put("resource_type", info.getResourceType());
+                            if (propel.get(featureMap.get("feature_name")) == null) {
+                                featureMap.put("is_push", "0");
+                            } else {
+                                featureMap.put("is_push", "1");
+                            }
+                            data.add(featureMap);
                         }
-                        data.add(featureMap);
-                    }
 
+                    }
                 }
+
             }
 
             MysqlJdbc jdbc = new MysqlJdbc(env.getProperty("spring.datasource.username"),

+ 7 - 0
nlp/src/main/java/org/diagbot/nlp/feature/FeatureAnalyze.java

@@ -18,6 +18,7 @@ import java.util.Map;
 
 public class FeatureAnalyze {
     private LexemePath<Lexeme> lexemePath = null;
+    private String featureNum = "";//特征提取范围
 
     Logger logger = LoggerFactory.getLogger(FeatureAnalyze.class);
 
@@ -61,6 +62,7 @@ public class FeatureAnalyze {
 //        }
 //        logger.info("分词文本结果:" + lexeme_text);
         lexemePath = replaceLexeme(lexemePath);
+        caseToken.getFeatureSize(featureNum);
         return caseToken.analyze(lexemePath);
     }
 
@@ -98,4 +100,9 @@ public class FeatureAnalyze {
         }
         return lexemePath;
     }
+
+    public String setFeatureNum(String featureNum){
+        this.featureNum = featureNum;
+        return featureNum;
+    }
 }

+ 34 - 10
nlp/src/main/java/org/diagbot/nlp/feature/extract/CaseToken.java

@@ -1,13 +1,18 @@
 package org.diagbot.nlp.feature.extract;
 
+import org.apache.commons.lang3.StringUtils;
 import org.diagbot.nlp.participle.word.Lexeme;
 import org.diagbot.nlp.participle.word.LexemePath;
 import org.diagbot.nlp.util.Constants;
 import org.diagbot.nlp.util.NegativeEnum;
 import org.diagbot.nlp.util.NlpUtil;
+import org.springframework.stereotype.Component;
 
-import java.util.*;
-
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+@Component
 public abstract class CaseToken {
     protected static String[] ignore_symbol = new String[]{"、", "."};
     protected static String[] stop_symbol = new String[]{"。", ";", "?", ";", "?", "“", "”", "\r", "\n", "[", "]", "{", "}"};
@@ -20,6 +25,7 @@ public abstract class CaseToken {
     protected Lexeme leftFeatureLexeme = null;
     protected Lexeme rightFeatureLexeme = null;
     protected int sn = 0;
+    protected String featureSize = "";
 
     static {
         Arrays.sort(ignore_symbol);
@@ -100,15 +106,33 @@ public abstract class CaseToken {
             }
         }
         if (!hasFeature) {
-            Map<String, Object> fMap = new HashMap<>(10);
-            fMap.put("feature_name", lexeme.getText());
-            fMap.put("feature_type", featureType);
-            fMap.put("negative", key);
-            fMap.put("sn", String.valueOf(sn++));
-            fMap.put("property", lexeme.getProperty());
-            fMap.put("concept", lexeme.getConcept());
-            featuresList.add(fMap);
+            if (StringUtils.isNotEmpty(featureSize)) {
+                if(featureSize.equals("all")){//featureSize为all时提取所有特征
+                    Map<String, Object> fMap = new HashMap<>(10);
+                    fMap.put("feature_name", lexeme.getText());
+                    fMap.put("feature_type", featureType);
+                    fMap.put("negative", key);
+                    fMap.put("sn", String.valueOf(sn++));
+                    fMap.put("property", lexeme.getProperty());
+                    fMap.put("concept", lexeme.getConcept());
+                    featuresList.add(fMap);
+                }else {
+                    if (sn < Integer.parseInt(featureSize)){
+                        Map<String, Object> fMap = new HashMap<>(10);
+                        fMap.put("feature_name", lexeme.getText());
+                        fMap.put("feature_type", featureType);
+                        fMap.put("negative", key);
+                        fMap.put("sn", String.valueOf(sn++));
+                        fMap.put("property", lexeme.getProperty());
+                        fMap.put("concept", lexeme.getConcept());
+                        featuresList.add(fMap);
+                    }
+                }
+            }
         }
     }
+    public void getFeatureSize(String fetureSize){
+        this.featureSize = fetureSize;
+    }
 }
 

+ 4 - 1
nlp/src/main/resources/nlp.properties

@@ -1,3 +1,6 @@
 #数据文件存放路径
 cache.file.dir=/opt/diagbot-push/cache_file/
-#cache.file.dir=d:\\cache_file\\
+#cache.file.dir=e:\\cache_file\\
+
+#特征提取范围(不限制范围时配置:all)
+push.feature.num=all