Browse Source

修改症状末尾添加诊断依据标准词方法

hujing 6 years ago
parent
commit
1231b6855b

+ 38 - 17
bigdata-web/src/main/java/org/diagbot/bigdata/common/ApplicationCacheUtil.java

@@ -109,7 +109,6 @@ public class ApplicationCacheUtil {
         if (kl_result_mapping_standword_map == null) {
             kl_result_mapping_standword_map = new HashMap<>();
             Configuration configuration = new DefaultConfig();
-            ParticipleUtil util = new ParticipleUtil();
             List<String> fileContents = configuration.readFileContents("kl_result_mapping_standword.dict");
             List<Map<String, String>> standWordObjValList = null;
             Map<String, String> standWordObjVal = null;
@@ -120,26 +119,31 @@ public class ApplicationCacheUtil {
                     LexemePath<Lexeme> lexemes = null;
                     String op = "";
                     String[] fileContentSplit = null;
+                    //每一个标准词根据大于小于符号切开,不然进行分词时还是会得到原本的标准词
                     if (fileContent.contains(">=") || fileContent.contains("≥")
                             || fileContent.contains(">") || fileContent.contains("大于")
-                            || fileContent.contains(">") || fileContent.contains("超过")){
+                            || fileContent.contains(">") || fileContent.contains("超过")) {
                         op = ">";
                         fileContentSplit = fileContent.split(operationGreater);
                     } else if (fileContent.contains("<=") || fileContent.contains("≤")
                             || fileContent.contains("<") || fileContent.contains("小于")
-                            || fileContent.contains("<") || fileContent.contains("少于")){
+                            || fileContent.contains("<") || fileContent.contains("少于")) {
                         op = "<";
                         fileContentSplit = fileContent.split(operationSmaller);
                     } else {
                         continue;
                     }
-                    for (String fileContentWords:fileContentSplit){
-                        LexemePath<Lexeme> lexemeWord = util.participle(fileContentWords);
-                        if (null == lexemes) {
-                            lexemes = lexemeWord;
-                        } else {
-                            for (Lexeme lexeme:lexemeWord) {
-                                lexemes.add(lexeme);
+                    LexemePath<Lexeme> lexemeWord = null;
+                    //每一个标准词切开后进行分词
+                    for (String fileContentWords : fileContentSplit) {
+                        lexemeWord = ParticipleUtil.participle(fileContentWords);
+                        if (lexemeWord != null) {
+                            if (null == lexemes) {
+                                lexemes = lexemeWord;
+                            } else {
+                                for (Lexeme lexeme : lexemeWord) {
+                                    lexemes.add(lexeme);
+                                }
                             }
                         }
                     }
@@ -147,8 +151,11 @@ public class ApplicationCacheUtil {
                     standWordObjValList = new ArrayList<>();
                     standWordObjVal = new HashMap<>();
                     int i = 0;
-                    for (Lexeme lexeme:lexemes) {
+                    for (Lexeme lexeme : lexemes) {
                         i++;
+                        if (lexeme.getProperty().contains(",")) {
+                            setProterty(lexeme); //如果分词后词性有多个,只选一个(暂时只处理症状,体征)
+                        }
                         NegativeEnum lexemeNegativeEnum = NegativeEnum.parseOfValue(lexeme.getProperty());
                         if (lexemeNegativeEnum == NegativeEnum.SYMPTOM || lexemeNegativeEnum == NegativeEnum.CAUSE
                                 || lexemeNegativeEnum == NegativeEnum.VITAL_INDEX) {
@@ -157,14 +164,15 @@ public class ApplicationCacheUtil {
                             } else {
                                 standWordObjKey = lexeme.getText();
                             }
-                        } else if (lexemeNegativeEnum == NegativeEnum.DIGITS ) {
-                                standWordObjVal.put("value", lexeme.getText());
-                        } else if (lexemeNegativeEnum == NegativeEnum.UNIT) {
-                                standWordObjVal.put("unit", lexeme.getText());
+                        } else if (lexemeNegativeEnum == NegativeEnum.DIGITS) {
+                            standWordObjVal.put("value", lexeme.getText());
+                        } else if (lexemeNegativeEnum == NegativeEnum.UNIT
+                                || lexemeNegativeEnum == NegativeEnum.EVENT_TIME) {
+                            standWordObjVal.put("unit", lexeme.getText());
                         }
                         if (lexemes.size() == i) {
-                            standWordObjVal.put("op",op);
-                            standWordObjVal.put("standword",fileContent);
+                            standWordObjVal.put("op", op);
+                            standWordObjVal.put("standword", fileContent);
                             if (kl_result_mapping_standword_map.containsKey(standWordObjKey)) {
                                 kl_result_mapping_standword_map.get(standWordObjKey).add(standWordObjVal);
                             } else {
@@ -179,4 +187,17 @@ public class ApplicationCacheUtil {
         }
         return kl_result_mapping_standword_map;
     }
+
+    public static void setProterty(Lexeme lexeme) {
+        for (String featureType : lexeme.getProperty().split(",")) {
+            switch (featureType) {
+                case "1":
+                    lexeme.setProperty("1");
+                    break;
+                case "33":
+                    lexeme.setProperty("33");
+                    break;
+            }
+        }
+    }
 }

+ 67 - 42
bigdata-web/src/main/java/org/diagbot/bigdata/work/ParamsDataProxy.java

@@ -20,6 +20,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 /**
  * @ClassName org.diagbot.bigdata.work.ParamsDataProxy
@@ -240,8 +241,11 @@ public class ParamsDataProxy {
         List<Lexeme> feature = new ArrayList<>();
         //收集分词结果中体征指标或体征指标值(数字)
         for (Lexeme lexeme : lexemes) {
+            if (lexeme.getProperty().contains(",")) {
+                ApplicationCacheUtil.setProterty(lexeme); //如果分词后词性有多个,只选一个(暂时只处理症状,体征)
+            }
             NegativeEnum lexemeNegativeEnum = NegativeEnum.parseOfValue(lexeme.getProperty());
-            if (lexemeNegativeEnum == NegativeEnum.VITAL_INDEX || lexemeNegativeEnum == NegativeEnum.DIGITS
+            if (lexemeNegativeEnum == NegativeEnum.VITAL_INDEX || lexemeNegativeEnum == NegativeEnum.SYMPTOM || lexemeNegativeEnum == NegativeEnum.DIGITS
                     || lexemeNegativeEnum == NegativeEnum.EVENT_TIME || lexemeNegativeEnum == NegativeEnum.UNIT
                     ) {
                 feature.add(lexeme);
@@ -254,19 +258,30 @@ public class ParamsDataProxy {
             if (i < feature.size() - 2) {
                 if ((NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.VITAL_INDEX
                         && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS
-                        && NegativeEnum.parseOfValue(feature.get(i + 2).getProperty()) == NegativeEnum.EVENT_TIME)
+                        && NegativeEnum.parseOfValue(feature.get(i + 2).getProperty()) == NegativeEnum.UNIT)
                         ||
                         (NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.VITAL_INDEX
                                 && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS
-                                && NegativeEnum.parseOfValue(feature.get(i + 2).getProperty()) == NegativeEnum.UNIT)) {
+                                && NegativeEnum.parseOfValue(feature.get(i + 2).getProperty()) == NegativeEnum.EVENT_TIME)
+                        ||
+                        (NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.SYMPTOM
+                                && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS
+                                && NegativeEnum.parseOfValue(feature.get(i + 2).getProperty()) == NegativeEnum.UNIT)
+                        ||
+                        (NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.SYMPTOM
+                                && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS
+                                && NegativeEnum.parseOfValue(feature.get(i + 2).getProperty()) == NegativeEnum.EVENT_TIME)) {
                     featureType.add(feature.get(i).getText() + "\t" + feature.get(i + 1).getText() + "\t"
                             + feature.get(i + 2).getText());
                     featureTypeState = false;
                 }
             }
-            if (i < feature.size() - 1 && featureTypeState) {
-                if (NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.VITAL_INDEX
-                        && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS) {
+            if (featureTypeState && i < feature.size() - 1) {
+                if ((NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.VITAL_INDEX
+                        && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS)
+                        ||
+                        (NegativeEnum.parseOfValue(feature.get(i).getProperty()) == NegativeEnum.SYMPTOM
+                                && NegativeEnum.parseOfValue(feature.get(i + 1).getProperty()) == NegativeEnum.DIGITS)) {
                     featureType.add(feature.get(i).getText() + "\t" + feature.get(i + 1).getText());
                 }
             }
@@ -298,56 +313,66 @@ public class ParamsDataProxy {
 
     //将标准词中体征指标值(数字)与分词结果中体征指标值(数字)比较
     private void judgment(SearchData sData, String[] features, Map<String, String> standWordMap) {
-        if (">".equals(standWordMap.get("op"))) {
-            //单独处理  血压≥140/90mmHg   类似情况
-            if (features[1].contains("/")) {
-                if (standWordMap.get("value").contains("/")) {
-                    String[] feature = features[1].split("/");
-                    Integer featuresSBP = Integer.valueOf(feature[0]); //分词特征收缩压
-                    Integer featuresDBP = Integer.valueOf(feature[1]); //分词特征舒张压
+        if (hasDigit(features[1])) {
+            if (">".equals(standWordMap.get("op"))) {
+                //单独处理  血压≥140/90mmHg   类似情况
+                if (features[1].contains("/")) {
+                    if (standWordMap.get("value").contains("/")) {
+                        String[] feature = features[1].split("/");
+                        Integer featuresSBP = Integer.valueOf(feature[0]); //分词特征收缩压
+                        Integer featuresDBP = Integer.valueOf(feature[1]); //分词特征舒张压
 
-                    String[] values = standWordMap.get("value").split("/");
-                    Integer standWordSBP = Integer.valueOf(values[0]); //标准词收缩压
-                    Integer standWordDBP = Integer.valueOf(values[1]); //标准词舒张压
-                    if (featuresSBP > standWordSBP && featuresDBP > standWordDBP) {
+                        String[] values = standWordMap.get("value").split("/");
+                        Integer standWordSBP = Integer.valueOf(values[0]); //标准词收缩压
+                        Integer standWordDBP = Integer.valueOf(values[1]); //标准词舒张压
+                        if (featuresSBP > standWordSBP && featuresDBP > standWordDBP) {
+                            sData.setSymptom(sData.getSymptom() + "," + standWordMap.get("standword"));
+                            System.out.println(sData.getSymptom());
+                        }
+                    }
+                } else {
+                    String num = getNum(standWordMap.get("value"));
+                    if (Double.valueOf(getNum(features[1])) > Double.valueOf(num)) {
                         sData.setSymptom(sData.getSymptom() + "," + standWordMap.get("standword"));
                         System.out.println(sData.getSymptom());
-                    }
-                }
-            } else {
-                String num = getNum(standWordMap.get("value"));
-                if (Double.valueOf(getNum(features[1])) > Double.valueOf(num)) {
-                    sData.setSymptom(sData.getSymptom() + "," + standWordMap.get("standword"));
-                    System.out.println(sData.getSymptom());
 
+                    }
                 }
-            }
-        } else if ("<".equals(standWordMap.get("op"))) {
-            //单独处理  血压小于90/60mmHg   类似情况
-            if (standWordMap.get("value").contains("/")) {
-                if (features[1].contains("/")) {
-                    String[] feature = features[1].split("/");
-                    Integer featuresSBP = Integer.valueOf(feature[0]); //收缩压
-                    Integer featuresDBP = Integer.valueOf(feature[1]); //舒张压
+            } else if ("<".equals(standWordMap.get("op"))) {
+                //单独处理  血压小于90/60mmHg   类似情况
+                if (standWordMap.get("value").contains("/")) {
+                    if (features[1].contains("/")) {
+                        String[] feature = features[1].split("/");
+                        Integer featuresSBP = Integer.valueOf(feature[0]); //收缩压
+                        Integer featuresDBP = Integer.valueOf(feature[1]); //舒张压
 
-                    String[] values = standWordMap.get("value").split("/");
-                    Integer standWordSBP = Integer.valueOf(values[0]); //收缩压
-                    Integer standWordDBP = Integer.valueOf(values[1]); //舒张压
-                    if (featuresSBP < standWordSBP && featuresDBP < standWordDBP) {
+                        String[] values = standWordMap.get("value").split("/");
+                        Integer standWordSBP = Integer.valueOf(values[0]); //收缩压
+                        Integer standWordDBP = Integer.valueOf(values[1]); //舒张压
+                        if (featuresSBP < standWordSBP && featuresDBP < standWordDBP) {
+                            sData.setSymptom(sData.getSymptom() + "," + standWordMap.get("standword"));
+                            System.out.println(sData.getSymptom());
+                        }
+                    }
+                } else {
+                    String num = getNum(standWordMap.get("value"));
+                    if (Double.valueOf(getNum(features[1])) < Double.valueOf(num)) {
                         sData.setSymptom(sData.getSymptom() + "," + standWordMap.get("standword"));
                         System.out.println(sData.getSymptom());
                     }
                 }
-            } else {
-                String num = getNum(standWordMap.get("value"));
-                if (Double.valueOf(getNum(features[1])) < Double.valueOf(num)) {
-                    sData.setSymptom(sData.getSymptom() + "," + standWordMap.get("standword"));
-                    System.out.println(sData.getSymptom());
-                }
             }
         }
     }
 
+    private boolean hasDigit(String content) {
+        boolean flag = false;
+        if (Pattern.compile(".*\\d+.*").matcher(content).matches()) {
+            flag = true;
+        }
+        return flag;
+    }
+
     private String getNum(String standWord) {
         StringBuffer sb = new StringBuffer();
         for (String num : standWord.replaceAll("[^0-9]", ",").split(",")) {

+ 74 - 30
bigdata-web/src/test/java/org/diagbot/AddStandWordTest.java

@@ -9,9 +9,12 @@ import org.diagbot.nlp.participle.cfg.DefaultConfig;
 import org.diagbot.nlp.participle.word.Lexeme;
 import org.diagbot.nlp.participle.word.LexemePath;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * @Description:
@@ -20,39 +23,80 @@ import java.util.Set;
  */
 public class AddStandWordTest {
     public static void main(String[] args) throws Exception {
-//        Map<String, List<Map<String, String>>> kl_result_mapping_standword_map = ApplicationCacheUtil.getKl_result_mapping_standword_map();
-//        Set<Map.Entry<String, List<Map<String, String>>>> entries = kl_result_mapping_standword_map.entrySet();
-//        for (Map.Entry<String, List<Map<String, String>>> entry:entries) {
-//            System.out.println(entry.getKey() + "\t"+ entry.getValue());
-//        }
-//        Configuration configuration = new DefaultConfig();
-        ParticipleUtil util = new ParticipleUtil();
-//        List<String> fileContents = configuration.readFileContents("kl_result_mapping_standword.dict");
-//        for (String fileContent:fileContents) {
-//            if (fileContent.contains(">=") || fileContent.contains("≥")
-//                    || fileContent.contains(">") || fileContent.contains("大于")
-//                    || fileContent.contains(">") || fileContent.contains("超过")
-//                    || fileContent.contains("<=") || fileContent.contains("≤")
-//                    || fileContent.contains("<") || fileContent.contains("小于")
-//                    || fileContent.contains("<") || fileContent.contains("少于")){
-//                String[] fileContentSplit = fileContent.split("<=|≤|<|小于|<|少于|>=|≥|>|大于|>|超过");
-//                for (String s:fileContentSplit) {
-//                    LexemePath<Lexeme> lexemes = util.participle(s);
-//                    for (Lexeme lexeme:lexemes) {
-//                        if (lexeme.getProperty().equals("2")) {
-//                            System.out.println(lexeme.getText() + "\t" + lexeme.getProperty());
-//                        }
-//                    }
-//                }
-//            }
-//        }
+        //        Map<String, List<Map<String, String>>> kl_result_mapping_standword_map = ApplicationCacheUtil.getKl_result_mapping_standword_map();
+        //        Set<Map.Entry<String, List<Map<String, String>>>> entries = kl_result_mapping_standword_map.entrySet();
+        //        for (Map.Entry<String, List<Map<String, String>>> entry:entries) {
+        //            System.out.println(entry.getKey() + "\t"+ entry.getValue());
+        //        }
+//        getStandWordFeatureType();
+//        ApplicationCacheUtil.getKl_result_mapping_standword_map();
+        long startTime = System.currentTimeMillis();
         ApplicationCacheUtil.getKl_result_mapping_standword_map();
+        long endCacheTime = System.currentTimeMillis();
+        long cacheTime = endCacheTime - startTime;
+        System.out.println("处理标准词最终用了:" + cacheTime + "毫秒.");
         SearchData searchData = new SearchData();
-        searchData.setSymptom("收缩压大于170mmHg,R 29次/分钟,额破格配好");
-        LexemePath<Lexeme> data = util.participle(searchData.getSymptom());
+        searchData.setSymptom("关节僵硬3min");
+        long splitStartTime = System.currentTimeMillis();
+        LexemePath<Lexeme> data = ParticipleUtil.participle(searchData.getSymptom());
         ParamsDataProxy paramsDataProxy = new ParamsDataProxy();
-
-        paramsDataProxy.addStandWord(data,ApplicationCacheUtil.kl_result_mapping_standword_map,searchData);
+        paramsDataProxy.addStandWord(data, ApplicationCacheUtil.kl_result_mapping_standword_map, searchData);
+        long endTime = System.currentTimeMillis();
+        long splitSeconds = endTime - splitStartTime;
+        long seconds = endTime - startTime;
+        System.out.println("添加标准词使用了:"+splitSeconds + "毫秒.");
+        System.out.println("处理文本总共使用了:"+seconds + "毫秒.");
+        AddStandWordTest addStandWordTest = new AddStandWordTest();
     }
 
+    public static void getStandWordFeatureType() throws IOException {
+        Configuration configuration = new DefaultConfig();
+        List<String> fileContents = configuration.readFileContents("kl_result_mapping_standword.dict");
+        String operationGreater = ">=|≥|>|大于|>|超过";
+        String operationSmaller = "<=|≤|<|小于|<|少于";
+        long startTime = System.currentTimeMillis();
+        for (String fileContent : fileContents) {
+            LexemePath<Lexeme> lexemes = null;
+            String op = "";
+            String[] fileContentSplit = null;
+            if (fileContent.contains(">=") || fileContent.contains("≥")
+                    || fileContent.contains(">") || fileContent.contains("大于")
+                    || fileContent.contains(">") || fileContent.contains("超过")) {
+                op = ">";
+                fileContentSplit = fileContent.split(operationGreater);
+            } else if (fileContent.contains("<=") || fileContent.contains("≤")
+                    || fileContent.contains("<") || fileContent.contains("小于")
+                    || fileContent.contains("<") || fileContent.contains("少于")) {
+                op = "<";
+                fileContentSplit = fileContent.split(operationSmaller);
+            } else {
+                continue;
+            }
+            for (String fileContentWords : fileContentSplit) {
+                LexemePath<Lexeme> lexemeWord = ParticipleUtil.participle(fileContentWords);
+                if (null == lexemes) {
+                    lexemes = lexemeWord;
+                } else {
+                    for (Lexeme lexeme : lexemeWord) {
+                        lexemes.add(lexeme);
+                    }
+                }
+            }
+            int i = 1;
+            for (Lexeme lexeme:lexemes) {
+                if (lexeme.getProperty().contains(",")) {
+                    ApplicationCacheUtil.setProterty(lexeme);
+                }
+                if (lexemes.size() != i) {
+                    System.out.print(lexeme.getText() + "(" + lexeme.getProperty() + ")|");
+                } else {
+                    System.out.println(lexeme.getText() + "(" + lexeme.getProperty() + ")");
+                }
+                i++;
+            }
+        }
+        long endTime = System.currentTimeMillis();
+        float seconds = (endTime - startTime) / 1000F;
+        System.out.println(Float.toString(seconds) + "秒.");
+    }
 }

+ 31 - 0
bigdata-web/src/test/java/org/diagbot/CacheFileManagerTest.java

@@ -96,6 +96,37 @@ public class CacheFileManagerTest {
         }
     }
 
+    public void standwordMappingInitialized() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/med-s?useUnicode=true&characterEncoding=UTF-8");
+        Connection conn = nlpJdbc.connect();
+        Statement st = null;
+        ResultSet rs = null;
+        try {
+            EncrypDES encrypDES = new EncrypDES();
+
+            String path = this.getClass().getClassLoader().getResource("").getPath();
+            path = path.substring(0, path.indexOf("target"));
+            String sql = "SELECT lib_name FROM kl_concept WHERE is_deleted = 'N' AND lib_type = 70 AND lib_name regexp '[0-9]'";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+
+            FileWriter fw = new FileWriter(path + "src/main/resources/kl_result_mapping_standword.dict");
+            while (rs.next()) {
+                fw.write(encrypDES.encrytor(rs.getString(1)));
+                fw.write("\n");
+            }
+            fw.close();
+        } catch (IOException ioe) {
+            ioe.printStackTrace();
+        } catch (SQLException sqle) {
+            sqle.printStackTrace();
+        } catch (Exception e) {
+            e.printStackTrace();
+        } finally {
+            nlpJdbc.close(rs, st, conn);
+        }
+    }
+
     private List<Map.Entry<String, String>> rsToMap(ResultSet rs, boolean isJoin) throws SQLException{
         String r1 = "";
         String r2 = "";