Browse Source

性别年龄过滤数据来源修改至标准术语库,特征提取增加标准术语转化

louhr 6 years ago
parent
commit
eeded1db81

File diff suppressed because it is too large
+ 476 - 805
bigdata-web/src/main/resources/doc_result_mapping_diag.dict


File diff suppressed because it is too large
+ 890 - 1376
bigdata-web/src/main/resources/doc_result_mapping_filter.dict


+ 24 - 17
bigdata-web/src/test/java/org/diagbot/CacheFileManagerTest.java

@@ -26,7 +26,7 @@ public class CacheFileManagerTest {
     }
     }
 
 
     public void contextMappingInitialized() {
     public void contextMappingInitialized() {
-        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/bigdata-web?useUnicode=true&characterEncoding=UTF-8");
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/med-s?useUnicode=true&characterEncoding=UTF-8");
         Connection conn = nlpJdbc.connect();
         Connection conn = nlpJdbc.connect();
         Statement st = null;
         Statement st = null;
         ResultSet rs = null;
         ResultSet rs = null;
@@ -36,31 +36,36 @@ public class CacheFileManagerTest {
             String path = this.getClass().getClassLoader().getResource("").getPath();
             String path = this.getClass().getClassLoader().getResource("").getPath();
             path = path.substring(0, path.indexOf("target"));
             path = path.substring(0, path.indexOf("target"));
             //体征衍射
             //体征衍射
-            String sql = "select distinct name, name_mapping from doc_result_mapping_vital order by name";
-            st = conn.createStatement();
-            rs = st.executeQuery(sql);
-            List<Map.Entry<String, String>> libraryList = rsToMap(rs, true);
-
-            FileWriter fw = new FileWriter(path + "src/main/resources/doc_result_mapping_vital.dict");
-            for (Map.Entry<String, String> entry : libraryList) {
-                fw.write(encrypDES.encrytor(entry.getKey() + "|" + entry.getValue()));
-                fw.write("\n");
-            }
-            fw.close();
+//            String sql = "select distinct name, name_mapping from doc_result_mapping_vital order by name";
+//            st = conn.createStatement();
+//            rs = st.executeQuery(sql);
+//            List<Map.Entry<String, String>> libraryList = rsToMap(rs, true);
+//
+//            FileWriter fw = new FileWriter(path + "src/main/resources/doc_result_mapping_vital.dict");
+//            for (Map.Entry<String, String> entry : libraryList) {
+//                fw.write(encrypDES.encrytor(entry.getKey() + "|" + entry.getValue()));
+//                fw.write("\n");
+//            }
+//            fw.close();
             //疾病科室
             //疾病科室
-            sql = "select distinct diag_name, dept_name from doc_result_mapping_diag order by diag_name";
+            String sql = "SELECT k1.lib_name diag_name, k2.lib_name dept_name FROM kl_concept_common kcc, kl_concept k1, kl_concept k2 " +
+                    "where kcc.concept_id = k1.id and kcc.dept_id = k2.id " +
+                    "and k1.lib_type = 18 and kcc.dept_id  is not null";
             st = conn.createStatement();
             st = conn.createStatement();
             rs = st.executeQuery(sql);
             rs = st.executeQuery(sql);
-            libraryList = rsToMap(rs, true);
+            List<Map.Entry<String, String>> libraryList = rsToMap(rs, true);
 
 
-            fw = new FileWriter(path + "src/main/resources/doc_result_mapping_diag.dict");
+            FileWriter fw = new FileWriter(path + "src/main/resources/doc_result_mapping_diag.dict");
             for (Map.Entry<String, String> entry : libraryList) {
             for (Map.Entry<String, String> entry : libraryList) {
                 fw.write(encrypDES.encrytor(entry.getKey() + "|" + entry.getValue()));
                 fw.write(encrypDES.encrytor(entry.getKey() + "|" + entry.getValue()));
                 fw.write("\n");
                 fw.write("\n");
             }
             }
             fw.close();
             fw.close();
             //性别年龄
             //性别年龄
-            sql = "select feature_name, feature_type, sex, age_start, age_end from doc_result_mapping_filter";
+            sql = "SELECT k1.lib_name, k1.lib_type, kcc.sex_type, kcc.min_age, kcc.max_age " +
+                    "FROM kl_concept_common kcc, kl_concept k1 " +
+                    "where kcc.concept_id = k1.id " +
+                    "and k1.lib_type in (1, 18) and kcc.dept_id  is not null;";
             st = conn.createStatement();
             st = conn.createStatement();
             rs = st.executeQuery(sql);
             rs = st.executeQuery(sql);
             fw = new FileWriter(path + "src/main/resources/doc_result_mapping_filter.dict");
             fw = new FileWriter(path + "src/main/resources/doc_result_mapping_filter.dict");
@@ -72,7 +77,9 @@ public class CacheFileManagerTest {
                 r3 = rs.getString(3);
                 r3 = rs.getString(3);
                 r4 = rs.getString(4);
                 r4 = rs.getString(4);
                 r5 = rs.getString(5);
                 r5 = rs.getString(5);
-
+                if ("18".equals(r2)) {
+                    r2 = "2";
+                }
                 fw.write(encrypDES.encrytor(r1 + "|" + r2 + "|" + r3 + "|" + r4 + "|" + r5));
                 fw.write(encrypDES.encrytor(r1 + "|" + r2 + "|" + r3 + "|" + r4 + "|" + r5));
                 fw.write("\n");
                 fw.write("\n");
             }
             }

+ 1 - 0
nlp-web/src/main/java/org/diagbot/nlp/controller/FeatureController.java

@@ -154,6 +154,7 @@ public class FeatureController extends BaseController<Feature, FeatureWrapper, L
                             content = info.getPresent();
                             content = info.getPresent();
                             propel = propelSymptom;
                             propel = propelSymptom;
                             break;
                             break;
+
                         case FEATURE:
                         case FEATURE:
                             content = info.getPresent();
                             content = info.getPresent();
                             propel = propelSymptom;
                             propel = propelSymptom;

+ 25 - 11
nlp/src/main/java/org/diagbot/nlp/feature/FeatureAnalyze.java

@@ -52,15 +52,15 @@ public class FeatureAnalyze {
         if (StringUtils.isEmpty(content)) {
         if (StringUtils.isEmpty(content)) {
             return null;
             return null;
         }
         }
-        logger.info("待分词文本:" + content);
+//        logger.info("待分词文本:" + content);
         lexemePath = ParticipleUtil.participle(content);
         lexemePath = ParticipleUtil.participle(content);
 
 
-        String lexeme_text = "";
-        for (int i = 0; i < lexemePath.size(); i++) {
-            lexeme_text = lexeme_text + "|" + lexemePath.get(i).getText() + "^" + lexemePath.get(i).getProperty();
-        }
-        logger.info("分词文本结果:" + lexeme_text);
-//        lexemePath = replaceLexeme(lexemePath);
+//        String lexeme_text = "";
+//        for (int i = 0; i < lexemePath.size(); i++) {
+//            lexeme_text = lexeme_text + "|" + lexemePath.get(i).getText() + "^" + lexemePath.get(i).getProperty();
+//        }
+//        logger.info("分词文本结果:" + lexeme_text);
+        lexemePath = replaceLexeme(lexemePath);
         return caseToken.analyze(lexemePath);
         return caseToken.analyze(lexemePath);
     }
     }
 
 
@@ -72,10 +72,24 @@ public class FeatureAnalyze {
     private LexemePath replaceLexeme(LexemePath<Lexeme> lexemePath) {
     private LexemePath replaceLexeme(LexemePath<Lexeme> lexemePath) {
         if (NlpCache.standard_info_synonym_map == null) NlpCache.createSynonymCache();
         if (NlpCache.standard_info_synonym_map == null) NlpCache.createSynonymCache();
         for (Lexeme l : lexemePath) {
         for (Lexeme l : lexemePath) {
-
-            if (NlpCache.standard_info_synonym_map.get(l.getProperty()) != null &&
-                    NlpCache.standard_info_synonym_map.get(l.getProperty()).get(l.getText()) != null) {
-                l.setText(NlpCache.standard_info_synonym_map.get(l.getProperty()).get(l.getText()));
+            String[] props = l.getProperty().split(",");
+            l.setConcept(null);
+            for (int i = 0; i < props.length; i++) {
+                if (NlpCache.standard_info_synonym_map.get(props[i]) == null
+                        || NlpCache.standard_info_synonym_map.get(props[i]).get(l.getText()) == null) {
+                    if (StringUtils.isEmpty(l.getConcept())) {
+                        l.setConcept(l.getText());
+                    } else {
+                        l.setConcept(l.getConcept() + "," + l.getText());
+                    }
+                    continue;
+                } else {
+                    if (i == 0) {
+                        l.setConcept(NlpCache.standard_info_synonym_map.get(props[i]).get(l.getText()));
+                    } else {
+                        l.setConcept(l.getConcept() + "," + NlpCache.standard_info_synonym_map.get(props[i]).get(l.getText()));
+                    }
+                }
             }
             }
         }
         }
         return lexemePath;
         return lexemePath;

+ 25 - 9
nlp/src/main/java/org/diagbot/nlp/feature/extract/CaseTokenFactory.java

@@ -19,21 +19,37 @@ public class CaseTokenFactory {
         try {
         try {
             switch (featureType) {
             switch (featureType) {
                 case SYMPTOM:
                 case SYMPTOM:
-                    return (CaseTokenSymptom) create(caseTokenSymptom, CaseTokenSymptom.class);
+                    if (caseTokenSymptom == null) {
+                        caseTokenSymptom = new CaseTokenSymptom();
+                    }
+                    return caseTokenSymptom;
                 case FEATURE:
                 case FEATURE:
-                    return (CaseTokenFeature) create(caseTokenFeature, CaseTokenFeature.class);
+                    if (caseTokenFeature == null) {
+                        caseTokenFeature = new CaseTokenFeature();
+                    }
+                    return caseTokenFeature;
                 case VITAL:
                 case VITAL:
-                    return (CaseTokenVital) create(caseTokenVital, CaseTokenVital.class);
+                    if (caseTokenVital == null) {
+                        caseTokenVital = new CaseTokenVital();
+                    }
+                    return caseTokenVital;
                 case PACS:
                 case PACS:
-                    return (CaseTokenPACS) create(caseTokenPacs, CaseTokenPACS.class);
+                    if (caseTokenPacs == null) {
+                        caseTokenPacs = new CaseTokenPACS();
+                    }
+                    return caseTokenPacs;
                 case LIS:
                 case LIS:
-                    return (CaseTokenLIS) create(caseTokenLis, CaseTokenLIS.class);
+                    if (caseTokenLis == null) {
+                        caseTokenLis = new CaseTokenLIS();
+                    }
+                    return caseTokenLis;
                 case DIAG:
                 case DIAG:
-                    return (CaseTokenDiag) create(caseTokenDiag, CaseTokenDiag.class);
+                    if (caseTokenDiag == null) {
+                        caseTokenDiag = new CaseTokenDiag();
+                    }
+                    return caseTokenDiag;
             }
             }
-        } catch (java.lang.InstantiationException inst) {
-            throw inst;
-        } catch (java.lang.IllegalAccessException ille) {
+        }catch (Exception ille) {
             throw ille;
             throw ille;
         }
         }
         return null;
         return null;

+ 5 - 3
nlp/src/main/java/org/diagbot/nlp/util/DictUtil.java

@@ -36,8 +36,10 @@ public class DictUtil {
         try {
         try {
             EncrypDES encrypDES = new EncrypDES();
             EncrypDES encrypDES = new EncrypDES();
             //所有词典库 不能用concat_group 大小写不区分
             //所有词典库 不能用concat_group 大小写不区分
-            String sql = "select l_1.name l_1_name, l_1.type_id type_id, l_2.name l_2_name, l_1.concept_id from kl_library_info l_1 " +
-                    "left join kl_library_info l_2 on l_1.concept_id = l_2.concept_id and l_2.is_concept = 1 ";
+            String sql = "select l_1.name l_1_name, l_1.type_id type_id, l_2.name l_2_name, l_1.concept_id from kl_library_info l_1\n" +
+                    "                    left join kl_library_info l_2 on l_1.concept_id = l_2.concept_id and l_2.is_concept = 1\n" +
+                    "left join kl_concept kc on l_1.concept_id = kc.id\n" +
+                    "where kc.is_deleted = 'N' ";
             st = conn.createStatement();
             st = conn.createStatement();
             rs = st.executeQuery(sql);
             rs = st.executeQuery(sql);
             FileWriter fw = new FileWriter(path + "tc.dict");
             FileWriter fw = new FileWriter(path + "tc.dict");
@@ -111,7 +113,7 @@ public class DictUtil {
             sql = "select k1.lib_name k1_lib_name, k2.lib_name k2_lib_name from kl_relation r " +
             sql = "select k1.lib_name k1_lib_name, k2.lib_name k2_lib_name from kl_relation r " +
                     "left join kl_concept k1 on k1.id = r.start_id " +
                     "left join kl_concept k1 on k1.id = r.start_id " +
                     "left join kl_concept k2 on k2.id = r.end_id " +
                     "left join kl_concept k2 on k2.id = r.end_id " +
-                    "where r.relation_id = 19";
+                    "where r.relation_id = 19 and k1.is_deleted = 'N' and k2.is_deleted = 'N'";
             st = conn.createStatement();
             st = conn.createStatement();
             rs = st.executeQuery(sql);
             rs = st.executeQuery(sql);
             libraryList = rsToMap(rs, true);
             libraryList = rsToMap(rs, true);

+ 1 - 1
nlp/src/main/resources/push-tc.dict

@@ -2371,7 +2371,7 @@ m69a4x/0L74hGqmV/bsQkQ==
 o+64/6lWlVwbsVzvksU32UvBAr4WBXPTXnxmhPdKEuw=
 o+64/6lWlVwbsVzvksU32UvBAr4WBXPTXnxmhPdKEuw=
 smBLYgNa4yz1csFOl7z+mA==
 smBLYgNa4yz1csFOl7z+mA==
 912O8QwqajrYLVA30XnYOA4mcPdV+/L/B451+nWReBM=
 912O8QwqajrYLVA30XnYOA4mcPdV+/L/B451+nWReBM=
-MioQfYpcsS5MUwH8OdE0LldZqE11ANohg5hJgFXoh/wt6xeKUlvDXIFfp2pKpPLB
+MioQfYpcsS5MUwH8OdE0LldZqE11ANohg5hJgFXoh/yHvkBDsy1Bug==
 E54guvu04Mc576fFk05fLDgDx8qs11DG
 E54guvu04Mc576fFk05fLDgDx8qs11DG
 oGp4lSS5zorXHcaEgjO75KBqeJUkuc6KbLUhJW/WJt4=
 oGp4lSS5zorXHcaEgjO75KBqeJUkuc6KbLUhJW/WJt4=
 221a9lsaxVFAs/ILVSahm5VGC0afuaicA200GsQgR08=
 221a9lsaxVFAs/ILVSahm5VGC0afuaicA200GsQgR08=

File diff suppressed because it is too large
+ 9023 - 8711
nlp/src/main/resources/synonym.dict


File diff suppressed because it is too large
+ 16579 - 15617
nlp/src/main/resources/tc.dict


+ 16 - 5
nlp/src/test/java/org/diagbot/nlp/test/ConceptTest.java

@@ -28,17 +28,18 @@ public class ConceptTest {
     private static final String EXCEL_XLS = "xls";
     private static final String EXCEL_XLS = "xls";
     private static final String EXCEL_XLSX = "xlsx";
     private static final String EXCEL_XLSX = "xlsx";
 
 
-    private static final String lis_path = "E:\\git\\docs\\医学知识库\\化验\\化验标准库第一期-邵启华0617.xlsx";
-    private static final String lis_public_path = "E:\\git\\docs\\医学知识库\\化验\\化验标准库第一期-邵启华化验套餐公表项.xlsx";
+    private static final String lis_path = "E:\\git\\docs\\医学知识库\\化验\\化验标准库第一期-邵启华0701.xlsx";
     private static final String pacs_path = "E:\\git\\docs\\医学知识库\\辅检\\辅检标准库第一期-邵青华0612.xlsx";
     private static final String pacs_path = "E:\\git\\docs\\医学知识库\\辅检\\辅检标准库第一期-邵青华0612.xlsx";
     private static final String symptom_path = "E:\\git\\docs\\医学知识库\\症状\\症状标准库-王玲0625.xlsx";
     private static final String symptom_path = "E:\\git\\docs\\医学知识库\\症状\\症状标准库-王玲0625.xlsx";
     private static final String drug_path = "E:\\git\\docs\\医学知识库\\药品\\药品标准库-邵启华0618.xlsx";
     private static final String drug_path = "E:\\git\\docs\\医学知识库\\药品\\药品标准库-邵启华0618.xlsx";
-    private static final String vital_path = "E:\\git\\docs\\医学知识库\\体征\\体征标准库第一阶段-吕纯0618.xlsx";
+    private static final String vital_path = "E:\\git\\docs\\医学知识库\\体征\\体征标准库第一阶段-吕纯0701.xlsx";
     private static final String part_path = "E:\\git\\docs\\医学知识库\\部位和科室\\部位标准库-最新结构0621.xlsx";
     private static final String part_path = "E:\\git\\docs\\医学知识库\\部位和科室\\部位标准库-最新结构0621.xlsx";
     private static final String disease_path = "E:\\git\\docs\\医学知识库\\诊断名和治疗方案\\知识库标准疾病名称和疾病治疗方案-邵青华0605.xlsx";
     private static final String disease_path = "E:\\git\\docs\\医学知识库\\诊断名和治疗方案\\知识库标准疾病名称和疾病治疗方案-邵青华0605.xlsx";
     private static final String dept_path = "E:\\git\\docs\\医学知识库\\部位和科室\\科室标准库-邵启华0521.xlsx";
     private static final String dept_path = "E:\\git\\docs\\医学知识库\\部位和科室\\科室标准库-邵启华0521.xlsx";
     private static final String scale_path = "E:\\git\\docs\\医学知识库\\量表\\量表标准词-邵青华0618.xlsx";
     private static final String scale_path = "E:\\git\\docs\\医学知识库\\量表\\量表标准词-邵青华0618.xlsx";
     private static final String other_path = "E:\\git\\docs\\医学知识库\\其他史\\其他史-邵青华0619.xls";
     private static final String other_path = "E:\\git\\docs\\医学知识库\\其他史\\其他史-邵青华0619.xls";
+    private static final String zdyj_path = "E:\\git\\docs\\医学知识库\\诊断依据\\全部依据\\带数字的词\\带数字的词.xlsx";
+    private static final String zdyjqd_path = "E:\\git\\docs\\医学知识库\\诊断依据\\全部依据\\其他有问题的词\\诊断依据问题词0701.xlsx";
 
 
     public static void main(String[] args) {
     public static void main(String[] args) {
         try {
         try {
@@ -56,7 +57,8 @@ public class ConceptTest {
 
 
 //            insertStandardInfo();
 //            insertStandardInfo();
 //            validateSplit();
 //            validateSplit();
-//            insertLisMealPublic();
+//            insertZdyj();
+            insertZdyjQt();
 
 
 //            insertRelationSymptom();
 //            insertRelationSymptom();
 
 
@@ -128,6 +130,7 @@ public class ConceptTest {
         insertLisMeal();
         insertLisMeal();
         insertLisDetail();
         insertLisDetail();
         insertLisPublic();
         insertLisPublic();
+        insertLisMealPublic();
     }
     }
 
 
     public static void insertLisMeal() throws Exception {
     public static void insertLisMeal() throws Exception {
@@ -143,7 +146,7 @@ public class ConceptTest {
     }
     }
 
 
     public static void insertLisMealPublic() throws Exception {
     public static void insertLisMealPublic() throws Exception {
-        insertOneColumn(lis_public_path, 0, "46", "化验公表项");
+        insertOneColumn(lis_path, 5, "46", "化验公表项");
     }
     }
 
 
     public static void insertOther() throws Exception {
     public static void insertOther() throws Exception {
@@ -163,6 +166,14 @@ public class ConceptTest {
 
 
     }
     }
 
 
+    public static void insertZdyj() throws Exception {
+        insertOneColumn(zdyj_path, 4, "69", "诊断依据条件");
+    }
+
+    public static void insertZdyjQt() throws Exception {
+        insertOneColumn(zdyjqd_path, 4, "70", "诊断依据其他");
+    }
+
     public static void insertOneColumn(String path, int columnIndex, String typeId, String typeName) throws Exception {
     public static void insertOneColumn(String path, int columnIndex, String typeId, String typeName) throws Exception {
         insertOneColumn(0, path, columnIndex, typeId, typeName);
         insertOneColumn(0, path, columnIndex, typeId, typeName);
     }
     }