Browse Source

关系抽取使用Mark入口

louhr 6 years ago
parent
commit
9ca48122fa

+ 849 - 0
nlp/src/test/java/org/diagbot/nlp/test/ConceptTest.java

@@ -0,0 +1,849 @@
+package org.diagbot.nlp.test;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.time.DateFormatUtils;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.diagbot.pub.jdbc.MysqlJdbc;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.*;
+
+/**
+ * @ClassName org.diagbot.nlp.test.ConceptTest
+ * @Description TODO
+ * @Author fyeman
+ * @Date 2019/6/6/006 10:18
+ * @Version 1.0
+ **/
+public class ConceptTest {
+    private static final String EXCEL_XLS = "xls";
+    private static final String EXCEL_XLSX = "xlsx";
+
+    private static final String lis_path = "E:\\git\\docs\\医学知识库\\化验\\化验标准库第一期-邵启华0614.xlsx";
+    private static final String pacs_path = "E:\\git\\docs\\医学知识库\\辅检\\辅检标准库第一期-邵青华0612.xlsx";
+    private static final String symptom_path = "E:\\git\\docs\\医学知识库\\症状\\症状标准库-王玲0613.xlsx";
+    private static final String drug_path = "E:\\git\\docs\\医学知识库\\药品\\药品标准库-邵启华0515.xlsx";
+    private static final String vital_path = "E:\\git\\docs\\医学知识库\\体征\\体征标准库第一阶段-吕纯0614.xlsx";
+    private static final String part_path = "E:\\git\\docs\\医学知识库\\部位和科室\\部位标准库-最新结构0613.xlsx";
+    private static final String disease_path = "E:\\git\\docs\\医学知识库\\诊断名和治疗方案\\知识库标准疾病名称和疾病治疗方案-邵青华0605.xlsx";
+    private static final String dept_path = "E:\\git\\docs\\医学知识库\\部位和科室\\科室标准库-邵启华0521.xlsx";
+
+    public static void main(String[] args) {
+        try {
+//            insertSymptom();
+//            insertPart();
+//            insertVital();
+//            insertLis();
+//            insertPacs();
+//            insertDisease();
+//            insertDept();
+
+//            insertStandardInfo();
+//            validateSplit();
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public static void validateSplit() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-med-0606?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("library_info", new String[]{"id", "name"}, "");
+
+        Map<String, String> all_words = new HashMap<>();
+        for (Map<String, String> map : data) {
+            all_words.put(map.get("name"), map.get("name"));
+        }
+
+        nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        data = nlpJdbc.query("medicine_rule_split", new String[]{"id", "split_word"}, "");
+        String[] names = null;
+        List<String> notin_words = new ArrayList<>();
+        for (Map<String, String> map : data) {
+            names = map.get("split_word").split(",");
+            for (int i = 0; i < names.length; i++) {
+                if (all_words.get(names[i]) == null) {
+                    notin_words.add(names[i]);
+                    all_words.put(names[i], names[i]);
+                }
+            }
+        }
+
+        List<Map<String, Object>> notin_words_list = new ArrayList<>();
+        for (String word : notin_words) {
+            Map<String, Object> line = new HashMap<>();
+            line.put("name", word);
+            notin_words_list.add(line);
+        }
+        nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-med-0606?useUnicode=true&characterEncoding=UTF-8");
+        nlpJdbc.insert(notin_words_list, "kl_standard_notin", new String[]{"name"});
+
+    }
+
+    public static void insertDept() throws Exception {
+        File excelFile = new File(dept_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                if (cell0 != null && StringUtils.isNotEmpty(cell0.getStringCellValue())) {
+                    concept_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                    all_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                }
+                Cell cell1 = row.getCell(1);
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    all_words.put(cell1.getStringCellValue(), cell0.getStringCellValue());
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "47");
+                line.put("type", "科室");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertSymptom() throws Exception {
+        File excelFile = new File(symptom_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(1);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                if (cell0 != null && !"".equals(cell0.getStringCellValue())) {
+                    put(concept_words, cell0);
+                    all_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                }
+                Cell cell1 = row.getCell(1);
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    all_words.put(cell1.getStringCellValue(), cell0.getStringCellValue());
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "1");
+                line.put("type", "症状");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertPart() throws Exception {
+        File excelFile = new File(part_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(1);
+                if (cell0 != null && !"".equals(cell0.getStringCellValue())) {
+                    concept_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                    all_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                }
+                Cell cell1 = row.getCell(0);
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    all_words.put(cell1.getStringCellValue(), cell0.getStringCellValue());
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "3");
+                line.put("type", "部位");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertVital() throws Exception {
+        insertVitalIndex();
+        insertVitalValue();
+        insertVitalResult();
+    }
+
+    public static void insertLis() throws Exception {
+        insertLisMeal();
+        insertLisDetail();
+        insertLisPublic();
+    }
+
+    public static void insertLisMeal() throws Exception {
+        File excelFile = new File(lis_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                if (cell0 != null && !"".equals(cell0.getStringCellValue())) {
+                    put(concept_words, cell0);
+                    all_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                }
+                if (cell1 != null && StringUtils.isNotEmpty(cell1.getStringCellValue())) {
+                    self = cell1.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        all_words.put(self[i], cell0.getStringCellValue());
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "12");
+                line.put("type", "化验套餐");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertLisDetail() throws Exception {
+        File excelFile = new File(lis_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell2 = row.getCell(2);
+                Cell cell3 = row.getCell(3);
+                if (cell2 != null && StringUtils.isNotEmpty(cell2.getStringCellValue())) {
+                    put(concept_words, cell2);
+                    all_words.put(cell2.getStringCellValue(), cell2.getStringCellValue());
+                }
+                if (cell3 != null && StringUtils.isNotEmpty(cell3.getStringCellValue())) {
+                    self = cell3.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        all_words.put(self[i], cell2.getStringCellValue());
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "13");
+                line.put("type", "化验明细");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertLisPublic() throws Exception {
+        File excelFile = new File(lis_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell4 = row.getCell(4);
+                if (cell4 != null && StringUtils.isNotEmpty(cell4.getStringCellValue())) {
+                    concept_words.put(cell4.getStringCellValue(), cell4.getStringCellValue());
+                    all_words.put(cell4.getStringCellValue(), cell4.getStringCellValue());
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "46");
+                line.put("type", "化验公表项");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertPacs() throws Exception {
+        File excelFile = new File(pacs_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        Cell cell = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                Cell cell2 = row.getCell(2);
+                Cell cell3 = row.getCell(3);
+                Cell cell4 = row.getCell(4);
+                if (cell0 != null && StringUtils.isNotEmpty(cell0.getStringCellValue())) {
+                    cell = cell0;
+                }
+                if (cell1 != null && StringUtils.isNotEmpty(cell1.getStringCellValue())) {
+                    cell = cell1;
+                }
+                if (cell2 != null && StringUtils.isNotEmpty(cell2.getStringCellValue())) {
+                    cell = cell2;
+                }
+                if (cell3 != null && StringUtils.isNotEmpty(cell3.getStringCellValue())) {
+                    cell = cell3;
+                }
+                if (cell != null) {
+                    put(concept_words, cell);
+                    all_words.put(cell.getStringCellValue(), cell.getStringCellValue());
+                }
+                if (cell4 != null && StringUtils.isNotEmpty(cell4.getStringCellValue()) && cell != null) {
+                    self = cell4.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        all_words.put(self[i], cell.getStringCellValue());
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+            cell = null;
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "16");
+                line.put("type", "辅检项目");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertDisease() throws Exception {
+        File excelFile = new File(disease_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                if (cell0 != null && StringUtils.isNotEmpty(cell0.getStringCellValue())) {
+                    concept_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                    all_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                }
+                if (cell1 != null && StringUtils.isNotEmpty(cell1.getStringCellValue()) && cell0 != null) {
+                    self = cell1.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        all_words.put(self[i], cell0.getStringCellValue());
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "18");
+                line.put("type", "疾病");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertDrug() throws Exception {
+
+    }
+
+    public static void insertVitalIndex() throws Exception {
+        File excelFile = new File(vital_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell1 = row.getCell(1);
+                Cell cell3 = row.getCell(3);
+
+                String concept_word = "";
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    self = cell1.getStringCellValue().split("、");
+                    concept_word = self[0];
+                    if (concept_words.get(self[0]) == null) {
+                        put(concept_words, self[0], null, null);
+                        for (int i = 0; i < self.length; i++) {
+                            if (all_words.get(self[i]) == null) {
+                                all_words.put(self[i], self[0]);
+                            }
+                        }
+                    } else {
+                        for (int i = 0; i < self.length; i++) {
+                            if (all_words.get(self[i]) == null) {
+                                all_words.put(self[i], self[0]);
+                            }
+                        }
+                    }
+                }
+
+                if (cell3 != null && StringUtils.isNotEmpty(cell3.getStringCellValue())) {
+                    self = cell3.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        if (all_words.get(self[i]) == null) {
+                            all_words.put(self[i], concept_word);
+                        }
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "33");
+                line.put("type", "体征指标");
+                all_words_list.add(line);
+            }
+        }
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertVitalValue() throws Exception {
+        File excelFile = new File(vital_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell2 = row.getCell(2);
+                if (cell2 != null && !"".equals(cell2.getStringCellValue())) {
+                    self = cell2.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        put(concept_words, self[i], null, null);
+                        all_words.put(self[i], self[i]);
+                    }
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "34");
+                line.put("type", "体征指标值");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertVitalResult() throws Exception {
+        File excelFile = new File(vital_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+
+        String[] self = null;
+        String[] suffix = null;
+        String[] prefix = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell3 = row.getCell(3);
+                Cell cell4 = row.getCell(4);
+                Cell cell5 = row.getCell(5);
+                Cell cell6 = row.getCell(6);
+                if (cell0 != null && StringUtils.isNotEmpty(cell0.getStringCellValue())) {
+                    put(concept_words, cell0);
+                    all_words.put(cell0.getStringCellValue(), cell0.getStringCellValue());
+                }
+                if (cell3 != null && StringUtils.isNotEmpty(cell3.getStringCellValue())) {
+                    self = cell3.getStringCellValue().split("、");
+                    if (cell4 != null && StringUtils.isNotEmpty(cell4.getStringCellValue())) {
+                        suffix = cell4.getStringCellValue().split("、");
+                        for (int i = 0; i < self.length; i++) {
+                            for (int j = 0; j < suffix.length; j++) {
+                                all_words.put(self[i] + suffix[j], cell0.getStringCellValue());
+                            }
+                        }
+                    }
+                    if (cell5 != null && StringUtils.isNotEmpty(cell5.getStringCellValue())) {
+                        prefix = cell5.getStringCellValue().split("、");
+                        for (int i = 0; i < self.length; i++) {
+                            for (int j = 0; j < prefix.length; j++) {
+                                all_words.put(prefix[j] + self[i], cell0.getStringCellValue());
+                            }
+                        }
+                    }
+                }
+                if (cell6 != null && StringUtils.isNotEmpty(cell6.getStringCellValue())) {
+                    self = cell6.getStringCellValue().split("、");
+                    for (int i = 0; i < self.length; i++) {
+                        all_words.put(self[i], cell0.getStringCellValue());
+                    }
+                }
+
+            } catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+            prefix = null;
+            suffix = null;
+        }
+
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map.Entry<String, String> entry : all_words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            if (StringUtils.isNotEmpty(entry.getKey())) {
+                line.put("name", entry.getKey());
+                line.put("type_id", "35");
+                line.put("type", "体征结果");
+                all_words_list.add(line);
+            }
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertStandardInfo() throws Exception {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-med-0606?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("kl_standard_info_0318", new String[]{"id", "name", "category_id", "category"}, " where category_id not in (1, 3, 13,14,15, 17,18,19,60,61,62, 10)");
+
+        Map<String, String> concept_words = new HashMap<>();
+        Map<String, String> all_words = new HashMap<>();
+        List<Map<String, Object>> all_words_list = new ArrayList<>();
+        for (Map<String, String> map : data) {
+            String name = map.get("name");
+            concept_words.put(name, name);
+            all_words.put(name, name);
+
+            Map<String, Object> line = new HashMap<>();
+            line.put("name", map.get("name"));
+            line.put("type_id", map.get("category_id"));
+            line.put("type", map.get("category"));
+            all_words_list.add(line);
+        }
+
+        insertLibraryInfo(all_words_list, all_words, concept_words);
+    }
+
+    public static void insertLibraryInfo(List<Map<String, Object>> list, Map<String, String> all_words, Map<String, String> concept_words) {
+        String updateDate = DateFormatUtils.format(new Date(), "yyyy-MM-dd HH:mm:ss");
+        for (Map<String, Object> line : list) {
+            line.put("order_no", "0");
+            line.put("is_concept", "0");
+            initInsertLine(line, updateDate);
+        }
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/med-s?useUnicode=true&characterEncoding=UTF-8");
+        int[] infoIds = nlpJdbc.insertBatch(list, "kl_library_info", new String[]{"name", "type_id", "is_concept", "creator", "modifier", "gmt_create", "gmt_modified", "is_deleted"});
+
+        List<Map<String, Object>> concept_words_list = new ArrayList<>();
+        String concept_word = "";
+        for (int i = 0; i < infoIds.length; i++) {
+            concept_word = list.get(i).get("name").toString();
+            if (concept_words.get(concept_word) != null) {
+                Map<String, Object> concept_line = new HashMap<>();
+                concept_line.put("lib_id", infoIds[i]);
+                concept_line.put("lib_name", concept_word);
+                concept_line.put("lib_type", list.get(i).get("type_id"));
+                initInsertLine(concept_line, updateDate);
+                concept_words_list.add(concept_line);
+            }
+        }
+
+        int[] conceptIds = nlpJdbc.insertBatch(concept_words_list, "kl_concept", new String[]{"lib_id", "lib_name", "lib_type", "creator", "modifier", "gmt_create", "gmt_modified", "is_deleted"});
+        Map<String, String> concept_word_ids = new HashMap<>();
+        for (int i = 0; i < conceptIds.length; i++) {
+            concept_word_ids.put(concept_words_list.get(i).get("lib_name").toString(), String.valueOf(conceptIds[i]));
+        }
+
+        List<Map<String, Object>> updates = new ArrayList<>();
+        List<Map<String, Object>> wheres = new ArrayList<>();
+
+        for (int i = 0; i < infoIds.length; i++) {
+            Map<String, Object> line = new HashMap<>();
+            Map<String, Object> where = new HashMap<>();
+if (concept_word_ids.get(all_words.get(list.get(i).get("name").toString())) == null) {
+    System.out.println(list.get(i).get("name").toString());
+    System.out.println(all_words.get(list.get(i).get("name").toString()));
+}
+            line.put("concept_id", concept_word_ids.get(all_words.get(list.get(i).get("name").toString())));
+            concept_word = list.get(i).get("name").toString();
+            if (concept_words.get(concept_word) != null) {
+                line.put("is_concept", "1");
+            } else {
+                line.put("is_concept", "0");
+            }
+            updates.add(line);
+            where.put("id", infoIds[i]);
+            wheres.add(where);
+        }
+        nlpJdbc.update("kl_library_info", updates, wheres);
+    }
+
+    public static void initInsertLine(Map<String, Object> line, String updateDate) {
+        line.put("is_deleted", "N");
+        line.put("creator", "楼辉荣");
+        line.put("modifier", "楼辉荣");
+        line.put("gmt_create", updateDate);
+        line.put("gmt_modified", updateDate);
+    }
+
+    public static Workbook getWorkbok(InputStream in, File file) throws IOException {
+        Workbook wb = null;
+        if (file.getName().endsWith(EXCEL_XLS)) {  //Excel 2003
+            wb = new HSSFWorkbook(in);
+        } else if (file.getName().endsWith(EXCEL_XLSX)) {  // Excel 2007/2010
+            wb = new XSSFWorkbook(in);
+        }
+        return wb;
+    }
+
+    public static Map<String, String> put(Map<String, String> words, String word, String[] prefix, String[] suffix) {
+        if (words.get(word) == null) {
+            words.put(word, word);
+        }
+        if (prefix != null) {
+            for (int i = 0; i < prefix.length; i++) {
+                if (words.get(prefix[i] + word) == null) {
+                    words.put(prefix[i] + word, prefix[i] + word);
+                }
+            }
+        }
+        if (suffix != null) {
+            for (int i = 0; i < suffix.length; i++) {
+                if (words.get(word + suffix[i]) == null) {
+                    words.put(word + suffix[i], word + suffix[i]);
+                }
+            }
+        }
+        if (prefix != null && suffix != null) {
+            for (int i = 0; i < prefix.length; i++) {
+                for (int j = 0; j < suffix.length; j++) {
+                    if (words.get(prefix[i] + word + suffix[j]) == null) {
+                        words.put(prefix[i] + word + suffix[j], prefix[i] + word + suffix[j]);
+                    }
+                }
+            }
+        }
+        return words;
+    }
+
+    public static Map<String, String> put(Map<String, String> words, Cell cell) {
+        return put(words, cell, null);
+    }
+
+    public static Map<String, String> put(Map<String, String> words, Cell cell, String suffix) {
+        if (cell == null) {
+            return words;
+        }
+        String[] key = cell.getStringCellValue().split("、");
+        for (int i = 0; i < key.length; i++) {
+            if (words.get(key[i]) == null) {
+                if (suffix == null) {
+                    words.put(key[i], key[i]);
+                } else {
+                    words.put(key[i] + suffix, key[i] + suffix);
+                }
+            }
+        }
+        return words;
+    }
+}

+ 779 - 0
nlp/src/test/java/org/diagbot/nlp/test/ExcelReadTest.java

@@ -0,0 +1,779 @@
+package org.diagbot.nlp.test;
+
+import org.apache.commons.lang3.time.DateFormatUtils;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.diagbot.pub.jdbc.MysqlJdbc;
+import org.springframework.util.StringUtils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.NumberFormat;
+import java.util.*;
+
+/**
+ * @ClassName org.diagbot.nlp.test.ExcelRead
+ * @Description TODO
+ * @Author fyeman
+ * @Date 2019/5/28/028 13:28
+ * @Version 1.0
+ **/
+public class ExcelReadTest {
+    private static final String EXCEL_XLS = "xls";
+    private static final String EXCEL_XLSX = "xlsx";
+
+    private static Map<String, String> lis_words = new HashMap<>();
+    private static Map<String, String> pacs_words = new HashMap<>();
+    private static Map<String, String> symptom_words = new HashMap<>();
+    private static Map<String, String> vital_words = new HashMap<>();
+
+    private static final String lis_path  = "E:\\git\\docs\\医学知识库\\化验\\化验标准库第一期-邵启华0612.xlsx";
+    private static final String pacs_path  = "E:\\git\\docs\\医学知识库\\辅检\\辅检标准库第一期-邵青华0612.xlsx";
+    private static final String symptom_path  = "E:\\git\\docs\\医学知识库\\症状\\症状标准库-王玲0613.xlsx";
+    private static final String drug_path  = "E:\\git\\docs\\医学知识库\\药品\\药品标准库-邵启华0515.xlsx";
+    private static final String vital_path  = "E:\\git\\docs\\医学知识库\\体征\\体征标准库第一阶段-吕纯0613.xlsx";
+    private static final String part_path  = "E:\\git\\docs\\医学知识库\\部位和科室\\部位标准库-邵启华0605.xlsx";
+    private static final String disease_path = "E:\\git\\docs\\医学知识库\\诊断名和治疗方案\\知识库标准疾病名称和疾病治疗方案-邵青华0605.xlsx";
+
+    public static void main(String[] args) {
+        try {
+//            readLis();
+//            readDrug();
+//            readDisease();
+//            readPart();
+//            readPacs();
+            readPart1();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public static void readLis() throws Exception {
+        File excelFile = new File(lis_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        Map<String, String> notin_words = new HashMap<>();
+        int count = 0;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                Cell cell2 = row.getCell(2);
+                Cell cell3 = row.getCell(3);
+                Cell cell4 = row.getCell(4);
+                Cell cell12 = row.getCell(12);
+                put(lis_words, cell0);
+                put(lis_words, cell2);
+                put(lis_words, cell4);
+
+                if (cell1 != null) {
+                    String[] val =  cell1.getStringCellValue().split("、");
+                    for (int i = 0; i < val.length; i++) {
+                        put(lis_words, val[i], null, null);
+                    }
+                }
+                if (cell3 != null) {
+                    String[] val =  cell3.getStringCellValue().split("、");
+                    for (int i = 0; i < val.length; i++) {
+                        put(lis_words, val[i], null, null);
+                    }
+                }
+
+                if (cell12 == null || StringUtils.isEmpty(cell12.getStringCellValue())) {
+                    put(lis_words, cell2, "增高");
+                    put(lis_words, cell2, "降低");
+                } else {
+                    if (cell12.getStringCellValue().indexOf("阴性") > -1) {
+                        put(lis_words, cell2, "阴性");
+                        put(lis_words, cell2, "阳性");
+                    }
+                }
+            }catch (Exception e) {
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+//        List<Map<String, Object>> lists = new ArrayList<>();
+//        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+//        List<Map<String, String>> data = nlpJdbc.query("medicine_rule_part1", new String[]{"id","standWord","correlationWord","result"}, " where number like '3.%'");
+//        for (Map<String, String> map : data) {
+//            String standWord = map.get("standWord");
+//            String correlationWord = map.get("correlationWord");
+//            if (lis_words.get(standWord) == null) {
+//                Map<String, Object> line = new HashMap<>();
+//                line.put("meal_name", standWord);
+//                lists.add(line);
+//                lis_words.put(standWord, standWord);
+//            }
+//            if (lis_words.get(correlationWord) == null) {
+//                Map<String, Object> line = new HashMap<>();
+//                line.put("meal_name", correlationWord);
+//                lists.add(line);
+//                lis_words.put(correlationWord, correlationWord);
+//            }
+//        }
+//
+//        nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+//        data = nlpJdbc.query("kl_standard_info", new String[]{"id","name"}, " where category_id in (13, 14)");
+//        for (Map<String, String> map : data) {
+//            String name = map.get("name");
+//            if (lis_words.get(name) == null) {
+//                Map<String, Object> line = new HashMap<>();
+//                line.put("meal_name", name);
+//                lists.add(line);
+//                lis_words.put(name, name);
+//            }
+//        }
+//
+//        nlpJdbc.insert(lists, "lis_notin", new String[]{"meal_name"});
+    }
+
+    public static void readDrug() throws Exception {
+        File excelFile = new File(drug_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        Map<String, String> words = new HashMap<>();
+        words = loadDataFromSheet(sheet, words, new int[]{0, 1, 2, 3, 4});
+
+        sheet = workbook.getSheetAt(1);
+        words = loadDataFromSheet(sheet, words, new int[]{0});
+
+        List<Map<String, Object>> lists = new ArrayList<>();
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("kl_standard_info", new String[]{"id","name"}, " where category_id = 10");
+        for (Map<String, String> map : data) {
+            String name = map.get("name");
+            if (words.get(name) == null) {
+                Map<String, Object> line = new HashMap<>();
+                line.put("drug_name", name);
+                lists.add(line);
+                words.put(name, name);
+            }
+        }
+
+        nlpJdbc.insert(lists, "drug_notin", new String[]{"drug_name"});
+    }
+
+    public static void readDisease() throws Exception {
+        File excelFile = new File(disease_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        Map<String, String> words = new HashMap<>();
+        words = loadDataFromSheet(sheet, words, new int[]{0, 1});
+
+        List<Map<String, Object>> lists = new ArrayList<>();
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("kl_standard_info", new String[]{"id","name"}, " where category_id = 19");
+        for (Map<String, String> map : data) {
+            String name = map.get("name");
+            if (words.get(name) == null) {
+                Map<String, Object> line = new HashMap<>();
+                line.put("disease", name);
+                lists.add(line);
+                words.put(name, name);
+            }
+        }
+
+        nlpJdbc.insert(lists, "disease_notin", new String[]{"disease"});
+    }
+
+    public static void readPart() throws Exception {
+        File excelFile = new File(part_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        Map<String, String> words = new HashMap<>();
+        int count = 0;
+        Cell cell = null;
+        String[] prefix = null;
+        String[] suffix = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                Cell cell2 = row.getCell(2);
+                Cell cell3 = row.getCell(3);
+                Cell cell4 = row.getCell(4);
+                Cell cell5 = row.getCell(5);
+                Cell cell6 = row.getCell(6);
+                Cell cell7 = row.getCell(7);
+                Cell cell8 = row.getCell(8);
+                if (cell0 != null && !"".equals(cell0.getStringCellValue())) {
+                    cell = cell0;
+                } else if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    cell = cell1;
+                } else if (cell2 != null && !"".equals(cell2.getStringCellValue())) {
+                    cell = cell2;
+                } else if (cell3 != null && !"".equals(cell3.getStringCellValue())) {
+                    cell = cell3;
+                } else if (cell4 != null && !"".equals(cell4.getStringCellValue())) {
+                    cell = cell4;
+                } else if (cell5 != null && !"".equals(cell5.getStringCellValue())) {
+                    cell = cell5;
+                } else if (cell6 != null && !"".equals(cell6.getStringCellValue())) {
+                    cell = cell6;
+                }
+
+                if (cell7 != null) {
+                    prefix  = cell7.getStringCellValue().split("、");
+                }
+                if (cell8 != null) {
+                    suffix  = cell8.getStringCellValue().split("、");
+                }
+                String part = cell.getStringCellValue();
+                if (part.equals("")) {
+                    System.out.println(count + part);
+                }
+                if (part.indexOf("(") > -1) {
+                    String part1 = part.substring(0, part.indexOf("("));
+                    String part2 = part.substring(part.indexOf("(") + 1, part.indexOf(")"));
+                    put(words, part1, prefix, suffix);
+                    put(words, part2, prefix, suffix);
+                } else {
+                    put(words, part, prefix, suffix);
+                }
+                prefix = null;
+                suffix = null;
+            }catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+        List<Map<String, Object>> lists = new ArrayList<>();
+
+        for (Map.Entry<String, String> entry : words.entrySet()) {
+            Map<String, Object> line = new HashMap<>();
+            line.put("part", entry.getKey());
+            line.put("concept", entry.getKey());
+            lists.add(line);
+        }
+
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("kl_standard_info", new String[]{"id","name"}, " where category_id = 3");
+        for (Map<String, String> map : data) {
+            String name = map.get("name");
+            if (words.get(name) == null) {
+                Map<String, Object> line = new HashMap<>();
+                line.put("part", name);
+                lists.add(line);
+                words.put(name, name);
+            }
+        }
+
+        nlpJdbc.insert(lists, "part_notin", new String[]{"part", "concept"});
+    }
+
+    public static void readPacs() throws Exception {
+        File excelFile = new File(pacs_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        int count = 0;
+        String[] alias = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                Cell cell2 = row.getCell(2);
+                Cell cell3 = row.getCell(3);
+                Cell cell4 = row.getCell(4);
+                if (cell0 != null && !"".equals(cell0.getStringCellValue())) {
+                    put(pacs_words, cell0);
+                }
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    put(pacs_words, cell1);
+                }
+                if (cell2 != null && !"".equals(cell2.getStringCellValue())) {
+                    put(pacs_words, cell2);
+                }
+                if (cell3 != null && !"".equals(cell3.getStringCellValue())) {
+                    put(pacs_words, cell3);
+                }
+                if (cell4 != null && !"".equals(cell4.getStringCellValue())) {
+                    String val = cell4.getStringCellValue();
+                    if (val.indexOf("、") >  -1) {
+                        alias = val.split("、");
+                        for (int i = 0; i < alias.length; i++) {
+                            put(pacs_words, alias[i], null,  null);
+                        }
+                    } else {
+                        put(pacs_words, cell4);
+                    }
+                }
+            }catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+
+//        List<Map<String, Object>> lists = new ArrayList<>();
+//        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+//        List<Map<String, String>> data = nlpJdbc.query("kl_standard_info", new String[]{"id","name"}, " where category_id = 17");
+//        for (Map<String, String> map : data) {
+//            String name = map.get("name");
+//            if (pacs_words.get(name) == null) {
+//                Map<String, Object> line = new HashMap<>();
+//                line.put("pacs_name", name);
+//                lists.add(line);
+//                pacs_words.put(name, name);
+//            }
+//        }
+//
+//        nlpJdbc.insert(lists, "pacs_notin", new String[]{"pacs_name"});
+    }
+
+    public static void readSymptom() throws Exception {
+        File excelFile = new File(symptom_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(1);
+
+        int count = 0;
+        String[] alias = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell1 = row.getCell(1);
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    put(symptom_words, cell1);
+                }
+            }catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+    }
+
+    public static void readVital() throws Exception {
+        File excelFile = new File(vital_path); // 创建文件对象
+        FileInputStream in = new FileInputStream(excelFile); // 文件流
+        Workbook workbook = getWorkbok(in, excelFile);
+        Sheet sheet = workbook.getSheetAt(0);
+
+        Map<String, String> words = new HashMap<>();
+        int count = 0;
+        Cell cell = null;
+        String[] prefix = null;
+        String[] suffix = null;
+        String[] self = null;
+        for (Row row : sheet) {
+            // 跳过第一和第二行的目录
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell0 = row.getCell(0);
+                Cell cell1 = row.getCell(1);
+                Cell cell2 = row.getCell(2);
+                Cell cell3 = row.getCell(3);
+                Cell cell4 = row.getCell(4);
+                Cell cell5 = row.getCell(5);
+                Cell cell6 = row.getCell(6);
+                if (cell0 != null && !"".equals(cell0.getStringCellValue())) {
+                    put(vital_words, cell0);
+                }
+                if (cell1 != null && !"".equals(cell1.getStringCellValue())) {
+                    self = cell1.getStringCellValue().split("、");
+                    for (int i = 0; i  < self.length; i++) {
+                        put(vital_words, self[i], null, null);
+                    }
+                }
+                if (cell2 != null && !"".equals(cell2.getStringCellValue())) {
+                    self = cell2.getStringCellValue().split("、");
+                    for (int i = 0; i  < self.length; i++) {
+                        put(vital_words, self[i], null, null);
+                    }
+                }
+                if (cell6 != null && !"".equals(cell6.getStringCellValue())) {
+                    put(vital_words, cell6);
+                }
+                if (cell3 != null && !"".equals(cell3.getStringCellValue())) {
+                    self = cell3.getStringCellValue().split("、");
+                    if (cell4 != null && !"".equals(cell4.getStringCellValue())) {
+                        suffix = cell4.getStringCellValue().split("、");
+                        for (int i = 0; i  < self.length; i++) {
+                            put(vital_words, self[i], null, suffix);
+                        }
+                    }
+                    if (cell5 != null && !"".equals(cell5.getStringCellValue())) {
+                        prefix = cell5.getStringCellValue().split("、");
+                        for (int i = 0; i  < self.length; i++) {
+                            put(vital_words, self[i], prefix, null);
+                        }
+                    }
+                }
+                prefix = null;
+                suffix = null;
+            }catch (Exception e) {
+                e.printStackTrace();
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+    }
+
+    public static void readPart1() throws Exception {
+        readPacs();
+        readLis();
+        readSymptom();
+        readVital();
+
+        Map<String, String> all_words = new HashMap<>();
+
+        String key = "";
+        String value = "";
+        for (Map.Entry<String, String> entry : symptom_words.entrySet()) {
+            key = entry.getKey().trim().replace("(", "(").replace(")",")");
+            value = entry.getValue().trim().replace("(", "(").replace(")",")");
+            all_words.put(key, value);
+        }
+        for (Map.Entry<String, String> entry : vital_words.entrySet()) {
+            key = entry.getKey().trim().replace("(", "(").replace(")",")");
+            value = entry.getValue().trim().replace("(", "(").replace(")",")");
+            all_words.put(key, value);
+        }
+        for (Map.Entry<String, String> entry : lis_words.entrySet()) {
+            if (key.equals("血常规")) {
+                System.out.println("血常规");
+            }
+            key = entry.getKey().trim().replace("(", "(").replace(")",")");
+            value = entry.getValue().trim().replace("(", "(").replace(")",")");
+            all_words.put(key, value);
+        }
+        for (Map.Entry<String, String> entry : pacs_words.entrySet()) {
+            key = entry.getKey().trim().replace("(", "(").replace(")",")");
+            value = entry.getValue().trim().replace("(", "(").replace(")",")");
+            all_words.put(key, value);
+        }
+
+
+
+        File directFile = new File("E:\\git\\docs\\医学知识库\\诊断依据\\全部依据\\part1"); // 创建文件对象
+
+
+        String updateDate = DateFormatUtils.format(new Date(), "yyyy-MM-dd HH:mm:ss");
+
+        NumberFormat nf = NumberFormat.getInstance();
+        nf.setMaximumFractionDigits(1);
+
+        String diag = "";
+        List<Map<String, Object>> lists = new ArrayList<>();
+
+        List<Map<String, Object>> not_in_lists = new ArrayList<>();
+        if (directFile.isDirectory()) {
+            File[] files = directFile.listFiles();
+            for (File file : files) {
+                File[] excelFile = file.listFiles();
+                for (int i = 0; i < excelFile.length; i++) {
+                    FileInputStream in = new FileInputStream(excelFile[i]); // 文件流
+                    Workbook workbook = getWorkbok(in, excelFile[i]);
+                    Sheet sheet = workbook.getSheetAt(0);
+
+                    diag = excelFile[i].getName().substring(0, excelFile[i].getName().indexOf(".xlsx"));
+                    Map<String, String> words = new HashMap<>();
+                    int count = 0;
+                    String[] alias = null;
+                    for (Row row : sheet) {
+                        // 跳过第一和第二行的目录
+                        if (count < 1) {
+                            count++;
+                            continue;
+                        }
+                        count++;
+                        try {
+                            Cell cell0 = row.getCell(0);
+                            Cell cell1 = row.getCell(1);
+                            Cell cell2 = row.getCell(2);
+                            Cell cell3 = row.getCell(3);
+                            Cell cell4 = row.getCell(4);
+
+                            if (cell0 == null || StringUtils.isEmpty(cell0.getStringCellValue())) {
+                                break;
+                            }
+
+                            String number = "";
+                            if (CellType.STRING == cell1.getCellTypeEnum()) {
+                                number = cell1.getStringCellValue();
+                            } else if (CellType.NUMERIC == cell1.getCellTypeEnum()) {
+                                number = nf.format(cell1.getNumericCellValue());
+                            }
+                            if (number.indexOf("1.") > -1 || number.indexOf("2.") > -1) {
+                                if (cell2 != null && !"".equals(cell2.getStringCellValue())) {
+                                    Map<String, Object> line = new HashMap<>();
+                                    line.put("disease", diag);
+                                    line.put("type", cell0.getStringCellValue());
+                                    line.put("number", number);
+                                    line.put("standWord", cell2.getStringCellValue().trim());
+                                    line.put("update_date", updateDate);
+                                    lists.add(line);
+
+                                    notinFound(all_words, not_in_lists, line);
+                                }
+                                if (cell3 != null && !"".equals(cell3.getStringCellValue())) {
+                                    String val = cell3.getStringCellValue();
+                                    if (val.indexOf("、") > -1) {
+                                        alias = val.split("、");
+                                        for (int j = 0; j < alias.length; j++) {
+                                            Map<String, Object> line = new HashMap<>();
+                                            line.put("disease", diag);
+                                            line.put("type", cell0.getStringCellValue());
+                                            line.put("number", number);
+                                            line.put("standWord", alias[j].trim());
+                                            line.put("update_date", updateDate);
+                                            lists.add(line);
+
+                                            notinFound(all_words, not_in_lists, line);
+                                        }
+                                    } else {
+                                        Map<String, Object> line = new HashMap<>();
+                                        line.put("disease", diag);
+                                        line.put("type", cell0.getStringCellValue());
+                                        line.put("number", number);
+                                        line.put("standWord", val.trim());
+                                        line.put("update_date", updateDate);
+                                        lists.add(line);
+
+                                        notinFound(all_words, not_in_lists, line);
+                                    }
+                                }
+                            }else if (number.indexOf("3.") > -1) {
+                                if (cell3 != null && cell4 != null) {
+                                    String val3 = cell3.getStringCellValue();
+                                    String val4 = cell4.getStringCellValue();
+                                    if (val4.indexOf("、") > -1) {
+                                        alias = val4.split("、");
+                                        for (int j = 0; j < alias.length; j++) {
+                                            Map<String, Object> line = new HashMap<>();
+                                            line.put("disease", diag);
+                                            line.put("type", cell0.getStringCellValue());
+                                            line.put("number", number);
+                                            line.put("standWord", cell2.getStringCellValue().trim());
+                                            line.put("correlationWord", val3.trim());
+                                            line.put("result", alias[j].trim());
+                                            line.put("update_date", updateDate);
+                                            lists.add(line);
+
+                                            notinFound(all_words, not_in_lists, line);
+                                        }
+                                    } else {
+                                        Map<String, Object> line = new HashMap<>();
+                                        line.put("disease", diag);
+                                        line.put("type", cell0.getStringCellValue());
+                                        line.put("number", number);
+                                        line.put("standWord", cell2.getStringCellValue().trim());
+                                        line.put("correlationWord", val3.trim());
+                                        line.put("result", val4.trim());
+                                        line.put("update_date", updateDate);
+                                        lists.add(line);
+
+                                        notinFound(all_words, not_in_lists, line);
+                                    }
+                                } else {
+                                    if (cell2 != null) {
+                                        Map<String, Object> line = new HashMap<>();
+                                        line.put("disease", diag);
+                                        line.put("type", cell0.getStringCellValue());
+                                        line.put("number", number);
+                                        line.put("standWord", cell2.getStringCellValue().trim());
+                                        line.put("check_label", "4");
+                                        line.put("update_date", updateDate);
+                                        not_in_lists.add(line);
+                                    }
+                                }
+                            } else {
+                                Map<String, Object> line = new HashMap<>();
+                                line.put("disease", diag);
+                                line.put("type", cell0.getStringCellValue());
+                                line.put("number", number);
+                                line.put("standWord", cell2.getStringCellValue().trim());
+                                if (cell3 != null) {
+                                    line.put("correlationWord", cell3.getStringCellValue().trim());
+                                }
+                                if (cell4 != null) {
+                                    line.put("result", cell4.getStringCellValue().trim());
+                                }
+                                line.put("update_date", updateDate);
+                                lists.add(line);
+
+                                notinFound(all_words, not_in_lists, line);
+                            }
+                        }catch (Exception e) {
+                            e.printStackTrace();
+                            System.out.println("第" + count + "出现错误!" + excelFile[i].getName());
+                        }
+                    }
+                }
+            }
+        }
+
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        nlpJdbc.insert(not_in_lists, "medicine_rule_part1_notin", new String[]{"disease", "type", "number", "standWord", "correlationWord", "result", "update_date", "check_label"});
+
+    }
+
+    public static void notinFound(Map<String, String> words, List<Map<String, Object>> not_in_list, Map<String, Object> line) {
+        if (line.get("disease").toString().equals("肠息肉") && line.get("number").toString().equals("1.6")) {
+            System.out.println("11111111");
+        }
+
+        String standWord = String.valueOf(line.get("standWord"));
+        if (standWord != null) {
+            standWord = standWord.replace("(", "(").replace(")",")");
+        }
+
+        if (words.get(standWord) == null) {
+            line.put("check_label", "1");
+        }
+        if (line.get("correlationWord") != null) {
+            String correlationWord = String.valueOf(line.get("correlationWord"));
+            if (correlationWord != null) {
+                correlationWord = correlationWord.replace("(", "(").replace(")",")");
+            }
+
+            String number = String.valueOf(line.get("number"));
+            if (number.indexOf("4.") < 0) {
+                if (words.get(correlationWord) == null) {
+                    if (line.get("check_label") != null) {
+                        line.put("check_label", line.get("check_label") + ",2");
+                    } else {
+                        line.put("check_label", "2");
+                    }
+                }
+            }
+        }
+        if (line.get("result") != null) {
+            String result = String.valueOf(line.get("result"));
+            if (result != null) {
+                result = result.replace("(", "(").replace(")",")");
+            }
+            if (words.get(result) == null) {
+                if (line.get("check_label") != null) {
+                    line.put("check_label", line.get("check_label") + ",3");
+                } else {
+                    line.put("check_label", "3");
+                }
+            }
+        }
+        if (line.get("check_label") !=  null) {
+            not_in_list.add(line);
+        }
+    }
+
+    public static Map<String, String> loadDataFromSheet(Sheet sheet, Map<String, String> words, int[] indexs) {
+        int count = 0;
+        for (Row row : sheet) {
+            if (count < 1) {
+                count++;
+                continue;
+            }
+            count++;
+            try {
+                Cell cell = null;
+                for (int i =  0; i < indexs.length; i++) {
+                    cell = row.getCell(indexs[i]);
+                    put(words, cell);
+                }
+            }catch (Exception e) {
+                System.out.println("第" + count + "出现错误!");
+            }
+        }
+        return words;
+    }
+
+    public static Workbook getWorkbok(InputStream in, File file) throws IOException {
+        Workbook wb = null;
+        if (file.getName().endsWith(EXCEL_XLS)) {  //Excel 2003
+            wb = new HSSFWorkbook(in);
+        } else if (file.getName().endsWith(EXCEL_XLSX)) {  // Excel 2007/2010
+            wb = new XSSFWorkbook(in);
+        }
+        return wb;
+    }
+
+    public static Map<String, String> put(Map<String, String> words,  String word, String[] prefix, String[] suffix) {
+        if (words.get(word) == null) {
+            words.put(word, word);
+        }
+        if (prefix != null) {
+            for (int i = 0; i < prefix.length; i++) {
+                if (words.get(prefix[i] + word) == null) {
+                    words.put(prefix[i] + word, prefix[i] + word);
+                }
+            }
+        }
+        if (suffix != null) {
+            for (int i = 0; i < suffix.length; i++) {
+                if (words.get(word + suffix[i]) == null) {
+                    words.put(word + suffix[i], word + suffix[i]);
+                }
+            }
+        }
+        if (prefix != null && suffix != null) {
+            for (int i = 0; i < prefix.length; i++) {
+                for (int j = 0; j < suffix.length; j++) {
+                    if (words.get(prefix[i] + word + suffix[j]) == null) {
+                        words.put(prefix[i] + word + suffix[j], prefix[i] + word + suffix[j]);
+                    }
+                }
+            }
+        }
+        return words;
+    }
+
+    public static Map<String, String> put(Map<String, String> words, Cell cell) {
+        return put(words, cell, null);
+    }
+
+    public static Map<String, String> put(Map<String, String> words, Cell cell, String suffix) {
+        if (cell == null) {
+            return words;
+        }
+        String[] key = cell.getStringCellValue().split("、");
+        for (int i = 0; i < key.length; i++) {
+            if (words.get(key[i]) == null) {
+                if (suffix == null) {
+                    words.put(key[i], key[i]);
+                } else {
+                    words.put(key[i] + suffix, key[i] + suffix);
+                }
+            }
+        }
+        return words;
+    }
+}

+ 1 - 1
nlp/src/test/java/org/diagbot/nlp/test/LexemeDicTest.java

@@ -38,7 +38,7 @@ public class LexemeDicTest {
 
             FileWriter fw = new FileWriter(path + "src/main/resources/tc.dict");
             for (Map.Entry<String, String> entry : libraryList) {
-                fw.write(encrypDES.encrytor(entry.getKey() + "|9|" + entry.getValue()));
+                fw.write(encrypDES.encrytor(entry.getKey() + "|9|" + entry.getValue() + "|9"));
                 fw.write("\n");
             }
             fw.close();

+ 150 - 0
nlp/src/test/java/org/diagbot/nlp/test/SplitDicTest.java

@@ -0,0 +1,150 @@
+package org.diagbot.nlp.test;
+
+import org.diagbot.nlp.participle.ParticipleUtil;
+import org.diagbot.nlp.participle.word.Lexeme;
+import org.diagbot.pub.jdbc.MysqlJdbc;
+import org.diagbot.pub.utils.security.EncrypDES;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @ClassName org.diagbot.nlp.test.SplitDicTest
+ * @Description TODO
+ * @Author fyeman
+ * @Date 2019/4/25/025 14:36
+ * @Version 1.0
+ **/
+public class SplitDicTest {
+    public static void main(String[] args) {
+        SplitDicTest test = new SplitDicTest();
+//        test.split();
+
+        test.createDic();
+//        test.copy();
+    }
+
+    public void createDic() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("medicine_rule_split", new String[]{"standword", "split_word"}, " where split_word is not null");
+        Map<String, String> words = new HashMap<>();
+
+        String word = "";
+        String[] split_words = null;
+//        for (Map<String, String> map : data) {
+//            word = map.get("standword");
+//            words.put(word, word);
+//        }
+        for (Map<String, String> map : data) {
+            word = map.get("split_word");
+            if (word.indexOf(",") > -1) {
+                split_words = word.split(",");
+                for (int i  = 0; i < split_words.length; i++){
+                    if (words.get(split_words[i]) == null) {
+                        words.put(split_words[i], split_words[i]);
+                    }
+                }
+            } else {
+                if (words.get(word) == null) {
+                    words.put(word, word);
+                }
+            }
+        }
+        List<Map<String, Object>> lists = new ArrayList<>();
+        for (Map.Entry<String, String> m : words.entrySet()) {
+            Map<String, Object> row = new HashMap<>();
+            row.put("name", m.getKey());
+            row.put("category_id", "-2");
+            row.put("category","本次整理");
+            lists.add(row);
+        }
+
+//        nlpJdbc.insert(lists, "kl_standard_split", new String[]{"name", "category_id", "category"});
+
+        System.out.println("肌紧张:"+words.get("肌紧张"));
+        try {
+            String path = this.getClass().getClassLoader().getResource("").getPath();
+            path = path.substring(0, path.indexOf("target"));
+            FileWriter fw = new FileWriter(path + "src/main/resources/tc.dict");
+
+            EncrypDES encrypDES = new EncrypDES();
+            for (Map.Entry<String, String> entry : words.entrySet()) {
+                fw.write(encrypDES.encrytor(entry.getKey() + "|9|9|9"));
+                fw.write("\n");
+            }
+            fw.close();
+
+        } catch (Exception ioe) {
+            ioe.printStackTrace();
+        }
+    }
+
+    public void copy() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("medicine_rule_origin", new String[]{"id", "disease", "type", "number", "standword", "extract_word"}, " where number like '1.%' or number like '2.%'");
+
+        String extract_word = "";
+        String[] ews  = null;
+        List<Map<String, Object>> lists = new ArrayList<>();
+        for (Map<String, String> map : data) {
+            extract_word = map.get("extract_word");
+            if ("基础体温呈单相".equals(map.get("standword"))) {
+                System.out.print("map.get(\"standword\")");
+            }
+            if (extract_word != null && extract_word.indexOf("、") > -1) {
+                ews = extract_word.split("、");
+                for (int i  = 0; i < ews.length; i++){
+                    Map<String, Object> line = new HashMap<>();
+                    line.put("disease", map.get("disease"));
+                    line.put("type", map.get("type"));
+                    line.put("number", map.get("number"));
+                    line.put("standword", map.get("standword"));
+                    line.put("extract_word", ews[i]);
+                    lists.add(line);
+                }
+            } else {
+                Map<String, Object> line = new HashMap<>();
+                line.put("disease", map.get("disease"));
+                line.put("type", map.get("type"));
+                line.put("number", map.get("number"));
+                line.put("standword", map.get("standword"));
+                line.put("extract_word", extract_word);
+                lists.add(line);
+            }
+        }
+
+        nlpJdbc.insert(lists, "medicine_rule_origin_split", new String[]{"disease", "type", "number", "standword", "extract_word"});
+    }
+
+    public void split() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/diagbot-rule?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("medicine_rule_part1", new String[]{"id", "standWord"}, " where number like '1.%' or number like '2.%'");
+        try {
+            List<Lexeme> lexemes = null;
+            String s = "";
+            String sql = "";
+            List<String> sqls = new ArrayList<>();
+            for (Map<String, String> map : data) {
+                if (map.get("standWord") != null) {
+                    lexemes = ParticipleUtil.participle(map.get("standWord"), false);
+                    for (int i = 0; i < lexemes.size(); i++) {
+                        if (i > 0) {
+                            s = s + "," + lexemes.get(i).getText();
+                        } else {
+                            s = lexemes.get(i).getText();
+                        }
+                    }
+                    sql = "update medicine_rule_part1 set split_word = '" + s + "' where id = " + map.get("id");
+                    sqls.add(sql);
+                }
+            }
+            nlpJdbc.update(sqls);
+        } catch (IOException ioe) {
+            ioe.printStackTrace();
+        }
+    }
+}

+ 59 - 0
public/src/main/java/org/diagbot/pub/jdbc/MysqlJdbc.java

@@ -167,6 +167,65 @@ public class MysqlJdbc {
         this.close(null, st, conn);
     }
 
+    public int[] insertBatch(List<Map<String, Object>> data, String table, String[] columns) {
+        Connection conn = connect();
+        if (conn == null) return null;
+        Statement st = null;
+        ResultSet rs = null;
+        int[] keys = new int[data.size()];
+        try {
+            conn.setAutoCommit(false);
+            st = conn.createStatement();
+            int batchIndex = 0;
+            for (int cursor = 0; cursor < data.size(); cursor ++) {
+                String sql = joinInsetSql(table, columns);
+                sql += "(";
+                for (int i = 0; i < columns.length; i++) {
+                    if (i > 0) {
+                        sql += ",";
+                    }
+                    if (data.get(cursor).get(columns[i]) != null) {
+                        sql += "'";
+                        sql += data.get(cursor).get(columns[i]);
+                        sql += "'";
+                    } else {
+                        sql += null;
+                    }
+                }
+                sql += ")";
+                st.addBatch(sql);
+                if ((cursor + 1) % 1000 == 0) {
+                    st.executeBatch();
+                    rs = st.getGeneratedKeys();
+                    int[] rtn = new int[1000];
+                    int index = 0;
+                    while (rs.next()) {
+                        rtn[index] = rs.getInt(1);
+                        index++;
+                    }
+                    System.arraycopy(rtn, 0, keys, batchIndex * 1000, 1000);
+                    batchIndex++;
+                }
+            }
+            if (data.size() % 1000 != 0) {
+                st.executeBatch();
+                rs = st.getGeneratedKeys();
+                int[] rtn = new int[data.size() - batchIndex * 1000];
+                int index = 0;
+                while (rs.next()) {
+                    rtn[index] = rs.getInt(1);
+                    index++;
+                }
+                System.arraycopy(rtn, 0, keys, batchIndex * 1000, data.size() - batchIndex * 1000);
+            }
+            conn.commit();
+        } catch (SQLException sqle) {
+            sqle.printStackTrace();
+        }
+        this.close(rs, st, conn);
+        return keys;
+    }
+
     public void update(String table, List<Map<String, Object>> data, List<Map<String, Object>> wheres) {
         Connection conn = connect();
         if (conn == null) return;