4 years ago · 2c2dfcc26c
--- a/trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunConsultationHtmlAnalysis.java
+++ b/trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunConsultationHtmlAnalysis.java
@@ -0,0 +1,69 @@
 
				+package com.lantone.qc.trans.beilun.util;
			
 
				+
			
 
				+import com.google.common.collect.Lists;
			
 
				+import com.google.common.collect.Maps;
			
 
				+import com.lantone.qc.pub.util.StringUtil;
			
 
				+import org.springframework.beans.factory.annotation.Autowired;
			
 
				+
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.stream.Collectors;
			
 
				+
			
 
				+/**
			
 
				+ * @Description:
			
 
				+ * @author: rengb
			
 
				+ * @time: 2020/9/12 13:53
			
 
				+ */
			
 
				+public class BeiLunConsultationHtmlAnalysis implements BeiLunHtmlAnalysis {
			
 
				+    @Autowired
			
 
				+    CommonAnalysisUtil commonAnalysisUtil;
			
 
				+
			
 
				+    @Override
			
 
				+    public Map<String, String> analysis(String... args) {
			
 
				+        List<String> titles = Lists.newArrayList("会诊类型", "被邀医院", "被邀科室", "申请时间",
			
 
				+                "患者病情及诊疗经过、申请会诊的理由及目的", "申请会诊科别", "被邀会诊科别", "申请会诊医师", "会诊意见", "会诊时间", "科主任",
			
 
				+                "会诊到达时间", "查体", "会诊建议", "会诊诊断", "会诊科室", "会诊医师", "外院会诊医师所在医疗机构名称", "会诊医师所在医疗机构名称");
			
 
				+        String html = args[0];
			
 
				+        String recTitle = args[1];
			
 
				+        Map<String, String> structureMap = Maps.newLinkedHashMap();
			
 
				+        List<String> htmlText = null;
			
 
				+        switch (recTitle) {
			
 
				+            case "377":
			
 
				+                htmlText = commonAnalysisUtil.html2List(html, true);
			
 
				+                commonAnalysisUtil.removeRepeat(htmlText);
			
 
				+                commonAnalysisUtil.html2StructureMap(titles, htmlText, structureMap);
			
 
				+                structureMap.put("rec_title=", recTitle);
			
 
				+                break;
			
 
				+            case "7883":
			
 
				+                htmlText = commonAnalysisUtil.html2List(html, false);
			
 
				+                commonAnalysisUtil.html2StructureMap(titles, htmlText, structureMap);
			
 
				+                if (StringUtil.isNotBlank(structureMap.get("会诊时间"))) {
			
 
				+                    String[] strArr = structureMap.get("会诊时间").split(" ");
			
 
				+                    structureMap.put("会诊意见", strArr[strArr.length - 1]);
			
 
				+                    structureMap.put("会诊时间", strArr[0]);
			
 
				+                }
			
 
				+                structureMap.put("rec_title=", recTitle);
			
 
				+                break;
			
 
				+            case "8084":
			
 
				+                titles = Lists.newArrayList("姓名", "性别", "出生日期", "联系电话",
			
 
				+                        "申请科室","入院/首诊时间", "住院号", "病情概述（含主诉、病史、诊断、诊治过程等）", "拟申请MDT时间、地点",
			
 
				+                        "拟请MDT参加科室", "MDT目的", "申请人签名", "申请递交时间", "科主任签字", "专家诊治建议", "专家科室", "签名",
			
 
				+                        "填写时间","主持科室小结（MDT的最终诊治决议）","科主任（主持人）签名","记录人（主管医师）签字","日期");
			
 
				+                htmlText = Lists.newArrayList();
			
 
				+                commonAnalysisUtil.html2ListByTable(html, htmlText);
			
 
				+                htmlText = htmlText.stream().distinct().collect(Collectors.toList());
			
 
				+                commonAnalysisUtil.html2StructureMapNoColon(titles, htmlText, structureMap);
			
 
				+                structureMap.put("rec_title=", recTitle);
			
 
				+                break;
			
 
				+            default:
			
 
				+                htmlText = commonAnalysisUtil.html2List(html, true);
			
 
				+                commonAnalysisUtil.html2StructureMap(titles, htmlText, structureMap);
			
 
				+                structureMap.put("rec_title=", "371");
			
 
				+                break;
			
 
				+        }
			
 
				+        commonAnalysisUtil.extractDateByTitle(structureMap, "会诊到达时间");
			
 
				+        commonAnalysisUtil.processType(structureMap, "会诊类型");
			
 
				+        return structureMap;
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunFirstCourseRecordHtmlAnalysis.java
+++ b/trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunFirstCourseRecordHtmlAnalysis.java
@@ -1,6 +1,7 @@
 
				 package com.lantone.qc.trans.beilun.util;
			
 
				 
			
 
				 import com.google.common.collect.Lists;
			
 
				+import com.google.common.collect.Maps;
			
 
				 import com.lantone.qc.pub.util.StringUtil;
			
 
				 import org.springframework.beans.factory.annotation.Autowired;
			
 
				 
			
@@ -9,7 +10,7 @@ import java.util.Map;
 
				 
			
 
				 /**
			
 
				  * @Description:
			
 
				- * @author: rengb
			
 
				+ * @author: HUJING
			
 
				  * @time: 2020/9/9 11:24
			
 
				  */
			
 
				 public class BeiLunFirstCourseRecordHtmlAnalysis implements BeiLunHtmlAnalysis {
			
@@ -22,8 +23,9 @@ public class BeiLunFirstCourseRecordHtmlAnalysis implements BeiLunHtmlAnalysis {
 
				                 "（四）初步诊断", "（五）诊疗计划", "医生签名");
			
 
				         String html = args[0];
			
 
				         String recTitle = args[1];
			
 
				-        List<String> htmlText = commonAnalysisUtil.html2List(html);
			
 
				-        Map<String, String> structureMap = commonAnalysisUtil.html2StructureMap(titles, htmlText);
			
 
				+        Map<String, String> structureMap = Maps.newLinkedHashMap();
			
 
				+        List<String> htmlText = commonAnalysisUtil.html2List(html, true);
			
 
				+        commonAnalysisUtil.html2StructureMap(titles, htmlText, structureMap);
			
 
				         String date = commonAnalysisUtil.extractDate(htmlText.get(0));
			
 
				         if (StringUtil.isNotBlank(date)) {
			
 
				             structureMap.put("时间", date);
			
--- a/trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunLeaveHospitalHtmlAnalysis.java
+++ b/trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunLeaveHospitalHtmlAnalysis.java
@@ -1,6 +1,7 @@
 
				 package com.lantone.qc.trans.beilun.util;
			
 
				 
			
 
				 import com.google.common.collect.Lists;
			
 
				+import com.google.common.collect.Maps;
			
 
				 import org.springframework.beans.factory.annotation.Autowired;
			
 
				 
			
 
				 import java.util.List;
			
@@ -8,7 +9,7 @@ import java.util.Map;
 
				 
			
 
				 /**
			
 
				  * @Description:
			
 
				- * @author: rengb
			
 
				+ * @author: HUJING
			
 
				  * @time: 2020/9/9 11:24
			
 
				  */
			
 
				 public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
			
@@ -21,9 +22,10 @@ public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
 
				                 "入院情况", "入院后检查化验结果", "诊疗经过", "出院计划", "病理检查结果", "出院情况", "治疗效果", "出院医嘱", "医师签名", "时间");
			
 
				         String html = args[0];
			
 
				         String recTitle = args[1];
			
 
				-        List<String> htmlText = commonAnalysisUtil.html2List(html);
			
 
				+        Map<String, String> structureMap = Maps.newLinkedHashMap();
			
 
				+        List<String> htmlText = commonAnalysisUtil.html2List(html, true);
			
 
				         htmlText.remove(0);//去除第一个div内容
			
 
				-        Map<String, String> structureMap = commonAnalysisUtil.html2StructureMap(titles, htmlText);
			
 
				+        commonAnalysisUtil.html2StructureMap(titles, htmlText, structureMap);
			
 
				         structureMap.put("rec_title=", "183");
			
 
				         return structureMap;
			
 
				     }
			
--- a/trans/src/main/java/com/lantone/qc/trans/beilun/util/CommonAnalysisUtil.java
+++ b/trans/src/main/java/com/lantone/qc/trans/beilun/util/CommonAnalysisUtil.java
@@ -1,7 +1,7 @@
 
				 package com.lantone.qc.trans.beilun.util;
			
 
				 
			
 
				 import com.google.common.collect.Lists;
			
 
				-import com.google.common.collect.Maps;
			
 
				+import com.lantone.qc.pub.util.StringUtil;
			
 
				 import org.jsoup.Jsoup;
			
 
				 import org.jsoup.nodes.Document;
			
 
				 import org.jsoup.nodes.Element;
			
@@ -10,6 +10,7 @@ import org.jsoup.select.Elements;
 
				 
			
 
				 import java.util.List;
			
 
				 import java.util.Map;
			
 
				+import java.util.TreeMap;
			
 
				 import java.util.regex.Matcher;
			
 
				 import java.util.regex.Pattern;
			
 
				 
			
@@ -26,7 +27,7 @@ public class CommonAnalysisUtil {
 
				      * @param html 原始html内容
			
 
				      * @return
			
 
				      */
			
 
				-    public List<String> html2List(String html) {
			
 
				+    public List<String> html2List(String html, boolean existHr) {
			
 
				         List<String> htmlText = Lists.newArrayList();
			
 
				         Document document = Jsoup.parse(html);
			
 
				         Element body = document.select("body").first();
			
@@ -38,11 +39,15 @@ public class CommonAnalysisUtil {
 
				                 findNode = true;
			
 
				                 continue;
			
 
				             }
			
 
				-            if (findNode) {
			
 
				+            if (findNode || !existHr) {
			
 
				                 Element element = (Element) node;
			
 
				                 Elements elements = element.select("div");
			
 
				                 for (Element e : elements) {
			
 
				-                    htmlText.add(e.text());
			
 
				+                    String text = e.text();
			
 
				+                    if (text.length() > 150) {
			
 
				+                        continue;
			
 
				+                    }
			
 
				+                    htmlText.add(text);
			
 
				                 }
			
 
				             }
			
 
				         }
			
@@ -52,15 +57,32 @@ public class CommonAnalysisUtil {
 
				         return htmlText;
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * 将html内容以table的格式存进list
			
 
				+     *
			
 
				+     * @param html 原始html内容
			
 
				+     * @return
			
 
				+     */
			
 
				+    public void html2ListByTable(String html, List<String> htmlText) {
			
 
				+        Elements trs = Jsoup.parse(html).select("table").select("tr");
			
 
				+        for (int i = 0; i < trs.size(); i++) {
			
 
				+            Elements tds = trs.get(i).select("td");
			
 
				+            for (int j = 0; j < tds.size(); j++) {
			
 
				+                String text = tds.get(j).text();
			
 
				+                htmlText.add(text);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     /**
			
 
				      * 将list中html内容转换成structureMap
			
 
				      *
			
 
				-     * @param titles   文书各标题
			
 
				-     * @param htmlText html内容以行的形式存储list
			
 
				+     * @param titles       文书各标题
			
 
				+     * @param htmlText     html内容以行的形式存储list
			
 
				+     * @param structureMap
			
 
				      * @return
			
 
				      */
			
 
				-    public Map<String, String> html2StructureMap(List<String> titles, List<String> htmlText) {
			
 
				-        Map<String, String> structureMap = Maps.newLinkedHashMap();
			
 
				+    public void html2StructureMap(List<String> titles, List<String> htmlText, Map<String, String> structureMap) {
			
 
				         StringBuffer sb = new StringBuffer();
			
 
				         for (String line : htmlText) {
			
 
				             String text = line.replaceAll("[   ]", " ");
			
@@ -69,8 +91,9 @@ public class CommonAnalysisUtil {
 
				             }
			
 
				             sb.append(text).append("\n");
			
 
				         }
			
 
				+        String content = sb.toString();
			
 
				+        sortTitles(titles, content);
			
 
				         cutByTitles(sb.toString(), titles, 0, structureMap);
			
 
				-        return structureMap;
			
 
				     }
			
 
				 
			
 
				     /**
			
@@ -88,13 +111,17 @@ public class CommonAnalysisUtil {
 
				         String beforeTitle = null, title = null, newTitle = null, value = null;
			
 
				         beforeTitle = titles.get(Math.max(depth - 1, 0));
			
 
				         title = titles.get(Math.min(depth, titles.size() - 1));
			
 
				-        newTitle = title + "：";
			
 
				         if (depth == titles.size()) {
			
 
				             value = line.substring(0, line.indexOf("\n"));
			
 
				             structureMap.put(beforeTitle, value.trim());
			
 
				             return;
			
 
				         }
			
 
				-        if (line.contains(newTitle)) {
			
 
				+        if (line.contains(title + "：") || line.contains(title + ":")) {
			
 
				+            if (line.contains(title + "：")) {
			
 
				+                newTitle = title + "：";
			
 
				+            } else {
			
 
				+                newTitle = title + ":";
			
 
				+            }
			
 
				             if (depth > 0) {
			
 
				                 value = line.substring(0, line.indexOf(newTitle));
			
 
				                 structureMap.put(beforeTitle, value.trim());
			
@@ -107,6 +134,92 @@ public class CommonAnalysisUtil {
 
				         cutByTitles(line, titles, depth, structureMap);
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * 将title根据在文本中的位置排序
			
 
				+     *
			
 
				+     * @param titles
			
 
				+     * @param content
			
 
				+     * @return
			
 
				+     */
			
 
				+    public List<String> sortTitles(List<String> titles, String content) {
			
 
				+        Map<Integer, String> titleIndex = new TreeMap<>();
			
 
				+        int index, index_1, index_2;
			
 
				+        for (String title : titles) {
			
 
				+            index_1 = content.indexOf(title + "：");
			
 
				+            index_2 = content.indexOf(title + ":");
			
 
				+            index = Math.max(index_1, index_2);
			
 
				+            if (index != -1) {
			
 
				+                titleIndex.put(index, title);
			
 
				+                content = content.substring(0, index) + content.substring(index + title.length() + 1);
			
 
				+            }
			
 
				+        }
			
 
				+        titles = Lists.newArrayList(titleIndex.values());
			
 
				+        return titles;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * 标题没有冒号版本
			
 
				+     */
			
 
				+    public void html2StructureMapNoColon(List<String> titles, List<String> htmlText, Map<String, String> structureMap) {
			
 
				+        StringBuffer sb = new StringBuffer();
			
 
				+        for (String line : htmlText) {
			
 
				+            String text = line.replaceAll("[   ]", " ");
			
 
				+            if (text.length() == 0) {
			
 
				+                continue;
			
 
				+            }
			
 
				+            sb.append(text).append("\n");
			
 
				+        }
			
 
				+        String content = sb.toString();
			
 
				+        List<String> sortTitles = sortTitlesNoColon(titles, content);
			
 
				+        cutByTitlesNoColon(sb.toString(), sortTitles, 0, structureMap);
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * 标题没有冒号版本
			
 
				+     */
			
 
				+    private void cutByTitlesNoColon(String line, List<String> titles, int depth, Map<String, String> structureMap) {
			
 
				+        if (depth > titles.size()) {
			
 
				+            return;
			
 
				+        }
			
 
				+        String beforeTitle = null, title = null, newTitle = null, value = null;
			
 
				+        beforeTitle = titles.get(Math.max(depth - 1, 0));
			
 
				+        title = titles.get(Math.min(depth, titles.size() - 1));
			
 
				+        if (depth == titles.size()) {
			
 
				+            value = line.substring(0, line.indexOf("\n"));
			
 
				+            structureMap.put(beforeTitle, value.trim());
			
 
				+            return;
			
 
				+        }
			
 
				+        if (line.contains(title)) {
			
 
				+            newTitle = title;
			
 
				+            if (depth > 0) {
			
 
				+                value = line.substring(0, line.indexOf(newTitle));
			
 
				+                structureMap.put(beforeTitle, value.trim());
			
 
				+            }
			
 
				+            line = line.substring(line.indexOf(newTitle) + newTitle.length());
			
 
				+            depth++;
			
 
				+        } else {
			
 
				+            titles.remove(depth);
			
 
				+        }
			
 
				+        cutByTitlesNoColon(line, titles, depth, structureMap);
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * 标题没有冒号版本
			
 
				+     */
			
 
				+    public List<String> sortTitlesNoColon(List<String> titles, String content) {
			
 
				+        Map<Integer, String> titleIndex = new TreeMap<>();
			
 
				+        int index;
			
 
				+        for (String title : titles) {
			
 
				+            index = content.indexOf(title);
			
 
				+            if (index != -1) {
			
 
				+                titleIndex.put(index, title);
			
 
				+                content = content.replace(title, "");
			
 
				+            }
			
 
				+        }
			
 
				+        titles = Lists.newArrayList(titleIndex.values());
			
 
				+        return titles;
			
 
				+    }
			
 
				+
			
 
				     /**
			
 
				      * 抽取文本中的第一个时间
			
 
				      *
			
@@ -114,11 +227,67 @@ public class CommonAnalysisUtil {
 
				      * @return
			
 
				      */
			
 
				     public String extractDate(String top) {
			
 
				-        Pattern pattern = Pattern.compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}([:][0-9]{1,2})?");
			
 
				+        Pattern pattern = Pattern.compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}([:][0-9]{1,2})?");
			
 
				         Matcher matcher = pattern.matcher(top);
			
 
				         if (matcher.find()) {
			
 
				             return matcher.group(0);
			
 
				         }
			
 
				         return null;
			
 
				     }
			
 
				+
			
 
				+    /**
			
 
				+     * 根据title重新存放时间
			
 
				+     *
			
 
				+     * @param structmap
			
 
				+     * @param title
			
 
				+     */
			
 
				+    public void extractDateByTitle(Map<String, String> structmap, String title) {
			
 
				+        if (structmap.containsKey(title)) {
			
 
				+            String date = extractDate(structmap.get(title));
			
 
				+            if (StringUtil.isNotBlank(date)) {
			
 
				+                structmap.put(title, date);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * 若内容中是包含选择框（会诊类型：     急会诊       普通会诊         请院外会诊），特殊处理
			
 
				+     *
			
 
				+     * @param structureMap
			
 
				+     */
			
 
				+    public void processType(Map<String, String> structureMap, String title) {
			
 
				+        if (structureMap.containsKey(title)) {
			
 
				+            String type = structureMap.get(title);
			
 
				+            String[] types = type.split(" ");
			
 
				+            for (String t : types) {
			
 
				+                if (t.contains("\uF0FE")) {
			
 
				+                    structureMap.put(title, t.replace("\uF0FE", ""));
			
 
				+                    break;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * 若list中其中一个元素包含之后第二个、第三个元素的文本，则把这个元素删除
			
 
				+     *
			
 
				+     * @param htmlList
			
 
				+     */
			
 
				+    public void removeRepeat(List<String> htmlList) {
			
 
				+        List<Integer> index = Lists.newArrayList();
			
 
				+        if (htmlList.size() < 3) return;
			
 
				+        String str1 = null, str2 = null, str3 = null;
			
 
				+        for (int i = 0; i < htmlList.size() - 2; i++) {
			
 
				+            str1 = htmlList.get(i);
			
 
				+            str2 = htmlList.get(i + 1);
			
 
				+            str3 = htmlList.get(i + 2);
			
 
				+            if (str1.contains(str2) && str1.contains(str3)) {
			
 
				+                index.add(i);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        for (int i : index) {
			
 
				+            htmlList.remove(i);
			
 
				+        }
			
 
				+    }
			
 
				 }