Bläddra i källkod

入院记录解析

rengb 4 år sedan
förälder
incheckning
2ee6e857ed

+ 109 - 0
trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunBeHospitalizedHtmlAnalysis.java

@@ -0,0 +1,109 @@
+package com.lantone.qc.trans.beilun.util;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.lantone.qc.pub.util.FileUtil;
+import com.lantone.qc.pub.util.StringUtil;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import java.util.Map;
+
+/**
+ * @Description:
+ * @author: rengb
+ * @time: 2020/9/15 16:28
+ */
+public class BeiLunBeHospitalizedHtmlAnalysis implements BeiLunHtmlAnalysis {
+
+    @Override
+    public Map<String, String> analysis(String... args) {
+        String html = args[0];
+        String recTitle = args[1];
+        Document doc = Jsoup.parse(html);
+        Map<String, String> map = Maps.newLinkedHashMap();
+        //个人基础信息table解析
+        BeiLunHtmlAnalysisUtil.tableStyle1InsertMap(doc.getElementById("table1"), map);
+        //主诉、现病史等table解析
+        BeiLunHtmlAnalysisUtil.tableStyle2InsertMap(doc.getElementById("table6"), map);
+        //体 格 检 查 表(一) table解析
+        Element tgjcTableElement = doc.getElementById("table3");
+        BeiLunHtmlAnalysisUtil.tableStyle2InsertMap(tgjcTableElement, map);
+
+        //诊断 table解析
+        String disTableElementId = "table7_2_0_0_1_1_2_0_1_4_5_6_7_0_1";
+        if (recTitle.equals("妇科大病历")) {
+            disTableElementId = "table7_2_0_0_1_1_2_0_1_4_5_6_7_0_1_37";
+        }
+        Element disTableElement = doc.getElementById(disTableElementId);
+        BeiLunHtmlAnalysisUtil.tableStyle1InsertMap(disTableElement, map);
+        disHandleExt(map);
+
+        //医生签名、医生签名时间
+        Element docSignElement = null, docSignTimeElement = null;
+        for (Element disTableElementNt : disTableElement.nextElementSiblings()) {
+            docSignElement = disTableElementNt.selectFirst("image,img");
+            if (docSignElement != null) {
+                docSignTimeElement = disTableElementNt.nextElementSibling();
+                break;
+            }
+        }
+        if (docSignElement != null) {
+            map.put("医生签名", docSignElement.outerHtml());
+        }
+        if (docSignTimeElement != null) {
+            map.put("医生签名时间", BeiLunHtmlAnalysisUtil.elementLayer1ToStr(docSignTimeElement, false).trim());
+        }
+
+        //专科检查、辅助检查
+        Element yuElement = new Element("div");
+        for (Element tgjcTableElementNt : tgjcTableElement.nextElementSiblings()) {
+            if (tgjcTableElementNt.id().contains("table7_2_0_0_1_1_2_0_1_4_5_6_7_0_1")) {
+                break;
+            }
+            if (tgjcTableElementNt.tagName().equals("table")) {
+                continue;
+            }
+            tgjcTableElementNt.appendTo(yuElement);
+        }
+        String yuText = BeiLunHtmlAnalysisUtil.blockDivToStr(yuElement, true)
+                .replace("体  格  检  查  表 (二)", "")
+                .replace("(补充及专科情况)", "")
+                .replace("辅  助  检  查", "辅助检查:")
+                .replace("诊断:", "")
+                .replace("补充专科情况", "补充专科情况:")
+                .trim();
+        CommonAnalysisUtil.cutByTitles(yuText, Lists.newArrayList("专科检查", "辅助检查"), 0, map);
+        return map;
+    }
+
+    private void disHandleExt(Map<String, String> map) {
+        String[] keys = { "补充诊断", "修正诊断" };
+        String value = null;
+        int index = 0;
+        for (String key : keys) {
+            value = map.get(key);
+            if (StringUtil.isNotBlank(value)) {
+                for (String key_ : keys) {
+                    index = value.indexOf(key_);
+                    if (index > 0) {
+                        map.put(key_, value.substring(index + 5));
+                        map.put(key, value.substring(0, index));
+                    }
+                }
+            }
+        }
+    }
+
+    public static void main(String[] args) {
+        String html = FileUtil.fileRead("C:/Users/Administrator/Desktop/宁波/病例导出/大病历-神经内科-神经内科-2258458.html");
+        String recTitle = "大病历-神经内科";
+        BeiLunBeHospitalizedHtmlAnalysis test = new BeiLunBeHospitalizedHtmlAnalysis();
+        Map<String, String> map = test.analysis(html, recTitle);
+        map.keySet().forEach(key -> {
+            System.out.println(key + "-----" + map.get(key));
+        });
+    }
+
+}

+ 141 - 0
trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunHtmlAnalysisUtil.java

@@ -0,0 +1,141 @@
+package com.lantone.qc.trans.beilun.util;
+
+import com.google.common.collect.Lists;
+import com.lantone.qc.pub.util.StringUtil;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @Description:
+ * @author: rengb
+ * @time: 2020/9/15 16:31
+ */
+public class BeiLunHtmlAnalysisUtil {
+
+    /**
+     * table样式:key和value在同一个td中,以冒号分开;保留换行
+     *
+     * @param tableElement
+     * @param map
+     */
+    public static void tableStyle1InsertMap(Element tableElement, Map<String, String> map) {
+        List<Element> tdElements = Lists.newArrayList();
+        tableElement.selectFirst("tbody").children().forEach(trElement -> {
+            trElement.children().forEach(tdElement -> {
+                tdElements.add(tdElement);
+            });
+        });
+        String tdText = null;
+        int tdMhIndex = 0;
+        for (Element tdElement : tdElements) {
+            if (tdElement.childNodeSize() == 0) {
+                continue;
+            }
+            tdText = blockDivToStr(tdElement.child(0), true);
+            if (tdText.endsWith("\n")) {
+                tdText = tdText.substring(0, tdText.length() - 1);
+            }
+            if (StringUtil.isBlank(tdText)) {
+                continue;
+            }
+            tdMhIndex = tdText.indexOf(":");
+            if (tdMhIndex == -1) {
+                tdMhIndex = tdText.indexOf(":");
+            }
+            if (tdMhIndex < 1) {
+                continue;
+            }
+            map.put(
+                    StringUtil.removeBlank(tdText.substring(0, tdMhIndex)),
+                    tdText.length() - 1 <= tdMhIndex ? "" : tdText.substring(tdMhIndex + 1)
+            );
+        }
+    }
+
+    /**
+     * table样式:td两两配对,一个key,一个value;不保留换行
+     *
+     * @param tableElement
+     * @param map
+     */
+    public static void tableStyle2InsertMap(Element tableElement, Map<String, String> map) {
+        List<Element> tdElements = Lists.newArrayList();
+        tableElement.selectFirst("tbody").children().forEach(trElement -> {
+            trElement.children().forEach(tdElement -> {
+                tdElements.add(tdElement);
+            });
+        });
+        int index = 0;
+        String key = null, value = null, text = null;
+        for (Element tdElement : tdElements) {
+            if (tdElement.childNodeSize() == 0) {
+                text = "";
+            } else {
+                text = blockDivToStr(tdElement.child(0), false);
+            }
+            if (index % 2 == 0) {
+                key = text.replaceAll("[:: ]", "");
+            }
+            if (index % 2 == 1) {
+                value = text;
+                if (StringUtil.isNotBlank(key)) {
+                    map.put(key, value);
+                }
+            }
+            index++;
+        }
+    }
+
+    /**
+     * 一个大的块状div下包含很多行行状div
+     *
+     * @param divElement
+     * @param isLineBreak 是否保留换行
+     * @return
+     */
+    public static String blockDivToStr(Element divElement, boolean isLineBreak) {
+        StringBuffer sbf = new StringBuffer();
+        for (Element childElement : divElement.children()) {
+            if (isLineBreak) {
+                sbf.append(elementLayer1ToStr(childElement, false)).append("\n");
+            } else {
+                sbf.append(elementLayer1ToStr(childElement, false).trim());
+            }
+        }
+        return sbf.toString();
+    }
+
+    /**
+     * 标签仅遍历第一子层级后转字符串
+     *
+     * @param element
+     * @param isLineBreak 是否保留换行
+     * @return
+     */
+    public static String elementLayer1ToStr(Element element, boolean isLineBreak) {
+        StringBuffer sbf = new StringBuffer();
+        List<Element> elements = Lists.newArrayList();
+        Elements childElements = element.children();
+        if (childElements.size() == 0) {
+            elements.add(element);
+        }
+        for (Element childElement : childElements) {
+            elements.add(childElement);
+        }
+        for (Element childElement : elements) {
+            if (childElement.tagName().equals("img") || childElement.tagName().equals("image")) {
+                sbf.append(childElement.outerHtml());
+            } else {
+                sbf.append(childElement.text());
+            }
+            if (isLineBreak) {
+                sbf.append("\n");
+            }
+        }
+        return sbf.toString().replaceAll(" ", " ");
+    }
+
+}

+ 1 - 1
trans/src/main/java/com/lantone/qc/trans/beilun/util/CommonAnalysisUtil.java

@@ -126,7 +126,7 @@ public class CommonAnalysisUtil {
      * @param depth        递归深度,也就是titles取值时的下标值
      * @param structureMap 存储结构化数据
      */
-    private static void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structureMap) {
+    public static void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structureMap) {
         if (depth > titles.size()) {
             return;
         }