Sfoglia il codice sorgente

北仑医院首次病程录、出院记录(出院小结)解析html

huj 4 anni fa
parent
commit
659066f961

+ 58 - 0
trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunFirstCourseRecordHtmlAnalysis.java

@@ -0,0 +1,58 @@
+package com.lantone.qc.trans.beilun.util;
+
+import com.google.common.collect.Lists;
+import com.lantone.qc.pub.util.StringUtil;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.springframework.beans.factory.annotation.Autowired;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @Description:
+ * @author: rengb
+ * @time: 2020/9/9 11:24
+ */
+public class BeiLunFirstCourseRecordHtmlAnalysis implements BeiLunHtmlAnalysis {
+    @Autowired
+    CommonAnalysisUtil commonAnalysisUtil;
+
+    @Override
+    public Map<String, String> analysis(String... args) {
+        List<String> titles = Lists.newArrayList("(一)病历特点", "(二)诊断依据", "(三)鉴别诊断",
+                "(四)初步诊断", "(五)诊疗计划", "医生签名");
+        String html = args[0];
+        String recTitle = args[1];
+        Document document = Jsoup.parse(html);
+        List<String> htmlText = Lists.newArrayList();
+        Elements elements = document.select("div");
+        boolean findTitleMain = false;
+        String style = null, title = null;
+        for (Element element : elements) {
+            style = element.attr("style");
+            title = element.attr("title");
+            if (StringUtil.isBlank(style) || StringUtil.isBlank(title)) {
+                continue;
+            }
+            if ("".equals(style) && title.contains(".odt")) {
+                break;
+            }
+            if ("".equals(style) && "main".equals(title)) {
+                findTitleMain = true;
+                continue;
+            }
+            if (findTitleMain) {
+                String text = element.text();
+                htmlText.add(text);
+            }
+        }
+        Map<String, String> structureMap = commonAnalysisUtil.html2StructureMap(titles, htmlText);
+        structureMap.put("rec_title=", "107");
+
+        return structureMap;
+    }
+
+}

+ 6 - 44
trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunLeaveHospitalHtmlAnalysis.java

@@ -6,6 +6,7 @@ import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
+import org.springframework.beans.factory.annotation.Autowired;
 
 import java.util.List;
 import java.util.Map;
@@ -16,12 +17,13 @@ import java.util.Map;
  * @time: 2020/9/9 11:24
  */
 public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
+    @Autowired
+    CommonAnalysisUtil commonAnalysisUtil;
 
     @Override
     public Map<String, String> analysis(String... args) {
         List<String> titles = Lists.newArrayList("入院日期", "出院日期", "住院天数", "入院诊断", "出院诊断",
                 "入院情况", "入院后检查化验结果", "诊疗经过", "出院计划", "病理检查结果", "出院情况", "治疗效果", "出院医嘱", "医师签名", "时间");
-        Map<String, String> structmap = Maps.newLinkedHashMap();
         String html = args[0];
         String recTitle = args[1];
         Document document = Jsoup.parse(html);
@@ -32,49 +34,9 @@ public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
             htmlText.add(text);
         }
         htmlText.remove(0);//去除第一个div内容
-        StringBuffer sb = new StringBuffer();
-        for (String line : htmlText) {
-            String text = line.replaceAll("[   ]", "");
-            if (text.length() == 0) {
-                continue;
-            }
-            sb.append(text).append("\n");
-        }
-        cutByTitles(sb.toString(), titles, 0, structmap);
-        structmap.put("rec_title=", "183");
-        return structmap;
-    }
-
-    /**
-     * 根据文书各标题截取相应文本,存入structmap中
-     *
-     * @param line      原始文本
-     * @param titles    文书各标题
-     * @param depth     递归深度,也就是titles取值时的下标值
-     * @param structmap 存储结构化数据
-     */
-    private void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structmap) {
-        if (depth > titles.size()) {
-            return;
-        }
-        String beforeTitle = null, title = null, newTitle = null, value = null;
-        beforeTitle = titles.get(Math.max(depth - 1, 0));
-        title = titles.get(Math.min(depth, titles.size() - 1));
-        newTitle = title + ":";
-        if (depth == titles.size()) {
-            value = line.substring(0, line.indexOf("\n"));
-            structmap.put(beforeTitle, value.trim());
-            return;
-        }
-        if (line.contains(newTitle)) {
-            if (depth > 0) {
-                value = line.substring(0, line.indexOf(newTitle));
-                structmap.put(beforeTitle, value.trim());
-            }
-            line = line.substring(line.indexOf(newTitle) + newTitle.length());
-            depth++;
-            cutByTitles(line, titles, depth, structmap);
-        }
+        Map<String, String> structureMap = commonAnalysisUtil.html2StructureMap(titles, htmlText);
+        structureMap.put("rec_title=", "183");
+        return structureMap;
     }
 
 }

+ 68 - 0
trans/src/main/java/com/lantone/qc/trans/beilun/util/CommonAnalysisUtil.java

@@ -0,0 +1,68 @@
+package com.lantone.qc.trans.beilun.util;
+
+import com.google.common.collect.Maps;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @Description :
+ * @Author : HUJING
+ * @Date: 2020/9/10 13:48
+ */
+public class CommonAnalysisUtil {
+    /**
+     * 将html内容转换成structureMap
+     *
+     * @param titles
+     * @param htmlText
+     * @return
+     */
+    public Map<String, String> html2StructureMap(List<String> titles, List<String> htmlText) {
+        Map<String, String> structmap = Maps.newLinkedHashMap();
+        StringBuffer sb = new StringBuffer();
+        for (String line : htmlText) {
+            String text = line.replaceAll("[   ]", "");
+            if (text.length() == 0) {
+                continue;
+            }
+            sb.append(text).append("\n");
+        }
+        cutByTitles(sb.toString(), titles, 0, structmap);
+        return structmap;
+    }
+
+    /**
+     * 根据文书各标题截取相应文本,存入structmap中
+     *
+     * @param line      原始文本
+     * @param titles    文书各标题
+     * @param depth     递归深度,也就是titles取值时的下标值
+     * @param structmap 存储结构化数据
+     */
+    private void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structmap) {
+        if (depth > titles.size()) {
+            return;
+        }
+        String beforeTitle = null, title = null, newTitle = null, value = null;
+        beforeTitle = titles.get(Math.max(depth - 1, 0));
+        title = titles.get(Math.min(depth, titles.size() - 1));
+        newTitle = title + ":";
+        if (depth == titles.size()) {
+            value = line.substring(0, line.indexOf("\n"));
+            structmap.put(beforeTitle, value.trim());
+            return;
+        }
+        if (line.contains(newTitle)) {
+            if (depth > 0) {
+                value = line.substring(0, line.indexOf(newTitle));
+                structmap.put(beforeTitle, value.trim());
+            }
+            line = line.substring(line.indexOf(newTitle) + newTitle.length());
+            depth++;
+        } else {
+            titles.remove(depth);
+        }
+        cutByTitles(line, titles, depth, structmap);
+    }
+}