Browse Source

北仑医院全科医学科出院记录(出院小结)解析html

huj 4 years ago
parent
commit
50ac6e4349

+ 64 - 1
trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunLeaveHospitalHtmlAnalysis.java

@@ -1,5 +1,13 @@
 package com.lantone.qc.trans.beilun.util;
 
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -11,7 +19,62 @@ public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
 
     @Override
     public Map<String, String> analysis(String... args) {
-        return null;
+        List<String> titles = Lists.newArrayList("入院日期", "出院日期", "住院天数", "入院诊断", "出院诊断",
+                "入院情况", "入院后检查化验结果", "诊疗经过", "出院计划", "病理检查结果", "出院情况", "治疗效果", "出院医嘱", "医师签名", "时间");
+        Map<String, String> structmap = Maps.newLinkedHashMap();
+        String html = args[0];
+        String recTitle = args[1];
+        Document document = Jsoup.parse(html);
+        List<String> htmlText = Lists.newArrayList();
+        Elements elements = document.select("div");
+        for (Element element : elements) {
+            String text = element.text();
+            htmlText.add(text);
+        }
+        htmlText.remove(0);//去除第一个div内容
+        StringBuffer sb = new StringBuffer();
+        for (String line : htmlText) {
+            String text = line.replaceAll("[   ]", "");
+            if (text.length() == 0) {
+                continue;
+            }
+            sb.append(text).append("\n");
+        }
+        cutByTitles(sb.toString(), titles, 0, structmap);
+        structmap.put("rec_title=", recTitle);
+        return structmap;
+    }
+
+    /**
+     * 根据文书各标题截取相应文本,存入structmap中
+     *
+     * @param line      原始文本
+     * @param titles    文书各标题
+     * @param depth     递归深度,也就是titles取值时的下标值
+     * @param structmap 存储结构化数据
+     */
+    private void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structmap) {
+        if (depth > titles.size()) {
+            return;
+        }
+        String beforeTitle = null, title = null, newTitle = null, value = null;
+        beforeTitle = titles.get(Math.max(depth - 1, 0));
+        title = titles.get(Math.min(depth, titles.size() - 1));
+        newTitle = title + ":";
+        if (depth == titles.size()) {
+            value = line.substring(0, line.indexOf("\n"));
+            structmap.put(beforeTitle, value.trim());
+            return;
+        }
+        if (line.contains(newTitle)) {
+            if (depth > 0) {
+                value = line.substring(0, line.indexOf(newTitle));
+                structmap.put(beforeTitle, value.trim());
+            }
+            line = line.substring(line.indexOf(newTitle) + newTitle.length());
+            depth++;
+            cutByTitles(line, titles, depth, structmap);
+        }
     }
 
 }