|
@@ -1,5 +1,13 @@
|
|
|
package com.lantone.qc.trans.beilun.util;
|
|
|
|
|
|
+import com.google.common.collect.Lists;
|
|
|
+import com.google.common.collect.Maps;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.select.Elements;
|
|
|
+
|
|
|
+import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
/**
|
|
@@ -11,7 +19,62 @@ public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
|
|
|
|
|
|
@Override
|
|
|
public Map<String, String> analysis(String... args) {
|
|
|
- return null;
|
|
|
+ List<String> titles = Lists.newArrayList("入院日期", "出院日期", "住院天数", "入院诊断", "出院诊断",
|
|
|
+ "入院情况", "入院后检查化验结果", "诊疗经过", "出院计划", "病理检查结果", "出院情况", "治疗效果", "出院医嘱", "医师签名", "时间");
|
|
|
+ Map<String, String> structmap = Maps.newLinkedHashMap();
|
|
|
+ String html = args[0];
|
|
|
+ String recTitle = args[1];
|
|
|
+ Document document = Jsoup.parse(html);
|
|
|
+ List<String> htmlText = Lists.newArrayList();
|
|
|
+ Elements elements = document.select("div");
|
|
|
+ for (Element element : elements) {
|
|
|
+ String text = element.text();
|
|
|
+ htmlText.add(text);
|
|
|
+ }
|
|
|
+ htmlText.remove(0);//去除第一个div内容
|
|
|
+ StringBuffer sb = new StringBuffer();
|
|
|
+ for (String line : htmlText) {
|
|
|
+ String text = line.replaceAll("[ ]", "");
|
|
|
+ if (text.length() == 0) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ sb.append(text).append("\n");
|
|
|
+ }
|
|
|
+ cutByTitles(sb.toString(), titles, 0, structmap);
|
|
|
+ structmap.put("rec_title=", recTitle);
|
|
|
+ return structmap;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 根据文书各标题截取相应文本,存入structmap中
|
|
|
+ *
|
|
|
+ * @param line 原始文本
|
|
|
+ * @param titles 文书各标题
|
|
|
+ * @param depth 递归深度,也就是titles取值时的下标值
|
|
|
+ * @param structmap 存储结构化数据
|
|
|
+ */
|
|
|
+ private void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structmap) {
|
|
|
+ if (depth > titles.size()) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ String beforeTitle = null, title = null, newTitle = null, value = null;
|
|
|
+ beforeTitle = titles.get(Math.max(depth - 1, 0));
|
|
|
+ title = titles.get(Math.min(depth, titles.size() - 1));
|
|
|
+ newTitle = title + ":";
|
|
|
+ if (depth == titles.size()) {
|
|
|
+ value = line.substring(0, line.indexOf("\n"));
|
|
|
+ structmap.put(beforeTitle, value.trim());
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ if (line.contains(newTitle)) {
|
|
|
+ if (depth > 0) {
|
|
|
+ value = line.substring(0, line.indexOf(newTitle));
|
|
|
+ structmap.put(beforeTitle, value.trim());
|
|
|
+ }
|
|
|
+ line = line.substring(line.indexOf(newTitle) + newTitle.length());
|
|
|
+ depth++;
|
|
|
+ cutByTitles(line, titles, depth, structmap);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
}
|