|
@@ -6,6 +6,7 @@ import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
import org.jsoup.select.Elements;
|
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
@@ -16,12 +17,13 @@ import java.util.Map;
|
|
* @time: 2020/9/9 11:24
|
|
* @time: 2020/9/9 11:24
|
|
*/
|
|
*/
|
|
public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
|
|
public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
|
|
|
|
+ @Autowired
|
|
|
|
+ CommonAnalysisUtil commonAnalysisUtil;
|
|
|
|
|
|
@Override
|
|
@Override
|
|
public Map<String, String> analysis(String... args) {
|
|
public Map<String, String> analysis(String... args) {
|
|
List<String> titles = Lists.newArrayList("入院日期", "出院日期", "住院天数", "入院诊断", "出院诊断",
|
|
List<String> titles = Lists.newArrayList("入院日期", "出院日期", "住院天数", "入院诊断", "出院诊断",
|
|
"入院情况", "入院后检查化验结果", "诊疗经过", "出院计划", "病理检查结果", "出院情况", "治疗效果", "出院医嘱", "医师签名", "时间");
|
|
"入院情况", "入院后检查化验结果", "诊疗经过", "出院计划", "病理检查结果", "出院情况", "治疗效果", "出院医嘱", "医师签名", "时间");
|
|
- Map<String, String> structmap = Maps.newLinkedHashMap();
|
|
|
|
String html = args[0];
|
|
String html = args[0];
|
|
String recTitle = args[1];
|
|
String recTitle = args[1];
|
|
Document document = Jsoup.parse(html);
|
|
Document document = Jsoup.parse(html);
|
|
@@ -32,49 +34,9 @@ public class BeiLunLeaveHospitalHtmlAnalysis implements BeiLunHtmlAnalysis {
|
|
htmlText.add(text);
|
|
htmlText.add(text);
|
|
}
|
|
}
|
|
htmlText.remove(0);//去除第一个div内容
|
|
htmlText.remove(0);//去除第一个div内容
|
|
- StringBuffer sb = new StringBuffer();
|
|
|
|
- for (String line : htmlText) {
|
|
|
|
- String text = line.replaceAll("[ ]", "");
|
|
|
|
- if (text.length() == 0) {
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
- sb.append(text).append("\n");
|
|
|
|
- }
|
|
|
|
- cutByTitles(sb.toString(), titles, 0, structmap);
|
|
|
|
- structmap.put("rec_title=", "183");
|
|
|
|
- return structmap;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * 根据文书各标题截取相应文本,存入structmap中
|
|
|
|
- *
|
|
|
|
- * @param line 原始文本
|
|
|
|
- * @param titles 文书各标题
|
|
|
|
- * @param depth 递归深度,也就是titles取值时的下标值
|
|
|
|
- * @param structmap 存储结构化数据
|
|
|
|
- */
|
|
|
|
- private void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structmap) {
|
|
|
|
- if (depth > titles.size()) {
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
- String beforeTitle = null, title = null, newTitle = null, value = null;
|
|
|
|
- beforeTitle = titles.get(Math.max(depth - 1, 0));
|
|
|
|
- title = titles.get(Math.min(depth, titles.size() - 1));
|
|
|
|
- newTitle = title + ":";
|
|
|
|
- if (depth == titles.size()) {
|
|
|
|
- value = line.substring(0, line.indexOf("\n"));
|
|
|
|
- structmap.put(beforeTitle, value.trim());
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
- if (line.contains(newTitle)) {
|
|
|
|
- if (depth > 0) {
|
|
|
|
- value = line.substring(0, line.indexOf(newTitle));
|
|
|
|
- structmap.put(beforeTitle, value.trim());
|
|
|
|
- }
|
|
|
|
- line = line.substring(line.indexOf(newTitle) + newTitle.length());
|
|
|
|
- depth++;
|
|
|
|
- cutByTitles(line, titles, depth, structmap);
|
|
|
|
- }
|
|
|
|
|
|
+ Map<String, String> structureMap = commonAnalysisUtil.html2StructureMap(titles, htmlText);
|
|
|
|
+ structureMap.put("rec_title=", "183");
|
|
|
|
+ return structureMap;
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
}
|