|
@@ -1,292 +0,0 @@
|
|
|
-package com.lantone.qc.trans.hangzhoufubao.util;
|
|
|
-
|
|
|
-import com.google.common.collect.Lists;
|
|
|
-import com.lantone.qc.dbanaly.util.ModuleMappingUtil;
|
|
|
-import com.lantone.qc.pub.util.StringUtil;
|
|
|
-import org.jsoup.nodes.Element;
|
|
|
-import org.jsoup.select.Elements;
|
|
|
-
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-
|
|
|
-/**
|
|
|
- * @Description:
|
|
|
- * @author: rengb
|
|
|
- * @time: 2020/9/15 16:31
|
|
|
- */
|
|
|
-public class hangzhoufubaoHtmlAnalysisUtil {
|
|
|
-
|
|
|
- /**
|
|
|
- * table样式:key和value在同一个td中,以冒号分开;保留换行
|
|
|
- *
|
|
|
- * @param tableElement
|
|
|
- * @param map
|
|
|
- */
|
|
|
- public static void tableStyle1InsertMap(Element tableElement, Map<String, String> map) {
|
|
|
- if (tableElement == null || map == null) {
|
|
|
- return;
|
|
|
- }
|
|
|
- List<Element> tdElements = Lists.newArrayList();
|
|
|
- tableElement.selectFirst("tbody").children().forEach(trElement -> {
|
|
|
- trElement.children().forEach(tdElement -> {
|
|
|
- tdElements.add(tdElement);
|
|
|
- });
|
|
|
- });
|
|
|
- String tdText = null;
|
|
|
- int tdMhIndex = 0;
|
|
|
- for (Element tdElement : tdElements) {
|
|
|
- if (tdElement.childNodeSize() == 0) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- tdText = blockDivToStr(tdElement.child(0), true);
|
|
|
- if (tdText.endsWith("\n")) {
|
|
|
- tdText = tdText.substring(0, tdText.length() - 1);
|
|
|
- }
|
|
|
- if (StringUtil.isBlank(tdText)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- tdMhIndex = tdText.indexOf(":");
|
|
|
- if (tdMhIndex == -1) {
|
|
|
- tdMhIndex = tdText.indexOf(":");
|
|
|
- }
|
|
|
- if (tdMhIndex < 1) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- if (tdText.contains("姓 名") && tdText.contains("年 龄")) {
|
|
|
- multiColonResolve(tdMhIndex, tdText, "年 龄", map);
|
|
|
- } else if (tdText.contains("姓 名") && tdText.contains("年 龄")) {
|
|
|
- multiColonResolve(tdMhIndex, tdText, "年 龄", map);
|
|
|
- } else if (tdText.contains("姓 名") && tdText.contains("年 龄")) {
|
|
|
- multiColonResolve(tdMhIndex, tdText, "年 龄", map);
|
|
|
- } else if (tdText.contains("姓 名") && tdText.contains("年 龄")) {
|
|
|
- multiColonResolve(tdMhIndex, tdText, "年 龄", map);
|
|
|
- } else if (tdText.contains("产前检查") && tdText.contains("孕/产次")) {
|
|
|
- multiColonResolve(tdMhIndex, tdText, "孕/产次", map);
|
|
|
- } else if (tdText.contains("末次月经") && tdText.contains("预产期")) {
|
|
|
- multiColonResolve(tdMhIndex, tdText, "预产期", map);
|
|
|
- } else {
|
|
|
- map.put(
|
|
|
- StringUtil.removeBlank(tdText.substring(0, tdMhIndex)),
|
|
|
- tdText.length() - 1 <= tdMhIndex ? "" : tdText.substring(tdMhIndex + 1)
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 取得多个字段时,分别存储
|
|
|
- *
|
|
|
- * @param tdText
|
|
|
- * @param text
|
|
|
- */
|
|
|
- public static void multiColonResolve(int tdMhIndex, String tdText, String text, Map<String, String> map) {
|
|
|
- String firstText = tdText.split(text)[0];
|
|
|
- map.put(
|
|
|
- StringUtil.removeBlank(firstText.substring(0, tdMhIndex)),
|
|
|
- firstText.length() - 1 <= tdMhIndex ? "" : firstText.substring(tdMhIndex + 1)
|
|
|
- );
|
|
|
- String secondText = tdText.split(text)[1];
|
|
|
- secondText = secondText.replace(":", "");
|
|
|
- secondText = secondText.replace(":", "");
|
|
|
- map.put(StringUtil.removeBlank(text), secondText);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * table样式:td两两配对,一个key,一个value;不保留换行
|
|
|
- *
|
|
|
- * @param tableElement
|
|
|
- * @param map
|
|
|
- */
|
|
|
- public static void tableStyle2InsertMap(Element tableElement, Map<String, String> map) {
|
|
|
- if (tableElement == null || map == null) {
|
|
|
- return;
|
|
|
- }
|
|
|
- List<Element> tdElements = Lists.newArrayList();
|
|
|
- tableElement.selectFirst("tbody").children().forEach(trElement -> {
|
|
|
- if (trElement.childNodeSize() != 2) {
|
|
|
- return;
|
|
|
- }
|
|
|
- trElement.children().forEach(tdElement -> {
|
|
|
- tdElements.add(tdElement);
|
|
|
- });
|
|
|
- });
|
|
|
- int index = 0;
|
|
|
- String key = null, value = null, text = null;
|
|
|
- for (Element tdElement : tdElements) {
|
|
|
- if (tdElement.childNodeSize() == 0) {
|
|
|
- text = "";
|
|
|
- } else {
|
|
|
- text = blockDivToStr(tdElement.child(0), false);
|
|
|
- }
|
|
|
- if (index % 2 == 0) {
|
|
|
- key = text.replaceAll("[:: ]", "");
|
|
|
- }
|
|
|
- if (index % 2 == 1) {
|
|
|
- value = text;
|
|
|
- if (StringUtil.isNotBlank(key)) {
|
|
|
- map.put(key, value);
|
|
|
- }
|
|
|
- }
|
|
|
- index++;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 一个大的块状div下包含很多行行状div
|
|
|
- *
|
|
|
- * @param divElement
|
|
|
- * @param isLineBreak 是否保留换行
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String blockDivToStr(Element divElement, boolean isLineBreak) {
|
|
|
- if (divElement == null) {
|
|
|
- return "";
|
|
|
- }
|
|
|
- StringBuffer sbf = new StringBuffer();
|
|
|
- for (Element childElement : divElement.children()) {
|
|
|
- if (isLineBreak) {
|
|
|
- sbf.append(elementLayer1ToStr(childElement, false)).append("\n");
|
|
|
- } else {
|
|
|
- sbf.append(elementLayer1ToStr(childElement, false).trim());
|
|
|
- }
|
|
|
- }
|
|
|
- return sbf.toString();
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 标签仅遍历第一子层级后转字符串
|
|
|
- *
|
|
|
- * @param element
|
|
|
- * @param isLineBreak 是否保留换行
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String elementLayer1ToStr(Element element, boolean isLineBreak) {
|
|
|
- if (element == null) {
|
|
|
- return "";
|
|
|
- }
|
|
|
- StringBuffer sbf = new StringBuffer();
|
|
|
- List<Element> elements = Lists.newArrayList();
|
|
|
- Elements childElements = element.children();
|
|
|
- if (childElements.size() == 0) {
|
|
|
- elements.add(element);
|
|
|
- }
|
|
|
- for (Element childElement : childElements) {
|
|
|
- elements.add(childElement);
|
|
|
- }
|
|
|
- for (Element childElement : elements) {
|
|
|
- if (childElement.tagName().equals("img") || childElement.tagName().equals("image")) {
|
|
|
- // sbf.append(childElement.outerHtml());
|
|
|
- sbf.append("—");
|
|
|
- } else {
|
|
|
- sbf.append(childElement.text());
|
|
|
- }
|
|
|
- if (isLineBreak) {
|
|
|
- sbf.append("\n");
|
|
|
- }
|
|
|
- }
|
|
|
- String sbfString = removeSex(sbf.toString().replaceAll(" ", " "));
|
|
|
- return sbfString;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 查询并插入页面模板id
|
|
|
- *
|
|
|
- * @param modeId
|
|
|
- * @param recTitle
|
|
|
- * @param map
|
|
|
- */
|
|
|
- public static void insertModuleId(String modeId, String recTitle, Map<String, String> map) {
|
|
|
- if ((StringUtil.isBlank(modeId) && StringUtil.isBlank(recTitle)) || map == null) {
|
|
|
- return;
|
|
|
- }
|
|
|
- String moduleId = ModuleMappingUtil.getHtmlDataTypeModuleId(recTitle);
|
|
|
- if (StringUtil.isBlank(moduleId)) {
|
|
|
- moduleId = ModuleMappingUtil.getStandardModuleId(modeId);
|
|
|
- }
|
|
|
- map.put("mode_id", moduleId);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * map中有的value以冒号开头,用此方法来去掉冒号
|
|
|
- *
|
|
|
- * @param map
|
|
|
- */
|
|
|
- public static void mapValueRemoveStartColon(Map<String, String> map) {
|
|
|
- if (map == null) {
|
|
|
- return;
|
|
|
- }
|
|
|
- map.keySet().forEach(key -> {
|
|
|
- if (map.get(key).startsWith(":") || map.get(key).startsWith(":")) {
|
|
|
- map.put(key, map.get(key).replaceFirst("[::]", ""));
|
|
|
- }
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 北仑:入院记录中性别取得去除"性"
|
|
|
- * e.g.(性 别:男性 ->性 别:男)
|
|
|
- *
|
|
|
- * @param sbfString
|
|
|
- */
|
|
|
- public static String removeSex(String sbfString) {
|
|
|
- if (sbfString.contains("性 别:")) {
|
|
|
- sbfString = sbfString.substring(0, sbfString.length() - 1);
|
|
|
- }
|
|
|
- return sbfString;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 杭州妇保自定义xml解析模板(入院记录)
|
|
|
- * @param divElement
|
|
|
- * @param isLineBreak 是否保留换行
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String blockDivToStr2(Element divElement, boolean isLineBreak) {
|
|
|
- if (divElement == null) {
|
|
|
- return "";
|
|
|
- }
|
|
|
- StringBuffer sbf = new StringBuffer();
|
|
|
- for (Element childElement : divElement.children()) {
|
|
|
- if (isLineBreak) {
|
|
|
- sbf.append(elementLayer1ToStr2(childElement, false)).append("\n");
|
|
|
- } else {
|
|
|
- sbf.append(elementLayer1ToStr2(childElement, false).trim());
|
|
|
- }
|
|
|
- }
|
|
|
- return sbf.toString();
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 余杭三院自定义xml解析模板(入院记录)
|
|
|
- * @param element
|
|
|
- * @param isLineBreak 是否保留换行
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String elementLayer1ToStr2(Element element, boolean isLineBreak) {
|
|
|
- if (element == null) {
|
|
|
- return "";
|
|
|
- }
|
|
|
- StringBuffer sbf = new StringBuffer();
|
|
|
- List<Element> elements = Lists.newArrayList();
|
|
|
- Elements childElements = element.children();
|
|
|
- if (childElements.size() == 0) {
|
|
|
- elements.add(element);
|
|
|
- }
|
|
|
- for (Element childElement : childElements) {
|
|
|
- elements.add(childElement);
|
|
|
- }
|
|
|
- for (Element childElement : elements) {
|
|
|
- if (childElement.tagName().equals("img") || childElement.tagName().equals("image")) {
|
|
|
- // sbf.append(childElement.outerHtml());
|
|
|
- sbf.append("—");
|
|
|
- } else {
|
|
|
- sbf.append(childElement.text());
|
|
|
- }
|
|
|
- if (isLineBreak) {
|
|
|
- sbf.append("\n");
|
|
|
- }
|
|
|
- }
|
|
|
- String sbfString = removeSex(sbf.toString().replaceAll(" ", " "));
|
|
|
- return sbfString;
|
|
|
- }
|
|
|
-}
|