Jelajahi Sumber

1.解析html优化

huj 4 tahun lalu
induk
melakukan
37964d8a0b

+ 18 - 2
trans/src/main/java/com/lantone/qc/trans/beilun/util/BeiLunConsultationHtmlAnalysis.java

@@ -6,6 +6,7 @@ import com.lantone.qc.pub.util.StringUtil;
 
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * @Description:
@@ -53,9 +54,24 @@ public class BeiLunConsultationHtmlAnalysis implements BeiLunHtmlAnalysis {
                         "填写时间", "主持科室小结(MDT的最终诊治决议)", "科主任(主持人)签名", "记录人(主管医师)签字", "日期");
                 htmlContent = CommonAnalysisUtil.html2String(html);
                 if (StringUtil.isNotBlank(htmlContent)) {
-                    htmlContent = htmlContent.replaceAll("[   ]", " ");
-                    CommonAnalysisUtil.html2StructureMap(titles, htmlContent, structureMap);
+                    htmlContent = htmlContent.replaceAll("[   ]", " ").replace("住院/门诊号", "");
+                    StringBuffer sbf = new StringBuffer();
+                    List<String> distinctText = Lists.newArrayList(htmlContent.split(" ")).stream().distinct().collect(Collectors.toList());
+                    for (String text : distinctText) {
+                        sbf.append(text).append(" ");
+                    }
+                    CommonAnalysisUtil.html2StructureMapNoColon(titles, sbf.toString(), structureMap);
                 }
+                if (StringUtil.isNotBlank(structureMap.get("主持科室小结(MDT的最终诊治决议)"))) {
+                    String[] valueArray = structureMap.get("主持科室小结(MDT的最终诊治决议)").split(" ");
+                    structureMap.put("主持科室小结(MDT的最终诊治决议)", valueArray[1]);
+                    String v = valueArray[2];
+                    String[] vArr = v.split(":");
+                    if (vArr.length > 1) {
+                        structureMap.put(vArr[0], vArr[1]);
+                    }
+                }
+
                 structureMap.put("rec_title=", recTitle);
                 break;
             default:

+ 10 - 19
trans/src/main/java/com/lantone/qc/trans/beilun/util/CommonAnalysisUtil.java

@@ -127,7 +127,7 @@ public class CommonAnalysisUtil {
      * @param structureMap 存储结构化数据
      */
     public static void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structureMap) {
-        if (depth > titles.size()) {
+        if (depth > titles.size() || titles.size() == 0) {
             return;
         }
         String beforeTitle = null, title = null, newTitle = null, value = null;
@@ -146,7 +146,7 @@ public class CommonAnalysisUtil {
             }
             if (depth > 0) {
                 value = line.substring(0, line.indexOf(newTitle));
-                structureMap.put(beforeTitle, value.trim());
+                structureMap.put(beforeTitle.replace(" ", ""), value.trim());
             }
             line = line.substring(line.indexOf(newTitle) + newTitle.length());
             depth++;
@@ -182,32 +182,23 @@ public class CommonAnalysisUtil {
     /**
      * 标题没有冒号版本
      */
-    public void html2StructureMapNoColon(List<String> titles, List<String> htmlText, Map<String, String> structureMap) {
-        StringBuffer sb = new StringBuffer();
-        for (String line : htmlText) {
-            String text = line.replaceAll("[   ]", " ");
-            if (text.length() == 0) {
-                continue;
-            }
-            sb.append(text).append("\n");
-        }
-        String content = sb.toString();
-        List<String> sortTitles = sortTitlesNoColon(titles, content);
-        cutByTitlesNoColon(sb.toString(), sortTitles, 0, structureMap);
+    public static void html2StructureMapNoColon(List<String> titles, String htmlText, Map<String, String> structureMap) {
+        List<String> sortTitlesNoColon = sortTitlesNoColon(titles, htmlText);
+        cutByTitlesNoColon(htmlText, sortTitlesNoColon, 0, structureMap);
     }
 
     /**
      * 标题没有冒号版本
      */
-    private void cutByTitlesNoColon(String line, List<String> titles, int depth, Map<String, String> structureMap) {
-        if (depth > titles.size()) {
+    private static void cutByTitlesNoColon(String line, List<String> titles, int depth, Map<String, String> structureMap) {
+        if (depth > titles.size() || titles.size() == 0) {
             return;
         }
         String beforeTitle = null, title = null, newTitle = null, value = null;
         beforeTitle = titles.get(Math.max(depth - 1, 0));
         title = titles.get(Math.min(depth, titles.size() - 1));
         if (depth == titles.size()) {
-            value = line.substring(0, line.indexOf("\n"));
+            value = line;
             structureMap.put(beforeTitle, value.trim());
             return;
         }
@@ -215,7 +206,7 @@ public class CommonAnalysisUtil {
             newTitle = title;
             if (depth > 0) {
                 value = line.substring(0, line.indexOf(newTitle));
-                structureMap.put(beforeTitle, value.trim());
+                structureMap.put(beforeTitle.replace(" ", ""), value.trim());
             }
             line = line.substring(line.indexOf(newTitle) + newTitle.length());
             depth++;
@@ -228,7 +219,7 @@ public class CommonAnalysisUtil {
     /**
      * 标题没有冒号版本
      */
-    public List<String> sortTitlesNoColon(List<String> titles, String content) {
+    public static List<String> sortTitlesNoColon(List<String> titles, String content) {
         Map<Integer, String> titleIndex = new TreeMap<>();
         int index;
         for (String title : titles) {