zhaops 3 лет назад
Родитель
Сommit
5d528543b4

+ 57 - 0
trans/src/main/java/com/lantone/qc/trans/ningbozhenhai/util/CommonAnalysisUtil.java

@@ -0,0 +1,57 @@
+package com.lantone.qc.trans.ningbozhenhai.util;
+
+import com.google.common.collect.Lists;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @Description :
+ * @Author : zhaops
+ * @Date 2022/7/25 15:56
+ */
+public class CommonAnalysisUtil {
+    /**
+     * 将title根据在文本中的位置排序
+     *
+     * @param titles
+     * @param content
+     * @return
+     */
+    public static List<String> sortTitles(List<String> titles, String content) {
+        titles = titles.stream().sorted(Comparator.comparing(i -> i.length()))
+                .collect(Collectors.toList());
+        Collections.reverse(titles);
+
+        Map<Integer, String> titleIndex = new TreeMap<>();
+        int index, index_1, index_2, firstIndex;
+        for (String title : titles) {
+            index = 0;
+            firstIndex = -1;
+            Boolean firstMatch = false;
+            while (index >= 0 && index < content.length()) {
+                index_1 = content.indexOf(title + ":", index);
+                index_2 = content.indexOf(title + ":", index);
+                index = Math.max(index_1, index_2);
+                if (index < 0) {
+                    break;
+                }
+                if (firstIndex == -1 && index > firstIndex) {
+                    firstIndex = index;
+                    firstMatch = true;
+                }
+                if (firstMatch) {
+                    titleIndex.put(index, title);
+                }
+                StringBuffer sb = new StringBuffer(title.length());
+                for (int i = 0; i < title.length(); i++) {
+                    sb.append('*');
+                }
+                content = content.substring(0, index) + sb.toString() + content.substring(index + title.length());
+
+            }
+        }
+        titles = Lists.newArrayList(titleIndex.values());
+        return titles;
+    }
+}

+ 1 - 1
trans/src/main/java/com/lantone/qc/trans/ningbozhenhai/util/FirstCourseRecordHtmlAnalysis.java

@@ -34,7 +34,7 @@ public class FirstCourseRecordHtmlAnalysis implements HtmlAnalysis {
                     .replace("第1页", "");
             CommonAnalysisUtil.extractWardInfo(recTitle, htmlContent, structureMap);
             if (StringUtil.isNotBlank(htmlContent)) {
-                titles = CommonAnalysisUtil.sortTitles(titles, htmlContent);
+                titles = com.lantone.qc.trans.ningbozhenhai.util.CommonAnalysisUtil.sortTitles(titles, htmlContent);
                 CommonAnalysisUtil.cutByTitles(htmlContent, titles, 0, structureMap);
                 if (!structureMap.containsKey("诊断依据")) {
                     String zdyj = "";