|
@@ -1,6 +1,8 @@
|
|
package com.lantone.qc.trans.ningbozhenhai.util;
|
|
package com.lantone.qc.trans.ningbozhenhai.util;
|
|
|
|
|
|
import com.google.common.collect.Lists;
|
|
import com.google.common.collect.Lists;
|
|
|
|
+import com.lantone.qc.pub.util.StringUtil;
|
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
|
import java.util.*;
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Collectors;
|
|
@@ -10,6 +12,7 @@ import java.util.stream.Collectors;
|
|
* @Author : zhaops
|
|
* @Author : zhaops
|
|
* @Date 2022/7/25 15:56
|
|
* @Date 2022/7/25 15:56
|
|
*/
|
|
*/
|
|
|
|
+@Slf4j
|
|
public class CommonAnalysisUtil {
|
|
public class CommonAnalysisUtil {
|
|
/**
|
|
/**
|
|
* 将title根据在文本中的位置排序
|
|
* 将title根据在文本中的位置排序
|
|
@@ -50,4 +53,52 @@ public class CommonAnalysisUtil {
|
|
titles = Lists.newArrayList(titleIndex.values());
|
|
titles = Lists.newArrayList(titleIndex.values());
|
|
return titles;
|
|
return titles;
|
|
}
|
|
}
|
|
-}
|
|
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 根据文书各标题截取相应文本,存入structmap中
|
|
|
|
+ *
|
|
|
|
+ * @param line 原始文本
|
|
|
|
+ * @param titles 文书各标题
|
|
|
|
+ * @param depth 递归深度,也就是titles取值时的下标值
|
|
|
|
+ * @param structureMap 存储结构化数据
|
|
|
|
+ */
|
|
|
|
+ public static void cutByTitles(String line, List<String> titles, int depth, Map<String, String> structureMap) {
|
|
|
|
+ if (depth > titles.size() || titles.size() == 0) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ String beforeTitle = null, title = null, newTitle = null, value = null;
|
|
|
|
+ beforeTitle = StringUtil.removeBlank(titles.get(Math.max(depth - 1, 0)));
|
|
|
|
+ title = titles.get(Math.min(depth, titles.size() - 1));
|
|
|
|
+ if (depth == titles.size()) {
|
|
|
|
+ /*if (line.contains("\n")) {
|
|
|
|
+ line = line.split("\n")[0];
|
|
|
|
+ }
|
|
|
|
+ */
|
|
|
|
+ value = line.replace("\n", "");
|
|
|
|
+ if (StringUtil.isBlank(structureMap.get(beforeTitle))) {
|
|
|
|
+ log.error("key:" + beforeTitle + "\n value:" + StringUtil.trim(value));
|
|
|
|
+ structureMap.put(beforeTitle, StringUtil.trim(value));
|
|
|
|
+ }
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ if (line.contains(title + ":") || line.contains(title + ":")) {
|
|
|
|
+ if (line.contains(title + ":")) {
|
|
|
|
+ newTitle = title + ":";
|
|
|
|
+ } else {
|
|
|
|
+ newTitle = title + ":";
|
|
|
|
+ }
|
|
|
|
+ if (depth > 0) {
|
|
|
|
+ value = line.substring(0, line.indexOf(newTitle));
|
|
|
|
+ if (StringUtil.isBlank(structureMap.get(beforeTitle))) {
|
|
|
|
+ log.error("key:" + beforeTitle + "\n value:" + StringUtil.trim(value).replace("\n", ""));
|
|
|
|
+ structureMap.put(beforeTitle, StringUtil.trim(value).replace("\n", ""));
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ line = line.substring(line.indexOf(newTitle) + newTitle.length());
|
|
|
|
+ depth++;
|
|
|
|
+ } else {
|
|
|
|
+ titles.remove(depth);
|
|
|
|
+ }
|
|
|
|
+ cutByTitles(line, titles, depth, structureMap);
|
|
|
|
+ }
|
|
|
|
+}
|