Browse Source

切词工具优化修改

rengb 5 years ago
parent
commit
2c183d5269
1 changed files with 32 additions and 0 deletions
  1. 32 0
      trans/src/main/java/com/lantone/qc/trans/comsis/Preproc.java

+ 32 - 0
trans/src/main/java/com/lantone/qc/trans/comsis/Preproc.java

@@ -1,6 +1,8 @@
 package com.lantone.qc.trans.comsis;
 
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.lantone.qc.pub.util.ListUtil;
 import com.lantone.qc.pub.util.StringUtil;
 
 import java.util.HashMap;
@@ -110,4 +112,34 @@ public class Preproc {
         return sectionsNew;
     }
 
+    /**
+     * key特殊处理然后切词,除了首次病程,其他模块可用
+     *
+     * @param isProgress
+     * @param content
+     * @param sourceTitles
+     * @return
+     */
+    public static Map<String, String> getCutWordMap(boolean isProgress, List<String> sourceTitles, String content) {
+        if (StringUtil.isBlank(content)) {
+            return Maps.newHashMap();
+        }
+        List<String> targetTitles = Lists.newArrayList();
+        sourceTitles.forEach(sourceTitle -> {
+            String targetTitle = "";
+            for (int index = 0; index < sourceTitle.length(); index++) {
+                if (index == sourceTitle.length() - 1) {
+                    targetTitle += sourceTitle.substring(index, index + 1);
+                } else {
+                    targetTitle += sourceTitle.substring(index, index + 1) + "[\\s\\p{Zs}]*";
+                }
+            }
+            targetTitles.add(targetTitle);
+        });
+        if (ListUtil.isEmpty(targetTitles)) {
+            return Maps.newHashMap();
+        }
+        return extract_doc_pub(isProgress, targetTitles, content);
+    }
+
 }