|
@@ -1,9 +1,16 @@
|
|
package com.lantone.qc.trans.comsis;
|
|
package com.lantone.qc.trans.comsis;
|
|
|
|
|
|
|
|
+import com.google.common.collect.Lists;
|
|
|
|
+import com.lantone.qc.pub.model.keys.ModelStandardKeys;
|
|
|
|
+import com.lantone.qc.pub.util.FileUtil;
|
|
|
|
+import com.lantone.qc.pub.util.ListUtil;
|
|
|
|
+
|
|
import java.util.HashMap;
|
|
import java.util.HashMap;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
|
|
+import java.util.regex.Matcher;
|
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
|
|
import static java.util.Map.Entry.comparingByValue;
|
|
import static java.util.Map.Entry.comparingByValue;
|
|
import static java.util.stream.Collectors.toMap;
|
|
import static java.util.stream.Collectors.toMap;
|
|
@@ -26,7 +33,7 @@ public class Preproc {
|
|
* @return
|
|
* @return
|
|
*/
|
|
*/
|
|
public static Map<String, String> extract_doc_pub(boolean isProgress, List<String> title, String line) {
|
|
public static Map<String, String> extract_doc_pub(boolean isProgress, List<String> title, String line) {
|
|
- line = line.replaceAll("[ *| *| *]*", "");
|
|
|
|
|
|
+// line = line.replaceAll("[ *| *| *]*", "");
|
|
int pos = 0;
|
|
int pos = 0;
|
|
int ln_pos = 0;
|
|
int ln_pos = 0;
|
|
String item = "";
|
|
String item = "";
|
|
@@ -37,6 +44,12 @@ public class Preproc {
|
|
Map<String, Integer> sorted = new HashMap<>();
|
|
Map<String, Integer> sorted = new HashMap<>();
|
|
|
|
|
|
for (String key : title) {
|
|
for (String key : title) {
|
|
|
|
+ Pattern pattern = Pattern.compile(key);
|
|
|
|
+ Matcher matcher=pattern.matcher(line);
|
|
|
|
+ while(matcher.find()){
|
|
|
|
+ key = matcher.group(0);
|
|
|
|
+ }
|
|
|
|
+
|
|
pos = 0;
|
|
pos = 0;
|
|
while (line.indexOf(key, pos) >= 0 && null == sorted.get(key)) {
|
|
while (line.indexOf(key, pos) >= 0 && null == sorted.get(key)) {
|
|
pos = line.indexOf(key, pos);
|
|
pos = line.indexOf(key, pos);
|