|
@@ -23,20 +23,7 @@ public class ParticipleUtil {
|
|
|
LexemePath<Lexeme> lexemes = participle(content);
|
|
|
if (isCombineUnit) {
|
|
|
ParticipleUtil util = new ParticipleUtil();
|
|
|
- lexemes = util.combineValidate(lexemes);
|
|
|
- lexemes = util.joinTime(lexemes);
|
|
|
-
|
|
|
- String year_pattern = "([1-2][0-9]{3}|[0-9]{2})";
|
|
|
- String mouth_day_pattern = "([0-9]{2}|[0-9])";
|
|
|
- String join_pattern = "([-/.]?)";
|
|
|
- String pattern_string = year_pattern + join_pattern + mouth_day_pattern + join_pattern + mouth_day_pattern;
|
|
|
- for (Lexeme l : lexemes) {
|
|
|
- if (l.getProperty().equals(Constants.word_property_number)) {
|
|
|
- if (Pattern.matches(pattern_string,l.getText())) {
|
|
|
- l.setProperty(Constants.word_property_time);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ util.combine(util, lexemes);
|
|
|
}
|
|
|
return lexemes;
|
|
|
}
|
|
@@ -48,6 +35,19 @@ public class ParticipleUtil {
|
|
|
return participle(content, NlpCache.segment_cache);
|
|
|
}
|
|
|
|
|
|
+ public static LexemePath<Lexeme> participlePacs(String content) throws IOException {
|
|
|
+ if (NlpCache.segment_cache_pacs == null) {
|
|
|
+ NlpCache.createSegmentCachePacs();
|
|
|
+ }
|
|
|
+ ParticipleToken token = new ParticipleToken();
|
|
|
+ token.start(new StringReader(content), NlpCache.segment_cache_pacs);
|
|
|
+ LexemePath<Lexeme> lexemePath = token.getLexemePath();
|
|
|
+ ParticipleUtil util = new ParticipleUtil();
|
|
|
+ util.combine(util, lexemePath);
|
|
|
+ token.end();
|
|
|
+ return lexemePath;
|
|
|
+ }
|
|
|
+
|
|
|
public static LexemePath<Lexeme> participle(String content, String path) throws IOException {
|
|
|
Configuration configuration = new DefaultConfig();
|
|
|
NlpCache.segment_cache = configuration.loadMainDict(path);
|
|
@@ -65,6 +65,23 @@ public class ParticipleUtil {
|
|
|
return lexemePath;
|
|
|
}
|
|
|
|
|
|
+ private void combine(ParticipleUtil util, LexemePath<Lexeme> lexemes) {
|
|
|
+ lexemes = util.combineValidate(lexemes);
|
|
|
+ lexemes = util.joinTime(lexemes);
|
|
|
+
|
|
|
+ String year_pattern = "([1-2][0-9]{3}|[0-9]{2})";
|
|
|
+ String mouth_day_pattern = "([0-9]{2}|[0-9])";
|
|
|
+ String join_pattern = "([-/.]?)";
|
|
|
+ String pattern_string = year_pattern + join_pattern + mouth_day_pattern + join_pattern + mouth_day_pattern;
|
|
|
+ for (Lexeme l : lexemes) {
|
|
|
+ if (l.getProperty().equals(Constants.word_property_number)) {
|
|
|
+ if (Pattern.matches(pattern_string,l.getText())) {
|
|
|
+ l.setProperty(Constants.word_property_time);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public static String participleAndHighlight(String content) throws IOException {
|
|
|
LexemePath<Lexeme> lexemePath = participle(content, false);
|
|
|
String separator = " * ";
|