|
@@ -0,0 +1,108 @@
|
|
|
+package org.diagbot.nlp.feature.extract;
|
|
|
+
|
|
|
+import org.diagbot.nlp.participle.word.Lexeme;
|
|
|
+import org.diagbot.nlp.participle.word.LexemePath;
|
|
|
+import org.diagbot.nlp.util.Constants;
|
|
|
+import org.diagbot.nlp.util.NegativeEnum;
|
|
|
+import org.diagbot.nlp.util.NlpUtil;
|
|
|
+
|
|
|
+import java.math.BigDecimal;
|
|
|
+import java.util.*;
|
|
|
+
|
|
|
+public class CaseTokenTime extends CaseToken {
|
|
|
+ {
|
|
|
+ stop_symbol = NlpUtil.extendsSymbol(stop_symbol, new String[]{",", ",", ":", ":"});
|
|
|
+ }
|
|
|
+
|
|
|
+ private List numtextList = new ArrayList(Arrays.asList("数", "多", "半", "一", "二", "三", "四", "五", "六", "七", "八", "九", "十"));
|
|
|
+ private Map<String, String> numtextMap = new HashMap<String, String>(){{
|
|
|
+ put("一","1");
|
|
|
+ put("二","2");
|
|
|
+ put("三","3");
|
|
|
+ put("四","4");
|
|
|
+ put("五","5");
|
|
|
+ put("六","6");
|
|
|
+ put("七","7");
|
|
|
+ put("八","8");
|
|
|
+ put("九","9");
|
|
|
+ put("十","10");
|
|
|
+ }};
|
|
|
+
|
|
|
+ public List<Map<String, Object>> analyze(LexemePath<Lexeme> lexemePath) throws Exception {
|
|
|
+ super.sn = 0;
|
|
|
+ int max_offset = 10; //只取前10个词元中的时间信息
|
|
|
+ //词性
|
|
|
+ String property = null;
|
|
|
+ NegativeEnum[] nees = new NegativeEnum[]{NegativeEnum.EVENT_TIME};
|
|
|
+ List<Map<String, Object>> featuresList = new ArrayList<>();
|
|
|
+
|
|
|
+ Lexeme leftLexeme = null;
|
|
|
+
|
|
|
+ double time_value = 0.0;
|
|
|
+ for (int index = 0; index < lexemePath.size(); index++) {
|
|
|
+ Lexeme lexeme = lexemePath.get(index);
|
|
|
+ property = lexeme.getProperty();
|
|
|
+ if (NlpUtil.isFeature(property, nees)) { //特征词 化验
|
|
|
+ if (index > 0) {
|
|
|
+ leftLexeme = lexemePath.get(index - 1);
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (numtextList.contains(leftLexeme.getText())) {
|
|
|
+ if ("数".equals(leftLexeme.getText()) || "多".equals(leftLexeme.getText())) { //数年直接按5年处理
|
|
|
+ time_value = 5;
|
|
|
+ } else if ("半".equals(leftLexeme.getText())) {
|
|
|
+ time_value = 0.5;
|
|
|
+ } else {
|
|
|
+ time_value = Double.valueOf(numtextMap.get(leftLexeme.getText()));
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ try {
|
|
|
+ time_value = Double.valueOf(leftLexeme.getText());
|
|
|
+ } catch (Exception nfe) {
|
|
|
+ if (leftLexeme.getText().indexOf("-") > -1) {
|
|
|
+ try {
|
|
|
+ time_value = Double.valueOf(leftLexeme.getText().split("-")[0]);
|
|
|
+ } catch (Exception e) {
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (time_value > 0) {
|
|
|
+ if ("年".equals(lexeme.getText()) || "年余".equals(lexeme.getText())) {
|
|
|
+ time_value = time_value * 365;
|
|
|
+ } else if ("月".equals(lexeme.getText()) || "月余".equals(lexeme.getText())) {
|
|
|
+ time_value = time_value * 30;
|
|
|
+ } else if ("天".equals(lexeme.getText()) || "天余".equals(lexeme.getText()) || "日".equals(lexeme.getText())) {
|
|
|
+ time_value = time_value;
|
|
|
+ } else if ("周".equals(lexeme.getText()) || "周余".equals(lexeme.getText())) {
|
|
|
+ time_value = time_value * 7;
|
|
|
+ } else if ("小时".equals(lexeme.getText())) {
|
|
|
+ time_value = time_value / 24;
|
|
|
+ } else if ("分钟".equals(lexeme.getText())) {
|
|
|
+ time_value = time_value / (24 * 60);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (time_value > 0) {
|
|
|
+ BigDecimal bd = new BigDecimal(time_value);
|
|
|
+ time_value = bd.setScale(2,BigDecimal.ROUND_HALF_UP).doubleValue();
|
|
|
+
|
|
|
+ Map<String, Object> fMap = new HashMap<>(10);
|
|
|
+ fMap.put("feature_name", time_value);
|
|
|
+ fMap.put("feature_type", Constants.feature_type_time);
|
|
|
+ fMap.put("negative", "有");
|
|
|
+ fMap.put("sn", String.valueOf(sn++));
|
|
|
+ fMap.put("property", lexeme.getProperty());
|
|
|
+ fMap.put("concept", lexeme.getConcept());
|
|
|
+
|
|
|
+ fMap.put("time_label", "1");
|
|
|
+ featuresList.add(fMap);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return featuresList;
|
|
|
+ }
|
|
|
+}
|