Bläddra i källkod

关系抽取页面展示修改

louhr 6 år sedan
förälder
incheckning
6bafe1057e
32 ändrade filer med 16821 tillägg och 4189 borttagningar
  1. 2 2
      nlp-web/src/main/java/org/diagbot/nlp/controller/RelationExtractionController.java
  2. 9 1
      nlp/src/main/java/org/diagbot/nlp/participle/ParticipleToken.java
  3. 2 1
      nlp/src/main/java/org/diagbot/nlp/participle/ParticipleUtil.java
  4. 74 0
      nlp/src/main/java/org/diagbot/nlp/relation/analyze/RelationAnalyze.java
  5. 10 3
      nlp/src/main/java/org/diagbot/nlp/relation/extract/PresentExtract.java
  6. 3 3
      nlp/src/main/java/org/diagbot/nlp/relation/extract/VitalExtract.java
  7. 3 3
      nlp/src/main/java/org/diagbot/nlp/relation/extract/module/Symptom.java
  8. 5 5
      nlp/src/main/java/org/diagbot/nlp/relation/extract/module/Vital.java
  9. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/BodyPart.java
  10. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Cause.java
  11. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Degree.java
  12. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Desc.java
  13. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Factor.java
  14. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Frequency.java
  15. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Item.java
  16. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Negative.java
  17. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/PD.java
  18. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Position.java
  19. 1 1
      nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Property.java
  20. 22 57
      nlp/src/main/java/org/diagbot/nlp/relation/RelationAnalyze.java
  21. 3 3
      nlp/src/main/java/org/diagbot/nlp/relation/extract/output/OutputInfo.java
  22. 1 1
      nlp/src/main/java/org/diagbot/nlp/util/Constants.java
  23. 12 3
      nlp/src/main/java/org/diagbot/nlp/util/NegativeEnum.java
  24. 1 1
      nlp/src/main/java/org/diagbot/nlp/util/NlpUtil.java
  25. 1 1
      nlp/src/main/resources/classify.dict
  26. 54 39
      nlp/src/main/resources/push-tc.dict
  27. 9 5
      nlp/src/main/resources/synonym.dict
  28. 16232 3988
      nlp/src/main/resources/tc.dict
  29. 269 0
      nlp/src/test/java/org/diagbot/nlp/test/EntityExtractTest.java
  30. 1 1
      nlp/src/test/java/org/diagbot/nlp/test/LexemeDicTest.java
  31. 2 2
      public/src/main/java/org/diagbot/pub/jdbc/MysqlJdbc.java
  32. 95 59
      push-web/src/main/resources/static/pages/relation/sample.html

+ 2 - 2
nlp-web/src/main/java/org/diagbot/nlp/controller/RelationExtractionController.java

@@ -1,8 +1,8 @@
 package org.diagbot.nlp.controller;
 
 import org.diagbot.nlp.feature.FeatureType;
-import org.diagbot.nlp.relation.RelationAnalyze;
-import org.diagbot.nlp.relation.extract.output.OutputInfo;
+import org.diagbot.nlp.relation.analyze.RelationAnalyze;
+import org.diagbot.nlp.relation.util.OutputInfo;
 import org.diagbot.pub.api.Response;
 import org.springframework.stereotype.Controller;
 import org.springframework.web.bind.annotation.RequestMapping;

+ 9 - 1
nlp/src/main/java/org/diagbot/nlp/participle/ParticipleToken.java

@@ -135,12 +135,20 @@ public class ParticipleToken {
 
     private void matchARABIC() throws IOException {
         int position;
+        int cur_offset = this.offset;
         while (cursor < available) {
             position = Arrays.binarySearch(join_symbols, buffer[cursor]);
             if (CharacterUtil.identifyCharType(buffer[cursor]) == CharacterUtil.CHAR_ARABIC        //数字后跟'-'、'/'、'.'作为数字处理
 //                    || CharacterUtil.identifyCharType(buffer[cursor]) == CharacterUtil.CHAR_ENGLISH
                     || position > -1) {
-                cursor++;
+//                cursor++;
+                cur_offset++;
+                //先依据词库是否成词判断
+                this.matchCHN(segment, cursor, 0, false);
+                if (cursor - cur_offset > 1) {
+                    this.cursor = cur_offset;
+                    break;
+                }
             } else if (CharacterUtil.identifyCharType(buffer[cursor]) == CharacterUtil.CHAR_CHINESE) {    //数字后跟中文单位
                 break;
             } else {

+ 2 - 1
nlp/src/main/java/org/diagbot/nlp/participle/ParticipleUtil.java

@@ -8,6 +8,7 @@ import org.diagbot.nlp.participle.word.Segment;
 import org.diagbot.nlp.util.Constants;
 import org.diagbot.nlp.util.NlpUtil;
 import org.diagbot.nlp.util.NlpCache;
+import org.springframework.util.StringUtils;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -142,7 +143,7 @@ public class ParticipleUtil {
         if (index > 0) {
             index--;
             last_l = lexemes.get(index);
-            if ("×".equals(last_l.getText()) && index > 0) {
+            if (("×".equals(last_l.getText()) || StringUtils.isEmpty(last_l.getText())) && index > 0) {
                 lexeme.setOffset(last_l.getOffset());
                 lexeme.setLength(last_l.getLength() + lexeme.getLength());
                 lexeme.setText(last_l.getText() + lexeme.getText());

+ 74 - 0
nlp/src/main/java/org/diagbot/nlp/relation/analyze/RelationAnalyze.java

@@ -0,0 +1,74 @@
+package org.diagbot.nlp.relation.analyze;
+
+import org.algorithm.core.cnn.AlgorithmCNNExecutor;
+import org.algorithm.core.cnn.entity.Lemma;
+import org.algorithm.core.cnn.entity.Triad;
+import org.algorithm.core.cnn.model.impl.RelationExtractionModelImpl;
+import org.diagbot.nlp.feature.FeatureType;
+import org.diagbot.nlp.participle.ParticipleUtil;
+import org.diagbot.nlp.participle.word.Lexeme;
+import org.diagbot.nlp.participle.word.LexemePath;
+import org.diagbot.nlp.relation.extract.PresentExtract;
+import org.diagbot.nlp.relation.extract.VitalExtract;
+import org.diagbot.nlp.relation.util.OutputInfo;
+import org.diagbot.nlp.relation.util.LemmaUtil;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @ClassName org.diagbot.nlp.relation.RelationAnalyze
+ * @Description 关系抽取入口
+ * @Author fyeman
+ * @Date 2019/1/18/018 14:36
+ * @Version 1.0
+ **/
+public class RelationAnalyze {
+    public List<OutputInfo> analyze(String content, FeatureType featureType) throws Exception {
+        String[] part_contents = content.split("\\。|\\;\\\r|\\\n|\\;");
+
+        List<OutputInfo> outputInfos = new ArrayList<>();
+
+        LemmaUtil lemmaUtil = new LemmaUtil();
+        for (String part_content : part_contents) {
+            LexemePath<Lexeme> lexemes = ParticipleUtil.participle(part_content, true);
+            //分词结果转词元结构,只提取有词性信息,因为部分特征信息未在三元组中,所以需要把分词结果也一并传入
+            List<Lemma> lemmaParticiple = lemmaUtil.lexemeToTriadLemma(lexemes);
+            //调用CNN模型
+            AlgorithmCNNExecutor executor = new RelationExtractionModelImpl();
+            List<Triad> triads = executor.execute(part_content, lemmaParticiple);
+            //模型返回的三元组转树形结构
+            List<Lemma> lemmaTree = lemmaUtil.traidToTree(triads, featureType);
+
+            OutputInfo outputInfo = new OutputInfo();
+            switch (featureType) {
+                case SYMPTOM:
+                    this.lemmaPresentExtract(outputInfo, lemmaTree, lemmaParticiple);
+            }
+            if (outputInfo != null) {
+                outputInfos.add(outputInfo);
+            }
+        }
+        return outputInfos;
+    }
+
+    public OutputInfo lemmaPresentExtract(OutputInfo outputInfo, List<Lemma> lemmaTree, List<Lemma> lemmaParticiple) {
+        //现病史症状信息提取
+        PresentExtract presentExtract = new PresentExtract();
+        outputInfo = presentExtract.extract(outputInfo, lemmaTree, lemmaParticiple);
+        return outputInfo;
+    }
+
+    /**
+     * 体征特征提取
+     * @param outputInfo
+     * @param lemmaTree
+     * @param lemmaParticiple
+     * @return
+     */
+    public OutputInfo lemmaVitalExtract(OutputInfo outputInfo, List<Lemma> lemmaTree, List<Lemma> lemmaParticiple) {
+        VitalExtract vitalExtract = new VitalExtract();
+        outputInfo = vitalExtract.extract(outputInfo, lemmaTree, lemmaParticiple);
+        return outputInfo;
+    }
+}

+ 10 - 3
nlp/src/main/java/org/diagbot/nlp/relation/extract/PresentExtract.java

@@ -1,12 +1,13 @@
 package org.diagbot.nlp.relation.extract;
 
 import org.algorithm.core.cnn.entity.Lemma;
-import org.diagbot.nlp.relation.extract.cell.*;
-import org.diagbot.nlp.relation.extract.module.Symptom;
-import org.diagbot.nlp.relation.extract.output.OutputInfo;
+import org.diagbot.nlp.relation.module.Symptom;
+import org.diagbot.nlp.relation.util.OutputInfo;
+import org.diagbot.nlp.relation.module.cell.*;
 import org.diagbot.nlp.util.Constants;
 import org.diagbot.nlp.util.NlpUtil;
 
+import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -18,12 +19,18 @@ import java.util.List;
  **/
 public class PresentExtract extends BaseExtract {
     public OutputInfo extract(OutputInfo outputInfo, List<Lemma> lemmaTree, List<Lemma> lemmaParticiple) {
+        List<String> repeatSymptomsList = new ArrayList<>();
         for (int index = 0; index < lemmaParticiple.size(); index++) {
             Lemma participle_lemma = lemmaParticiple.get(index);
             String property = participle_lemma.getProperty();
             if (NlpUtil.isFeature(property, Constants.symptom_type)) {          //特征词 症状
+                if (repeatSymptomsList.contains(participle_lemma.getText())) {
+                    continue;
+                }
                 Symptom symptom = lookSymptomRelations(participle_lemma, lemmaTree);
                 outputInfo.getSymptoms().add(symptom);
+
+                repeatSymptomsList.add(participle_lemma.getText());
             }
         }
         lemmaTree.removeAll(containsLemmaTree);

+ 3 - 3
nlp/src/main/java/org/diagbot/nlp/relation/extract/VitalExtract.java

@@ -1,9 +1,9 @@
 package org.diagbot.nlp.relation.extract;
 
 import org.algorithm.core.cnn.entity.Lemma;
-import org.diagbot.nlp.relation.extract.cell.*;
-import org.diagbot.nlp.relation.extract.module.Vital;
-import org.diagbot.nlp.relation.extract.output.OutputInfo;
+import org.diagbot.nlp.relation.module.Vital;
+import org.diagbot.nlp.relation.util.OutputInfo;
+import org.diagbot.nlp.relation.module.cell.*;
 import org.diagbot.nlp.util.Constants;
 import org.diagbot.nlp.util.NlpUtil;
 

+ 3 - 3
nlp/src/main/java/org/diagbot/nlp/relation/extract/module/Symptom.java

@@ -1,6 +1,6 @@
-package org.diagbot.nlp.relation.extract.module;
+package org.diagbot.nlp.relation.module;
 
-import org.diagbot.nlp.relation.extract.cell.*;
+import org.diagbot.nlp.relation.module.cell.*;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.cell.Symptom
@@ -13,7 +13,7 @@ public class Symptom {
     private String symptomName;
     private Negative negative;
 
-    private BodyPart  bodyPart;
+    private BodyPart bodyPart;
     private Degree degree;
     private Cause cause;
     private Property property;

+ 5 - 5
nlp/src/main/java/org/diagbot/nlp/relation/extract/module/Vital.java

@@ -1,9 +1,9 @@
-package org.diagbot.nlp.relation.extract.module;
+package org.diagbot.nlp.relation.module;
 
-import org.diagbot.nlp.relation.extract.cell.BodyPart;
-import org.diagbot.nlp.relation.extract.cell.Degree;
-import org.diagbot.nlp.relation.extract.cell.Item;
-import org.diagbot.nlp.relation.extract.cell.Negative;
+import org.diagbot.nlp.relation.module.cell.BodyPart;
+import org.diagbot.nlp.relation.module.cell.Degree;
+import org.diagbot.nlp.relation.module.cell.Item;
+import org.diagbot.nlp.relation.module.cell.Negative;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.module.Vital

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/BodyPart.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 public class BodyPart {
     private String partBodyName;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Cause.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 public class Cause {
     private String causeName;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Degree.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 public class Degree {
     private String degreeName;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Desc.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.cell.Desc

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Factor.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.cell.Factor

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Frequency.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 public class Frequency {
     private String freName;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Item.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.cell.Item

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Negative.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.cell.Negative

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/PD.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 public class PD {
     private String value;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Position.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 /**
  * @ClassName org.diagbot.nlp.relation.extract.cell.Position

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/relation/extract/cell/Property.java

@@ -1,4 +1,4 @@
-package org.diagbot.nlp.relation.extract.cell;
+package org.diagbot.nlp.relation.module.cell;
 
 public class Property {
     private String propertyName;

+ 22 - 57
nlp/src/main/java/org/diagbot/nlp/relation/RelationAnalyze.java

@@ -1,16 +1,10 @@
-package org.diagbot.nlp.relation;
+package org.diagbot.nlp.relation.util;
 
-import org.algorithm.core.cnn.AlgorithmCNNExecutor;
 import org.algorithm.core.cnn.entity.Lemma;
 import org.algorithm.core.cnn.entity.Triad;
-import org.algorithm.core.cnn.model.impl.RelationExtractionModelImpl;
 import org.diagbot.nlp.feature.FeatureType;
-import org.diagbot.nlp.participle.ParticipleUtil;
 import org.diagbot.nlp.participle.word.Lexeme;
 import org.diagbot.nlp.participle.word.LexemePath;
-import org.diagbot.nlp.relation.extract.PresentExtract;
-import org.diagbot.nlp.relation.extract.VitalExtract;
-import org.diagbot.nlp.relation.extract.output.OutputInfo;
 import org.diagbot.nlp.util.Constants;
 import org.diagbot.nlp.util.NegativeEnum;
 import org.diagbot.nlp.util.NlpUtil;
@@ -19,48 +13,19 @@ import java.util.ArrayList;
 import java.util.List;
 
 /**
- * @ClassName org.diagbot.nlp.relation.RelationAnalyze
- * @Description 关系抽取入口
+ * @ClassName org.diagbot.nlp.relation.util.LemmaUtil
+ * @Description TODO
  * @Author fyeman
- * @Date 2019/1/18/018 14:36
+ * @Date 2019/3/21/021 14:25
  * @Version 1.0
  **/
-public class RelationAnalyze {
-    public List<OutputInfo> analyze(String content, FeatureType featureType) throws Exception {
-        String[] part_contents = content.split("\\。|\\;\\\r|\\\n|\\;");
-
-        List<OutputInfo> outputInfos = new ArrayList<>();
-        for (String part_content : part_contents) {
-            LexemePath<Lexeme> lexemes = ParticipleUtil.participle(part_content, true);
-            //分词结果转词元结构,只提取有词性信息,因为部分特征信息未在三元组中,所以需要把分词结果也一并传入
-            List<Lemma> lemmaParticiple = lexemeToTriadLemma(lexemes);
-            //调用CNN模型
-            AlgorithmCNNExecutor executor = new RelationExtractionModelImpl();
-            List<Triad> triads = executor.execute(part_content, lemmaParticiple);
-            //模型返回的三元组转树形结构
-            List<Lemma> lemmaTree = traidToTree(triads, featureType);
-            OutputInfo outputInfo = new OutputInfo();
-            switch (featureType) {
-                case SYMPTOM:
-                    PresentExtract presentExtract = new PresentExtract();
-                    outputInfo = presentExtract.extract(outputInfo, lemmaTree, lemmaParticiple);
-                    //如果还有体征信息 也需要抽取
-                    VitalExtract vitalExtract = new VitalExtract();
-                    outputInfo = vitalExtract.extract(outputInfo, lemmaTree, lemmaParticiple);
-            }
-            if (outputInfo != null) {
-                outputInfos.add(outputInfo);
-            }
-        }
-        return outputInfos;
-    }
-
+public class LemmaUtil {
     /**
      * 分词结果转词元结构
      * @param lexemes
      * @return
      */
-    private List<Lemma> lexemeToTriadLemma(LexemePath<Lexeme> lexemes) {
+    public List<Lemma> lexemeToTriadLemma(LexemePath<Lexeme> lexemes) {
         List<Lemma> lemmas = new ArrayList<>();
         for (Lexeme lexeme : lexemes) {
             if (!lexeme.getProperty().equals("99")) {
@@ -81,7 +46,7 @@ public class RelationAnalyze {
      * @param featureType
      * @return
      */
-    private List<Lemma> traidToTree(List<Triad> triads, FeatureType featureType) {
+    public List<Lemma> traidToTree(List<Triad> triads, FeatureType featureType) {
         List<Lemma> lemmaTree = new ArrayList<>();
         switch (featureType) {
             case SYMPTOM:
@@ -91,32 +56,32 @@ public class RelationAnalyze {
         return lemmaTree;
     }
 
-    private void select(List<Lemma> lemmaTree, List<Triad> triads, NegativeEnum[] type) {
+    public void select(List<Lemma> lemmaTree, List<Triad> triads, NegativeEnum[] type) {
         List<Triad> has_add_triads = new ArrayList<>();
         for (Triad triad : triads) {
-            Lemma origin_l = null;
-            Lemma relation_l = null;
+            Lemma left_l = null;
+            Lemma right_l = null;
             if (NlpUtil.isFeature(triad.getL_1().getProperty(), type)) {
-                origin_l = this.copyProperties(triad.getL_1());
-                relation_l = this.copyProperties(triad.getL_2());
+                left_l = this.copyProperties(triad.getL_1());
+                right_l = this.copyProperties(triad.getL_2());
             }
             if (NlpUtil.isFeature(triad.getL_2().getProperty(), type)) {
-                origin_l = this.copyProperties(triad.getL_2());
-                relation_l = this.copyProperties(triad.getL_1());
+                left_l = this.copyProperties(triad.getL_2());
+                right_l = this.copyProperties(triad.getL_1());
             }
-            if (origin_l != null && relation_l != null) {
+            if (left_l != null && right_l != null) {
                 boolean isFindFromTree = false;
                 for (Lemma l : lemmaTree) {
-                    if (l.getText().equals(origin_l.getText()) && l.getPosition().equals(origin_l.getPosition())) {
-                        origin_l = l;
+                    if (l.getText().equals(left_l.getText()) && l.getPosition().equals(left_l.getPosition())) {
+                        left_l = l;
                         isFindFromTree = true;
                         break;
                     }
                 }
-                this.findRelationTriad(triads, triad, has_add_triads, relation_l, type);
-                origin_l.add(relation_l);
+                this.findRelationTriad(triads, triad, has_add_triads, right_l, type);
+                left_l.add(right_l);
                 if (!isFindFromTree) {      //树上已有节点 不需要添加
-                    lemmaTree.add(origin_l);
+                    lemmaTree.add(left_l);
                 }
                 //已添加到树中的三元组
                 has_add_triads.add(triad);
@@ -125,7 +90,7 @@ public class RelationAnalyze {
         triads.removeAll(has_add_triads);
     }
 
-    private boolean findRelationTriad(List<Triad> triads, Triad current_triad, List<Triad> has_add_triads, Lemma lemma, NegativeEnum[] type) {
+    public boolean findRelationTriad(List<Triad> triads, Triad current_triad, List<Triad> has_add_triads, Lemma lemma, NegativeEnum[] type) {
         boolean isFindRelation = false;
         for (Triad triad : triads) {
             if (current_triad == triad) {
@@ -151,7 +116,7 @@ public class RelationAnalyze {
         return isFindRelation;
     }
 
-    private Lemma copyProperties(Lemma lemma) {
+    public Lemma copyProperties(Lemma lemma) {
         Lemma l = new Lemma();
         l.setLen(lemma.getLen());
         l.setPosition(lemma.getPosition());

+ 3 - 3
nlp/src/main/java/org/diagbot/nlp/relation/extract/output/OutputInfo.java

@@ -1,7 +1,7 @@
-package org.diagbot.nlp.relation.extract.output;
+package org.diagbot.nlp.relation.util;
 
-import org.diagbot.nlp.relation.extract.module.Symptom;
-import org.diagbot.nlp.relation.extract.module.Vital;
+import org.diagbot.nlp.relation.module.Symptom;
+import org.diagbot.nlp.relation.module.Vital;
 
 import java.util.ArrayList;
 import java.util.List;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/util/Constants.java

@@ -17,7 +17,7 @@ public class Constants {
     public final static String feature_type_history = "7";       //历史
     public final static String feature_type_feature = "9"; //症状描述中的特征信息 如部位、性质等
 
-    public static NegativeEnum[] symptom_type = new NegativeEnum[]{NegativeEnum.SYMPTOM};
+    public static NegativeEnum[] symptom_type = new NegativeEnum[]{NegativeEnum.SYMPTOM, NegativeEnum.SYMPTOM_INDEX, NegativeEnum.SYMPTOM_PERFORMANCE};
     public static NegativeEnum[] unit_time_type = new NegativeEnum[]{NegativeEnum.EVENT_TIME, NegativeEnum.UNIT};
     public static NegativeEnum[] vital_type = new NegativeEnum[]{NegativeEnum.VITAL_INDEX_VALUE, NegativeEnum.VITAL_RESULT};
     public static NegativeEnum[] vital_index_type = new NegativeEnum[]{NegativeEnum.VITAL_INDEX};

+ 12 - 3
nlp/src/main/java/org/diagbot/nlp/util/NegativeEnum.java

@@ -4,12 +4,12 @@ package org.diagbot.nlp.util;
  * Created by Administrator on 2017/11/8.
  */
 public enum NegativeEnum {
-    NUMBER("0"), SYMPTOM("1"), EVENT_TIME("2"), BODY_PART("3"), PROPERTY("4"), CAUSE("5"), DEEP("6"), FEMININE("7"), POSITIVE("8"),
+    NUMBER("0"), SYMPTOM("1"), SYMPTOM_INDEX("75"), EVENT_TIME("2"), EVENT_TIME_DESC("74"), BODY_PART("3"), PROPERTY("4"), CAUSE("5"), DEEP("6"), DEEP_QUANTIFIER("80"), FEMININE("7"), POSITIVE("8"),
     UNIT("9"), MEDICINE("10"), TREATMENT("11"),
     LIS_TYPE("13"), LIS_NAME("14"), LIS_RESULT("15"), WAY("16"),
     PACS_NAME("17"), PACS_RESULT("18"),
     DISEASE("19"), JOIN("20"), POSITION("21"), FAMILY("24"), FOOD("27"), PAST("28"), OPERATION("29"),
-    HABIT("25"), SYMPTOM_PERFORMANCE("32"), EVENT_TIME_VALUE("33"), FREQUENCY("34"),
+    HABIT("25"), SYMPTOM_PERFORMANCE("32"), NUMBER_QUANTIFIER("33"), FREQUENCY("34"),
     OTHER("99"),
     VITAL_INDEX("60"), VITAL_INDEX_VALUE("61"), VITAL_RESULT("62"), VITAL_BODY_PART("63"),
     ADDRESS("70"), PERSON("72"), PERSON_FEATURE_DESC("73");
@@ -109,7 +109,7 @@ public enum NegativeEnum {
                 negativeEnum = NegativeEnum.SYMPTOM_PERFORMANCE;
                 break;
             case "33":
-                negativeEnum = NegativeEnum.EVENT_TIME_VALUE;
+                negativeEnum = NegativeEnum.NUMBER_QUANTIFIER;
                 break;
             case "34":
                 negativeEnum = NegativeEnum.FREQUENCY;
@@ -135,6 +135,15 @@ public enum NegativeEnum {
             case "73":
                 negativeEnum = NegativeEnum.PERSON_FEATURE_DESC;
                 break;
+            case "74":
+                negativeEnum = NegativeEnum.EVENT_TIME_DESC;
+                break;
+            case "75":
+                negativeEnum = NegativeEnum.SYMPTOM_INDEX;
+                break;
+            case "80":
+                negativeEnum = NegativeEnum.DEEP_QUANTIFIER;
+                break;
             case "99":
                 negativeEnum = org.diagbot.nlp.util.NegativeEnum.OTHER;
                 break;

+ 1 - 1
nlp/src/main/java/org/diagbot/nlp/util/NlpUtil.java

@@ -22,7 +22,7 @@ public class NlpUtil {
 
     public static boolean isNumberString(Lexeme l) {
         if (l == null) return false;
-        if (NlpUtil.isFeature(l.getProperty(), new NegativeEnum[]{NegativeEnum.EVENT_TIME_VALUE})) {
+        if (NlpUtil.isFeature(l.getProperty(), new NegativeEnum[]{NegativeEnum.EVENT_TIME_DESC})) {
             return true;
         }
         for (char c : l.getText().toCharArray()) {

+ 1 - 1
nlp/src/main/resources/classify.dict

@@ -166,7 +166,6 @@ ZS17Dgm0g5sr9CAO6fuC/753qq2yF2cWRRuFBtx13KM=
 ZS17Dgm0g5uWR6gm1zbExfAZVFfu7NX1kG57ceWMY4U=
 ZS17Dgm0g5tIBVGJQ5x2a7I5g1QSIqIgK4DYmKUtYzA=
 ZS17Dgm0g5tIBVGJQ5x2a7LMlg0j5ZRh9csoaYhecJCqne2lkXR8xlSdFN30dJby
-ZS17Dgm0g5vdPmsNsdD0Um4OXV6Tw9PG7V4GyMOt7zhW9jHJ+r/xLg==
 ZS17Dgm0g5vdPmsNsdD0UiKD3zwhl3gh7V4GyMOt7zhW9jHJ+r/xLg==
 ZS17Dgm0g5vdPmsNsdD0Umh6VPbMFwmv7V4GyMOt7zhW9jHJ+r/xLg==
 ZS17Dgm0g5ua8k4aUM5PKiW+LiiN/JuVK4DYmKUtYzA=
@@ -652,6 +651,7 @@ VdEaXpjHPMhE3KrXtZ4jFnyx141wnrJVrK5K/t8x433+cEOlbfXBvIw+DoB3tNkx
 VdEaXpjHPMhE3KrXtZ4jFjbl5mp04vwp7V4GyMOt7zhW9jHJ+r/xLg==
 nfxM7lh8NBwr9CAO6fuC/+y3tyd/IYsPRRuFBtx13KM=
 nfxM7lh8NBxIBVGJQ5x2a7LMlg0j5ZRh9csoaYhecJCqne2lkXR8xlSdFN30dJby
+nfxM7lh8NBzdPmsNsdD0Um4OXV6Tw9PG7V4GyMOt7zhW9jHJ+r/xLg==
 nfxM7lh8NBzdPmsNsdD0Umh6VPbMFwmv7V4GyMOt7zhW9jHJ+r/xLg==
 kQcn4rJ5+wSIw8AEuG/52HRYmXGVWc2IbApsV2kPNEPyHOff8EOfXEpgEZ36UZf7
 kQcn4rJ5+wRQbAZnM3n7QqFv4anH8q04vk1evuCxkYg=

+ 54 - 39
nlp/src/main/resources/push-tc.dict

@@ -1896,7 +1896,7 @@ EeeyFfQIQ+QOyE8u4SYtLkpNB1cQdS7NSmARnfpRl/s=
 EeeyFfQIQ+QOyE8u4SYtLmGz6CAYmPcvC6BfePje8sMPMD6GPM2/yn67Ys5AgpywEDNnZ6hZT6I=
 EeeyFfQIQ+Rxs2nUVlCWYioNXRmLZt8b
 EeeyFfQIQ+Rd97LzHvX2QX/+MLIrd4qt
-TAitzbWFTrFKYBGd+lGX+w==
+dCVasxBHmj6ia0min0kp8g==
 m1laWtXWeo9KYBGd+lGX+w==
 a3wGfO7OpDIZ2wSwovQtcZ0sz95qSyMlbNbHA2ZM5iIpEmOL2tK7/UpgEZ36UZf7
 0E3e6Vg79Nebp2Brp5WMh6OEd0aUO5IDLbw2UTXX6rwnHOlkm0sxA3Lm8EIUKRZm
@@ -2429,7 +2429,7 @@ iECkdToulZmyQs0Sm++kou03CmF6iLCV
 78/lFzNy/WnfSSD55uAt33o/nXYKyXOMZjX4sbTUNtEcASvwS7pUqdNT95SkLHLI66toUiLrDtQ=
 78/lFzNy/WniOdeLvNRe6AfJhFlwVnqNIKAjxToDxNAnHOlkm0sxA3Lm8EIUKRZm
 B01WKDNRsPRKYBGd+lGX+w==
-gXZ0Xg5zcV8i1yUAroeK3w==
+gXZ0Xg5zcV+gvr24VGyRVg==
 QQbRCPAAGuvvWZ8OmWlkVPOhSUxpVSf2aU2f+f9Ufpcgq939Z08kXHLm8EIUKRZm
 1O0Luas38+MJlrmsDncTEx3q8u66+3iHSmARnfpRl/s=
 1O0Luas38+OfLZRUgAEVq1NOoLFK3FBiwGKphuLh3q8=
@@ -3029,6 +3029,7 @@ QZTfiq+Edqu3SJVMN26PGuS34x2ZooD3
 C5JIOQ7EAeFKYBGd+lGX+w==
 olifDSGaxrVm/cafNAAK6w==
 IO1oQEFa/v2E8xWYl4CDQw==
+U2OBEFuQKIQFnN6jc1veZw==
 fOrTNgGFmibE4XgYXHJt8Q==
 fOrTNgGFmibJtO97b+Pp1A==
 fOrTNgGFmibuxn+1KvIdVAPZnLTJIPKK
@@ -3067,7 +3068,7 @@ td9+nqHLT++/y1AeMTgzNb4pbaLhJ3IT7uVotICMCGo=
 BO+IhRN30Bb9rm+rwXFyIQ==
 O0BiPXvwwKzXSdV1MvjB2HLm8EIUKRZm
 KR9m/266GcczVdtZ/oVfkg==
-H6QrL3+VEZxKYBGd+lGX+w==
+o6GbM0ANni2ia0min0kp8g==
 zm9kx7yJ+LxKYBGd+lGX+w==
 FcQfCvhsoJtpb/c9CGc9Hg==
 Dl/TEMXTgtsZ2wSwovQtcZ0sz95qSyMlbNbHA2ZM5iIpEmOL2tK7/UpgEZ36UZf7
@@ -5042,9 +5043,9 @@ ONcLL9XJ8ciQUbVc8RwRvqMamx73jYKr+sYUsedNRj8=
 jXxUWA+YZdfnocFDIteUBMuqPGxX8ABoVwvh73CMiD1TkPDwKrgh/SbsY5jA4ToE/2uUryF3sNzk884IkEpUNw==
 jXxUWA+YZdfnocFDIteUBMuqPGxX8ABoVwvh73CMiD0art+omaj7dbGF3x7+tZQ9oRpZFK+ovSioFowRzYDHOHLm8EIUKRZm
 xazckZA2QbQO0fuuZbNSN3Lm8EIUKRZm
-sNAy/qRT7h5KYBGd+lGX+w==
-DabC6aKzPEtKYBGd+lGX+w==
-F5XMJE5rrfJKYBGd+lGX+w==
+k4eH46O6oTiia0min0kp8g==
+oAhA7k8zgq+ia0min0kp8g==
+qNnfkHASfvOia0min0kp8g==
 8c90gHt8/GbgyDleQPRbwK4oPTGzZT+Kl1WLP5ukp4EjTG+EEze1DSn0rIW8Pwe8
 8c90gHt8/GbgyDleQPRbwK4oPTGzZT+KhW0W25kZgAAlCZcPlHTkoPCKUPg/OCl7SmARnfpRl/s=
 a0WPukO1EbigTRzJ0SMiOSn0rIW8Pwe8
@@ -8619,9 +8620,9 @@ iP5Xip9aWM7fIbPNVARXB+dxaGPqYEjC
 OMqr42Wlz1UP9pt6KOiNhw==
 ASZYZd2fCgG1f4daHIwZR3Lm8EIUKRZm
 FsTFz0EWhLs7aXf36GTtvA==
-KBkG08VWEkNKYBGd+lGX+w==
-q5OtbiMDyIpgfWBkdUEehg==
-q5OtbiMDyIr3B/NgOaulbw==
+k4x+32uVFRqia0min0kp8g==
+q5OtbiMDyIppMiD9jqsB5w==
+q5OtbiMDyIoC8sgVkwZABw==
 2xpYzF2n2IczNqbgkStTWsBiqYbi4d6v
 91Tcvp6nLFIiDDzNUIFh7GOJJl8swme/hw48h4qdOM5y5vBCFCkWZg==
 qhpCRtjIPPCFpHfK3oEItdAwH6T4L0xqcubwQhQpFmY=
@@ -9672,7 +9673,7 @@ akIGiqCH/qmOO6EBvxnDtg==
 2aDJllB28JJlRWwDw/Rte3Lm8EIUKRZm
 /bJL7h9rpxSbO1L2fO2XRW+Bk7qt5BlMSmARnfpRl/s=
 /bJL7h9rpxSbO1L2fO2XReUtC7idx4Fv4F+k/KnXynlKYBGd+lGX+w==
-fevU2qGfgXhKYBGd+lGX+w==
+dHLIaguN0DCia0min0kp8g==
 URFNeCAd/rgPwBAHXejTGcaGDCQGNoon2VWC+M3su9c=
 yQLOuEV2p6ppb/c9CGc9Hg==
 1+f2NAqZx5tKYBGd+lGX+w==
@@ -10496,7 +10497,7 @@ phH8RYGLM5RZhHXMEtaFUYKoWEtpIM8v
 phH8RYGLM5QP9ZMiwTHaHjnPZ59ynUfV
 zt3iHa0EhQTeUiPI/JiJPw==
 ZPtlkT+G6zcsqzd+cZvBXg==
-JrYNtocoggBKYBGd+lGX+w==
+Py47TBdRZUGia0min0kp8g==
 Pe9Nlo+K7UqiTnVBHMKJ9joHoy7uDJrR
 C02i5ai6k4w=
 PvcWOf0Ib7U4ZJAoPGYRhEp0V1O+je9SSmARnfpRl/s=
@@ -11111,7 +11112,7 @@ EyVtvYLLJ02aqbaiczO/F0kJoYyjhE5N28520TzlaKk=
 EyVtvYLLJ02aqbaiczO/F4Gp3u4FzA7VSmARnfpRl/s=
 k78Z3wzz3ZoQM2dnqFlPog==
 a7W1BLQQYlcX6fQT2YnqfQ==
-Z+ysuBtQ1StKYBGd+lGX+w==
+qpLLQBJEijqia0min0kp8g==
 1+e6Bicr5DBpb/c9CGc9Hg==
 NrKoxwJxgv8WdDjXo9CY3A==
 NrKoxwJxgv9XzApHvzDfhf1rt8UoytdE
@@ -11131,8 +11132,6 @@ SawDDCX5L5jdTmlOCW5Gyw==
 TdsPMKL8V6kX6fQT2YnqfQ==
 ncbpEBm03yAQM2dnqFlPog==
 ZKA/6UDNB84=
-RyJ51E2E9GpKYBGd+lGX+w==
-YUUEJiHMYPRKYBGd+lGX+w==
 K9XIds1HWL0sqzd+cZvBXg==
 aTQISAJyX9vVFz4nMqc4ZU5NCDzaSygN
 HC6LRxOLBr0sK/7yhgICY05NCDzaSygN
@@ -11701,6 +11700,7 @@ BWgVkFaqPPs0cTTYYTCEKQ==
 DL/j+Tnkq6NKYBGd+lGX+w==
 AIvTjMd3sKv8C+9+zz2f9w==
 LSPkZOEPD3jTd16vLxLlMQ==
+ZZW35Jl3SZl63qE4SatoZA==
 fsMqsWBrhmGHSoHBmV4iZA==
 Zo2NAYGvF8iebSVc2Xwv+w==
 mpaFzePG/5ouGiQDDW+woA==
@@ -12357,7 +12357,7 @@ dNv6qr7SXTui5OjhmEZ6BaZZOOz+9CeESmARnfpRl/s=
 STc8NC97THSW64PK7OzRclYApecTfaA2vujCWb3vNig=
 aLFigvlmokv4P3PtJT/4oSKMEqgfyDxXp4H7BA++0r0QM2dnqFlPog==
 BhsjQLQoO72gEefC98cHfg==
-e7fHFUBNrI5KYBGd+lGX+w==
+EEN1nHQrjxSia0min0kp8g==
 oVJkClU0BvirB8cdRCRsJQ==
 oVJkClU0BviNU+AGHPtPt8zoFrwV2QdimZRe1dTw7jMQM2dnqFlPog==
 oVJkClU0Bviq3zbusJnpvQEbAw1oCNhqSmARnfpRl/s=
@@ -12735,6 +12735,7 @@ vOhzvdhSIygpq5ZXcfxL4q7sh8wB7jb0UGti1nv2jtD9EzGcSjHIoYZyWOqulxmg
 xm0YhMMvUrSLes/lB+2ecp1+GU06p7/j
 xm0YhMMvUrRKc6rkwgZMj3Lm8EIUKRZm
 xm0YhMMvUrSmNzM3zXLoyAkP+NGYGXKBwGKphuLh3q8=
+xm0YhMMvUrSuiiDFYdR0WXLm8EIUKRZm
 xm0YhMMvUrTv6eQ9seoGjEUzv8FEiuOYwGKphuLh3q8=
 xm0YhMMvUrTv6eQ9seoGjEUzv8FEiuOYOknbIYzP/rjAYqmG4uHerw==
 KA3vo7pJXIdLNMBz3Xx/8Q==
@@ -12900,6 +12901,7 @@ l0FzfX6DT5k98wBfoVyh8S7pg2orELiIcd3yzO2f4692ParSe7MGMUpgEZ36UZf7
 3nu5sG93VQOwr6HZTL6b+1KSWhccc/Zvy7S5rCgbe+o=
 3nu5sG93VQOwr6HZTL6b+8tOysr2ug/xWp4o6Bg9nsUQM2dnqFlPog==
 3nu5sG93VQOf/f1dgAIVgXLm8EIUKRZm
+3nu5sG93VQO4Lq8l4HGsqMCw/LDJGBUlWHMDjlQBJag=
 3nu5sG93VQOu+WjmnkVfz6Fc+7PkMCoimfIWc3kpHfVrpcSX5i+cpw==
 3nu5sG93VQM/wBG7jQncUHLm8EIUKRZm
 3nu5sG93VQNWcuiK3UNznJN4GHpQAT1oG2CpDFniRyJa16C/EBF3HjHYQZx01Mctm+zgM9xNjMU=
@@ -14467,7 +14469,6 @@ DefAwZ4n13SXuNGxONPgs3Lm8EIUKRZm
 DefAwZ4n13RTbT/8RG7ptJ/BpKWov8L8cubwQhQpFmY=
 DefAwZ4n13RTbT/8RG7ptCHoUhwn+q4AcubwQhQpFmY=
 cofx9bzGF2U=
-yPETRZHkVwNKYBGd+lGX+w==
 pLznVzHhzIQToqtXFzcysA==
 RMvBGSygzAjpPzQ7ZvTEjA==
 RMvBGSygzAjNPS3iTjaK8XLm8EIUKRZm
@@ -14709,8 +14710,8 @@ xGS9wP8CKk3OY4cRxnhx73XSHzIeHfBtKg1dGYtm3xs=
 xGS9wP8CKk3OY4cRxnhx7/hMXPM39kgjZ+Vo+M4S6bc=
 zZwsIXJHVQReJcDAcAR/kiSsMvDnQ18OUszWlbhdMM5mGYM5U40MHw==
 YCXqOLh/MzUARlZLRV5OJaml1UESJglfESwbP6vCmCQ=
-YjJGJbpmXTpKYBGd+lGX+w==
-RACWYLAQOrJKYBGd+lGX+w==
+5FFt71oqon+ia0min0kp8g==
+WYlzBrNzPrOia0min0kp8g==
 z5d2zrqZFoG+HuTiJLjB/uI1vMasM53IlWmTDQJzOUIQM2dnqFlPog==
 SfDVzLMNdQr7Aur4rhXoBg==
 FTcV6Jv369s9WC2fOrpOtg==
@@ -15365,7 +15366,7 @@ P0+rKaCrMyBKYBGd+lGX+w==
 4WX/eFMf4vdKYBGd+lGX+w==
 ZMacpUvqRo4cdrB251kWVA==
 8D+66Vaar/EaOH9NcRRmow==
-7HDUCavDhdZKYBGd+lGX+w==
+Z32/XXQxQamia0min0kp8g==
 6xj4EIvwPU5XrrLCZAbA0GkaXFUkWOQH
 Yvu81CxXgVmIPRuTXeItYg==
 srKnbt8NRTF20O4BpJ5ZFg==
@@ -23037,6 +23038,7 @@ SA93hp4kW7BqT0qLyKNF3gm+ik3u28cBfcnA0FNZxlUQM2dnqFlPog==
 Wpt1csc/kGLQPmivElcbxdGAmH6Ls5rh
 Ui7dOySkHe92Cx/aeSRB80LIMNT0PSrHSmARnfpRl/s=
 /ntJmZAyiNOS3PxErN3g5b/k15zfmd5H
+pGY+NbyU+NmdLlE5Fk9noQ==
 w0WmywVpDHSNrqDg67izJmlv9z0IZz0e
 QQ6kZzWcKiS1QXPpBNd8kb3l/aZqooYt
 QQ6kZzWcKiS1QXPpBNd8kVhwc9eBN/mHMk9eUV3+aNs=
@@ -25286,7 +25288,7 @@ SKaLpgRak/9zUFAUG+b9Fo0u6jneTTO+
 CggpZP4i6gM7aXf36GTtvA==
 KSHApaED76Qsqzd+cZvBXg==
 yeeO+iS3tm5U/TFXt7v0EBE6VH0VzrXUPKqfh50y5/0JdzcrK7kAQg==
-Cu1i9BhkVrTRH/foyQXBDg==
+Cu1i9BhkVrTZdsrW9hMwsA==
 naGYg3EsvcUQM2dnqFlPog==
 KvIefntUCxU/STBEVGP9VpieOWDwV1mo
 KvIefntUCxWBDXTJgEXlefwj3uuHDUo/SmARnfpRl/s=
@@ -25305,7 +25307,7 @@ c7JvGVRxtQ4QhBqkNz+qAg==
 K2LLo4COUm+11xDERmrjMQ==
 7dWJoDG2iVMQM2dnqFlPog==
 oljEGEel4pfLbtrnPva6PxAzZ2eoWU+i
-ZoQsKUQaLm5KYBGd+lGX+w==
+UQvajZrxpkmia0min0kp8g==
 LLc7kujy/IjxFTYm96DwEVBx98x3dSKjliogyqLe5MqrB8cdRCRsJQ==
 NeOoAxjiNI6eVQOebOiQLXp8re9DpYyKSmARnfpRl/s=
 WhfKS1q74jWEluyWqfzfcR/gyjF7/BPVID/4Un545b9/Z0l4vhEgzHLm8EIUKRZm
@@ -25327,8 +25329,8 @@ Kx0qZRJbca+2eQd7E/7L83Lm8EIUKRZm
 C1DGev9SjeMcdrB251kWVA==
 p31+a5NG7DgQM2dnqFlPog==
 rXUzcf7ihatKYBGd+lGX+w==
-l/CslbOdg8TbDs7pB5LQQQ==
-9DjofUiQbHvh9oL6MDPr2A==
+l/CslbOdg8Rp/MatfzEhhw==
+9DjofUiQbHt4NsHPl6bQeg==
 OOUymHEFv51KYBGd+lGX+w==
 /C8Lr0zoZE4sqzd+cZvBXg==
 sCgHxCvaCms7aXf36GTtvA==
@@ -25342,7 +25344,7 @@ ABcuigAAL3osqzd+cZvBXg==
 X3ixzFx7yngWhWLcoWylFQ==
 2DepqsVduQtKYBGd+lGX+w==
 wnBrmsLm/mpGqTcZNjIJwaszD9OIZHG0
-3jnD7B7QJC1KYBGd+lGX+w==
+dENj0tf7WIqia0min0kp8g==
 f23pgwJG2QYKXP3UGzNj3DoHoy7uDJrR
 lRNC1vGqAbkqd2Goqc7wTzoHoy7uDJrR
 g8TDPKcyiFo8mfYH2Nh/ybfhY9DhSuhsL6dWAYmH3+V//jCyK3eKrQ==
@@ -25369,16 +25371,16 @@ TCMBORDYsg4rsn6clmF5Kg==
 7rbgI/6/HULkZt3omYVUSw==
 2C6NWkYCMWcsqzd+cZvBXg==
 3u2+n5V8RrCxk2CN0xr93g==
-VfoH7W5xhGVKYBGd+lGX+w==
-DyCnq7N7FKRKYBGd+lGX+w==
+M9hBu5u+xdiia0min0kp8g==
+wy6hr5qj6Y6ia0min0kp8g==
 w42kmaslleJfHwiwz57oOyjIGXGb0pxhwGKphuLh3q8=
 aXw1GW7pz8CognNgz49kBnLm8EIUKRZm
 5LcEM5njK2nmSsvRAWjhow==
 U3e63H117g4sqzd+cZvBXg==
-EMIutVmjdCpKYBGd+lGX+w==
-p1czHCDmIV1KYBGd+lGX+w==
-1Jp+vmnzo1NKYBGd+lGX+w==
-aRPuvpm4W8ZKYBGd+lGX+w==
+0evsgDvT2tqia0min0kp8g==
+g32zCbxpD/Sia0min0kp8g==
+dLYgssmYPteia0min0kp8g==
+DYnbm6yPyAuia0min0kp8g==
 XywdATLRbqLWPst56W2zySaoH8dT1RizSmARnfpRl/s=
 b7V5e/be8HgQM2dnqFlPog==
 XbvQXQRFF/GolFTTM3O6Wbl5dWa20q2FlnjCcVB4n4U=
@@ -25397,7 +25399,7 @@ ZIWA7GufHVYgNz6JlUZhkhAzZ2eoWU+i
 PfdaDBocFQixm2LbXcoIDXLm8EIUKRZm
 +ABo3FbTzJYQM2dnqFlPog==
 2yXLHU3QQIVKYBGd+lGX+w==
-cqDEz88YCSxKYBGd+lGX+w==
+QPjCS+GSiayia0min0kp8g==
 /pxd3Yx5vTGCM8IVzV6Bc+RW3m0CLirswGKphuLh3q8=
 /pxd3Yx5vTG/2nP6jR2w3ytKOLKlA1ksMvC5rExawa7DJrGIZ4OTkvHYk0wbVf7/
 Qfvo4xhb4z6eVQOebOiQLXp8re9DpYyKSmARnfpRl/s=
@@ -29471,7 +29473,7 @@ LAphuzvX0+0X8YapN452Ho0u6jneTTO+
 LAphuzvX0+3zwqVLoNh+dmFRVgZLBsUGPbYt1gxECy9s1scDZkzmIikSY4va0rv9SmARnfpRl/s=
 LAphuzvX0+3zwqVLoNh+dtNT95SkLHLIN/krNlxwCQs=
 LAphuzvX0+3zwqVLoNh+dtNT95SkLHLIpppiaGA6ggmgIQ8fFlKSlQ==
-LAphuzvX0+2kcrqjTBL39hP5J5z5twJM
+LAphuzvX0+2kcrqjTBL39nLm8EIUKRZm
 LAphuzvX0+01QUb6+CD5OJ/BpKWov8L8cubwQhQpFmY=
 LAphuzvX0+01QUb6+CD5OP274M6YLa3pXybsefm54Pdy5vBCFCkWZg==
 LAphuzvX0+01QUb6+CD5OF8m7Hn5ueD3cubwQhQpFmY=
@@ -32889,7 +32891,9 @@ rTVqZVyiuAQQM2dnqFlPog==
 3CPos7luhu7LbtrnPva6PxAzZ2eoWU+i
 RWZidHYgxmhKYBGd+lGX+w==
 BIigBbV4HWg=
+wmRvRyr6N5znSqCSJwV+1A==
 wmRvRyr6N5wB9qAjL/gYSwPZnLTJIPKK
+5jM/3b9i31Gkm28pQc1SWg==
 2HzoVL+pQpW3+Ne/e20ajg==
 bXsWdeWJzKLeJQK1AkjI0g==
 bXsWdeWJzKIS54+vO/aTq7UH/wUl4qQhsv2P+NuGnx5KYBGd+lGX+w==
@@ -33137,7 +33141,10 @@ FeneHV0jniWepEyWM2W9LOiSMOY/emDTwGKphuLh3q8=
 8zBWmzELTdpZ/Gf1ijcv/g==
 rz0nWFz/+dJpb/c9CGc9Hg==
 Q+cjzMmYU+5rKvAfWeSp+3usW9c2GnwrwGKphuLh3q8=
+bFjFv7/SHH8F2H5RglN53doLKd/tt4Tz
+xl0cXP48zHtuP63zm+kBpA==
 o2DNf6DIEKim+5yUHRENOw==
+LfX+of1ooG7dWagCg5zjQw==
 IdoVi5ODsNcsqzd+cZvBXg==
 Jl99d9fk2PXijE3Rt9R/1w==
 Jl99d9fk2PUetRuWQ4GjsDoHoy7uDJrR
@@ -33932,7 +33939,7 @@ j0xYRyAbdp7BjqlaOqk1O3Lm8EIUKRZm
 BWDQhwjZ4blKYBGd+lGX+w==
 sfwRYmDHiW0712JQoXGx/33e0abfH1gYcmkudNOoEtc=
 UM40XQVfmLkXboT3r2LHWsHPtfBlLggR
-civNsxQq391KYBGd+lGX+w==
+zM2vKIqlMoOia0min0kp8g==
 GrrmmLNKBogu8zzbg9omzXC7pAaDLGxS4F+k/KnXynlKYBGd+lGX+w==
 5YYpg49pmJdKYBGd+lGX+w==
 bO4KsD5esDEDnMIjB96FIQ==
@@ -35565,9 +35572,9 @@ dOfqECpm90QCiZaWvqrvxA==
 YPZlJ8WEap9KYBGd+lGX+w==
 +F/O6Qdj0/0QM2dnqFlPog==
 apmpsZ/b7LXFpI25Ovug3g==
-oz+AtqQBU6JKYBGd+lGX+w==
-OdjRkL2nWudgfWBkdUEehg==
-OdjRkL2nWuf3B/NgOaulbw==
+QIVNCpgUyhaia0min0kp8g==
+OdjRkL2nWudpMiD9jqsB5w==
+OdjRkL2nWucC8sgVkwZABw==
 LgLYSIr7v4NV2hXSPzoUsA==
 inCRAGoAxqXFZcTf1i9DHQ==
 inCRAGoAxqW7j9dcwukeEw==
@@ -36065,6 +36072,7 @@ BvUQsO1TZjQToqtXFzcysA==
 BvUQsO1TZjSe6UM0b8H+gHLm8EIUKRZm
 /gqwzzkYXZxpb/c9CGc9Hg==
 4uadpArGqCfkd5xNWvPtLA==
+4uadpArGqCfeoIvuUniQ8mlv9z0IZz0e
 FvlQDKy1PUA=
 aIUqclbk0VcmoSggkU8jx+OAzA7s56n83iUCtQJIyNI=
 glWnjAaoDOcQM2dnqFlPog==
@@ -36580,8 +36588,7 @@ MkfhBWQmHjXjZwmcBeDtzHLm8EIUKRZm
 yAuyeT3nXGeuZEovE8UTJXLm8EIUKRZm
 p13WWal0B8StJBkgZKQx7w==
 p13WWal0B8TcIAXYxxl/2joHoy7uDJrR
-cXFQRYCOBadKYBGd+lGX+w==
-fGbs93rwRcxPiFA97DSzTQ==
+Mhs+T6q8paVpb/c9CGc9Hg==
 VBRgeKuJ7PjLbtrnPva6PxAzZ2eoWU+i
 YaL4CY9/yRF4p8i85ltYYHLm8EIUKRZm
 d2hP8EZe5TYsqzd+cZvBXg==
@@ -36855,6 +36862,7 @@ Bp/ohwpUXwle16w2SHzIIQ==
 Bp/ohwpUXwm3nq2S+4vXtMyizcisX1Ri
 Bp/ohwpUXwkoFvtOzPImIkbTdiaUqZ5I
 HHbAtE/jDF3cjOCdaMLPog==
+irWj3KIqRc+uCUEEtjBkR25lP48L0fdm
 irWj3KIqRc+RRDXq3aNA4NcoR/eDzRLxSmARnfpRl/s=
 irWj3KIqRc+CYXeHDvyQgA==
 irWj3KIqRc+7RL4mJkqRKJqfsKs17czAJ/BhAHsD6vw=
@@ -36982,7 +36990,7 @@ TwAEuKuuKnYsqzd+cZvBXg==
 7Avjlt3+xsDYr+BG9GVYOw==
 2u/jQTV366fkAx5jkygXaw==
 Vhw178ue65Lr7JgQKsnKW35fD8r1fWpMSmARnfpRl/s=
-sOt8Hu+fh1e0ukbhmbNtBWlv9z0IZz0e
+sOt8Hu+fh1fV1SWbE894fw==
 QUqpPWFj+1MQM2dnqFlPog==
 nbsEibqKgSIQM2dnqFlPog==
 HgzusXpRvqQ7Zu5JSk43RQ==
@@ -41612,6 +41620,7 @@ g06wHkmXiWrAzJDjwRpL9Q==
 1XzDXoXAd+/CgeHeyn5s2nLm8EIUKRZm
 1XzDXoXAd++7Xn+3foyNznLm8EIUKRZm
 j4kALvDkWQfWli/rvnRLUg==
+TtyaRnokW861u1jAVBYViw==
 TtyaRnokW86W6FHd+/iTeQ==
 vBv0+XanS1wew9MR7vr9Uw==
 3Us4hTxwFHUaOH9NcRRmow==
@@ -46500,6 +46509,7 @@ ZH7wI+6Qkac8RUQBssvb4w==
 O5zhOyFPgFpKYBGd+lGX+w==
 nFr6GLdmGqQQM2dnqFlPog==
 HjbsMpneZqL7lTMAXPNP6w==
+c+wjQVVQgq4So3KR0aB14g==
 wJmfybgGo/VKYBGd+lGX+w==
 ntKwejfnuA3sSzyDpSDlkzf4pB1EBiRkLDJOV5gdVTc=
 ntKwejfnuA2A2cuUErSanCRuLhUHGmXzJk4Mr5/kH7iBaVzkuveTQH/+MLIrd4qt
@@ -47784,6 +47794,7 @@ z+rfsyfdeCiODtLdQmG7PCoNXRmLZt8b
 wu4NEuCivVRnj112Q3SecQ==
 wu4NEuCivVTF+pz6J/mJTQ==
 mHFe98NUmvdewW5mC/j/CgPZnLTJIPKK
++wqnb2WGYmY+qWb8rE2y5Wlv9z0IZz0e
 klSUum1bm6uZdaggjnN/Hg==
 L0I8ODrd0DP630U06cFg6jGcSveFLEG+UGCdjGW1vUmylNZ/XO3c4g==
 c471dKxW8Ar3k/Hp6csG1DoHoy7uDJrR
@@ -51423,11 +51434,13 @@ M+D5UX+4YYSSbgk4aWLLvw==
 cydb1bYl5SGtJBkgZKQx7w==
 cydb1bYl5SHcIAXYxxl/2joHoy7uDJrR
 Lgo+ZxJicvw=
+fynuWiHDwwe9KvCUUwojqA==
 /T5dIKAtQzrFpI25Ovug3g==
 VvbceNnyFVZpb/c9CGc9Hg==
 bbTLrfY0C/VKYBGd+lGX+w==
 I9GtloEBcjq9KvCUUwojqA==
 QM3AYSUh0u9syxEf39iAPw==
+OcwzoWVT6xC9KvCUUwojqA==
 Fsy/czP00tvN3HeskrJ5Yw==
 Ysd94Yy3DrkiMGE1Cy3gtw==
 d87FdJ/Jf9YKmof5Ey59pw==
@@ -51967,6 +51980,7 @@ d/ZleGAg5P8yNNp06LfsToxTXACE8LdHSmARnfpRl/s=
 6oWfREbXhfIVV+cw45Wt+s0zkcWZgizB
 1zfwSAiGDC74bwE0hIREnYKoWEtpIM8v
 ricLfGNY6sakrADDVvnyM3Lm8EIUKRZm
+iGEVqbed1RePZQHJos4lZg==
 z+V2rYSIrHkQM2dnqFlPog==
 Kne5s2y3NYPLbtrnPva6PxAzZ2eoWU+i
 CDN90Vuo5BvqOXMnLfjosw==
@@ -55407,6 +55421,7 @@ WNjeFQwjzuzEbLfUC84OzlmFRiH3DkGLOdMWg3lvN5g=
 WNjeFQwjzuzIpckGJQIjMcFSg9C4JVRE06LTK0O421Msqzd+cZvBXg==
 WNjeFQwjzuwehZV332a7SJj9lZ2f8vtz/0m5yTkB2mk=
 WNjeFQwjzuzmb8O/SBOki2Yv/AGrgmcIro8I+L5UXjejp8scWHeh3w==
+WNjeFQwjzuzmb8O/SBOki2Yv/AGrgmcIqf+GCGq/L1Msqzd+cZvBXg==
 WNjeFQwjzuwSrAnZvH1yR8ITvKPerSSs/0m5yTkB2mk=
 WNjeFQwjzuwSrAnZvH1yR8ITvKPerSSs+64wun+ooeQDw0XhjkbqWQ==
 WNjeFQwjzuxWQjRh3naeuXIfXtHDSMQkSmARnfpRl/s=

+ 9 - 5
nlp/src/main/resources/synonym.dict

@@ -629,7 +629,6 @@ ZS17Dgm0g5sr9CAO6fuC/8hcc+w9cvGzRNyq17WeIxYgKxfKdv/2STQt0BAiEkw1
 ZS17Dgm0g5sr9CAO6fuC/35sZlzOzlcLRNyq17WeIxYgKxfKdv/2STQt0BAiEkw1
 ZS17Dgm0g5uWR6gm1zbExWUtew4JtIObSAVRiUOcdmuyzJYNI+WUYfXLKGmIXnCQVu1VNingzjo=
 ZS17Dgm0g5tIBVGJQ5x2aycHVxupfSwiA3e/hrLacJyeWEPZoG2eWrhsKNhJ+U8g
-ZS17Dgm0g5vdPmsNsdD0Um4OXV6Tw9PGaXOWE5tzEK+IJZr3POR7Sf5pqy+r9yUWOxgqIHpj3G0=
 ZS17Dgm0g5vdPmsNsdD0Umh6VPbMFwmvaXOWE5tzEK+IJZr3POR7SRHISdV4Hc+fydPG0/OzNPR3RjFcViAD/g==
 ZS17Dgm0g5ua8k4aUM5PKq73S6nwnkppA3e/hrLacJwXL5Tq4h/ozb5l3nVmjNgjQwipD9IysUYUEksYV8b9Bw==
 +hIX2IIIwtSEneK88ldV/r7DLJTv6yKM
@@ -1180,7 +1179,6 @@ qN3qWT2bGk0lhEi58dRQU0MLaMkQGL5p
 0NdbQ1JU67PxmnQ46QAyrL5mrHhC2c0D
 MIqhjxJmusPrfgC7wGdnmKCUMFUcmUqK
 rn8GV7gaU3UD7zrtOhTM7UpgEZ36UZf7
-6PVclh97Li9kZ3B7iyrR4IK6LuRYaN4w
 shc7AW2Xd8aiCoIwYr9UnIK6LuRYaN4w
 ptgLW3r/PWnU3bM62p5k2+VSu8PfUGPHZbG8KUhskwQ=
 SZw+xPbMTLaoJwnEzmAuk0pgEZ36UZf7
@@ -1612,6 +1610,7 @@ dlvIGPFnmQpX8enb6BlG6BnbBLCi9C1xdig3icfaFQlw2EZUYZ9TOUs17/uBe6qF
 L8PH6RYvQ4lWgHE3Y5h4cldsnAssKndjhawXDHx/XgCbj/LPAa7CXNgUKjvPI0f/
 kb80gvcxzar3vBhNkU59YyzV5/OP8SC7ryKysOD+KUY=
 AIvTjMd3sKt841qlXVGmBBlLFlAQ+u3i
+ZZW35Jl3SZkAqosWyE+MhGf4A5nRUkII
 cZarH8MV8sEWjRnZhCj020pgEZ36UZf7
 H0UnDFxpQ1VWgHE3Y5h4cldsnAssKndjiOTt1VlHverHofGbR/+lo9gUKjvPI0f/
 MfArfnKi0sbS/VhD/5yr6fCKLkTxTRsjQsjDFyh0etI=
@@ -3540,6 +3539,7 @@ bK8Zpab/PptNYls6bckCKxinOyXiON3Htew4/25sQfh7zD+13Aep3lpFaNgH43hC
 bK8Zpab/PptNYls6bckCKxinOyXiON3HgDYFNCmIYdO17Dj/bmxB+HvMP7XcB6neWkVo2AfjeEI=
 bK8Zpab/PptNYls6bckCKxinOyXiON3HgDYFNCmIYdOANgU0KYhh07XsOP9ubEH4e8w/tdwHqd5aRWjYB+N4Qg==
 gJj1miT0Dpc1Teioz1asRWmZ459ubdRU3yRSpwZ0fTK1nwJu9BVlioqjLl2lbokkftghrsieFJwwNgYSse52/Q==
+pGY+NbyU+NkrrFGAy9hf932uwRjCoH5r51xSaTWBb1w=
 w0WmywVpDHSNrqDg67izJrYnEsXqsKL9
 QQ6kZzWcKiS1QXPpBNd8kVhwc9eBN/mHj7bszohDE5Tg6njkDcx/a3rgmStR4H73
 QQ6kZzWcKiS1QXPpBNd8kVNMtFmODhCbtdI2dzduC63g6njkDcx/a3rgmStR4H73
@@ -4517,7 +4517,7 @@ vex7K0bOHIGOR+IyZWBLhGkKT3CtulN5
 KI6VTaqubnu4RS5mEn/iyx4LqGcyn5L82/ac/I99K9htexZ15YnMopUfeC/WJulCXZ/HAkHq0qN1+RhYXqM100OrEUSwHU6W/106SjBTEu2XcnkrfHl5wRO2icIlBY3e
 KI6VTaqubnu4RS5mEn/iyybITkU+QsAQ8ovKqaT50Bhjbi6oMwq51ieSpmlySTag31VqLkIaWnU=
 YpvY+G8a1eSDWVXzQCYSoOnsg1FMZCBUg1lV80AmEqD8C0Ms3Yu4qA==
-wmRvRyr6N5yAmEiSg+1tM8I503QdVi5x
+5jM/3b9i31E9a96tOwWxlkZqz4/zUosBFWN3xRThYNo=
 zF+QtFd3/zOZlLyuqFrbpnNh2kJa4EYYd/dwi31nZXg=
 wY62Cw7bf9UPQGI0Um6WxIZc9HnDKZwuWWEtRbtOPFA=
 bXsWdeWJzKIhj0+n6MoMdJdyeSt8eXnBE7aJwiUFjd4=
@@ -4597,7 +4597,9 @@ FeneHV0jniV+QJMdObX42idOUF2fo3Vfdaak9DBsX0/1IQBYBPMjZZ2chw2JdeqnK1E/wxTJGka2KqzI
 FeneHV0jniViKLplWTxde0aGKBDe6KHgFwOrsCFAWPttd7Uiq/ivKg==
 FeneHV0jniXH6G7p2o7OmE9fABqP9Xek+hh3HD/b6EpEi+MQl+GiV54vUh3E8iWx
 Q+cjzMmYU+6yrAI8b6C0g0pgEZ36UZf7
+bFjFv7/SHH8F2H5RglN53b5U44NPPMQwfkRC94tU/To=
 OYu/p+Z1BL8nOyzdN3BUYEpgEZ36UZf7
+LfX+of1ooG7dIWWmQpCPJGrAhdGym+dGfkRC94tU/To=
 BrdHo2/KIq/ZSNPYgFwKJwa3R6NvyiKvDJMY4fCtDFY=
 BrdHo2/KIq/8ABeAT8ORjLR/UMlmTa53QFfN5c9EqNhvKO84YTnyQ4EruWqVpiHh
 HWLb/nfuHAkpw9nl6LwU96G5PfH0Oralbz39Xy2erFk=
@@ -5225,6 +5227,7 @@ ySmvEQkp+QnDVB+FOWxfQqIfbBaKuOTwesVRDwyG1+rvq8gD9jrJi33hFtJfBaSgaOaImY1ru44=
 ySmvEQkp+QnDVB+FOWxfQm4kpY+Uw9aXX4TGvKe7ChFT0jPxNVIYaq/YuAHzCZ7mgAre32dOKcEAN2vwm/8xqVN/DqrmR/+o3C2dAq296HPP8GBhzPctH2rERi9wNKpAYVVeUs9o0SM=
 ySmvEQkp+QlYsYlOjJf65juETX9TuLx5dEofaAkvq0zWeNdZ3kO++W2Nuxn2SU1l9GBUoa5JGn3O7tfDrONcIAESvSwUZDzP
 TTZnVqSyUmHOmew3/oTijw==
+4uadpArGqCfeoIvuUniQ8uW7BrAdxwgo
 2s+Nci9wvwdZqPGtQn/hwCr2WNh4R9dDKQFzrAJDaZc=
 x1YYrWRa5HvjsXZ3A7sitUpgEZ36UZf7
 zMEGGathOY5DKMBhndIq8GK633mBPuhHfXpg2jZoGZlnlaMm6UyUQQ==
@@ -5294,7 +5297,6 @@ NxH/k5Kw1zRFMs6dfy4e+g==
 hU2JsvCVd8h/G+pSk/DU1J/BpKWov8L8hU2JsvCVd8jFsQ/2b03/Ag==
 QRRawhtQjw9SNf5GnzWF/A==
 kUUgvamYK4HrilGZfSe60Q==
-fGbs93rwRczzEpRMHp+90w==
 VBRgeKuJ7PjWeNdZ3kO++ZI/hKRWCsMo
 /xGMK7LNshCdpnC0XD4MGw==
 IsVzTv93VpAJXKD0DIc8rg==
@@ -5357,7 +5359,7 @@ jDc+I5DLhjfCoj1DBrmTZb07UWCqWi6/Le6jWvY+EOw6FxNEsc+CmUKPm+UN1WcG
 jDc+I5DLhjcgtkRUkcx1/C3uo1r2PhDsOhcTRLHPgplCj5vlDdVnBg==
 jDc+I5DLhjcgtkRUkcx1/NbvkPoUxHEmOhcTRLHPgplCj5vlDdVnBg==
 jDc+I5DLhjev5CnjxWLdWUzkJFOBOazAFSRIAu1hEegKJZuCHMXeWEpgEZ36UZf7
-sOt8Hu+fh1fZRWVDAXGH/F9FQ+5x9IFZlkKQ4jx5xBc=
+sOt8Hu+fh1do/VvBBU1qybDrfB7vn4dXkSlT2wHlmBI=
 3V3869ulMfYcAs4LRBtWOfOgtC3urvRzHOlg6a72ToWwQJXgvPwD7slFVJy9t4QF
 p9xqtUeJYuVeJKC1Gwo1gZ/BpKWov8L8p9xqtUeJYuXCArlCzKmjAw==
 am6mi0e3+dmx5mhrEdvgkRApXgoL452bSJAqyeGLlGQ=
@@ -6205,6 +6207,7 @@ RmdvwfLNayZfroQ+x6I5zJ/Js1I36o/IR4E0DI/N9m/KSqm/hVqrhg==
 1XzDXoXAd++hFsW90x0cOF8m7Hn5ueD31XzDXoXAd+9pbo+u8k2ZvA==
 1XzDXoXAd++hFsW90x0cOCHoUhwn+q4A1XzDXoXAd+9pbo+u8k2ZvA==
 1XzDXoXAd+8Nk3vYyUw3rFKaXqLtcC4+JdoLa5V/XCa5OeBLEXi0fJcDBSLClcIN
+TtyaRnokW84a0HQISHomR6XKNvciEBnB
 TtyaRnokW87Z5+YxrRuIUOHr9YA0+LL7FWN3xRThYNo=
 vBv0+XanS1xy02rvkOmlZtg0B8HVH9dkB451+nWReBM=
 3Us4hTxwFHUTedVcW+1ACu0liRe9U4t8
@@ -7827,6 +7830,7 @@ IktfsfyMWm7clP9Ii58LNHI1r7s51gHJBCwP8JTwb7rM1vPXRZ/8mVOlBV102mLp9iDZhXhDlM4=
 IktfsfyMWm57ko0EuedelZ/BpKWov8L8IktfsfyMWm4EHpBBsvMLUA==
 +QcUSjkdonhy40or12/sGgQRLw+xuWJZah2H2V59QN4=
 tFAlzsMnb+LIAHh1qZ4IWtfY3AGky2hs
+iGEVqbed1ReJ+SxO/CjoZ8ydZx9MvdRI
 UmfuMXcjH+3SUR91SkRm53XSEWqDKmJuNC0MX2gH6Hs=
 mdJR4ErXZk7fvd05/i8Dew==
 Kne5s2y3NYP2aXMD1JQvMqCdw0eXUe4I

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 16232 - 3988
nlp/src/main/resources/tc.dict


+ 269 - 0
nlp/src/test/java/org/diagbot/nlp/test/EntityExtractTest.java

@@ -0,0 +1,269 @@
+package org.diagbot.nlp.test;
+
+import org.diagbot.nlp.participle.ParticipleUtil;
+import org.diagbot.nlp.participle.cfg.Configuration;
+import org.diagbot.nlp.participle.cfg.DefaultConfig;
+import org.diagbot.nlp.participle.word.Lexeme;
+import org.diagbot.nlp.participle.word.LexemePath;
+import org.diagbot.nlp.participle.word.Segment;
+import org.diagbot.nlp.util.Constants;
+import org.diagbot.nlp.util.NegativeEnum;
+import org.diagbot.pub.jdbc.MysqlJdbc;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class EntityExtractTest {
+    public static void main(String[] args) {
+
+        Configuration configuration = new DefaultConfig();
+        Segment segment = configuration.loadMainDict("tc.dict");
+
+        EntityExtractTest entityExtractTest = new EntityExtractTest();
+        List<Map<String, String>> data = entityExtractTest.searchData();
+
+        Map<String, List<String>> pairList =  entityExtractTest.searchPropertyPair();
+
+        Map<String, String> lexiconTypeMap = entityExtractTest.searchLexiconType();
+
+        String present = "";
+        String[] partPresents;
+        String sentenceId = "";
+        LexemePath<Lexeme> lexemes = null;
+        List<Map<String, Object>> results_pair = new ArrayList<>();
+        List<Map<String, Object>> results_none_pair = new ArrayList<>();
+        try {
+            for (int i = 0; i < data.size(); i++) {
+                present = data.get(i).get("xbs");
+                present = present.replaceAll("\r\n", "");
+                sentenceId = data.get(i).get("zyxh");
+                partPresents = present.split(";|;|。");
+                for (int k = 0; k < partPresents.length; k++) {
+                    if (partPresents[k].length() == 0) {
+                        continue;
+                    }
+                    lexemes = ParticipleUtil.participle(partPresents[k], segment);
+
+                    lexemes = entityExtractTest.combineValidate(lexemes);
+                    for (int l_1 = 0; l_1 < lexemes.size(); l_1++) {
+                        Lexeme lexeme_1 = lexemes.get(l_1);
+                        if ("99".equals(lexeme_1.getProperty())) {
+                            continue;
+                        } else {
+                            for (int l_2 = l_1 + 1; l_2 < lexemes.size(); l_2++) {
+                                Lexeme lexeme_2 = lexemes.get(l_2);
+                                if (!"99".equals(lexeme_2.getProperty())) {
+                                    boolean isPair = entityExtractTest.isPair(pairList, lexeme_1.getProperty(), lexeme_2.getProperty());
+                                    Map<String, Object> result = new HashMap<>();
+                                    result.put("sentence_id", sentenceId);
+                                    result.put("sentence_uuid", sentenceId + "_" + k);
+                                    result.put("sentence", partPresents[k]);
+                                    result.put("entity_1_name", lexeme_1.getText());
+                                    result.put("entity_1_position", lexeme_1.getOffset() + "," + (lexeme_1.getOffset() + lexeme_1.getLength() - 1));
+                                    result.put("entity_1_prop", lexeme_1.getProperty());
+                                    result.put("entity_1_prop_name", entityExtractTest.propId2Name(lexiconTypeMap, lexeme_1.getProperty()));
+                                    result.put("entity_2_name", lexeme_2.getText());
+                                    result.put("entity_2_position", lexeme_2.getOffset() + "," + (lexeme_2.getOffset() + lexeme_2.getLength() - 1));
+                                    result.put("entity_2_prop", lexeme_2.getProperty());
+                                    result.put("entity_2_prop_name", entityExtractTest.propId2Name(lexiconTypeMap, lexeme_2.getProperty()));
+                                    result.put("relation", "0");
+                                    if (isPair) {
+                                        results_pair.add(result);
+                                    } else {
+                                        results_none_pair.add(result);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (results_pair.size() > 0) {
+                MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/nlp-web?useUnicode=true&characterEncoding=UTF-8");
+                nlpJdbc.insert(results_pair, "re_tagging_result_part", new String[]{"sentence_id", "sentence_uuid", "sentence", "entity_1_position", "entity_2_position",
+                        "entity_1_name", "entity_2_name", "entity_1_prop", "entity_2_prop", "entity_1_prop_name", "entity_2_prop_name", "relation"});
+            }
+
+            if (results_none_pair.size() > 0) {
+                MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/nlp-web?useUnicode=true&characterEncoding=UTF-8");
+                nlpJdbc.insert(results_none_pair, "re_tagging_result_none_part", new String[]{"sentence_id", "sentence_uuid", "sentence", "entity_1_position", "entity_2_position",
+                        "entity_1_name", "entity_2_name", "entity_1_prop", "entity_2_prop", "entity_1_prop_name", "entity_2_prop_name", "relation"});
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public List<Map<String, String>> searchData() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/nlp-web?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("tb_ryjl_extract", new String[]{"zyxh", "xbs"}, " limit 0, 3");
+        return data;
+    }
+
+    public Map<String, String> searchLexiconType() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/nlp-web?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("re_lexicon_type", new String[]{"id", "name"}, " limit 0, 1000");
+
+        Map<String, String> result = new HashMap<>();
+        for (Map<String, String> map : data) {
+            result.put(map.get("id"), map.get("name"));
+        }
+        return result;
+    }
+
+    public Map<String, List<String>> searchPropertyPair() {
+        MysqlJdbc nlpJdbc = new MysqlJdbc("root", "diagbot@20180822", "jdbc:mysql://192.168.2.235:3306/nlp-web?useUnicode=true&characterEncoding=UTF-8");
+        List<Map<String, String>> data = nlpJdbc.query("re_lexicon_property_pair", new String[]{"prop1_id", "prop2_id"}, " where has_relation = 1");
+
+        Map<String, List<String>> result = new HashMap<>();
+        List<String> list = null;
+        for (Map<String, String> map : data) {
+            String prop1_id = map.get("prop1_id");
+            String prop2_id = map.get("prop2_id");
+
+            list = result.get(prop1_id);
+            if (list == null) {
+                list = new ArrayList<>();
+                list.add(prop2_id);
+                result.put(prop1_id, list);
+            } else {
+                if (!list.contains(prop2_id)) {
+                    list.add(prop2_id);
+                    result.put(prop1_id, list);
+                }
+            }
+
+            list = result.get(prop2_id);
+            if (list == null) {
+                list = new ArrayList<>();
+                list.add(prop1_id);
+                result.put(prop2_id, list);
+            } else {
+                if (!list.contains(prop1_id)) {
+                    list.add(prop1_id);
+                    result.put(prop2_id, list);
+                }
+            }
+        }
+        return result;
+    }
+
+    private boolean isPair(Map<String, List<String>> result, String prop1_id, String prop2_id) {
+        String[] prop1_ids = prop1_id.split(",");
+        String[] prop2_ids = prop2_id.split(",");
+        List<String> list = null;
+        for (int i = 0; i < prop1_ids.length; i++) {
+            list = result.get(prop1_ids[i]);
+            if (list == null) continue;
+            for (int j = 0; j < prop2_ids.length; j++) {
+                if (list.contains(prop2_ids[j])) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    public String propId2Name(Map<String, String> map, String prop_id) {
+        String[] prop_ids = prop_id.split(",");
+
+        String prop_name = "";
+        for (int i = 0; i < prop_ids.length; i++) {
+            if (i == 0) {
+                prop_name = map.get(prop_ids[i]);
+            } else {
+                prop_name = prop_name + "," + map.get(prop_ids[i]);
+            }
+        }
+        return prop_name;
+    }
+
+    public String lexeme2Text(List<Lexeme> lexemes) {
+        String text = "";
+        for (Lexeme l : lexemes) {
+            text = text + l.getText();
+        }
+        return text;
+    }
+
+    private LexemePath<Lexeme> combineValidate(LexemePath<Lexeme> lexemes) {
+        Lexeme l = null;
+        LexemePath<Lexeme> results = new LexemePath<>();
+        for (int i = 0, len = lexemes.size(); i < len; i++) {
+            l = lexemes.get(i);
+            if (l.getProperty() != null
+                    && (l.getProperty().equals(Constants.word_property_time) || l.getProperty().equals(Constants.word_property_unit))) {
+                findLast(lexemes, i, l, results);
+            } else {
+                results.add(l);
+            }
+        }
+        return results;
+    }
+
+    private void findLast(LexemePath<Lexeme> lexemes, int index, Lexeme lexeme, LexemePath<Lexeme> results) {
+        Lexeme last_l = null;
+        if (index > 0) {
+            index--;
+            last_l = lexemes.get(index);
+            if ("×".equals(last_l.getText()) && index > 0) {
+                lexeme.setOffset(last_l.getOffset());
+                lexeme.setLength(last_l.getLength() + lexeme.getLength());
+                lexeme.setText(last_l.getText() + lexeme.getText());
+                results.remove(results.size() - 1);
+                index--;
+                last_l = lexemes.get(index);
+            }
+            if (isNumberString(last_l)) {
+                lexeme.setOffset(last_l.getOffset());
+                lexeme.setLength(last_l.getLength() + lexeme.getLength());
+                lexeme.setText(last_l.getText() + lexeme.getText());
+                results.remove(results.size() - 1);
+            }
+            results.add(lexeme);
+        }
+    }
+
+    public static boolean isNumberString(Lexeme l) {
+        if (l == null) return false;
+        if (isFeature(l.getProperty(), new NegativeEnum[]{NegativeEnum.EVENT_TIME_DESC})) {
+            return true;
+        }
+        for (char c : l.getText().toCharArray()) {
+            if (c >= '0' && c <= '9') {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public static boolean isFeature(String property, NegativeEnum[] features) {
+        if (property == null) {
+            return false;
+        }
+        if (features == null || features.length == 0) {
+            return true;
+        }
+        if (property.indexOf(",") > 0) {
+            String[] properties = property.split(",");
+            for (int i = 0; i < properties.length; i++) {
+                for (NegativeEnum nenum : features) {
+                    if (NegativeEnum.parseOfValue(properties[i]) == nenum) {
+                        return true;
+                    }
+                }
+            }
+        } else {
+            for (NegativeEnum nenum : features) {
+                if (NegativeEnum.parseOfValue(property) == nenum) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+}

+ 1 - 1
nlp/src/test/java/org/diagbot/nlp/test/LexemeDicTest.java

@@ -31,7 +31,7 @@ public class LexemeDicTest {
             String path = this.getClass().getClassLoader().getResource("").getPath();
             path = path.substring(0, path.indexOf("target"));
             //所有词典库
-            String sql = "select distinct name, category_id, status from kl_standard_info where category_id != 100 and category_id is not null && category is not null order by name";
+            String sql = "select distinct name, category_id, status from kl_standard_info_0318 where category_id != 100 and category_id is not null && category is not null order by name";
             st = conn.createStatement();
             rs = st.executeQuery(sql);
             List<Map.Entry<String, String>> libraryList = rsToMap(rs, true);

+ 2 - 2
public/src/main/java/org/diagbot/pub/jdbc/MysqlJdbc.java

@@ -151,13 +151,13 @@ public class MysqlJdbc {
                 sb.append(")");
                 first_index++;
 
-                if ((cursor + 1) % 10000 == 0) {
+                if ((cursor + 1) % 1000 == 0) {
                     st.executeUpdate(sql + sb.toString());
                     sb = new StringBuffer();
                     first_index = 0;
                 }
             }
-            if (data.size() % 10000 != 0) {
+            if (data.size() % 1000 != 0) {
                 st.executeUpdate(sql + sb.toString());
             }
             conn.commit();

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 95 - 59
push-web/src/main/resources/static/pages/relation/sample.html