|
@@ -1,16 +1,15 @@
|
|
|
package org.diagbot.nlp.relation;
|
|
|
|
|
|
-import com.alibaba.fastjson.JSON;
|
|
|
import org.algorithm.core.cnn.AlgorithmCNNExecutor;
|
|
|
import org.algorithm.core.cnn.entity.Lemma;
|
|
|
import org.algorithm.core.cnn.entity.Triad;
|
|
|
-import org.algorithm.core.cnn.model.RelationExtractionModel;
|
|
|
import org.algorithm.core.cnn.model.impl.RelationExtractionModelImpl;
|
|
|
import org.diagbot.nlp.feature.FeatureType;
|
|
|
import org.diagbot.nlp.participle.ParticipleUtil;
|
|
|
import org.diagbot.nlp.participle.word.Lexeme;
|
|
|
import org.diagbot.nlp.participle.word.LexemePath;
|
|
|
import org.diagbot.nlp.relation.extract.PresentExtract;
|
|
|
+import org.diagbot.nlp.relation.extract.VitalExtract;
|
|
|
import org.diagbot.nlp.relation.extract.output.OutputInfo;
|
|
|
import org.diagbot.nlp.util.Constants;
|
|
|
import org.diagbot.nlp.util.NegativeEnum;
|
|
@@ -28,7 +27,7 @@ import java.util.List;
|
|
|
**/
|
|
|
public class RelationAnalyze {
|
|
|
public List<OutputInfo> analyze(String content, FeatureType featureType) throws Exception {
|
|
|
- String[] part_contents = content.split("\\。|\\;|\\;");
|
|
|
+ String[] part_contents = content.split("\\。|\\;\\\r|\\\n|\\;");
|
|
|
|
|
|
List<OutputInfo> outputInfos = new ArrayList<>();
|
|
|
for (String part_content : part_contents) {
|
|
@@ -37,14 +36,17 @@ public class RelationAnalyze {
|
|
|
List<Lemma> lemmaParticiple = lexemeToTriadLemma(lexemes);
|
|
|
//调用CNN模型
|
|
|
AlgorithmCNNExecutor executor = new RelationExtractionModelImpl();
|
|
|
- List<Triad> triads = executor.execute(content, lemmaParticiple);
|
|
|
+ List<Triad> triads = executor.execute(part_content, lemmaParticiple);
|
|
|
//模型返回的三元组转树形结构
|
|
|
List<Lemma> lemmaTree = traidToTree(triads, featureType);
|
|
|
- OutputInfo outputInfo = null;
|
|
|
+ OutputInfo outputInfo = new OutputInfo();
|
|
|
switch (featureType) {
|
|
|
case SYMPTOM:
|
|
|
PresentExtract presentExtract = new PresentExtract();
|
|
|
- outputInfo = presentExtract.extract(lemmaTree, lemmaParticiple);
|
|
|
+ outputInfo = presentExtract.extract(outputInfo, lemmaTree, lemmaParticiple);
|
|
|
+ //如果还有体征信息 也需要抽取
|
|
|
+ VitalExtract vitalExtract = new VitalExtract();
|
|
|
+ outputInfo = vitalExtract.extract(outputInfo, lemmaTree, lemmaParticiple);
|
|
|
}
|
|
|
if (outputInfo != null) {
|
|
|
outputInfos.add(outputInfo);
|
|
@@ -64,7 +66,7 @@ public class RelationAnalyze {
|
|
|
if (!lexeme.getProperty().equals("99")) {
|
|
|
Lemma lemma = new Lemma();
|
|
|
lemma.setLen(lexeme.getLength());
|
|
|
- lemma.setPosition(lexeme.getOffset() + "," + (lexeme.getOffset() + lexeme.getLength()));
|
|
|
+ lemma.setPosition(lexeme.getOffset() + "," + (lexeme.getOffset() + lexeme.getLength() - 1));
|
|
|
lemma.setText(lexeme.getText());
|
|
|
lemma.setProperty(lexeme.getProperty());
|
|
|
lemmas.add(lemma);
|
|
@@ -92,18 +94,30 @@ public class RelationAnalyze {
|
|
|
private void select(List<Lemma> lemmaTree, List<Triad> triads, NegativeEnum[] type) {
|
|
|
List<Triad> has_add_triads = new ArrayList<>();
|
|
|
for (Triad triad : triads) {
|
|
|
+ Lemma origin_l = null;
|
|
|
+ Lemma relation_l = null;
|
|
|
if (NlpUtil.isFeature(triad.getL_1().getProperty(), type)) {
|
|
|
- Lemma lemma = triad.getL_1();
|
|
|
- lemma.add(this.findRelationTriad(triads, has_add_triads, triad.getL_2()));
|
|
|
- lemmaTree.add(lemma);
|
|
|
- //已添加到树中的三元组
|
|
|
- has_add_triads.add(triad);
|
|
|
+ origin_l = this.copyProperties(triad.getL_1());
|
|
|
+ relation_l = this.copyProperties(triad.getL_2());
|
|
|
}
|
|
|
if (NlpUtil.isFeature(triad.getL_2().getProperty(), type)) {
|
|
|
- Lemma lemma = triad.getL_2();
|
|
|
- lemma.add(this.findRelationTriad(triads, has_add_triads, triad.getL_1()));
|
|
|
- lemmaTree.add(lemma);
|
|
|
-
|
|
|
+ origin_l = this.copyProperties(triad.getL_2());
|
|
|
+ relation_l = this.copyProperties(triad.getL_1());
|
|
|
+ }
|
|
|
+ if (origin_l != null && relation_l != null) {
|
|
|
+ boolean isFindFromTree = false;
|
|
|
+ for (Lemma l : lemmaTree) {
|
|
|
+ if (l.getText().equals(origin_l.getText()) && l.getPosition().equals(origin_l.getPosition())) {
|
|
|
+ origin_l = l;
|
|
|
+ isFindFromTree = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ this.findRelationTriad(triads, triad, has_add_triads, relation_l, type);
|
|
|
+ origin_l.add(relation_l);
|
|
|
+ if (!isFindFromTree) { //树上已有节点 不需要添加
|
|
|
+ lemmaTree.add(origin_l);
|
|
|
+ }
|
|
|
//已添加到树中的三元组
|
|
|
has_add_triads.add(triad);
|
|
|
}
|
|
@@ -111,21 +125,38 @@ public class RelationAnalyze {
|
|
|
triads.removeAll(has_add_triads);
|
|
|
}
|
|
|
|
|
|
- private Lemma findRelationTriad(List<Triad> triads, List<Triad> has_add_triads, Lemma lemma) {
|
|
|
+ private boolean findRelationTriad(List<Triad> triads, Triad current_triad, List<Triad> has_add_triads, Lemma lemma, NegativeEnum[] type) {
|
|
|
+ boolean isFindRelation = false;
|
|
|
for (Triad triad : triads) {
|
|
|
+ if (current_triad == triad) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
if (triad.getL_1().getText().equals(lemma.getText())
|
|
|
- && triad.getL_1().getPosition().equals(lemma.getPosition())) {
|
|
|
- lemma.add(triad.getL_2());
|
|
|
+ && triad.getL_1().getPosition().equals(lemma.getPosition())
|
|
|
+ && !NlpUtil.isFeature(triad.getL_2().getProperty(), type)) {
|
|
|
+ lemma.add(this.copyProperties(triad.getL_2()));
|
|
|
+ isFindRelation = true;
|
|
|
//已添加到树中的三元组
|
|
|
has_add_triads.add(triad);
|
|
|
}
|
|
|
if (triad.getL_2().getText().equals(lemma.getText())
|
|
|
- && triad.getL_2().getPosition().equals(lemma.getPosition())) {
|
|
|
- lemma.add(triad.getL_1());
|
|
|
+ && triad.getL_2().getPosition().equals(lemma.getPosition())
|
|
|
+ && !NlpUtil.isFeature(triad.getL_1().getProperty(), type)) {
|
|
|
+ lemma.add(this.copyProperties(triad.getL_1()));
|
|
|
+ isFindRelation = true;
|
|
|
//已添加到树中的三元组
|
|
|
has_add_triads.add(triad);
|
|
|
}
|
|
|
}
|
|
|
- return lemma;
|
|
|
+ return isFindRelation;
|
|
|
+ }
|
|
|
+
|
|
|
+ private Lemma copyProperties(Lemma lemma) {
|
|
|
+ Lemma l = new Lemma();
|
|
|
+ l.setLen(lemma.getLen());
|
|
|
+ l.setPosition(lemma.getPosition());
|
|
|
+ l.setProperty(lemma.getProperty());
|
|
|
+ l.setText(lemma.getText());
|
|
|
+ return l;
|
|
|
}
|
|
|
}
|