|
@@ -0,0 +1,411 @@
|
|
|
+package org.diagbot.nlp.relation.analyze;
|
|
|
+
|
|
|
+import org.algorithm.core.cnn.AlgorithmCNNExecutor;
|
|
|
+import org.algorithm.core.cnn.entity.Lemma;
|
|
|
+import org.algorithm.core.cnn.entity.Triad;
|
|
|
+import org.algorithm.core.cnn.model.impl.RelationExtractionModelImpl;
|
|
|
+import org.diagbot.nlp.feature.FeatureType;
|
|
|
+import org.diagbot.nlp.participle.word.Lexeme;
|
|
|
+import org.diagbot.nlp.participle.word.LexemePath;
|
|
|
+import org.diagbot.nlp.relation.extract.*;
|
|
|
+import org.diagbot.nlp.relation.module.*;
|
|
|
+import org.diagbot.nlp.relation.module.cell.*;
|
|
|
+import org.diagbot.nlp.relation.util.LemmaUtil;
|
|
|
+import org.diagbot.nlp.relation.util.OutputInfo;
|
|
|
+import org.diagbot.nlp.util.Constants;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+import static org.diagbot.nlp.participle.ParticipleUtil.participle;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @ClassName org.diagbot.nlp.relation.StructureAnalyze
|
|
|
+ * @Description 结构分析入口
|
|
|
+ * @Author Mark Huang
|
|
|
+ * @Date 2019/4/30 9:07
|
|
|
+ * @Version 1.0
|
|
|
+ **/
|
|
|
+public class StructureAnalyze {
|
|
|
+
|
|
|
+ private List<OutputInfo> outputInfos = new ArrayList<>();
|
|
|
+// private static OutputInfo outputInfo = null;
|
|
|
+ private static boolean updated = false;
|
|
|
+ private static Object current = null;
|
|
|
+ private static String num = "";
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 分词,提取词性,封装
|
|
|
+ */
|
|
|
+ public List<OutputInfo> extract(String content) {
|
|
|
+
|
|
|
+ Lexeme lexeme = null;
|
|
|
+ String timestamp = "";
|
|
|
+ String[] tokens = content.split(",|。|;");
|
|
|
+
|
|
|
+ try {
|
|
|
+// outputInfos.add(initOutputInfo());
|
|
|
+ for (String token:tokens) {
|
|
|
+ updated = false;
|
|
|
+ outputInfos.add(initOutputInfo());
|
|
|
+ OutputInfo outputInfo = outputInfos.get(outputInfos.size()-1);
|
|
|
+ System.out.println(token);
|
|
|
+ LexemePath<Lexeme> lexemePath = participle(token, false);
|
|
|
+ for (int i = 0; i < lexemePath.size(); i++) {
|
|
|
+ lexeme = lexemePath.get(i);
|
|
|
+ updateLexeme(lexeme);
|
|
|
+ updateInfo(outputInfo, lexeme);
|
|
|
+ }
|
|
|
+// outputInfo.getSymptoms().get(outputInfo.getSymptoms().size()-1).setSymptomName(token);
|
|
|
+
|
|
|
+ Symptom symptom = outputInfos.get(outputInfos.size() - 1).getSymptoms().get(0);
|
|
|
+// if (symptom.getNegative() != null && symptom.getSymptomName() == "") {
|
|
|
+// outputInfos.get(outputInfos.size() - 1).setSymptoms(new ArrayList<>());
|
|
|
+// }
|
|
|
+ if (updated == false) {
|
|
|
+ outputInfos.remove(outputInfos.size() - 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ catch (IOException ioe) {
|
|
|
+ ioe.printStackTrace();
|
|
|
+ }
|
|
|
+ finally {
|
|
|
+ current = null;
|
|
|
+ return outputInfos;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public void updateLexeme(Lexeme lexeme) {
|
|
|
+ String prop = lexeme.getProperty();
|
|
|
+ String[] lexprop = lexeme.getProperty().split(",");
|
|
|
+
|
|
|
+ if (lexprop.length > 1) {
|
|
|
+ prop = lexprop[0];
|
|
|
+ for (int i=0;i<lexprop.length;i++) {
|
|
|
+ if (Integer.parseInt(lexprop[i]) < Integer.parseInt(prop)) {
|
|
|
+ prop = lexprop[i];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ lexeme.setProperty(prop);
|
|
|
+ }
|
|
|
+
|
|
|
+ public void updateInfo(OutputInfo outputInfo, Lexeme lexeme) {
|
|
|
+ String con = "伴";
|
|
|
+ Symptom symptom = outputInfo.getSymptoms().get(outputInfo.getSymptoms().size() - 1);;
|
|
|
+ Lis lis = null;
|
|
|
+ Pacs pacs = null;
|
|
|
+ Vital vital = null;
|
|
|
+ Treat treat = null;
|
|
|
+ PD pd1 = null;
|
|
|
+
|
|
|
+ try {
|
|
|
+
|
|
|
+ switch (lexeme.getProperty()) {
|
|
|
+ case Constants.word_property_cause:
|
|
|
+ Cause cause = new Cause();
|
|
|
+ cause.setCauseName(lexeme.getText());
|
|
|
+ if (current instanceof Symptom) {
|
|
|
+ symptom = (Symptom)current;
|
|
|
+ }
|
|
|
+ symptom.setCause(cause);
|
|
|
+ current = symptom;
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_symptom:
|
|
|
+ case Constants.word_property_vital_idx:
|
|
|
+ current = updateClinicalInfo(outputInfo, lexeme.getText(), lexeme.getProperty());
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_prop:
|
|
|
+ Property property = new Property();
|
|
|
+ property.setPropertyName(lexeme.getText());
|
|
|
+ if (symptom != null) {
|
|
|
+ symptom.setProperty(property);
|
|
|
+ }
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_degree:
|
|
|
+ Degree degree = new Degree();
|
|
|
+ degree.setDegreeName(lexeme.getText());
|
|
|
+ if (current instanceof Symptom) {
|
|
|
+ ((Symptom)current).setDegree(degree);
|
|
|
+ }
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_LIS:
|
|
|
+ case Constants.word_property_LIS_Detail:
|
|
|
+ lis = new Lis();
|
|
|
+ lis.setLisName(lexeme.getText());
|
|
|
+ outputInfo.getLises().add(lis);
|
|
|
+ current = lis;
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_LIS_Result:
|
|
|
+ if (current instanceof Lis) {
|
|
|
+ if (((Lis)current).getPd() == null) {
|
|
|
+ ((Lis)current).setPd(new PD());
|
|
|
+ pd1 = ((Lis)current).getPd();
|
|
|
+ }
|
|
|
+ pd1.setValue(lexeme.getText());
|
|
|
+ pd1.setUnit("");
|
|
|
+ updated = true;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Constants.word_property_PACS:
|
|
|
+ pacs = new Pacs();
|
|
|
+ pacs.setPacsName(lexeme.getText());
|
|
|
+ outputInfo.getPacses().add(pacs);
|
|
|
+ current = pacs;
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_PACS_Result:
|
|
|
+ if (current instanceof Pacs) {
|
|
|
+ if (((Pacs) current).getValue() == null) {
|
|
|
+ ((Pacs) current).setValue("");
|
|
|
+ }
|
|
|
+ ((Pacs) current).setValue(((Pacs) current).getValue() + lexeme.getText() + ", ");
|
|
|
+ updated = true;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Constants.word_property_bodypart:
|
|
|
+ case Constants.word_property_direction:
|
|
|
+ if (current instanceof Pacs) {
|
|
|
+ if (((Pacs)current).getBodypart()==null) {
|
|
|
+ ((Pacs)current).setBodypart("");
|
|
|
+ }
|
|
|
+ ((Pacs) current).setBodypart(((Pacs)current).getBodypart()+lexeme.getText() + ", ");
|
|
|
+ updated = true;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Constants.word_property_neg:
|
|
|
+ Negative negative = new Negative();
|
|
|
+ negative.setNegaName(lexeme.getText());
|
|
|
+ symptom.setNegative(negative);
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_timestamp:
|
|
|
+ PD pd = new PD();
|
|
|
+ pd.setValue(lexeme.getText());
|
|
|
+ pd.setUnit("");
|
|
|
+ List<PD> pds = new ArrayList<>();
|
|
|
+ pds.add(pd);
|
|
|
+ if (current instanceof Symptom) {
|
|
|
+ symptom = (Symptom)current;
|
|
|
+ }
|
|
|
+ if (symptom != null) {
|
|
|
+ symptom.setTimestamp(pds);
|
|
|
+ updated = true;
|
|
|
+ current = symptom;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Constants.word_property_freq:
|
|
|
+ case Constants.word_property_number:
|
|
|
+ num = lexeme.getText();
|
|
|
+
|
|
|
+ updated = true;
|
|
|
+
|
|
|
+ break;
|
|
|
+ case Constants.word_property_unit:
|
|
|
+ if (num.length() > 0) {
|
|
|
+ if (current instanceof Symptom) {
|
|
|
+ symptom = (Symptom) current;
|
|
|
+ if (symptom.getPds() == null) {
|
|
|
+ List<PD> pds1 = new ArrayList<>();
|
|
|
+ pds1.add(new PD());
|
|
|
+ symptom.setPds(pds1);
|
|
|
+ }
|
|
|
+ pd1 = symptom.getPds().get(0);
|
|
|
+ } else if (current instanceof Vital) {
|
|
|
+ vital = (Vital) current;
|
|
|
+ if (vital.getPd() == null) {
|
|
|
+ vital.setPd(new PD());
|
|
|
+ }
|
|
|
+ pd1 = vital.getPd();
|
|
|
+ } else if (current instanceof Treat) {
|
|
|
+ treat = (Treat)current;
|
|
|
+ if (treat.getPds() == null) {
|
|
|
+ List<PD> pds1 = new ArrayList<>();
|
|
|
+ pds1.add(new PD());
|
|
|
+ treat.setPds(pds1);
|
|
|
+ }
|
|
|
+ pd1 = treat.getPds().get(0);
|
|
|
+ } else if (current instanceof Lis) {
|
|
|
+ lis = (Lis)current;
|
|
|
+ if (lis.getPd() == null) {
|
|
|
+ lis.setPd(new PD());
|
|
|
+ }
|
|
|
+ pd1 = lis.getPd();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ pd1.setValue(num);
|
|
|
+ pd1.setUnit(lexeme.getText());
|
|
|
+ updated = true;
|
|
|
+ num = "";
|
|
|
+ break;
|
|
|
+ case Constants.word_property_time:
|
|
|
+ if (num.length() > 0) {
|
|
|
+ if (symptom.getTimestamp() == null) {
|
|
|
+ List<PD> pds1 = new ArrayList<>();
|
|
|
+ pds1.add(new PD());
|
|
|
+ symptom.setTimestamp(pds1);
|
|
|
+ }
|
|
|
+ pd1 = symptom.getTimestamp().get(0);
|
|
|
+ pd1.setValue(num);
|
|
|
+ pd1.setUnit(lexeme.getText());
|
|
|
+ updated = true;
|
|
|
+ num = "";
|
|
|
+ current = symptom;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case Constants.word_property_diagnose:
|
|
|
+ current = updateClinicalInfo(outputInfo, lexeme.getText(), lexeme.getProperty());
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_med:
|
|
|
+ if (!(current instanceof Treat)) {
|
|
|
+ current = updateClinicalInfo(outputInfo, "", Constants.word_property_diagnose);
|
|
|
+ }
|
|
|
+ treat = (Treat) current;
|
|
|
+
|
|
|
+ if (treat.getTreatName() == null || treat.getTreatName().trim().length() == 0) {
|
|
|
+ treat.setTreatName(lexeme.getText());
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ treat.setTreatName(treat.getTreatName() + "," + lexeme.getText());
|
|
|
+ }
|
|
|
+ updated = true;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_treat:
|
|
|
+ if (current instanceof Treat) {
|
|
|
+ treat = (Treat)current;
|
|
|
+ if (treat.getValue() == null || treat.getValue().trim().length() == 0) {
|
|
|
+ treat.setValue(lexeme.getText());
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ treat.setValue(treat.getValue() + "," + lexeme.getText());
|
|
|
+ }
|
|
|
+ updated = true;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ catch (Exception ex) {
|
|
|
+ ex.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public OutputInfo initOutputInfo() {
|
|
|
+ OutputInfo outputInfo = new OutputInfo();
|
|
|
+
|
|
|
+ outputInfo.setSymptoms(new ArrayList<>());
|
|
|
+ outputInfo.getSymptoms().add(new Symptom());
|
|
|
+ outputInfo.getSymptoms().get(outputInfo.getSymptoms().size()-1).setSymptomName("");
|
|
|
+
|
|
|
+ outputInfo.setVitals(new ArrayList<>());
|
|
|
+
|
|
|
+ outputInfo.setLises(new ArrayList<>());
|
|
|
+
|
|
|
+ outputInfo.setPacses(new ArrayList<>());
|
|
|
+
|
|
|
+ outputInfo.setTreats(new ArrayList<>());
|
|
|
+
|
|
|
+ return outputInfo;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public Object updateClinicalInfo(OutputInfo outputInfo, String name, String type) {
|
|
|
+
|
|
|
+ Object obj = null;
|
|
|
+
|
|
|
+ switch (type) {
|
|
|
+ case Constants.word_property_symptom:
|
|
|
+ Symptom symptom = outputInfo.getSymptoms().get(outputInfo.getSymptoms().size() - 1);
|
|
|
+
|
|
|
+ if (symptom.getSymptomName() == null || symptom.getSymptomName().trim().length() == 0) {
|
|
|
+ symptom.setSymptomName(name);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ symptom.setSymptomName(symptom.getSymptomName() + ", " + name);
|
|
|
+ }
|
|
|
+
|
|
|
+ obj = symptom;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_vital_idx:
|
|
|
+ if (outputInfo.getVitals().size() == 0) {
|
|
|
+ outputInfo.getVitals().add(new Vital());
|
|
|
+ }
|
|
|
+
|
|
|
+ Vital vital = outputInfo.getVitals().get(outputInfo.getVitals().size()-1);
|
|
|
+
|
|
|
+ if (vital.getVitalName() == null || vital.getVitalName().trim().length() == 0) {
|
|
|
+ vital.setVitalName(name);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ vital.setVitalName(vital.getVitalName() + "," + name);
|
|
|
+ }
|
|
|
+
|
|
|
+ obj = vital;
|
|
|
+ break;
|
|
|
+ case Constants.word_property_diagnose:
|
|
|
+ if (outputInfo.getTreats().size() == 0) {
|
|
|
+ outputInfo.getTreats().add(new Treat());
|
|
|
+ }
|
|
|
+
|
|
|
+ Treat treat = outputInfo.getTreats().get(outputInfo.getTreats().size()-1);
|
|
|
+
|
|
|
+ if (treat.getDiagnose() == null || treat.getDiagnose().trim().length() == 0) {
|
|
|
+ treat.setDiagnose(name);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ treat.setDiagnose(treat.getDiagnose() + "," + name);
|
|
|
+ }
|
|
|
+ obj = treat;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+// if (symptom.getSymptomName().trim().length() == 0) {
|
|
|
+ if (outputInfos.size() >= 2 ) {
|
|
|
+ if (symptom.getNegative() == null &&
|
|
|
+ (outputInfos.get(outputInfos.size() - 2).getSymptoms().get(0).getSymptomName().length() > 0 ||
|
|
|
+ outputInfos.get(outputInfos.size() - 2).getSymptoms().get(0).getNegative() != null)) {
|
|
|
+ symptom = outputInfos.get(outputInfos.size() - 2).getSymptoms().get(0);
|
|
|
+ if (symptom.getSymptomName().indexOf(name) == -1) {
|
|
|
+ symptom.setSymptomName(symptom.getSymptomName() + ", " + name);
|
|
|
+ }
|
|
|
+ outputInfos.remove(outputInfos.get(outputInfos.size() - 1));
|
|
|
+ }
|
|
|
+ else if (symptom.getNegative() != null &&
|
|
|
+ outputInfos.get(outputInfos.size() - 2).getSymptoms().get(0).getNegative() != null) {
|
|
|
+ symptom = outputInfos.get(outputInfos.size() - 2).getSymptoms().get(0);
|
|
|
+ if (symptom.getSymptomName().indexOf(name) == -1) {
|
|
|
+ symptom.setSymptomName(symptom.getSymptomName() + ", " + name);
|
|
|
+ }
|
|
|
+ outputInfos.remove(outputInfos.get(outputInfos.size() - 1));
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ symptom.setSymptomName(name);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+
|
|
|
+ if (symptom.getSymptomName().trim().length() == 0) {
|
|
|
+ symptom.setSymptomName(name);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ symptom.setSymptomName(symptom.getSymptomName() + ", " + name);
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+// }
|
|
|
+// else {
|
|
|
+// symptom.setSymptomName(symptom.getSymptomName() + ", " + name);
|
|
|
+// }
|
|
|
+*/
|
|
|
+// current = symptom;
|
|
|
+ return obj;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|