Prechádzať zdrojové kódy

三级查房文本切割重新处理

hujing 5 rokov pred
rodič
commit
6d5235b304

+ 1 - 1
kernel/src/main/java/com/lantone/qc/kernel/structure/ai/BeHospitalizedAI.java

@@ -37,7 +37,7 @@ public class BeHospitalizedAI extends ModelAI {
      * HPIForCX_cx[主诉、现病史、专科检查]
      * GeneralVital_cx[一般体格检查]
      * chief_present[邵逸夫医院主诉]
-     * primary_diag[诊断]
+     * Diagnoses_cx[诊断]
      * Present_cx[现病史]
      */
     public static List<String> medicalTextType = Arrays.asList("FirstCourseRecord_cx", "PastFamily_cx", "PersonalHistory_cx", "HPIForCX_cx",

+ 2 - 2
kernel/src/main/java/com/lantone/qc/kernel/structure/ai/model/EntityEnum.java

@@ -23,8 +23,8 @@ public enum EntityEnum {
     CONTACT_HISTORY("接触史"), MARITAL_HISTORY("冶游史"), MARITAL_STATUS("婚姻情况"), MARRYIAGE("结婚年龄"), NEAR_RELATION("近亲史"),
     CURE_AIM("治疗目的"), OTHER("其他"), OUTERCOURTYARD("外院"), NURSINGLEVEL("护理级别"), CHIEF("主诉"), NOTES("注意事项"),
     KEY_WORD_FOR_DOCTOR_ADVICES("出院医嘱标题"), DOCTORADVICE("医嘱"), KEY_WORD_FOR_PART("大标题"), OUTCOME("转归情况"),
-    PHYSICAL_EXAMINATION("查体"),TITLE_FOR_DIAG_BASIS("诊断依据标题"),TITLE_FOR_DIFF("鉴别诊断标题"),TITLE_FOR_TREAT("诊疗计划标题"),
-    TREATMENT_PLAN("诊疗计划");
+    PHYSICAL_EXAMINATION("查体"),TITLE_FOR_SIGN("查体标题"),TITLE_FOR_DIAG("诊断标题"),TITLE_FOR_DIAG_BASIS("诊断依据标题"),
+    TITLE_FOR_DIFF("鉴别诊断标题"),TITLE_FOR_TREAT("诊疗计划标题"),TITLE("其他标题"), TREATMENT_PLAN("诊疗计划");
 
     private String value;
 

+ 1 - 1
kernel/src/main/java/com/lantone/qc/kernel/structure/ai/process/EntityProcess.java

@@ -62,7 +62,7 @@ public class EntityProcess {
      * @param entitys
      * @return
      */
-    private List<Lemma> loadAllLemmaList(JSONArray entitys) {
+    protected List<Lemma> loadAllLemmaList(JSONArray entitys) {
         List<Lemma> allLemmaList = new ArrayList<>();
         //所有实体读取
         for (int i = 0; i < entitys.size(); i++) {

+ 148 - 49
kernel/src/main/java/com/lantone/qc/kernel/structure/ai/process/EntityProcessThreeLevelWard.java

@@ -10,7 +10,10 @@ import com.lantone.qc.pub.model.entity.TreatmentPlan;
 import com.lantone.qc.pub.model.label.ThreeLevelWardLabel;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * @ClassName : EntityProcessThreeLevelWard
@@ -19,40 +22,46 @@ import java.util.List;
  * @Date: 2020-03-20 11:20
  */
 public class EntityProcessThreeLevelWard extends EntityProcess {
+    private List<String> titleList = Arrays.asList(
+            EntityEnum.TITLE_FOR_SIGN.toString(),
+            EntityEnum.TITLE_FOR_DIAG.toString(),
+            EntityEnum.TITLE_FOR_DIAG_BASIS.toString(),
+            EntityEnum.TITLE_FOR_DIFF.toString(),
+            EntityEnum.TITLE_FOR_TREAT.toString());
+
     public ThreeLevelWardLabel extractEntity(JSONObject aiOut) {
         ThreeLevelWardLabel threeLevelWardLabel = new ThreeLevelWardLabel();
         String content = aiOut.getString("content");
-        int titleForDiagBasisIndex = 0, titleForDiffIndex = 0, titleForTreatIndex = 0;
-        List<Lemma> diagBasisLemmas = createEntityTree(aiOut, EntityEnum.TITLE_FOR_DIAG_BASIS.toString());
-        List<Lemma> diagDiffLemmas = createEntityTree(aiOut, EntityEnum.TITLE_FOR_DIFF.toString());
-        List<Lemma> treatLemmas = createEntityTree(aiOut, EntityEnum.TITLE_FOR_TREAT.toString());
-        if (diagBasisLemmas.size() > 0) {
-            titleForDiagBasisIndex = content.indexOf(diagBasisLemmas.get(0).getText());//诊断依据标题起始位置
-        }
-        if (diagDiffLemmas.size() > 0) {
-            titleForDiffIndex = content.indexOf(diagDiffLemmas.get(0).getText());//鉴别诊断标题起始位置
-        }
-        if (treatLemmas.size() > 0) {
-            titleForTreatIndex = content.indexOf(treatLemmas.get(0).getText());//诊疗计划标题起始位置
-        }
 
-        //诊断依据文本
-        if (titleForDiagBasisIndex > 0 && titleForDiffIndex > 0) {
-            threeLevelWardLabel.setDiagBasisText(content.substring(titleForDiagBasisIndex, titleForDiffIndex));
+        Map<String, List<String>> titleText = subWithTitle(aiOut);
+        //全部临床表现实体
+        List<Lemma> clinicalLemmas = createEntityTree(aiOut, EntityEnum.CLINICAL_FEATURE.toString());
+        //全部诊断
+        List<Lemma> diagLemmas = createEntityTree(aiOut, EntityEnum.DIEASE.toString());
+        //诊断依据
+        if (titleText.get(EntityEnum.TITLE_FOR_DIAG_BASIS.toString()) != null) {
+            List<String> diagBasisList = titleText.get(EntityEnum.TITLE_FOR_DIAG_BASIS.toString());
+            String diagBasis = String.join(",", diagBasisList);
+            String[] diagBasisSplit = diagBasis.split(",");
+            //诊断依据文本
+            threeLevelWardLabel.setDiagBasisText(content.substring(
+                    Integer.parseInt(diagBasisSplit[0]),
+                    Integer.parseInt(diagBasisSplit[diagBasisSplit.length - 1])));
+            //病史(在 诊断依据 以外的片段中出现 任一 临床表现)
+            addClinical(threeLevelWardLabel, diagBasisSplit, clinicalLemmas);
+            //补充诊断/初步诊断/修正诊断
+            addDiag(threeLevelWardLabel, diagBasisSplit, diagLemmas);
         }
-        //病史(在 诊断依据,鉴别诊断 以外的片段中出现 任一 临床表现)
-        if (titleForDiagBasisIndex > 0) {
-            List<Lemma> clinicalLemmas = createEntityTree(aiOut, EntityEnum.CLINICAL_FEATURE.toString());
-            List<Clinical> clinicals = new ArrayList<>();
-            for (Lemma lemma : clinicalLemmas) {
-                if (Integer.parseInt(lemma.getPosition()) > titleForDiagBasisIndex) {
-                    continue;
-                }
-                Clinical clinical = new Clinical();
-                clinical.setName(lemma.getText());
-                clinicals.add(clinical);
-            }
-            threeLevelWardLabel.setClinicals(clinicals);
+
+        //鉴别诊断
+        if (titleText.get(EntityEnum.TITLE_FOR_DIFF.toString()) != null) {
+            List<String> diffDiagList = titleText.get(EntityEnum.TITLE_FOR_DIFF.toString());
+            String diagBasis = String.join(",", diffDiagList);
+            String[] diffDiagSplit = diagBasis.split(",");
+            //病史(在 鉴别诊断 以外的片段中出现 任一 临床表现)
+            addClinical(threeLevelWardLabel, diffDiagSplit, clinicalLemmas);
+            //补充诊断/初步诊断/修正诊断
+            addDiag(threeLevelWardLabel, diffDiagSplit, diagLemmas);
         }
 
         //查体
@@ -66,12 +75,15 @@ public class EntityProcessThreeLevelWard extends EntityProcess {
         threeLevelWardLabel.setSigns(signs);
 
         //鉴别诊断
-        if (titleForDiffIndex > 0) {
-            List<Diag> diags = new ArrayList<>();
+        if (titleText.get(EntityEnum.TITLE_FOR_DIFF.toString()) != null) {
+            List<String> diffDiagList = titleText.get(EntityEnum.TITLE_FOR_DIFF.toString());
+            String diagBasis = String.join(",", diffDiagList);
+            String[] diffDiagSplit = diagBasis.split(",");
             List<Lemma> dieaseLemmas = createEntityTree(aiOut, EntityEnum.DIEASE.toString());
+            List<Diag> diags = new ArrayList<>();
             for (Lemma dieaseLemma : dieaseLemmas) {
-                if (Integer.parseInt(dieaseLemma.getPosition()) < titleForDiffIndex
-                        || Integer.parseInt(dieaseLemma.getPosition()) > titleForTreatIndex) {
+                if (Integer.parseInt(dieaseLemma.getPosition()) < Integer.parseInt(diffDiagSplit[0])
+                        || Integer.parseInt(dieaseLemma.getPosition()) > Integer.parseInt(diffDiagSplit[diffDiagSplit.length - 1])) {
                     continue;
                 }
                 Diag diag = new Diag();
@@ -79,7 +91,7 @@ public class EntityProcessThreeLevelWard extends EntityProcess {
                 diags.add(diag);
             }
             threeLevelWardLabel.setDiffDiag(diags);
-
+            List<Lemma> diagDiffLemmas = createEntityTree(aiOut, EntityEnum.TITLE_FOR_DIFF.toString());
             for (Lemma lemma : diagDiffLemmas) {
                 if (lemma.getText().contains("诊断明确") || lemma.getText().contains("无需鉴别")) {
                     threeLevelWardLabel.setDiffDiagText(lemma.getText());
@@ -88,21 +100,6 @@ public class EntityProcessThreeLevelWard extends EntityProcess {
             }
         }
 
-        //补充诊断/初步诊断/修正诊断
-        if (titleForDiagBasisIndex > 0 && titleForTreatIndex > 0) {
-            List<Lemma> dieaseLemmas = createEntityTree(aiOut, EntityEnum.DIEASE.toString());
-            List<Diag> diags = new ArrayList<>();
-            for (Lemma dieaseLemma : dieaseLemmas) {
-                if (Integer.parseInt(dieaseLemma.getPosition()) < titleForDiagBasisIndex
-                        || Integer.parseInt(dieaseLemma.getPosition()) > titleForTreatIndex) {
-                    Diag diag = new Diag();
-                    diag.setName(dieaseLemma.getText());
-                    diags.add(diag);
-                }
-            }
-            threeLevelWardLabel.setDiags(diags);
-        }
-
         //诊疗计划
         List<Lemma> treatmentPlanLemmas = createEntityTree(aiOut, EntityEnum.TREATMENT_PLAN.toString());
         List<TreatmentPlan> treatmentPlans = new ArrayList<>();
@@ -114,4 +111,106 @@ public class EntityProcessThreeLevelWard extends EntityProcess {
         threeLevelWardLabel.setTreatmentPlans(treatmentPlans);
         return threeLevelWardLabel;
     }
+
+    /**
+     * 根据各标题截取文本index
+     *
+     * @param aiOut
+     * @return
+     */
+    private Map<String, List<String>> subWithTitle(JSONObject aiOut) {
+        Map<String, List<String>> titleText = new LinkedHashMap<>();
+        String content = aiOut.getString("content");
+        List<Lemma> lemmaList = loadAllLemmaList(aiOut.getJSONObject("annotation").getJSONArray("T"));
+        Lemma lemma;
+        int start = 0;
+        String subContentIndex = "", title = "";
+        for (int i = 0; i < lemmaList.size(); i++) {
+            lemma = lemmaList.get(i);
+            if (!titleList.contains(lemma.getProperty()) && i != lemmaList.size() - 1) {
+                continue;
+            }
+            if (i != lemmaList.size() - 1) {
+                if (start == 0) {
+                    start = Integer.parseInt(lemma.getPosition());
+                    title = lemma.getText();//截取的这一段文本的标题
+                } else {
+                    //subContent = content.substring(start + title.length(), Integer.parseInt(lemma.getPosition()));
+                    subContentIndex = start + title.length() + "," + Integer.parseInt(lemma.getPosition());
+                    putSubContent(titleText, title, subContentIndex);
+                    start = Integer.parseInt(lemma.getPosition());
+                    title = lemma.getText();//截取的这一段文本的标题
+                }
+            } else {
+                //如果是最后一个Lemma,文本就从start开始取,取到结束
+                //subContent = content.substring((start + lemma.getText().length()));
+                int lastIndex = content.length() - 1;
+                subContentIndex = start + title.length() + "," + lastIndex;
+                putSubContent(titleText, title, subContentIndex);
+            }
+        }
+        return titleText;
+    }
+
+    private void putSubContent(Map<String, List<String>> titleText, String text, String subContent) {
+        List<String> textList;
+        if (titleText.containsKey(text)) {
+            titleText.get(text).add(subContent);
+        } else {
+            textList = new ArrayList<>();
+            textList.add(subContent);
+            titleText.put(text, textList);
+        }
+    }
+
+    /**
+     * 添加病史(临床表现)
+     *
+     * @param threeLevelWardLabel
+     * @param indexArray          标题对应文本全部起始位置、结束位置信息
+     * @param clinicalLemmas
+     */
+    private void addClinical(ThreeLevelWardLabel threeLevelWardLabel, String[] indexArray, List<Lemma> clinicalLemmas) {
+        List<Clinical> clinicals = new ArrayList<>();
+        for (Lemma lemma : clinicalLemmas) {
+            if (Integer.parseInt(lemma.getPosition()) > Integer.parseInt(indexArray[0])
+                    || Integer.parseInt(lemma.getPosition()) < Integer.parseInt(indexArray[indexArray.length - 1])) {
+                continue;
+            }
+            Clinical clinical = new Clinical();
+            clinical.setName(lemma.getText());
+            clinicals.add(clinical);
+        }
+        if (threeLevelWardLabel.getClinicals().size() == 0) {
+            threeLevelWardLabel.setClinicals(clinicals);
+        } else {
+            threeLevelWardLabel.getClinicals().addAll(clinicals);
+        }
+    }
+
+    /**
+     * 添加补充诊断/初步诊断/修正诊断
+     *
+     * @param threeLevelWardLabel
+     * @param indexArray          标题对应文本全部起始位置、结束位置信息
+     * @param diagLemmas
+     */
+    private void addDiag(ThreeLevelWardLabel threeLevelWardLabel, String[] indexArray, List<Lemma> diagLemmas) {
+        List<Diag> diags = new ArrayList<>();
+        for (Lemma lemma : diagLemmas) {
+            if (Integer.parseInt(lemma.getPosition()) > Integer.parseInt(indexArray[0])
+                    || Integer.parseInt(lemma.getPosition()) < Integer.parseInt(indexArray[indexArray.length - 1])) {
+                continue;
+            }
+            Diag diag = new Diag();
+            diag.setName(lemma.getText());
+            diags.add(diag);
+        }
+        if (threeLevelWardLabel.getDiags().size() == 0) {
+            threeLevelWardLabel.setDiags(diags);
+        } else {
+            threeLevelWardLabel.getDiags().addAll(diags);
+        }
+    }
+
 }