Browse Source

Merge branch 'push-dev-pacs' of http://192.168.2.236:10080/louhr/push into push-dev-pacs

louhr 5 years ago
parent
commit
a2b4903a17

+ 130 - 0
algorithm/src/main/java/org/algorithm/core/FilterRule.java

@@ -0,0 +1,130 @@
+package org.algorithm.core;
+
+import java.util.Map;
+
+/**
+ * 过滤规则
+ *
+ * @Author: bijl
+ * @Date: 2019/9/5 20:21
+ * @Description:
+ */
+public class FilterRule {
+
+    private Integer uuid;
+
+    private String key_1;
+    private String type_1;
+
+    private String key_2;
+    private String type_2;
+
+    private String inside;
+    private String insideType;
+
+    private String despite;
+    private String despiteInside;
+
+    public FilterRule(Map<String, String> aMap) {
+
+        this.key_1 = aMap.get("key_1");
+        this.type_1 = aMap.get("type_1");
+
+        this.key_2 = aMap.get("key_2");
+        this.type_2 = aMap.get("type_2");
+
+        this.inside = aMap.get("inside");
+        this.insideType = aMap.get("inside_type");
+
+        this.despite = aMap.get("despite");
+        this.despiteInside = aMap.get("despite_inside");
+    }
+
+    public Integer getUuid() {
+        return uuid;
+    }
+
+    public void setUuid(Integer uuid) {
+        this.uuid = uuid;
+    }
+
+
+    public String getKey_1() {
+        return key_1;
+    }
+
+    public void setKey_1(String key_1) {
+        this.key_1 = key_1;
+    }
+
+    public String getType_1() {
+        return type_1;
+    }
+
+    public void setType_1(String type_1) {
+        this.type_1 = type_1;
+    }
+
+    public String getKey_2() {
+        return key_2;
+    }
+
+    public void setKey_2(String key_2) {
+        this.key_2 = key_2;
+    }
+
+    public String getType_2() {
+        return type_2;
+    }
+
+    public void setType_2(String type_2) {
+        this.type_2 = type_2;
+    }
+
+    public String getInside() {
+        return inside;
+    }
+
+    public void setInside(String inside) {
+        this.inside = inside;
+    }
+
+    public String getInsideType() {
+        return insideType;
+    }
+
+    public void setInsideType(String insideType) {
+        this.insideType = insideType;
+    }
+
+    public String getDespite() {
+        return despite;
+    }
+
+    public void setDespite(String despite) {
+        this.despite = despite;
+    }
+
+    public String getDespiteInside() {
+        return despiteInside;
+    }
+
+    public void setDespiteInside(String despiteInside) {
+        this.despiteInside = despiteInside;
+    }
+
+    @Override
+    public String toString() {
+        return "FilterRule{" +
+                "uuid=" + uuid +
+                ", key_1='" + key_1 + '\'' +
+                ", type_1='" + type_1 + '\'' +
+                ", key_2='" + key_2 + '\'' +
+                ", type_2='" + type_2 + '\'' +
+                ", inside='" + inside + '\'' +
+                ", insideType='" + insideType + '\'' +
+                ", despite='" + despite + '\'' +
+                ", despiteInside='" + despiteInside + '\'' +
+                '}';
+    }
+}

+ 211 - 0
algorithm/src/main/java/org/algorithm/core/RelationTreeUtils.java

@@ -0,0 +1,211 @@
+package org.algorithm.core;
+
+import org.algorithm.core.cnn.entity.Lemma;
+import org.algorithm.core.cnn.entity.Triad;
+
+import java.util.*;
+
+/**
+ * 关系树工具类
+ *
+ * @Author: bijl
+ * @Date: 2019/9/5 15:16
+ * @Description:
+ */
+public class RelationTreeUtils {
+
+    /**
+     * 同名实体(这里也叫词项)归并
+     * 规则:
+     *  1- 直接替代为位置最前面的一个
+     *
+     * @param triads 实体对列表
+     */
+    public static void sameTextLemmaMerge(List<Triad> triads) {
+
+        Map<String, Lemma> lemmaMap = new HashMap<>();
+        for (Triad triad : triads) {
+            Lemma l1 = triad.getL_1();
+            Lemma l2 = triad.getL_2();
+
+            if (lemmaMap.get(l1.getText()) == null)
+                lemmaMap.put(l1.getText(), l1);
+            else {
+                Lemma l1Pre = lemmaMap.get(l1.getText());
+                if (l1Pre.getStartPosition() > l1.getStartPosition())
+                    triad.setL_1(l1);  // 取靠前的
+            }
+
+            if (lemmaMap.get(l2.getText()) == null)
+                lemmaMap.put(l2.getText(), l2);
+            else {
+                Lemma l2Pre = lemmaMap.get(l2.getText());
+                if (l2Pre.getStartPosition() > l2.getStartPosition())
+                    triad.setL_2(l2);  // 取靠前的
+            }
+        }
+        for (Triad triad : triads) {
+            Lemma l1 = triad.getL_1();
+            Lemma l2 = triad.getL_2();
+            triad.setL_1(lemmaMap.get(l1.getText()));  // 用前面的同名实体(这里也叫词项)替代后面的
+            triad.setL_2(lemmaMap.get(l2.getText()));  // 用前面的同名实体(这里也叫词项)替代后面的
+        }
+    }
+
+    /**
+     * 构建关系树
+     * 基本规则:
+     *  1- 两个有关系的实体,前面的为父节点,后面的为子节点
+     *
+     * @param triads 有关系的三元组列表
+     */
+    public static void buildRelationTree(List<Triad> triads) {
+        for (Triad triad : triads) {
+            Lemma l1 = triad.getL_1();
+            Lemma l2 = triad.getL_2();
+            if (l1.getStartPosition() < l2.getStartPosition()) {  // 在前者为父节点
+                l1.setHasChildren(true);
+                l2.setParent(l1);
+            } else {
+                l2.setHasChildren(true);
+                l1.setParent(l2);
+            }
+        }
+    }
+
+    /**
+     * 获取关系树的分枝
+     *
+     * @param projectName 项目名称,如:核磁共振
+     * @param triads      有关系,并且设置了父子节点关系的三元组
+     */
+    public static Object[] getRelationTreeBranches(String projectName, List<Triad> triads) {
+        List<Lemma> hasNoChildrenLemmas = new ArrayList<>();
+        for (Triad triad : triads) {
+            if (!triad.getL_1().isHasChildren())
+                hasNoChildrenLemmas.add(triad.getL_1());
+
+            if (!triad.getL_2().isHasChildren())
+                hasNoChildrenLemmas.add(triad.getL_2());
+        }
+
+        List<List<String>> branches = new ArrayList<>();
+        for (Lemma lemma : hasNoChildrenLemmas) {
+            List<String> aBranch = new ArrayList<>();
+            while (lemma != null) {
+                aBranch.add(lemma.getText());  // 只加入,文本
+                lemma = lemma.getParent();
+            }
+            branches.addAll(permute(aBranch));  // 排列
+        }
+
+        Object[] obj = {projectName, branches};
+
+        return obj;
+    }
+
+    /**
+     * 从三元组列表到关系树分枝
+     *
+     * @param projectName
+     * @param triads
+     * @return
+     */
+    public static Object[] triadsToRelationTreeBranches(String projectName, List<Triad> triads) {
+        sameTextLemmaMerge(triads);
+        buildRelationTree(triads);
+        Object[] obj = getRelationTreeBranches("胃造影", triads);
+        return obj;
+    }
+
+    /**
+     * 全排列算法
+     *
+     * @param stringList 字符串列表
+     * @return
+     */
+    public static ArrayList<ArrayList<String>> permute(List<String> stringList) {
+        ArrayList<ArrayList<String>> result = new ArrayList<ArrayList<String>>();
+        result.add(new ArrayList<String>());
+
+        for (int i = 0; i < stringList.size(); i++) {
+            //list of list in current iteration of the stringList num
+            ArrayList<ArrayList<String>> current = new ArrayList<ArrayList<String>>();
+
+            for (ArrayList<String> l : result) {
+                // # of locations to insert is largest index + 1
+                for (int j = 0; j < l.size() + 1; j++) {
+                    // + add num[i] to different locations
+                    l.add(j, stringList.get(i));
+
+                    ArrayList<String> temp = new ArrayList<String>(l);
+                    current.add(temp);
+
+                    // - remove num[i] add
+                    l.remove(j);
+                }
+            }
+
+            result = new ArrayList<ArrayList<String>>(current);
+        }
+
+        return result;
+    }
+
+
+    /**
+     * 测试文件
+     */
+    public static void test() {
+
+        List<Triad> triads = new ArrayList<>();
+        Lemma l1_1 = new Lemma();
+        Lemma l1_2 = new Lemma();
+        l1_1.setText("子宫");
+        l1_1.setPosition("0,2");
+
+        l1_2.setText("内膜");
+        l1_2.setPosition("5,8");
+
+        Triad triad_1 = new Triad();
+        triad_1.setL_1(l1_1);
+        triad_1.setL_2(l1_2);
+        triads.add(triad_1);
+
+        Lemma l2_1 = new Lemma();
+        Lemma l2_2 = new Lemma();
+        l2_1.setText("宫颈线");
+        l2_1.setPosition("11,13");
+
+        l2_2.setText("很长");
+        l2_2.setPosition("15,18");
+
+        Triad triad_2 = new Triad();
+        triad_2.setL_1(l2_1);
+        triad_2.setL_2(l2_2);
+        triads.add(triad_2);
+
+
+        Lemma l3_1 = new Lemma();
+        Lemma l3_2 = new Lemma();
+
+        l3_1.setText("内膜");
+        l3_1.setPosition("5,8");
+
+        l3_2.setText("出血");
+        l3_2.setPosition("9,10");
+
+        Triad triad_3 = new Triad();
+        triad_3.setL_1(l3_1);
+        triad_3.setL_2(l3_2);
+        triads.add(triad_3);
+
+        sameTextLemmaMerge(triads);
+        buildRelationTree(triads);
+        Object[] obj = getRelationTreeBranches("胃造影", triads);
+
+        System.out.println(obj[0]);
+        System.out.println(obj[1]);
+    }
+
+}

+ 486 - 0
algorithm/src/main/java/org/algorithm/core/RuleCheckMachine.java

@@ -0,0 +1,486 @@
+package org.algorithm.core;
+
+import org.algorithm.core.cnn.entity.Lemma;
+import org.algorithm.core.cnn.entity.Triad;
+import org.algorithm.util.MysqlConnector;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.*;
+
+/**
+ * 规则检查机器
+ *
+ * @Author: bijl
+ * @Date: 2019/9/6 10:32
+ * @Description:
+ */
+public class RuleCheckMachine {
+    private final List<FilterRule> filterRules = new ArrayList<>();
+    private Map<String, Map<String, Set<Integer>>> key_1_map = null;
+    private Map<String, Map<String, Set<Integer>>> key_2_map = null;
+    private Map<String, String> punctuations = new HashMap<>();
+    private Map<String, Set<Integer>> despiteMap = null;  // 实体名:[规则uuid列表]
+    private Map<String, Set<Integer>> despiteInsideMap = null; // 实体名:[规则uuid列表]
+    private Map<String, Map<String, Set<Integer>>> insideMap = null;
+
+
+    public RuleCheckMachine() {
+        this.loadRules();
+        this.makeKey1Map();
+        this.makeKey2Map();
+        this.makeInsideMap();
+        this.makeDespiteMap();
+        this.makeDespiteInsideMap();
+    }
+
+
+    /**
+     * 加载规则
+     */
+    public void loadRules() {
+        /**
+         * 连接数据库
+         */
+        String url = "jdbc:mysql://192.168.2.235/test_case?user=root&password=diagbot@20180822";
+        MysqlConnector connector = new MysqlConnector(url);
+        String querySql =
+                "SELECT rr.key_1, rr.type_1, rr.key_2, rr.type_2, rr.inside, rr.inside_type, " +
+                        "rr.despite, rr.despite_inside " +
+                        "FROM relation_neg_rules AS rr " +
+                        "WHERE rr.`status` = 1";
+
+        ResultSet rs = connector.query(querySql);
+        Integer uuid = 0;
+        try {
+            while (rs.next()) {
+                String key_1 = rs.getString("key_1");
+                String type_1 = rs.getString("type_1");
+
+                String key_2 = rs.getString("key_2");
+                String type_2 = rs.getString("type_2");
+
+                String inside = rs.getString("inside");
+                String inside_type = rs.getString("inside_type");
+
+                String despite = rs.getString("despite");
+                String despite_inside = rs.getString("despite_inside");
+
+                String[] despiteSplit = despite.split(",");
+                String[] despiteInsideSplit = despite_inside.split(",");
+                for (int j = 0; j < despiteSplit.length; j++) {
+                    for (int k = 0; k < despiteInsideSplit.length; k++) {
+                        Map<String, String> variableMap = new HashMap<>();
+                        variableMap.put("key_1", key_1);
+                        variableMap.put("type_1", type_1);
+
+                        variableMap.put("key_2", key_2);
+                        variableMap.put("type_2", type_2);
+
+                        variableMap.put("inside", inside);
+                        variableMap.put("inside_type", inside_type);
+
+                        variableMap.put("despite", despiteSplit[j]);
+                        variableMap.put("despite_inside", despiteInsideSplit[k]);
+
+                        FilterRule filterRule = new FilterRule(variableMap);
+                        filterRule.setUuid(uuid);
+                        this.filterRules.add(filterRule);
+
+//                            System.out.println(filterRule);
+
+                        uuid += 1;
+                    }
+                }
+
+            }
+
+        } catch (SQLException e) {
+            e.printStackTrace();
+            throw new RuntimeException("加载规则字典失败");
+        } finally {
+            connector.close();
+        }
+    }
+
+    /**
+     * 制作实体1相关信息字典
+     */
+    private void makeKey1Map() {
+        Map<String, Map<String, Set<Integer>>> key_1_map_ = new HashMap<>();
+        Map<String, Set<Integer>> emptyMap = new HashMap<>();
+        Map<String, Set<Integer>> typeMap = new HashMap<>();
+        Map<String, Set<Integer>> wordMap = new HashMap<>();
+        key_1_map_.put("", emptyMap);
+        key_1_map_.put("type", typeMap);
+        key_1_map_.put("word", wordMap);
+
+        for (FilterRule rule : this.filterRules) {
+            String key_1 = rule.getKey_1();
+            String type_1 = rule.getType_1();
+            Integer uuid = rule.getUuid();
+
+            this.inputMaps(key_1, type_1, uuid, emptyMap, typeMap, wordMap, null);
+        }
+        this.key_1_map = key_1_map_;
+    }
+
+
+    /**
+     * 制作实体2相关信息字典
+     */
+    private void makeKey2Map() {
+        Map<String, Map<String, Set<Integer>>> key_2_map_ = new HashMap<>();
+        Map<String, Set<Integer>> emptyMap = new HashMap<>();
+        Map<String, Set<Integer>> typeMap = new HashMap<>();
+        Map<String, Set<Integer>> wordMap = new HashMap<>();
+        key_2_map_.put("", emptyMap);
+        key_2_map_.put("type", typeMap);
+        key_2_map_.put("word", wordMap);
+
+        for (FilterRule rule : this.filterRules) {
+            String key_2 = rule.getKey_2();
+            String type_2 = rule.getType_2();
+            Integer uuid = rule.getUuid();
+
+            this.inputMaps(key_2, type_2, uuid, emptyMap, typeMap, wordMap, null);
+        }
+        this.key_2_map = key_2_map_;
+    }
+
+    /**
+     * 制作内部实体相关信息字典
+     */
+    private void makeInsideMap() {
+        Map<String, Map<String, Set<Integer>>> insideMap_ = new HashMap<>();
+        Map<String, Set<Integer>> punctuationMap = new HashMap<>();
+        Map<String, Set<Integer>> typeMap = new HashMap<>();
+        Map<String, Set<Integer>> typePunctuationMap = new HashMap<>();
+        Map<String, Set<Integer>> wordMap = new HashMap<>();
+        insideMap_.put("punc", punctuationMap);
+        insideMap_.put("type", typeMap);
+        insideMap_.put("typePunctuation", typePunctuationMap);
+        insideMap_.put("word", wordMap);
+
+        for (FilterRule rule : this.filterRules) {
+            String inside = rule.getInside();
+            String insideType = rule.getInsideType();
+            Integer uuid = rule.getUuid();
+            if (insideType.equals("punc"))
+                this.punctuations.put(inside, inside);
+
+            if (",".equals(inside.substring(0, 1)))
+                this.inputMaps(inside, insideType, uuid, null, typePunctuationMap, wordMap, punctuationMap);
+            else
+                this.inputMaps(inside, insideType, uuid, null, typeMap, wordMap, punctuationMap);
+        }
+        this.insideMap = insideMap_;
+    }
+
+    /**
+     * maps输入
+     *
+     * @param key
+     * @param type
+     * @param uuid
+     * @param emptyMap
+     * @param typeMap
+     * @param wordMap
+     */
+    private void inputMaps(String key, String type, Integer uuid, Map<String, Set<Integer>> emptyMap,
+                           Map<String, Set<Integer>> typeMap, Map<String, Set<Integer>> wordMap,
+                           Map<String, Set<Integer>> punctuationMap) {
+
+        if ("".equals(type)) {
+            if (emptyMap.get(key) == null)
+                emptyMap.put(key, new HashSet<>());
+            emptyMap.get(key).add(uuid);
+        } else if ("type".equals(type)) {
+            if (typeMap.get(key) == null)
+                typeMap.put(key, new HashSet<>());
+            typeMap.get(key).add(uuid);
+        } else if ("word".equals(type)) {
+            if (wordMap.get(key) == null)
+                wordMap.put(key, new HashSet<>());
+            wordMap.get(key).add(uuid);
+        } else if ("punc".equals(type)) {
+            if (punctuationMap.get(key) == null)
+                punctuationMap.put(key, new HashSet<>());
+            punctuationMap.get(key).add(uuid);
+        } else {
+            throw new RuntimeException("出现了位置新type");
+        }
+
+    }
+
+
+    /**
+     * 制作例外字典
+     */
+    private void makeDespiteMap() {
+        Map<String, Set<Integer>> despiteMap = new HashMap<>();
+        for (FilterRule rule : this.filterRules) {
+            String despite = rule.getDespite();
+            if (!despite.equals("")) {  // 空白不收录
+                if (despiteMap.get(despite) == null) {
+                    despiteMap.put(despite, new HashSet<>());
+                }
+                despiteMap.get(despite).add(rule.getUuid());  //
+            }
+        }
+        this.despiteMap = despiteMap;
+    }
+
+
+    /**
+     * 制作例外_内部字典
+     */
+    private void makeDespiteInsideMap() {
+        Map<String, Set<Integer>> despiteInsideMap = new HashMap<>();
+        for (FilterRule rule : this.filterRules) {
+            String despiteInside = rule.getDespiteInside();
+            if (!despiteInside.equals("")) {  // 空白不收录
+                if (despiteInsideMap.get(despiteInside) == null) {
+                    despiteInsideMap.put(despiteInside, new HashSet<>());
+                }
+                despiteInsideMap.get(despiteInside).add(rule.getUuid());  //
+            }
+        }
+        this.despiteInsideMap = despiteInsideMap;
+    }
+
+    /**
+     * 名称—类别—开始位置类
+     */
+    class NameTypeStartPosition implements Comparable<NameTypeStartPosition> {
+        private String name;
+        private String type;
+        private int startPosition;
+
+        public NameTypeStartPosition(String name, String type, int startPosition) {
+            this.name = name;
+            this.type = type;
+            this.startPosition = startPosition;
+        }
+
+        @Override
+        public int compareTo(NameTypeStartPosition o) {
+            return this.startPosition - o.getStartPosition();
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public void setName(String name) {
+            this.name = name;
+        }
+
+        public String getType() {
+            return type;
+        }
+
+        public void setType(String type) {
+            this.type = type;
+        }
+
+        public int getStartPosition() {
+            return startPosition;
+        }
+
+        public void setStartPosition(int startPosition) {
+            this.startPosition = startPosition;
+        }
+
+        @Override
+        public String toString() {
+            return "NameTypeStartPosition{" +
+                    "name='" + name + '\'' +
+                    ", type='" + type + '\'' +
+                    ", startPosition=" + startPosition +
+                    '}';
+        }
+
+    }
+
+    /**
+     * 获取已排序的(名称,类别,开始位置)对象
+     *
+     * @param triads
+     * @return
+     */
+    public List<NameTypeStartPosition> getSortedNameTypeByPosition(List<Triad> triads) {
+        List<NameTypeStartPosition> nameTypeStartPositions = new ArrayList<>();
+        for (Triad triad : triads) {
+            Lemma l1 = triad.getL_1();
+            Lemma l2 = triad.getL_2();
+            nameTypeStartPositions.add(
+                    new NameTypeStartPosition(l1.getText(), l1.getProperty(), l1.getStartPosition()));
+            nameTypeStartPositions.add(
+                    new NameTypeStartPosition(l2.getText(), l2.getProperty(), l2.getStartPosition()));
+        }
+        nameTypeStartPositions.sort(Comparator.naturalOrder());
+
+        return nameTypeStartPositions;
+    }
+
+    /**
+     * 是否移除
+     *
+     * @param nameTypeStartPositions
+     * @param startIndex
+     * @param endIndex
+     * @return
+     */
+    public boolean isRemove(List<NameTypeStartPosition> nameTypeStartPositions, int startIndex, int endIndex,
+                            String sentence) {
+        Set<Integer> remainUuids = new HashSet<>();  // 剩余规则的uuid
+        for (FilterRule rule : this.filterRules)
+            remainUuids.add(rule.getUuid());
+
+        // 过滤实体名称触发例外条件情况
+        String entity_1_name = nameTypeStartPositions.get(startIndex).getName();
+        String entity_1_type = nameTypeStartPositions.get(startIndex).getType();
+
+        String entity_2_name = nameTypeStartPositions.get(endIndex).getType();
+        String entity_2_type = nameTypeStartPositions.get(endIndex).getType();
+
+        Set<Integer> set = null;
+        set = this.despiteMap.get(entity_1_name);  // 过滤有实体1名为例外情况(即,不成立)的规则(的uuid)
+        this.removeAll(remainUuids, set);
+
+        set = this.despiteMap.get(entity_2_name);  // 过滤有实体2名为例外情况(即,不成立)的规则(的uuid)
+        this.removeAll(remainUuids, set);
+
+        // 过滤中间实体的名称触发例外条件情况
+        for (int i = startIndex; i <= endIndex; i++) {
+            NameTypeStartPosition nameTypeStartPosition = nameTypeStartPositions.get(i);
+            set = this.despiteInsideMap.get(nameTypeStartPosition.getName());
+            this.removeAll(remainUuids, set);
+        }
+
+        // 三板斧过滤
+        // 实体1,过滤
+        set = new HashSet<>();
+        this.addAll(set, this.key_1_map.get("").get(""));
+        // 满足,形如("形容词", "type") 过滤条件的规则
+        this.addAll(set, this.key_1_map.get("type").get(entity_1_type));
+        // 满足,形如("胸痛", "word") 过滤条件的规则
+        this.addAll(set, this.key_1_map.get("word").get(entity_1_name));
+        this.retainAll(remainUuids, set);  // 求交集,同事满足实体1相关的过滤条件,且不不满足例外情况
+        if (remainUuids.size() == 0)
+            return false;
+
+        // 实体2,过滤
+        set = new HashSet<>();
+        this.addAll(set, this.key_2_map.get("").get(""));
+        // 满足,形如("形容词", "type") 过滤条件的规则
+        this.addAll(set, this.key_2_map.get("type").get(entity_2_type));
+        // 满足,形如("胸痛", "word") 过滤条件的规则
+        this.addAll(set, this.key_2_map.get("word").get(entity_2_name));
+        this.retainAll(remainUuids, set);  // 求交集,同事满足实体1相关的过滤条件,且不不满足例外情况
+        if (remainUuids.size() == 0)
+            return false;
+
+        // 中间实体过滤
+        set = new HashSet<>();
+        for (int i = startIndex; i <= endIndex; i++) {
+            NameTypeStartPosition nameTypeStartPosition = nameTypeStartPositions.get(i);
+            // 中间实体满足,形如("胸痛", "word") 过滤条件的规则
+            this.addAll(set, this.insideMap.get("word").get(nameTypeStartPosition.getName()));
+            // 中间实体满足,形如(";", "punc") 过滤条件的规则
+            this.addAll(set, this.insideMap.get("type").get(nameTypeStartPosition.getType()));  // 没有逗号的
+        }
+
+        int entity_1_start = nameTypeStartPositions.get(startIndex).getStartPosition();
+        int entity_2_start = nameTypeStartPositions.get(endIndex).getStartPosition();
+
+        // 标点过滤
+        String aPunc = null;
+        for (int i=entity_1_start; i<entity_2_start;i++){
+            aPunc = sentence.substring(i, i+1);
+            if (this.punctuations.get(aPunc) != null)
+                this.addAll(set, this.insideMap.get("punc").get(aPunc));
+        }
+
+        // 逗号+属性 过滤
+        int commaIndex = sentence.indexOf(",", entity_1_start + 1);  // 逗号位置
+        String commaPadType = "";  // 逗号拼接上类型
+        while (commaIndex > -1 && commaIndex < entity_2_start) {
+            for (int i = startIndex; i <= endIndex; i++) {
+                NameTypeStartPosition nameTypeStartPosition = nameTypeStartPositions.get(i);
+                if (nameTypeStartPosition.getStartPosition() > commaIndex) {
+                    commaPadType = "," + nameTypeStartPosition.getType();
+                    set.addAll(this.insideMap.get("typePunctuation").get(commaPadType));
+                }
+
+            }
+        }
+        this.retainAll(remainUuids, set);  // 求交集,同事中间实体相关的过滤条件,且不不满足例外情况
+
+//        for (FilterRule rule: this.filterRules) {
+//            if (remainUuids.contains(rule.getUuid()))
+//                System.out.println(rule);
+//
+//        }
+
+        return remainUuids.size() > 0;  // 还有规则满足,则过滤
+
+    }
+
+    /**
+     * 求差集,避免null和空集
+     *
+     * @param basicSet
+     * @param set
+     */
+    private void removeAll(Set<Integer> basicSet, Set<Integer> set) {
+        if (set != null && set.size() > 0)
+            basicSet.removeAll(set);
+    }
+
+    /**
+     * 求交集,避免null和空集
+     *
+     * @param basicSet
+     * @param set
+     */
+    private void addAll(Set<Integer> basicSet, Set<Integer> set) {
+        if (set != null && set.size() > 0)
+            basicSet.addAll(set);
+    }
+
+    /**
+     * 求并集,避免null和空集
+     *
+     * @param basicSet
+     * @param set
+     */
+    private void retainAll(Set<Integer> basicSet, Set<Integer> set) {
+        if (set != null && set.size() > 0)
+            basicSet.retainAll(set);
+    }
+
+    /**
+     * 检查并移除
+     *
+     * @param sentence 句子
+     * @param triads 三元组列表
+     */
+    public void checkAndRemove(String sentence, List<Triad> triads) {
+        List<NameTypeStartPosition> nameTypeStartPositions = this.getSortedNameTypeByPosition(triads);
+        Map<Integer, Integer> startPositionToIndexMap = new HashMap<>();
+        for (int i = 0; i < nameTypeStartPositions.size(); i++)
+            startPositionToIndexMap.put(nameTypeStartPositions.get(i).getStartPosition(), i);
+
+        Iterator<Triad> it = triads.iterator();
+        while (it.hasNext()) {  // 遍历三元组,移除满足过滤规则的
+            Triad triad = it.next();
+            int startIndex = startPositionToIndexMap.get(triad.getL_1().getStartPosition());
+            int endIndex = startPositionToIndexMap.get(triad.getL_2().getStartPosition());
+            if (isRemove(nameTypeStartPositions, startIndex, endIndex, sentence)) {
+                it.remove();
+            }
+        }
+    }
+}

+ 1 - 1
algorithm/src/main/java/org/algorithm/core/cnn/dataset/RelationExtractionDataSet.java

@@ -17,7 +17,7 @@ import com.alibaba.fastjson.JSONObject;
 public class RelationExtractionDataSet {
 
     private Map<String, Integer> char2id = new HashMap<>();
-    public final int MAX_LEN = 512;
+    public final int MAX_LEN = 256;
 
 
     public RelationExtractionDataSet(String dir) {

+ 26 - 0
algorithm/src/main/java/org/algorithm/core/cnn/entity/Lemma.java

@@ -16,6 +16,32 @@ public class Lemma {
     private int len;
     private String property;
 
+    private Lemma parent;
+    private boolean hasChildren;
+
+    public Lemma getParent() {
+        return parent;
+    }
+
+    public void setParent(Lemma parent) {
+        this.parent = parent;
+    }
+
+
+    public boolean isHasChildren() {
+        return hasChildren;
+    }
+
+    public void setHasChildren(boolean hasChildren) {
+        this.hasChildren = hasChildren;
+    }
+
+    public int getStartPosition(){
+        String[] pos = this.position.split(",");
+        return Integer.parseInt(pos[0]);
+    }
+
+
     private List<Lemma> relationLemmas = new ArrayList<>();
 
     public String getText() {

+ 37 - 8
algorithm/src/main/java/org/algorithm/core/cnn/model/RelationExtractionEnsembleModel.java

@@ -1,5 +1,7 @@
 package org.algorithm.core.cnn.model;
 
+import org.algorithm.core.RelationTreeUtils;
+import org.algorithm.core.RuleCheckMachine;
 import org.algorithm.core.cnn.AlgorithmCNNExecutor;
 import org.algorithm.core.cnn.dataset.RelationExtractionDataSet;
 import org.algorithm.core.cnn.entity.Triad;
@@ -30,8 +32,10 @@ public class RelationExtractionEnsembleModel extends AlgorithmCNNExecutor {
     private RelationExtractionDataSet dataSet;
     private RelationExtractionSubModel[] subModels = new RelationExtractionSubModel[2];
     private ExecutorService executorService = Executors.newCachedThreadPool();
+    private final RuleCheckMachine ruleCheckMachine = new RuleCheckMachine();
 
     public RelationExtractionEnsembleModel() {
+        // 解析路径
         PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
 
         String modelsPath = prop.getProperty("basicPath");  // 模型基本路径
@@ -39,18 +43,20 @@ public class RelationExtractionEnsembleModel extends AlgorithmCNNExecutor {
         dataSetPath = dataSetPath + File.separator + "char2id.json";
         String exportDir = modelsPath.replace("model_version_replacement", "ensemble_model_2");
 
+        // 加载数据集和初始化集成模型
         this.dataSet = new RelationExtractionDataSet(dataSetPath);
         this.init(exportDir);
 
+        // 添加子模型系数,并加载子模型cnn_1d_low
         Map<String, Tensor<Float>> cnn_1d_low_map = new HashMap<>();
-        cnn_1d_low_map.put("keep_prob",Tensor.create(1.0f, Float.class));
+        cnn_1d_low_map.put("keep_prob", Tensor.create(1.0f, Float.class));
         subModels[0] = new RelationExtractionSubModel("cnn_1d_low", cnn_1d_low_map);
-//        subModels[1] = new RelationExtractionSubModel("cnn_1d_lstm_low");
 
+        // 添加子模型系数,并加载子模型lstm_low_api
         Map<String, Tensor<Float>> lstm_low_api_map = new HashMap<>();
-        lstm_low_api_map.put("input_keep_prob",Tensor.create(1.0f, Float.class));
-        lstm_low_api_map.put("output_keep_prob",Tensor.create(1.0f, Float.class));
-        lstm_low_api_map.put("state_keep_prob",Tensor.create(1.0f, Float.class));
+        lstm_low_api_map.put("input_keep_prob", Tensor.create(1.0f, Float.class));
+        lstm_low_api_map.put("output_keep_prob", Tensor.create(1.0f, Float.class));
+        lstm_low_api_map.put("state_keep_prob", Tensor.create(1.0f, Float.class));
         subModels[1] = new RelationExtractionSubModel("lstm_low_api", lstm_low_api_map);
     }
 
@@ -92,12 +98,24 @@ public class RelationExtractionEnsembleModel extends AlgorithmCNNExecutor {
         return inputValues;
     }
 
+
+    /**
+     * 数据预处理,包括过滤,等操作
+     * @param content
+     * @param triads
+     */
+    private void preProcess(String content, List<Triad> triads){
+        if (!(content.length() > this.dataSet.MAX_LEN) && triads.size() > 0) // 句子长度不超过MAX_LEN,有三元组
+            this.ruleCheckMachine.checkAndRemove(content, triads);
+    }
+
     @Override
     public List<Triad> execute(String content, List<Triad> triads) {
-        // 句子长度不超过MAX_LEN,有三元组
-        if (content.length() > this.dataSet.MAX_LEN || triads.size() < 1) {
+        // 预处理
+        this.preProcess(content, triads);
+        if (content.length() > this.dataSet.MAX_LEN || triads.size() < 1)  // 句子长度不超过MAX_LEN,有三元组
             return new ArrayList<>();
-        }
+
         int[][] inputValues = this.convertData(content, triads);  // shape = [3, batchSize * this.subModels.length]
         int batchSize = triads.size();
 
@@ -162,6 +180,17 @@ public class RelationExtractionEnsembleModel extends AlgorithmCNNExecutor {
         return triads;
     }
 
+    /**
+     * 从三元组列表到关系树分枝
+     *  TODO:真实与外部对接还没做,包括无实体对的情况
+     * @param projectName
+     * @param triads
+     * @return
+     */
+    public Object[] triadsToRelationTreeBranches(String projectName, List<Triad> triads) {
+        return RelationTreeUtils.triadsToRelationTreeBranches(projectName, triads);
+    }
+
 
     /**
      * @param inputValues 字符id,相对于实体1位置,相对于实体2位置

+ 55 - 38
algorithm/src/main/java/org/algorithm/test/Test.java

@@ -1,49 +1,66 @@
 package org.algorithm.test;
 
+import java.util.*;
 
 public class Test {
-    
+
+
     public static void main(String[] args) {
-        
-//        Integer aa = new Integer(53);
-//        Integer bb = new Integer(954);
-//        float xx = 1.0f;
-//        for(int i=1; i< 955; i++) {
-//            xx = (float)(Math.round(1.0f * i / bb*100000))/100000;
-//            System.out.println(i+":"+xx);
-////        }
-//        String filePath = "/opt/models/model_version_replacement/model";
-//        int index = filePath.indexOf("model_version_replacement");
-//
-//        System.out.println(filePath.substring(0, index));
-//            public static void testJSONStrToJavaBeanObj(){
-//
-//        Student student = JSON.parseObject(JSON_OBJ_STR, new TypeReference<Student>() {});
-//        //Student student1 = JSONObject.parseObject(JSON_OBJ_STR, new TypeReference<Student>() {});//因为JSONObject继承了JSON,所以这样也是可以的
-//
-//        System.out.println(student.getStudentName()+":"+student.getStudentAge());
-//
-        String JSON_ARRAY_STR = "[{\"length\":4,\"offset\":0,\"property\":\"1\",\"text\":\"剑突下痛\",\"threshold\":0.0},{\"length\":2,\"offset\":4,\"property\":\"1\",\"text\":\"胀痛\",\"threshold\":0.0},{\"length\":2,\"offset\":6,\"property\":\"2\",\"text\":\"1天\",\"threshold\":0.0},{\"length\":1,\"offset\":8,\"text\":\",\",\"threshold\":0.0}]\n";
-//        JSONArray jsonArray = JSONArray.parseArray(JSON_ARRAY_STR);
-////        String jsonString = "{\"length\":4,\"offset\":0,\"property\":\"1\",\"text\":\"剑突下痛\",\"threshold\":0.0}";
-//
-//       for (int i = 0; i < jsonArray.size(); i++){
-//           JSONObject job = jsonArray.getJSONObject(i);
-//           LemmaInfo info = JSON.parseObject(job.toJSONString(), new TypeReference<LemmaInfo>() {});
-//           //Student student1 = JSONObject.parseObject(JSON_OBJ_STR, new TypeReference<Student>() {});//因为JSONObject继承了JSON,所以这样也是可以的
-//
-//           System.out.println(info.getLength()+":"+info.getText());
-//       }
-
-        int index = 0;
-        for (int i=0; i<5; i++)
-            for (int j = i+1; j< 6; j++){
-                System.out.println(i + "," + j);
-                index ++;
+        Set<String> result = new HashSet<String>();
+        Set<String> set1 = new HashSet<String>() {
+            {
+                add("王者荣耀");
+                add("英雄联盟");
+                add("穿越火线");
+                add("地下城与勇士");
+            }
+        };
+
+        Set<String> set2 = new HashSet<String>() {
+            {
+                add("王者荣耀");
+                add("地下城与勇士");
+                add("魔兽世界");
             }
+        };
+
+        result.clear();
+        result.addAll(set1);
+        result.retainAll(set2);
+        System.out.println("交集:" + result);
+
+        result.clear();
+        result.addAll(set1);
+        result.removeAll(set2);
+        System.out.println("差集:" + result);
 
-        System.out.println(index);
+        result.clear();
+        result.addAll(set1);
+        result.addAll(set2);
+        System.out.println("并集:" + result);
 
+        List<Integer> aList = new ArrayList<>();
+        aList.add(1);
+        aList.add(2);
+        aList.add(3);
+        aList.add(4);
+        aList.add(5);
+        aList.add(6);
+
+        Iterator<Integer> it = aList.iterator();
+
+        while (it.hasNext()){
+            int ll = it.next();
+            if (ll % 2 == 0){
+                it.remove();
+            }
+        }
+        System.out.println(aList);
+
+        String xx = "I have an apple, I have a pen.";
+        System.out.println(xx.indexOf("have", 19));
+        System.out.println(xx.substring(19, 22));
     }
 
 }
+

+ 15 - 0
algorithm/src/main/java/org/algorithm/test/TestRelationTreeUtils.java

@@ -0,0 +1,15 @@
+package org.algorithm.test;
+
+import org.algorithm.core.RelationTreeUtils;
+
+/**
+ * @Author: bijl
+ * @Date: 2019/9/5 17:07
+ * @Description:
+ */
+public class TestRelationTreeUtils {
+
+    public static void main(String[] args) {
+//        RelationTreeUtils.test();
+    }
+}

+ 122 - 0
algorithm/src/main/java/org/algorithm/test/TestRuleCheckMachine.java

@@ -0,0 +1,122 @@
+package org.algorithm.test;
+
+import org.algorithm.core.RuleCheckMachine;
+import org.algorithm.core.cnn.entity.Lemma;
+import org.algorithm.core.cnn.entity.Triad;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @Author: bijl
+ * @Date: 2019/9/7 15:49
+ * @Description:
+ */
+public class TestRuleCheckMachine {
+
+    public static void main(String[] args) {
+        List<Triad> triads = new ArrayList<>();
+
+        Triad triad = null;
+        Lemma l_1 = null;
+        Lemma l_2 = null;
+
+//        l_1 = new Lemma();
+//        l_1.setPosition("3,4");
+//        l_1.setText("剧烈");
+//
+//        l_2 = new Lemma();
+//        l_2.setPosition("5,6");
+//        l_2.setText("胸痛");
+//
+//        triad = new Triad();
+//        triad.setL_1(l_1);
+//        triad.setL_2(l_2);
+//        triads.add(triad);
+//
+//        l_1 = new Lemma();
+//        l_1.setPosition("7,8");
+//        l_1.setText("头痛");
+//        l_1.setProperty("部位");
+//
+//        l_2 = new Lemma();
+//        l_2.setPosition("9,10");
+//        l_2.setText("失眠");
+//        l_1.setProperty("反义");
+//
+//        triad = new Triad();
+//        triad.setL_1(l_1);
+//        triad.setL_2(l_2);
+//        triads.add(triad);
+//
+//        for (int i=0; i < 500; i++)
+//            triads.add(triad);
+
+//        String sentence = "肝脏外形饱满,包膜光整,肝实质回声增强细密,分布欠均匀,血管网显示欠清晰,未见明显占位,左右肝内胆管未见明显扩张";
+//        l_1 = new Lemma();
+//        l_1.setPosition("0,1");
+//        l_1.setText("肝脏");
+//        l_1.setProperty("部位");
+//
+//        l_2 = new Lemma();
+//        l_2.setPosition("9,10");
+//        l_2.setText("光整");
+//        l_2.setProperty("属性值");
+//
+//        triad = new Triad();
+//        triad.setL_1(l_1);
+//        triad.setL_2(l_2);
+//        triads.add(triad);
+
+//        String sentence = "双卵巢大小正常,内各见十数个小卵泡回声,沿周边排列,大小约0.5-0.7cm";
+//        l_1 = new Lemma();
+//        l_1.setText("十");
+//        l_1.setPosition("11,11");
+//        l_1.setProperty("");
+//
+//        l_2 = new Lemma();
+//        l_2.setText("排列");
+//        l_2.setPosition("23,24");
+//        l_2.setProperty("辅检其他");
+//
+//        triad = new Triad();
+//        triad.setL_1(l_1);
+//        triad.setL_2(l_2);
+//        triads.add(triad);
+
+        String sentence = "粘液糊澄清,量少";
+        l_1 = new Lemma();
+        l_2 = new Lemma();
+
+        l_1.setText("澄清");
+        l_2.setText("量少");
+
+        l_1.setPosition("3,4");
+        l_2.setPosition("6,7");
+
+        l_1.setProperty("属性值");
+        l_2.setProperty("辅检其他");
+
+        triad = new Triad();
+        triad.setL_1(l_1);
+        triad.setL_2(l_2);
+        triads.add(triad);
+
+//        for (int i=0; i < 500; i++)
+//            triads.add(triad);
+
+        for (int i = 0; i < sentence.length() - 1; i++)
+            System.out.print("" + i + sentence.substring(i, i + 1) + " ");
+
+        System.out.println();
+        System.out.println("size of triads " + triads.size());
+        RuleCheckMachine ruleCheckMachine = new RuleCheckMachine();
+        long startTime=System.currentTimeMillis();
+        ruleCheckMachine.checkAndRemove(sentence, triads);
+        long endTime=System.currentTimeMillis();
+
+        System.out.println("当前程序耗时:"+(endTime-startTime) +"ms");
+
+        System.out.println("size of triads " + triads.size());
+    }
+}

+ 1 - 1
algorithm/src/main/java/org/algorithm/util/MysqlConnector.java

@@ -45,7 +45,7 @@ public class MysqlConnector {
     
     /**
      * 执行sql语句
-     * @param sql
+     * @param sqls
      */
     public void executeBatch(List<String> sqls) {
         Statement stmt = null;

+ 2 - 2
algorithm/src/main/resources/algorithm.properties

@@ -1,8 +1,8 @@
 ################################ model basic url ###################################
 
 #basicPath=E:/project/push/algorithm/src/main/models/model_version_replacement/model
-basicPath=/opt/models/dev/models/model_version_replacement/model
-#basicPath=E:/xxx/model_version_replacement/model
+#basicPath=/opt/models/dev/models/model_version_replacement/model
+basicPath=E:/re_models/model_version_replacement/model
 
 ############################### current model version ################################
 diagnosisPredict.version=outpatient_556_IOE_1