Browse Source

1- 添加三元组过滤规则。

bijl 5 years ago
parent
commit
87495659f8

+ 130 - 0
algorithm/src/main/java/org/algorithm/core/FilterRule.java

@@ -0,0 +1,130 @@
+package org.algorithm.core;
+
+import java.util.Map;
+
+/**
+ * 过滤规则
+ *
+ * @Author: bijl
+ * @Date: 2019/9/5 20:21
+ * @Description:
+ */
+public class FilterRule {
+
+    private Integer uuid;
+
+    private String key_1;
+    private String type_1;
+
+    private String key_2;
+    private String type_2;
+
+    private String inside;
+    private String insideType;
+
+    private String despite;
+    private String despiteInside;
+
+    public FilterRule(Map<String, String> aMap) {
+
+        this.key_1 = aMap.get("key_1");
+        this.type_1 = aMap.get("type_1");
+
+        this.key_2 = aMap.get("key_2");
+        this.type_2 = aMap.get("type_2");
+
+        this.inside = aMap.get("inside");
+        this.insideType = aMap.get("inside_type");
+
+        this.despite = aMap.get("despite");
+        this.despiteInside = aMap.get("despite_inside");
+    }
+
+    public Integer getUuid() {
+        return uuid;
+    }
+
+    public void setUuid(Integer uuid) {
+        this.uuid = uuid;
+    }
+
+
+    public String getKey_1() {
+        return key_1;
+    }
+
+    public void setKey_1(String key_1) {
+        this.key_1 = key_1;
+    }
+
+    public String getType_1() {
+        return type_1;
+    }
+
+    public void setType_1(String type_1) {
+        this.type_1 = type_1;
+    }
+
+    public String getKey_2() {
+        return key_2;
+    }
+
+    public void setKey_2(String key_2) {
+        this.key_2 = key_2;
+    }
+
+    public String getType_2() {
+        return type_2;
+    }
+
+    public void setType_2(String type_2) {
+        this.type_2 = type_2;
+    }
+
+    public String getInside() {
+        return inside;
+    }
+
+    public void setInside(String inside) {
+        this.inside = inside;
+    }
+
+    public String getInsideType() {
+        return insideType;
+    }
+
+    public void setInsideType(String insideType) {
+        this.insideType = insideType;
+    }
+
+    public String getDespite() {
+        return despite;
+    }
+
+    public void setDespite(String despite) {
+        this.despite = despite;
+    }
+
+    public String getDespiteInside() {
+        return despiteInside;
+    }
+
+    public void setDespiteInside(String despiteInside) {
+        this.despiteInside = despiteInside;
+    }
+
+    @Override
+    public String toString() {
+        return "FilterRule{" +
+                "uuid=" + uuid +
+                ", key_1='" + key_1 + '\'' +
+                ", type_1='" + type_1 + '\'' +
+                ", key_2='" + key_2 + '\'' +
+                ", type_2='" + type_2 + '\'' +
+                ", inside='" + inside + '\'' +
+                ", insideType='" + insideType + '\'' +
+                ", despite='" + despite + '\'' +
+                ", despiteInside='" + despiteInside + '\'' +
+                '}';
+    }
+}

+ 432 - 0
algorithm/src/main/java/org/algorithm/core/RuleCheckMachine.java

@@ -0,0 +1,432 @@
+package org.algorithm.core;
+
+import org.algorithm.core.cnn.entity.Lemma;
+import org.algorithm.core.cnn.entity.Triad;
+import org.algorithm.util.MysqlConnector;
+
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.*;
+
+/**
+ * 规则检查机器
+ *
+ * @Author: bijl
+ * @Date: 2019/9/6 10:32
+ * @Description:
+ */
+public class RuleCheckMachine {
+    private final List<FilterRule> filterRules = new ArrayList<>();
+    private Map<String, Map<String, Set<Integer>>> key_1_map = null;
+    private Map<String, Map<String, Set<Integer>>> key_2_map = null;
+    private Map<String, Set<Integer>> despiteMap = null;  // 实体名:[规则uuid列表]
+    private Map<String, Set<Integer>> despiteInsideMap = null; // 实体名:[规则uuid列表]
+    private Map<String, Map<String, Set<Integer>>> insideMap = null;
+
+
+    public RuleCheckMachine() {
+        this.loadRules();
+        this.makeKey1Map();
+        this.makeKey2Map();
+        this.makeInsideMap();
+        this.makeDespiteMap();
+        this.makeDespiteInsideMap();
+    }
+
+
+    /**
+     * 加载规则
+     */
+    public void loadRules() {
+        /**
+         * 连接数据库
+         */
+        String url = "jdbc:mysql://192.168.2.235/test_case?user=root&password=diagbot@20180822";
+        MysqlConnector connector = new MysqlConnector(url);
+        String querySql =
+                "SELECT rr.key_1, rr.type_1, rr.key_2, rr.type_2, rr.inside, rr.inside_type, " +
+                        "rr.despite, rr.despite_inside " +
+                        "FROM relation_neg_rules AS rr " +
+                        "WHERE rr.`status` = 1";
+
+        ResultSet rs = connector.query(querySql);
+        Integer uuid = 0;
+        try {
+            while (rs.next()) {
+                String key_1 = rs.getString("key_1");
+                String type_1 = rs.getString("type_1");
+
+                String key_2 = rs.getString("key_2");
+                String type_2 = rs.getString("type_2");
+
+                String inside = rs.getString("inside");
+                String inside_type = rs.getString("inside_type");
+
+                String despite = rs.getString("despite");
+                String despite_inside = rs.getString("despite_inside");
+
+                String[] insideSplit = inside.split("或");
+                String[] despiteSplit = despite.split(",");
+                String[] despiteInsideSplit = despite_inside.split(",");
+                for (int i = 0; i < insideSplit.length; i++) {
+                    for (int j = 0; j < despiteSplit.length; j++) {
+                        for (int k = 0; k < despiteInsideSplit.length; k++) {
+                            Map<String, String> variableMap = new HashMap<>();
+                            variableMap.put("key_1", key_1);
+                            variableMap.put("type_1", type_1);
+
+                            variableMap.put("key_2", key_2);
+                            variableMap.put("type_2", type_2);
+
+                            variableMap.put("inside", insideSplit[i]);
+                            variableMap.put("inside_type", inside_type);
+
+                            variableMap.put("despite", despiteSplit[j]);
+                            variableMap.put("despite_inside", despiteInsideSplit[k]);
+
+                            FilterRule filterRule = new FilterRule(variableMap);
+                            filterRule.setUuid(uuid);
+                            this.filterRules.add(filterRule);
+
+                            // TODO:delete
+                            System.out.println(filterRule);
+
+                            uuid += 1;
+                        }
+                    }
+                }
+            }
+
+        } catch (SQLException e) {
+            e.printStackTrace();
+            throw new RuntimeException("加载规则字典失败");
+        } finally {
+            connector.close();
+        }
+    }
+
+    /**
+     * 制作实体1相关信息字典
+     */
+    private void makeKey1Map() {
+        Map<String, Map<String, Set<Integer>>> key_1_map_ = new HashMap<>();
+        Map<String, Set<Integer>> emptyMap = new HashMap<>();
+        Map<String, Set<Integer>> typeMap = new HashMap<>();
+        Map<String, Set<Integer>> wordMap = new HashMap<>();
+        key_1_map_.put("", emptyMap);
+        key_1_map_.put("type", typeMap);
+        key_1_map_.put("word", wordMap);
+
+        for (FilterRule rule : this.filterRules) {
+            String key_1 = rule.getKey_1();
+            String type_1 = rule.getType_1();
+            Integer uuid = rule.getUuid();
+
+            this.inputMaps(key_1, type_1, uuid, emptyMap, typeMap, wordMap, null);
+        }
+        this.key_1_map = key_1_map_;
+    }
+
+
+    /**
+     * 制作实体2相关信息字典
+     */
+    private void makeKey2Map() {
+        Map<String, Map<String, Set<Integer>>> key_2_map_ = new HashMap<>();
+        Map<String, Set<Integer>> emptyMap = new HashMap<>();
+        Map<String, Set<Integer>> typeMap = new HashMap<>();
+        Map<String, Set<Integer>> wordMap = new HashMap<>();
+        key_2_map_.put("", emptyMap);
+        key_2_map_.put("type", typeMap);
+        key_2_map_.put("word", wordMap);
+
+        for (FilterRule rule : this.filterRules) {
+            String key_2 = rule.getKey_2();
+            String type_2 = rule.getType_2();
+            Integer uuid = rule.getUuid();
+
+            this.inputMaps(key_2, type_2, uuid, emptyMap, typeMap, wordMap, null);
+        }
+        this.key_2_map = key_2_map_;
+    }
+
+    /**
+     * 制作内部实体相关信息字典
+     */
+    private void makeInsideMap() {
+        Map<String, Map<String, Set<Integer>>> insideMap_ = new HashMap<>();
+        Map<String, Set<Integer>> punctuationMap = new HashMap<>();
+        Map<String, Set<Integer>> typeMap = new HashMap<>();
+        Map<String, Set<Integer>> typePunctuationMap = new HashMap<>();
+        Map<String, Set<Integer>> wordMap = new HashMap<>();
+        insideMap_.put("punc", punctuationMap);
+        insideMap_.put("type", typeMap);
+        insideMap_.put("typePunctuation", typePunctuationMap);
+        insideMap_.put("word", wordMap);
+
+        for (FilterRule rule : this.filterRules) {
+            String inside = rule.getInside();
+            String insideType = rule.getInsideType();
+            Integer uuid = rule.getUuid();
+            if (",".equals(inside.substring(0, 1)))
+                this.inputMaps(inside, inside, uuid, null, punctuationMap, typePunctuationMap, wordMap);
+            else
+                this.inputMaps(inside, inside, uuid, null, punctuationMap, typeMap, wordMap);
+        }
+        this.insideMap = insideMap_;
+    }
+
+    /**
+     * maps输入
+     *
+     * @param key
+     * @param type
+     * @param uuid
+     * @param emptyMap
+     * @param typeMap
+     * @param wordMap
+     */
+    private void inputMaps(String key, String type, Integer uuid, Map<String, Set<Integer>> emptyMap,
+                           Map<String, Set<Integer>> typeMap, Map<String, Set<Integer>> wordMap,
+                           Map<String, Set<Integer>> punctuationMap) {
+
+        if ("".equals(type)) {
+            if (emptyMap.get(key) == null)
+                emptyMap.put(key, new HashSet<>());
+            emptyMap.get(key).add(uuid);
+        } else if ("type".equals(type)) {
+            if (typeMap.get(key) == null)
+                typeMap.put(key, new HashSet<>());
+            typeMap.get(key).add(uuid);
+        } else if ("word".equals(type)) {
+            if (wordMap.get(key) == null)
+                wordMap.put(key, new HashSet<>());
+            wordMap.get(key).add(uuid);
+        } else if ("punc".equals(type)) {
+            if (punctuationMap.get(key) == null)
+                punctuationMap.put(key, new HashSet<>());
+            punctuationMap.get(key).add(uuid);
+        } else {
+            throw new RuntimeException("出现了位置新type");
+        }
+
+    }
+
+
+    /**
+     * 制作例外字典
+     */
+    private void makeDespiteMap() {
+        Map<String, Set<Integer>> despiteMap = new HashMap<>();
+        for (FilterRule rule : this.filterRules) {
+            String despite = rule.getDespite();
+            if (!despite.equals("")) {  // 空白不收录
+                if (despiteMap.get(despite) == null) {
+                    despiteMap.put(despite, new HashSet<>());
+                }
+                despiteMap.get(despite).add(rule.getUuid());  //
+            }
+        }
+        this.despiteMap = despiteMap;
+    }
+
+
+    /**
+     * 制作例外_内部字典
+     */
+    private void makeDespiteInsideMap() {
+        Map<String, Set<Integer>> despiteInsideMap = new HashMap<>();
+        for (FilterRule rule : this.filterRules) {
+            String despiteInside = rule.getDespiteInside();
+            if (!despiteInside.equals("")) {  // 空白不收录
+                if (despiteInsideMap.get(despiteInside) == null) {
+                    despiteInsideMap.put(despiteInside, new HashSet<>());
+                }
+                despiteInsideMap.get(despiteInside).add(rule.getUuid());  //
+            }
+        }
+        this.despiteInsideMap = despiteInsideMap;
+    }
+
+    /**
+     * 名称—类别—开始位置类
+     */
+    class NameTypeStartPosition implements Comparable<NameTypeStartPosition> {
+        private String name;
+        private String type;
+        private int startPosition;
+
+        public NameTypeStartPosition(String name, String type, int startPosition) {
+            this.name = name;
+            this.type = type;
+            this.startPosition = startPosition;
+        }
+
+        @Override
+        public int compareTo(NameTypeStartPosition o) {
+            return this.startPosition - o.getStartPosition();
+        }
+
+        public String getName() {
+            return name;
+        }
+
+        public void setName(String name) {
+            this.name = name;
+        }
+
+        public String getType() {
+            return type;
+        }
+
+        public void setType(String type) {
+            this.type = type;
+        }
+
+        public int getStartPosition() {
+            return startPosition;
+        }
+
+        public void setStartPosition(int startPosition) {
+            this.startPosition = startPosition;
+        }
+
+        @Override
+        public String toString() {
+            return "NameTypeStartPosition{" +
+                    "name='" + name + '\'' +
+                    ", type='" + type + '\'' +
+                    ", startPosition=" + startPosition +
+                    '}';
+        }
+
+    }
+
+    /**
+     * 获取已排序的(名称,类别,开始位置)对象
+     *
+     * @param triads
+     * @return
+     */
+    public List<NameTypeStartPosition> getSortedNameTypeByPosition(List<Triad> triads) {
+        List<NameTypeStartPosition> nameTypeStartPositions = new ArrayList<>();
+        for (Triad triad : triads) {
+            Lemma l1 = triad.getL_1();
+            Lemma l2 = triad.getL_2();
+            nameTypeStartPositions.add(
+                    new NameTypeStartPosition(l1.getText(), l1.getProperty(), l1.getStartPosition()));
+            nameTypeStartPositions.add(
+                    new NameTypeStartPosition(l2.getText(), l2.getProperty(), l2.getStartPosition()));
+        }
+        nameTypeStartPositions.sort(Comparator.naturalOrder());
+
+        return nameTypeStartPositions;
+    }
+
+    /**
+     * 是否移除
+     *
+     * @param nameTypeStartPositions
+     * @param startIndex
+     * @param endIndex
+     * @return
+     */
+    public boolean isRemove(List<NameTypeStartPosition> nameTypeStartPositions, int startIndex, int endIndex,
+                            String sentence) {
+        Set<Integer> remainUuids = new HashSet<>();  // 剩余规则的uuid
+        for (FilterRule rule : this.filterRules)
+            remainUuids.add(rule.getUuid());
+
+        // 过滤实体名称触发例外条件情况
+        String entity_1_name = nameTypeStartPositions.get(startIndex).getName();
+        String entity_1_type = nameTypeStartPositions.get(startIndex).getType();
+
+        String entity_2_name = nameTypeStartPositions.get(endIndex).getType();
+        String entity_2_type = nameTypeStartPositions.get(endIndex).getType();
+
+        Set<Integer> set = null;
+        set = this.despiteMap.get(entity_1_name);  // 过滤有实体1名为例外情况(即,不成立)的规则(的uuid)
+        remainUuids.removeAll(set);
+
+        set = this.despiteMap.get(entity_2_name);  // 过滤有实体2名为例外情况(即,不成立)的规则(的uuid)
+        remainUuids.removeAll(set);
+
+        // 过滤中间实体的名称触发例外条件情况
+        for (int i = startIndex; i <= endIndex; i++) {
+            NameTypeStartPosition nameTypeStartPosition = nameTypeStartPositions.get(i);
+            set = this.despiteInsideMap.get(nameTypeStartPosition.getName());
+            remainUuids.remove(set);
+        }
+
+        // 三板斧过滤
+        // 实体1,过滤
+        set = new HashSet<>();
+        set.addAll(this.key_1_map.get("").get(""));
+        set.addAll(this.key_1_map.get("type").get(entity_1_type)); // 满足,形如("形容词", "type") 过滤条件的规则
+        set.addAll(this.key_1_map.get("word").get(entity_1_name)); // 满足,形如("胸痛", "word") 过滤条件的规则
+        remainUuids.retainAll(set);  // 求交集,同事满足实体1相关的过滤条件,且不不满足例外情况
+        if (remainUuids.size() == 0)
+            return false;
+
+        // 实体2,过滤
+        set = new HashSet<>();
+        set.addAll(this.key_2_map.get("").get(""));
+        set.addAll(this.key_2_map.get("type").get(entity_2_type)); // 满足,形如("形容词", "type") 过滤条件的规则
+        set.addAll(this.key_2_map.get("word").get(entity_2_name)); // 满足,形如("胸痛", "word") 过滤条件的规则
+        remainUuids.retainAll(set);  // 求交集,同事满足实体2相关的过滤条件,且不不满足例外情况
+        if (remainUuids.size() == 0)
+            return false;
+
+        // 中间实体过滤
+        set = new HashSet<>();
+        set.addAll(this.insideMap.get("").get(""));
+        for (int i = startIndex; i <= endIndex; i++) {
+            NameTypeStartPosition nameTypeStartPosition = nameTypeStartPositions.get(i);
+            // 中间实体满足,形如("胸痛", "word") 过滤条件的规则
+            set.addAll(this.insideMap.get("word").get(nameTypeStartPosition.getName()));
+            set.addAll(this.insideMap.get("type").get(nameTypeStartPosition.getType()));  // 没有逗号的
+        }
+
+        int entity_1_start = nameTypeStartPositions.get(startIndex).getStartPosition();
+        int entity_2_start = nameTypeStartPositions.get(endIndex).getStartPosition();
+
+        int commaIndex = sentence.indexOf(",", entity_1_start + 1);  // 逗号位置
+        String commaPadType = "";  // 逗号拼接上类型
+        while (commaIndex > -1 && commaIndex < entity_2_start) {
+            for (int i = startIndex; i <= endIndex; i++) {
+                NameTypeStartPosition nameTypeStartPosition = nameTypeStartPositions.get(i);
+                if (nameTypeStartPosition.getStartPosition() > commaIndex) {
+                    commaPadType = "," + nameTypeStartPosition.getType();
+                    set.addAll(this.insideMap.get("typePunctuation").get(commaPadType));
+                }
+
+            }
+        }
+        remainUuids.retainAll(set);  // 求交集,同事中间实体相关的过滤条件,且不不满足例外情况
+
+        return remainUuids.size() > 0;  // 还有规则满足,则过滤
+
+    }
+
+    /**
+     * 检查并移除
+     *
+     * @param triads 三元组列表
+     */
+    public void checkAndRemove(List<Triad> triads, String sentence) {
+        List<NameTypeStartPosition> nameTypeStartPositions = this.getSortedNameTypeByPosition(triads);
+        Map<Integer, Integer> startPositionToIndexMap = new HashMap<>();
+        for (int i = 0; i < nameTypeStartPositions.size(); i++)
+            startPositionToIndexMap.put(nameTypeStartPositions.get(i).getStartPosition(), i);
+
+        Iterator<Triad> it = triads.iterator();
+        while (it.hasNext()) {  // 遍历三元组,移除满足过滤规则的
+            Triad triad = it.next();
+            int startIndex = startPositionToIndexMap.get(triad.getL_1().getStartPosition());
+            int endIndex = startPositionToIndexMap.get(triad.getL_2().getStartPosition());
+            if (isRemove(nameTypeStartPositions, startIndex, endIndex, sentence)) {
+                it.remove();
+            }
+        }
+    }
+}

+ 54 - 3
algorithm/src/main/java/org/algorithm/test/Test.java

@@ -1,14 +1,65 @@
 package org.algorithm.test;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
+import java.util.*;
 
 public class Test {
 
 
     public static void main(String[] args) {
+        Set<String> result = new HashSet<String>();
+        Set<String> set1 = new HashSet<String>() {
+            {
+                add("王者荣耀");
+                add("英雄联盟");
+                add("穿越火线");
+                add("地下城与勇士");
+            }
+        };
 
+        Set<String> set2 = new HashSet<String>() {
+            {
+                add("王者荣耀");
+                add("地下城与勇士");
+                add("魔兽世界");
+            }
+        };
+
+        result.clear();
+        result.addAll(set1);
+        result.retainAll(set2);
+        System.out.println("交集:" + result);
+
+        result.clear();
+        result.addAll(set1);
+        result.removeAll(set2);
+        System.out.println("差集:" + result);
+
+        result.clear();
+        result.addAll(set1);
+        result.addAll(set2);
+        System.out.println("并集:" + result);
+
+        List<Integer> aList = new ArrayList<>();
+        aList.add(1);
+        aList.add(2);
+        aList.add(3);
+        aList.add(4);
+        aList.add(5);
+        aList.add(6);
+
+        Iterator<Integer> it = aList.iterator();
+
+        while (it.hasNext()){
+            int ll = it.next();
+            if (ll % 2 == 0){
+                it.remove();
+            }
+        }
+        System.out.println(aList);
+
+        String xx = "I have an apple, I have a pen.";
+        System.out.println(xx.indexOf("have", 19));
+        System.out.println(xx.substring(19, 22));
     }
 
 }

+ 1 - 1
algorithm/src/main/java/org/algorithm/test/TestRelationTreeUtils.java

@@ -10,6 +10,6 @@ import org.algorithm.core.RelationTreeUtils;
 public class TestRelationTreeUtils {
 
     public static void main(String[] args) {
-        RelationTreeUtils.test();
+//        RelationTreeUtils.test();
     }
 }

+ 63 - 0
algorithm/src/main/java/org/algorithm/test/TestRuleCheckMachine.java

@@ -0,0 +1,63 @@
+package org.algorithm.test;
+
+import org.algorithm.core.RuleCheckMachine;
+import org.algorithm.core.cnn.entity.Lemma;
+import org.algorithm.core.cnn.entity.Triad;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @Author: bijl
+ * @Date: 2019/9/7 15:49
+ * @Description:
+ */
+public class TestRuleCheckMachine {
+
+    public static void main(String[] args) {
+        List<Triad> triads = new ArrayList<>();
+
+        Triad triad = null;
+        Lemma l_1 = null;
+        Lemma l_2 = null;
+
+        l_1 = new Lemma();
+        l_1.setPosition("3,4");
+        l_1.setText("剧烈");
+
+        l_2 = new Lemma();
+        l_2.setPosition("5,6");
+        l_2.setText("胸痛");
+
+        triad = new Triad();
+        triad.setL_1(l_1);
+        triad.setL_2(l_2);
+        triads.add(triad);
+
+        l_1 = new Lemma();
+        l_1.setPosition("7,8");
+        l_1.setText("头痛");
+        l_1.setProperty("部位");
+
+        l_2 = new Lemma();
+        l_2.setPosition("9,10");
+        l_2.setText("失眠");
+        l_1.setProperty("反义");
+
+        triad = new Triad();
+        triad.setL_1(l_1);
+        triad.setL_2(l_2);
+        triads.add(triad);
+
+
+        String sentence = "患者剧烈胸痛,头痛失眠不安";
+        for (int i=0;i<sentence.length() - 1; i++)
+            System.out.print("" + i +  sentence.substring(i, i+1) + " ");
+
+        System.out.println("size of triads" + triads.size());
+        RuleCheckMachine ruleCheckMachine = new RuleCheckMachine();
+        ruleCheckMachine.checkAndRemove(triads, sentence);
+
+        System.out.println("size of triads" + triads.size());
+    }
+}