5 years ago · df2aaaffbf
--- a/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSet.java
+++ b/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSet.java
@@ -21,6 +21,7 @@ public abstract class NNDataSet {
 
																     protected final Map<String, Integer> LABEL_DICT = new HashMap<>();
															
 
																     protected final Map<String, Integer> NEGATIVE_DICT = new HashMap<>();
															
 
																+    protected final Map<String, String>  RE_SPLIT_WORD_DICT = new HashMap<>();
															
 
																     private final String[] FEATURE_DICT_ARRAY;
															
 
																     private final String[] LABEL_DICT_ARRAY;
															
@@ -32,6 +33,7 @@ public abstract class NNDataSet {
 
																         this.FEATURE_DICT_ARRAY = new String[this.NUM_FEATURE];
															
 
																         this.LABEL_DICT_ARRAY = new String[this.NUM_LABEL];
															
 
																         this.makeDictArr();
															
 
																+        this.readReSplitWordDict();
															
 
																     }
															
 
																     /**
															
@@ -45,6 +47,11 @@ public abstract class NNDataSet {
 
																      * 读取特征和类别字典
															
 
																      */
															
 
																     public abstract void readDict(String modelAndVersion);
															
 
																+
															
 
																+    /**
															
 
																+     * 读取再分词字典
															
 
																+     */
															
 
																+    public abstract void readReSplitWordDict();
															
 
																     /**
															
 
																      * 生成字典列表
															
--- a/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSetImpl.java
+++ b/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSetImpl.java
@@ -3,6 +3,7 @@ package org.algorithm.core.neural.dataset;
 
																 import org.algorithm.util.TextFileReader;
															
 
																 import org.diagbot.pub.utils.PropertiesUtil;
															
 
																+import java.util.HashMap;
															
 
																 import java.util.Iterator;
															
 
																 import java.util.List;
															
 
																 import java.util.Map;
															
@@ -10,7 +11,7 @@ import java.util.Map.Entry;
 
																 /**
															
 
																  * 门诊诊断推送用数据集
															
 
																- * 
															
 
																+ *
															
 
																  * @Author: bijl
															
 
																  * @Date: 2018年7月26日-上午10:19:43
															
 
																  * @Description:
															
@@ -25,6 +26,8 @@ public class NNDataSetImpl extends NNDataSet {
 
																     @Override
															
 
																     public float[] toFeatureVector(Map<String, Map<String, String>> inputs) {
															
 
																+
															
 
																+        this.reSplitWord(inputs);  // 再分词
															
 
																         float[] featureVector = new float[this.NUM_FEATURE];
															
 
																         Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
															
@@ -32,13 +35,9 @@ public class NNDataSetImpl extends NNDataSet {
 
																         String featureName = "";
															
 
																         Integer position = -1;
															
 
																         Integer negative = 0;
															
 
																-        // Integer partbodyValue = 0;
															
 
																         float positive_value = 1.0f;
															
 
																         float negative_value = -1.0f;
															
 
																         Map<String, String> featureValues = null;
															
 
																-        // String partbody = null;
															
 
																-        // String[] partbodys = null;
															
 
																-        // String sn = null;
															
 
																         /**
															
 
																          * 数据方案设计
															
@@ -51,11 +50,6 @@ public class NNDataSetImpl extends NNDataSet {
 
																             featureValues = entry.getValue();
															
 
																             position = this.FEATURE_DICT.get(featureName);
															
 
																             negative = NEGATIVE_DICT.get(featureValues.get("negative"));
															
 
																-            // 突出主症状的数据方案
															
 
																-            // sn = featureValues.get("sn");
															
 
																-            // if("0".equals(sn)) {
															
 
																-            // negative = negative * 10;
															
 
																-            // }
															
 
																             if (position != null)
															
 
																                 if (negative == 1)
															
@@ -65,91 +59,36 @@ public class NNDataSetImpl extends NNDataSet {
 
																                 else
															
 
																                     System.out.println("New Nagetive! This may lead to an error.");
															
 
																-
															
 
																-
															
 
																-            /**
															
 
																-             * 部位附属症状数据表示方案 partbodyValue = this.PARTBODY_DICT.get(featureValues.get("partbody"));
															
 
																-             * if(partbodyValue != null) { value = 1.0f * partbodyValue /
															
 
																-             * this.PARTBODY_DICT.get("NULL"); // 部位值表示 value = (float)(Math.round(value *
															
 
																-             * 100000))/100000; // 保留5位有效数字 } value = negative * value; featureVector[position] =
															
 
																-             * value;
															
 
																-             * 
															
 
																-             */
															
 
																-
															
 
																         }
															
 
																         return featureVector;
															
 
																     }
															
 
																-
															
 
																-    /**
															
 
																-     * 读取字典
															
 
																-     */
															
 
																-//     @Override
															
 
																-//     public void readDict(String modelAndVersion) {
															
 
																-//    
															
 
																-//     PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
															
 
																-//     String model_version = prop.getProperty(modelAndVersion);
															
 
																-//     model_version = model_version.trim();
															
 
																-//    
															
 
																-//     String url = "jdbc:mysql://192.168.2.235/diagbot-app?user=root&password=diagbot@20180822";
															
 
																-//     MysqlConnector connector = new MysqlConnector(url);
															
 
																-//     String querySql = "SELECT md._name, md._index, md.type_id " + "FROM model_dictionary AS md "
															
 
																-//     + "WHERE md.belong_model = 'outpatient_model'";
															
 
																-//    
															
 
																-//     querySql = querySql.replace("outpatient_model", model_version);
															
 
																-//     ResultSet rs = connector.query(querySql);
															
 
																-//     try {
															
 
																-//     while (rs.next()) {
															
 
																-//     int type_id = rs.getInt("type_id");
															
 
																-//     int _index = rs.getInt("_index");
															
 
																-//     String _name = rs.getString("_name");
															
 
																-//    
															
 
																-//     if (type_id == 1)
															
 
																-//     this.FEATURE_DICT.put(_name, _index);
															
 
																-//     else if (type_id == 2)
															
 
																-//     this.LABEL_DICT.put(_name, _index);
															
 
																-//     else if (type_id == 8)
															
 
																-//     this.NEGATIVE_DICT.put(_name, _index);
															
 
																-//    
															
 
																-//     }
															
 
																-//    
															
 
																-//     System.out.println("feature size:"+this.FEATURE_DICT.size());
															
 
																-//    
															
 
																-//     } catch (SQLException e) {
															
 
																-//     e.printStackTrace();
															
 
																-//     throw new RuntimeException("加载特征和类别字典失败");
															
 
																-//     } finally {
															
 
																-//     connector.close();
															
 
																-//     }
															
 
																-//    
															
 
																-//     }
															
 
																-
															
 
																     @Override
															
 
																     public void readDict(String modelAndVersion) {
															
 
																-        
															
 
																+
															
 
																         PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
															
 
																         String model_version = prop.getProperty(modelAndVersion);
															
 
																         String filePath = prop.getProperty("basicPath");  // 基本目录
															
 
																         filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
															
 
																-        
															
 
																+
															
 
																         filePath = filePath + "dictionaries.bin";  // 字典文件位置
															
 
																-        
															
 
																+
															
 
																         List<String> lines = TextFileReader.readLines(filePath);
															
 
																         boolean firstLine = true;
															
 
																-        
															
 
																+
															
 
																         String[] temp = null;
															
 
																         for (String line : lines) {
															
 
																             if (firstLine) {  // 去除第一行
															
 
																                 firstLine = false;
															
 
																                 continue;
															
 
																             }
															
 
																-            
															
 
																+
															
 
																             temp = line.split("\\|");
															
 
																-            
															
 
																-            if(temp[3].equals(model_version)){
															
 
																+
															
 
																+            if (temp[3].equals(model_version)) {
															
 
																                 int type_id = Integer.parseInt(temp[2]);
															
 
																                 int _index = Integer.parseInt(temp[1]);
															
 
																                 String _name = temp[0];
															
@@ -168,4 +107,66 @@ public class NNDataSetImpl extends NNDataSet {
 
																     }
															
 
																+    /**
															
 
																+     * 再分词：
															
 
																+     * 基本操作：
															
 
																+     * 如果再分词表中有某一词项，则移除它，并添加该此项对应的细分词项
															
 
																+     *
															
 
																+     * @param inputs 输入
															
 
																+     */
															
 
																+    public void reSplitWord(Map<String, Map<String, String>> inputs) {
															
 
																+        Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
															
 
																+
															
 
																+        String featureName = "";
															
 
																+        String[] splitWords = null;
															
 
																+        Integer negative = 1;
															
 
																+        Map<String, String> featureValues = null;
															
 
																+        while (entries.hasNext()) {
															
 
																+            Entry<String, Map<String, String>> entry = entries.next();
															
 
																+            featureName = entry.getKey();
															
 
																+            if (this.RE_SPLIT_WORD_DICT.get(featureName) != null) {
															
 
																+                entries.remove();  // 移除该词项
															
 
																+                splitWords = this.RE_SPLIT_WORD_DICT.get(featureName).split("_");
															
 
																+                for (String word : splitWords) {  // 添加细分词项
															
 
																+                    featureValues = new HashMap<>();
															
 
																+                    featureValues.put("negative", "有"); // 设置为阳性词
															
 
																+                    inputs.put(word, featureValues);
															
 
																+                }
															
 
																+
															
 
																+            }
															
 
																+
															
 
																+        }
															
 
																+    }
															
 
																+
															
 
																+    @Override
															
 
																+    public void readReSplitWordDict() {
															
 
																+        PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
															
 
																+        String filePath = prop.getProperty("basicPath");  // 基本目录
															
 
																+        filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
															
 
																+
															
 
																+        filePath = filePath + "dictionaries.bin";  // 字典文件位置
															
 
																+
															
 
																+        List<String> lines = TextFileReader.readLines(filePath);
															
 
																+
															
 
																+        boolean firstLine = true;
															
 
																+
															
 
																+        String[] temp = null;
															
 
																+        Map<String, String> feature_map = null;
															
 
																+        for (String line : lines) {
															
 
																+            if (firstLine) {  // 去除第一行
															
 
																+                firstLine = false;
															
 
																+                continue;
															
 
																+            }
															
 
																+
															
 
																+            temp = line.split("\\|");
															
 
																+
															
 
																+            this.RE_SPLIT_WORD_DICT.put(temp[0], temp[1]);
															
 
																+
															
 
																+        }
															
 
																+
															
 
																+        System.out.println("再分词，词条数:" + this.RE_SPLIT_WORD_DICT.size());
															
 
																+
															
 
																+    }
															
 
																+
															
 
																+
															
 
																 }