il y a 5 ans · df2aaaffbf
--- a/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSet.java
+++ b/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSet.java
@@ -21,6 +21,7 @@ public abstract class NNDataSet {
 
				     
			
 
				     protected final Map<String, Integer> LABEL_DICT = new HashMap<>();
			
 
				     protected final Map<String, Integer> NEGATIVE_DICT = new HashMap<>();
			
 
				+    protected final Map<String, String>  RE_SPLIT_WORD_DICT = new HashMap<>();
			
 
				     private final String[] FEATURE_DICT_ARRAY;
			
 
				     private final String[] LABEL_DICT_ARRAY;
			
 
				 
			
@@ -32,6 +33,7 @@ public abstract class NNDataSet {
 
				         this.FEATURE_DICT_ARRAY = new String[this.NUM_FEATURE];
			
 
				         this.LABEL_DICT_ARRAY = new String[this.NUM_LABEL];
			
 
				         this.makeDictArr();
			
 
				+        this.readReSplitWordDict();
			
 
				     }
			
 
				     
			
 
				     /**
			
@@ -45,6 +47,11 @@ public abstract class NNDataSet {
 
				      * 读取特征和类别字典
			
 
				      */
			
 
				     public abstract void readDict(String modelAndVersion);
			
 
				+
			
 
				+    /**
			
 
				+     * 读取再分词字典
			
 
				+     */
			
 
				+    public abstract void readReSplitWordDict();
			
 
				     
			
 
				     /**
			
 
				      * 生成字典列表
			
--- a/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSetImpl.java
+++ b/algorithm/src/main/java/org/algorithm/core/neural/dataset/NNDataSetImpl.java
@@ -3,6 +3,7 @@ package org.algorithm.core.neural.dataset;
 
				 import org.algorithm.util.TextFileReader;
			
 
				 import org.diagbot.pub.utils.PropertiesUtil;
			
 
				 
			
 
				+import java.util.HashMap;
			
 
				 import java.util.Iterator;
			
 
				 import java.util.List;
			
 
				 import java.util.Map;
			
@@ -10,7 +11,7 @@ import java.util.Map.Entry;
 
				 
			
 
				 /**
			
 
				  * 门诊诊断推送用数据集
			
 
				- * 
			
 
				+ *
			
 
				  * @Author: bijl
			
 
				  * @Date: 2018年7月26日-上午10:19:43
			
 
				  * @Description:
			
@@ -25,6 +26,8 @@ public class NNDataSetImpl extends NNDataSet {
 
				 
			
 
				     @Override
			
 
				     public float[] toFeatureVector(Map<String, Map<String, String>> inputs) {
			
 
				+
			
 
				+        this.reSplitWord(inputs);  // 再分词
			
 
				         float[] featureVector = new float[this.NUM_FEATURE];
			
 
				 
			
 
				         Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
			
@@ -32,13 +35,9 @@ public class NNDataSetImpl extends NNDataSet {
 
				         String featureName = "";
			
 
				         Integer position = -1;
			
 
				         Integer negative = 0;
			
 
				-        // Integer partbodyValue = 0;
			
 
				         float positive_value = 1.0f;
			
 
				         float negative_value = -1.0f;
			
 
				         Map<String, String> featureValues = null;
			
 
				-        // String partbody = null;
			
 
				-        // String[] partbodys = null;
			
 
				-        // String sn = null;
			
 
				 
			
 
				         /**
			
 
				          * 数据方案设计
			
@@ -51,11 +50,6 @@ public class NNDataSetImpl extends NNDataSet {
 
				             featureValues = entry.getValue();
			
 
				             position = this.FEATURE_DICT.get(featureName);
			
 
				             negative = NEGATIVE_DICT.get(featureValues.get("negative"));
			
 
				-            // 突出主症状的数据方案
			
 
				-            // sn = featureValues.get("sn");
			
 
				-            // if("0".equals(sn)) {
			
 
				-            // negative = negative * 10;
			
 
				-            // }
			
 
				 
			
 
				             if (position != null)
			
 
				                 if (negative == 1)
			
@@ -65,91 +59,36 @@ public class NNDataSetImpl extends NNDataSet {
 
				                 else
			
 
				                     System.out.println("New Nagetive! This may lead to an error.");
			
 
				 
			
 
				-
			
 
				-
			
 
				-            /**
			
 
				-             * 部位附属症状数据表示方案 partbodyValue = this.PARTBODY_DICT.get(featureValues.get("partbody"));
			
 
				-             * if(partbodyValue != null) { value = 1.0f * partbodyValue /
			
 
				-             * this.PARTBODY_DICT.get("NULL"); // 部位值表示 value = (float)(Math.round(value *
			
 
				-             * 100000))/100000; // 保留5位有效数字 } value = negative * value; featureVector[position] =
			
 
				-             * value;
			
 
				-             * 
			
 
				-             */
			
 
				-
			
 
				         }
			
 
				 
			
 
				         return featureVector;
			
 
				     }
			
 
				 
			
 
				-
			
 
				-    /**
			
 
				-     * 读取字典
			
 
				-     */
			
 
				-//     @Override
			
 
				-//     public void readDict(String modelAndVersion) {
			
 
				-//    
			
 
				-//     PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
			
 
				-//     String model_version = prop.getProperty(modelAndVersion);
			
 
				-//     model_version = model_version.trim();
			
 
				-//    
			
 
				-//     String url = "jdbc:mysql://192.168.2.235/diagbot-app?user=root&password=diagbot@20180822";
			
 
				-//     MysqlConnector connector = new MysqlConnector(url);
			
 
				-//     String querySql = "SELECT md._name, md._index, md.type_id " + "FROM model_dictionary AS md "
			
 
				-//     + "WHERE md.belong_model = 'outpatient_model'";
			
 
				-//    
			
 
				-//     querySql = querySql.replace("outpatient_model", model_version);
			
 
				-//     ResultSet rs = connector.query(querySql);
			
 
				-//     try {
			
 
				-//     while (rs.next()) {
			
 
				-//     int type_id = rs.getInt("type_id");
			
 
				-//     int _index = rs.getInt("_index");
			
 
				-//     String _name = rs.getString("_name");
			
 
				-//    
			
 
				-//     if (type_id == 1)
			
 
				-//     this.FEATURE_DICT.put(_name, _index);
			
 
				-//     else if (type_id == 2)
			
 
				-//     this.LABEL_DICT.put(_name, _index);
			
 
				-//     else if (type_id == 8)
			
 
				-//     this.NEGATIVE_DICT.put(_name, _index);
			
 
				-//    
			
 
				-//     }
			
 
				-//    
			
 
				-//     System.out.println("feature size:"+this.FEATURE_DICT.size());
			
 
				-//    
			
 
				-//     } catch (SQLException e) {
			
 
				-//     e.printStackTrace();
			
 
				-//     throw new RuntimeException("加载特征和类别字典失败");
			
 
				-//     } finally {
			
 
				-//     connector.close();
			
 
				-//     }
			
 
				-//    
			
 
				-//     }
			
 
				-
			
 
				     @Override
			
 
				     public void readDict(String modelAndVersion) {
			
 
				-        
			
 
				+
			
 
				         PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
			
 
				         String model_version = prop.getProperty(modelAndVersion);
			
 
				 
			
 
				         String filePath = prop.getProperty("basicPath");  // 基本目录
			
 
				         filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
			
 
				-        
			
 
				+
			
 
				         filePath = filePath + "dictionaries.bin";  // 字典文件位置
			
 
				-        
			
 
				+
			
 
				         List<String> lines = TextFileReader.readLines(filePath);
			
 
				 
			
 
				         boolean firstLine = true;
			
 
				-        
			
 
				+
			
 
				         String[] temp = null;
			
 
				         for (String line : lines) {
			
 
				             if (firstLine) {  // 去除第一行
			
 
				                 firstLine = false;
			
 
				                 continue;
			
 
				             }
			
 
				-            
			
 
				+
			
 
				             temp = line.split("\\|");
			
 
				-            
			
 
				-            if(temp[3].equals(model_version)){
			
 
				+
			
 
				+            if (temp[3].equals(model_version)) {
			
 
				                 int type_id = Integer.parseInt(temp[2]);
			
 
				                 int _index = Integer.parseInt(temp[1]);
			
 
				                 String _name = temp[0];
			
@@ -168,4 +107,66 @@ public class NNDataSetImpl extends NNDataSet {
 
				 
			
 
				     }
			
 
				 
			
 
				+    /**
			
 
				+     * 再分词：
			
 
				+     * 基本操作：
			
 
				+     * 如果再分词表中有某一词项，则移除它，并添加该此项对应的细分词项
			
 
				+     *
			
 
				+     * @param inputs 输入
			
 
				+     */
			
 
				+    public void reSplitWord(Map<String, Map<String, String>> inputs) {
			
 
				+        Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
			
 
				+
			
 
				+        String featureName = "";
			
 
				+        String[] splitWords = null;
			
 
				+        Integer negative = 1;
			
 
				+        Map<String, String> featureValues = null;
			
 
				+        while (entries.hasNext()) {
			
 
				+            Entry<String, Map<String, String>> entry = entries.next();
			
 
				+            featureName = entry.getKey();
			
 
				+            if (this.RE_SPLIT_WORD_DICT.get(featureName) != null) {
			
 
				+                entries.remove();  // 移除该词项
			
 
				+                splitWords = this.RE_SPLIT_WORD_DICT.get(featureName).split("_");
			
 
				+                for (String word : splitWords) {  // 添加细分词项
			
 
				+                    featureValues = new HashMap<>();
			
 
				+                    featureValues.put("negative", "有"); // 设置为阳性词
			
 
				+                    inputs.put(word, featureValues);
			
 
				+                }
			
 
				+
			
 
				+            }
			
 
				+
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void readReSplitWordDict() {
			
 
				+        PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
			
 
				+        String filePath = prop.getProperty("basicPath");  // 基本目录
			
 
				+        filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
			
 
				+
			
 
				+        filePath = filePath + "dictionaries.bin";  // 字典文件位置
			
 
				+
			
 
				+        List<String> lines = TextFileReader.readLines(filePath);
			
 
				+
			
 
				+        boolean firstLine = true;
			
 
				+
			
 
				+        String[] temp = null;
			
 
				+        Map<String, String> feature_map = null;
			
 
				+        for (String line : lines) {
			
 
				+            if (firstLine) {  // 去除第一行
			
 
				+                firstLine = false;
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            temp = line.split("\\|");
			
 
				+
			
 
				+            this.RE_SPLIT_WORD_DICT.put(temp[0], temp[1]);
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+        System.out.println("再分词，词条数:" + this.RE_SPLIT_WORD_DICT.size());
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+
			
 
				 }