|
@@ -3,6 +3,7 @@ package org.algorithm.core.neural.dataset;
|
|
import org.algorithm.util.TextFileReader;
|
|
import org.algorithm.util.TextFileReader;
|
|
import org.diagbot.pub.utils.PropertiesUtil;
|
|
import org.diagbot.pub.utils.PropertiesUtil;
|
|
|
|
|
|
|
|
+import java.util.HashMap;
|
|
import java.util.Iterator;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
@@ -10,7 +11,7 @@ import java.util.Map.Entry;
|
|
|
|
|
|
/**
|
|
/**
|
|
* 门诊诊断推送用数据集
|
|
* 门诊诊断推送用数据集
|
|
- *
|
|
|
|
|
|
+ *
|
|
* @Author: bijl
|
|
* @Author: bijl
|
|
* @Date: 2018年7月26日-上午10:19:43
|
|
* @Date: 2018年7月26日-上午10:19:43
|
|
* @Description:
|
|
* @Description:
|
|
@@ -25,6 +26,8 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
|
|
|
@Override
|
|
@Override
|
|
public float[] toFeatureVector(Map<String, Map<String, String>> inputs) {
|
|
public float[] toFeatureVector(Map<String, Map<String, String>> inputs) {
|
|
|
|
+
|
|
|
|
+ this.reSplitWord(inputs); // 再分词
|
|
float[] featureVector = new float[this.NUM_FEATURE];
|
|
float[] featureVector = new float[this.NUM_FEATURE];
|
|
|
|
|
|
Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
|
|
Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
|
|
@@ -32,13 +35,9 @@ public class NNDataSetImpl extends NNDataSet {
|
|
String featureName = "";
|
|
String featureName = "";
|
|
Integer position = -1;
|
|
Integer position = -1;
|
|
Integer negative = 0;
|
|
Integer negative = 0;
|
|
- // Integer partbodyValue = 0;
|
|
|
|
float positive_value = 1.0f;
|
|
float positive_value = 1.0f;
|
|
float negative_value = -1.0f;
|
|
float negative_value = -1.0f;
|
|
Map<String, String> featureValues = null;
|
|
Map<String, String> featureValues = null;
|
|
- // String partbody = null;
|
|
|
|
- // String[] partbodys = null;
|
|
|
|
- // String sn = null;
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
* 数据方案设计
|
|
* 数据方案设计
|
|
@@ -51,11 +50,6 @@ public class NNDataSetImpl extends NNDataSet {
|
|
featureValues = entry.getValue();
|
|
featureValues = entry.getValue();
|
|
position = this.FEATURE_DICT.get(featureName);
|
|
position = this.FEATURE_DICT.get(featureName);
|
|
negative = NEGATIVE_DICT.get(featureValues.get("negative"));
|
|
negative = NEGATIVE_DICT.get(featureValues.get("negative"));
|
|
- // 突出主症状的数据方案
|
|
|
|
- // sn = featureValues.get("sn");
|
|
|
|
- // if("0".equals(sn)) {
|
|
|
|
- // negative = negative * 10;
|
|
|
|
- // }
|
|
|
|
|
|
|
|
if (position != null)
|
|
if (position != null)
|
|
if (negative == 1)
|
|
if (negative == 1)
|
|
@@ -65,91 +59,36 @@ public class NNDataSetImpl extends NNDataSet {
|
|
else
|
|
else
|
|
System.out.println("New Nagetive! This may lead to an error.");
|
|
System.out.println("New Nagetive! This may lead to an error.");
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * 部位附属症状数据表示方案 partbodyValue = this.PARTBODY_DICT.get(featureValues.get("partbody"));
|
|
|
|
- * if(partbodyValue != null) { value = 1.0f * partbodyValue /
|
|
|
|
- * this.PARTBODY_DICT.get("NULL"); // 部位值表示 value = (float)(Math.round(value *
|
|
|
|
- * 100000))/100000; // 保留5位有效数字 } value = negative * value; featureVector[position] =
|
|
|
|
- * value;
|
|
|
|
- *
|
|
|
|
- */
|
|
|
|
-
|
|
|
|
}
|
|
}
|
|
|
|
|
|
return featureVector;
|
|
return featureVector;
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * 读取字典
|
|
|
|
- */
|
|
|
|
-// @Override
|
|
|
|
-// public void readDict(String modelAndVersion) {
|
|
|
|
-//
|
|
|
|
-// PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
|
|
|
|
-// String model_version = prop.getProperty(modelAndVersion);
|
|
|
|
-// model_version = model_version.trim();
|
|
|
|
-//
|
|
|
|
-// String url = "jdbc:mysql://192.168.2.235/diagbot-app?user=root&password=diagbot@20180822";
|
|
|
|
-// MysqlConnector connector = new MysqlConnector(url);
|
|
|
|
-// String querySql = "SELECT md._name, md._index, md.type_id " + "FROM model_dictionary AS md "
|
|
|
|
-// + "WHERE md.belong_model = 'outpatient_model'";
|
|
|
|
-//
|
|
|
|
-// querySql = querySql.replace("outpatient_model", model_version);
|
|
|
|
-// ResultSet rs = connector.query(querySql);
|
|
|
|
-// try {
|
|
|
|
-// while (rs.next()) {
|
|
|
|
-// int type_id = rs.getInt("type_id");
|
|
|
|
-// int _index = rs.getInt("_index");
|
|
|
|
-// String _name = rs.getString("_name");
|
|
|
|
-//
|
|
|
|
-// if (type_id == 1)
|
|
|
|
-// this.FEATURE_DICT.put(_name, _index);
|
|
|
|
-// else if (type_id == 2)
|
|
|
|
-// this.LABEL_DICT.put(_name, _index);
|
|
|
|
-// else if (type_id == 8)
|
|
|
|
-// this.NEGATIVE_DICT.put(_name, _index);
|
|
|
|
-//
|
|
|
|
-// }
|
|
|
|
-//
|
|
|
|
-// System.out.println("feature size:"+this.FEATURE_DICT.size());
|
|
|
|
-//
|
|
|
|
-// } catch (SQLException e) {
|
|
|
|
-// e.printStackTrace();
|
|
|
|
-// throw new RuntimeException("加载特征和类别字典失败");
|
|
|
|
-// } finally {
|
|
|
|
-// connector.close();
|
|
|
|
-// }
|
|
|
|
-//
|
|
|
|
-// }
|
|
|
|
-
|
|
|
|
@Override
|
|
@Override
|
|
public void readDict(String modelAndVersion) {
|
|
public void readDict(String modelAndVersion) {
|
|
-
|
|
|
|
|
|
+
|
|
PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
|
|
PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
|
|
String model_version = prop.getProperty(modelAndVersion);
|
|
String model_version = prop.getProperty(modelAndVersion);
|
|
|
|
|
|
String filePath = prop.getProperty("basicPath"); // 基本目录
|
|
String filePath = prop.getProperty("basicPath"); // 基本目录
|
|
filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
|
|
filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
|
|
-
|
|
|
|
|
|
+
|
|
filePath = filePath + "dictionaries.bin"; // 字典文件位置
|
|
filePath = filePath + "dictionaries.bin"; // 字典文件位置
|
|
-
|
|
|
|
|
|
+
|
|
List<String> lines = TextFileReader.readLines(filePath);
|
|
List<String> lines = TextFileReader.readLines(filePath);
|
|
|
|
|
|
boolean firstLine = true;
|
|
boolean firstLine = true;
|
|
-
|
|
|
|
|
|
+
|
|
String[] temp = null;
|
|
String[] temp = null;
|
|
for (String line : lines) {
|
|
for (String line : lines) {
|
|
if (firstLine) { // 去除第一行
|
|
if (firstLine) { // 去除第一行
|
|
firstLine = false;
|
|
firstLine = false;
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
temp = line.split("\\|");
|
|
temp = line.split("\\|");
|
|
-
|
|
|
|
- if(temp[3].equals(model_version)){
|
|
|
|
|
|
+
|
|
|
|
+ if (temp[3].equals(model_version)) {
|
|
int type_id = Integer.parseInt(temp[2]);
|
|
int type_id = Integer.parseInt(temp[2]);
|
|
int _index = Integer.parseInt(temp[1]);
|
|
int _index = Integer.parseInt(temp[1]);
|
|
String _name = temp[0];
|
|
String _name = temp[0];
|
|
@@ -168,4 +107,66 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * 再分词:
|
|
|
|
+ * 基本操作:
|
|
|
|
+ * 如果再分词表中有某一词项,则移除它,并添加该此项对应的细分词项
|
|
|
|
+ *
|
|
|
|
+ * @param inputs 输入
|
|
|
|
+ */
|
|
|
|
+ public void reSplitWord(Map<String, Map<String, String>> inputs) {
|
|
|
|
+ Iterator<Entry<String, Map<String, String>>> entries = inputs.entrySet().iterator();
|
|
|
|
+
|
|
|
|
+ String featureName = "";
|
|
|
|
+ String[] splitWords = null;
|
|
|
|
+ Integer negative = 1;
|
|
|
|
+ Map<String, String> featureValues = null;
|
|
|
|
+ while (entries.hasNext()) {
|
|
|
|
+ Entry<String, Map<String, String>> entry = entries.next();
|
|
|
|
+ featureName = entry.getKey();
|
|
|
|
+ if (this.RE_SPLIT_WORD_DICT.get(featureName) != null) {
|
|
|
|
+ entries.remove(); // 移除该词项
|
|
|
|
+ splitWords = this.RE_SPLIT_WORD_DICT.get(featureName).split("_");
|
|
|
|
+ for (String word : splitWords) { // 添加细分词项
|
|
|
|
+ featureValues = new HashMap<>();
|
|
|
|
+ featureValues.put("negative", "有"); // 设置为阳性词
|
|
|
|
+ inputs.put(word, featureValues);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public void readReSplitWordDict() {
|
|
|
|
+ PropertiesUtil prop = new PropertiesUtil("/algorithm.properties");
|
|
|
|
+ String filePath = prop.getProperty("basicPath"); // 基本目录
|
|
|
|
+ filePath = filePath.substring(0, filePath.indexOf("model_version_replacement"));
|
|
|
|
+
|
|
|
|
+ filePath = filePath + "dictionaries.bin"; // 字典文件位置
|
|
|
|
+
|
|
|
|
+ List<String> lines = TextFileReader.readLines(filePath);
|
|
|
|
+
|
|
|
|
+ boolean firstLine = true;
|
|
|
|
+
|
|
|
|
+ String[] temp = null;
|
|
|
|
+ Map<String, String> feature_map = null;
|
|
|
|
+ for (String line : lines) {
|
|
|
|
+ if (firstLine) { // 去除第一行
|
|
|
|
+ firstLine = false;
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ temp = line.split("\\|");
|
|
|
|
+
|
|
|
|
+ this.RE_SPLIT_WORD_DICT.put(temp[0], temp[1]);
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ System.out.println("再分词,词条数:" + this.RE_SPLIT_WORD_DICT.size());
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
}
|
|
}
|