|
@@ -1,7 +1,5 @@
|
|
|
package org.algorithm.core.neural.dataset;
|
|
|
|
|
|
-import com.alibaba.fastjson.JSON;
|
|
|
-import com.alibaba.fastjson.JSONObject;
|
|
|
import org.algorithm.util.TextFileReader;
|
|
|
import org.diagbot.pub.utils.PropertiesUtil;
|
|
|
|
|
@@ -79,9 +77,9 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
ch = sentence.charAt(i);
|
|
|
id = this.CHAR2ID_DICT.get(String.valueOf(ch));
|
|
|
if (id == null) {
|
|
|
- id = this.CHAR2ID_DICT.get("<UKC>");
|
|
|
+ id = this.CHAR2ID_DICT.get("<UNC>");
|
|
|
}
|
|
|
- ids[i] = id.intValue();
|
|
|
+ ids[i] = id;
|
|
|
}
|
|
|
for (int i = sentence.length(); i < max_len; i++) // padding
|
|
|
ids[i] = this.CHAR2ID_DICT.get("<PAD>");
|
|
@@ -143,7 +141,7 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
|
|
|
}
|
|
|
|
|
|
- System.out.println("feature size:" + this.FEATURE_DICT.size());
|
|
|
+// System.out.println("feature size:" + this.FEATURE_DICT.size());
|
|
|
|
|
|
}
|
|
|
|
|
@@ -161,12 +159,14 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
BufferedReader br = null;
|
|
|
try {
|
|
|
br = new BufferedReader(new FileReader(filePath)); // 读取原始json文件
|
|
|
- String s = null;
|
|
|
- while ((s = br.readLine()) != null) {
|
|
|
- JSONObject jsonObject = (JSONObject) JSON.parse(s);
|
|
|
- Set<Entry<String, Object>> entries = jsonObject.entrySet();
|
|
|
- for (Map.Entry<String, Object> entry : entries)
|
|
|
- this.CHAR2ID_DICT.put(entry.getKey(), (Integer) entry.getValue());
|
|
|
+ String line = null;
|
|
|
+ String[] pair = null;
|
|
|
+ while ((line = br.readLine()) != null) {
|
|
|
+ line = line.trim();
|
|
|
+ if (line.indexOf("_|_") > -1){
|
|
|
+ pair = line.split("_\\|_");
|
|
|
+ this.CHAR2ID_DICT.put(pair[0], Integer.parseInt(pair[1]));
|
|
|
+ }
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
e.printStackTrace();
|
|
@@ -242,7 +242,7 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
|
|
|
}
|
|
|
|
|
|
- System.out.println("再分词,词条数:" + this.RE_SPLIT_WORD_DICT.size());
|
|
|
+// System.out.println("再分词,词条数:" + this.RE_SPLIT_WORD_DICT.size());
|
|
|
|
|
|
}
|
|
|
|
|
@@ -275,7 +275,7 @@ public class NNDataSetImpl extends NNDataSet {
|
|
|
this.RELATED_DIAGNOSIS_DICT.put(temp[0], diagnosis_map);
|
|
|
}
|
|
|
|
|
|
- System.out.println("疾病过滤字典大小:" + this.RELATED_DIAGNOSIS_DICT.size());
|
|
|
+// System.out.println("疾病过滤字典大小:" + this.RELATED_DIAGNOSIS_DICT.size());
|
|
|
}
|
|
|
|
|
|
|