Browse Source

更新分词,在分词时根据不同类别,将词性无关的词剔除

MarkHuang 6 years ago
parent
commit
15d4c56642

+ 47 - 4
bigdata-web/src/main/java/org/diagbot/bigdata/work/ParamsDataProxy.java

@@ -16,10 +16,7 @@ import org.slf4j.LoggerFactory;
 import org.springframework.util.StringUtils;
 
 import javax.servlet.http.HttpServletRequest;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.regex.Pattern;
 
 /**
@@ -83,6 +80,9 @@ public class ParamsDataProxy {
             List<Map<String, Object>> featuresList = fa.start(searchData.getSymptom(), FeatureType.FEATURE);
             paramFeatureInit(searchData, featuresList);
 
+            // 清洗特征词,去除词性不匹配的词
+            searchData = cleanFeature(featuresList, fa, searchData);
+
             //如果既往史中诊断信息,需要提取这个特征
             featuresList = fa.start(searchData.getOther(), FeatureType.DIAG);
             paramFeatureInit(searchData, featuresList);
@@ -383,5 +383,48 @@ public class ParamsDataProxy {
         return sb.toString();
     }
 
+    private SearchData cleanFeature(List<Map<String, Object>> featuresList, FeatureAnalyze fa,
+                                    SearchData searchData) {
+        // 在输入的辅检文本中,只提取辅检信息
+        String[] PACS_Feature = {Constants.word_property_PACS,
+                Constants.word_property_PACS_Detail, Constants.word_property_PACS_Result};
+        searchData = removeFeature(featuresList, fa, searchData, PACS_Feature);
+
+        // 在输入的化验文本中,只提取化验信息
+        String[] LIS_Feature = {Constants.word_property_LIS,
+                Constants.word_property_LIS_Detail, Constants.word_property_LIS_Result};
+        searchData = removeFeature(featuresList, fa, searchData, LIS_Feature);
+
+        return searchData;
+    }
+
+    private SearchData removeFeature(List<Map<String, Object>> featureList, FeatureAnalyze fa,
+                                     SearchData searchData, String[] Feature) {
+        String name = "";
+        Boolean related = false;
+
+        try {
+            // 在输入的辅检文本中,只提取辅检信息
+            featureList = fa.start(searchData.getPacs(), FeatureType.FEATURE);
+            for (Map<String, Object> item: featureList) {
+                name = item.get("feature_name").toString();
+                String[] property = item.get("property").toString().split(",");
+                for (String prop:property) {
+                    if (Arrays.asList(Feature).indexOf(prop) >= 0) {
+                        related = true;
+                        break;
+                    }
+                }
+
+                if (!related) {
+                    searchData.getInputs().remove(name);
+                }
+            }
 
+        } catch (Exception ex) {
+            ex.printStackTrace();
+        } finally {
+            return searchData;
+        }
+    }
 }