Przeglądaj źródła

1、针对pacs解析使用特定词库

louhr 5 lat temu
rodzic
commit
8bf0cb7337

+ 12 - 0
common-push/src/main/java/org/diagbot/common/push/cache/CacheFileManager.java

@@ -163,6 +163,18 @@ public class CacheFileManager {
 
             fw = new FileWriter(path + "classify.dict");
             fw.close();
+
+            sql = "select name, type from kl_library_info_pacs order by name";
+            st = conn.createStatement();
+            rs = st.executeQuery(sql);
+            fw = new FileWriter(path + "pacs-tc.dict");
+            while (rs.next()) {
+                r1 = rs.getString(1);
+                r2 = rs.getString(2);
+                fw.write(encrypDES.encrytor(r1 + "|9|"+ r2 + "|" + r1));
+                fw.write("\n");
+            }
+            fw.close();
         } catch (IOException ioe) {
             ioe.printStackTrace();
         } catch (SQLException sqle) {

+ 31 - 14
nlp/src/main/java/org/diagbot/nlp/participle/ParticipleUtil.java

@@ -23,20 +23,7 @@ public class ParticipleUtil {
         LexemePath<Lexeme> lexemes = participle(content);
         if (isCombineUnit) {
             ParticipleUtil util = new ParticipleUtil();
-            lexemes = util.combineValidate(lexemes);
-            lexemes = util.joinTime(lexemes);
-
-            String year_pattern = "([1-2][0-9]{3}|[0-9]{2})";
-            String mouth_day_pattern = "([0-9]{2}|[0-9])";
-            String join_pattern = "([-/.]?)";
-            String pattern_string = year_pattern + join_pattern + mouth_day_pattern + join_pattern + mouth_day_pattern;
-            for (Lexeme l : lexemes) {
-                if (l.getProperty().equals(Constants.word_property_number)) {
-                    if (Pattern.matches(pattern_string,l.getText())) {
-                        l.setProperty(Constants.word_property_time);
-                    }
-                }
-            }
+            util.combine(util, lexemes);
         }
         return lexemes;
     }
@@ -48,6 +35,19 @@ public class ParticipleUtil {
         return participle(content, NlpCache.segment_cache);
     }
 
+    public static LexemePath<Lexeme> participlePacs(String content) throws IOException {
+        if (NlpCache.segment_cache_pacs == null) {
+            NlpCache.createSegmentCachePacs();
+        }
+        ParticipleToken token = new ParticipleToken();
+        token.start(new StringReader(content), NlpCache.segment_cache_pacs);
+        LexemePath<Lexeme> lexemePath = token.getLexemePath();
+        ParticipleUtil util = new ParticipleUtil();
+        util.combine(util, lexemePath);
+        token.end();
+        return lexemePath;
+    }
+
     public static LexemePath<Lexeme> participle(String content, String path) throws IOException {
         Configuration configuration = new DefaultConfig();
         NlpCache.segment_cache = configuration.loadMainDict(path);
@@ -65,6 +65,23 @@ public class ParticipleUtil {
         return lexemePath;
     }
 
+    private void combine(ParticipleUtil util, LexemePath<Lexeme> lexemes) {
+        lexemes = util.combineValidate(lexemes);
+        lexemes = util.joinTime(lexemes);
+
+        String year_pattern = "([1-2][0-9]{3}|[0-9]{2})";
+        String mouth_day_pattern = "([0-9]{2}|[0-9])";
+        String join_pattern = "([-/.]?)";
+        String pattern_string = year_pattern + join_pattern + mouth_day_pattern + join_pattern + mouth_day_pattern;
+        for (Lexeme l : lexemes) {
+            if (l.getProperty().equals(Constants.word_property_number)) {
+                if (Pattern.matches(pattern_string,l.getText())) {
+                    l.setProperty(Constants.word_property_time);
+                }
+            }
+        }
+    }
+
     public static String participleAndHighlight(String content) throws IOException {
         LexemePath<Lexeme> lexemePath = participle(content, false);
         String separator = "&nbsp;*&nbsp;";

+ 11 - 0
push-web/src/main/java/org/diagbot/push/controller/ParticipleController.java

@@ -20,6 +20,17 @@ public class ParticipleController extends BaseController {
         return listView;
     }
 
+    @ResponseBody
+    @RequestMapping("/split_pacs")
+    public Response split(String content) throws IOException {
+        Response response = new Response();
+        long start = System.currentTimeMillis();
+        response.setData(ParticipleUtil.participlePacs(content));
+        long end = System.currentTimeMillis();
+        System.out.println("总耗时:........." + (end - start));
+        return response;
+    }
+
     @ResponseBody
     @RequestMapping("/split_and_highlight")
     public Response splitAndHighlight(String content) throws IOException {