瀏覽代碼

hannlp试做

gaodm 4 年之前
父節點
當前提交
08424bf71c
共有 2 個文件被更改,包括 32 次插入0 次删除
  1. 7 0
      pom.xml
  2. 25 0
      src/main/java/com/diagbot/util/NLPUtil.java

+ 7 - 0
pom.xml

@@ -34,6 +34,7 @@
         <logstash.version>5.2</logstash.version>
         <docker-maven-plugin.version>1.2.1</docker-maven-plugin.version>
         <aggregator.version>1.1.3</aggregator.version>
+        <hanlp.version>portable-1.8.1</hanlp.version>
         <docker.image.prefix>192.168.2.236:5000/diagbotcloud</docker.image.prefix>
         <registryUrl>http://192.168.2.236:5000/repository/diagbotcloud/</registryUrl>
     </properties>
@@ -215,6 +216,12 @@
             <artifactId>spring-boot-data-aggregator-starter</artifactId>
             <version>${aggregator.version}</version>
         </dependency>
+
+        <dependency>
+            <groupId>com.hankcs</groupId>
+            <artifactId>hanlp</artifactId>
+            <version>${hanlp.version}</version>
+        </dependency>
     </dependencies>
 
     <!-- 私有仓库 -->

+ 25 - 0
src/main/java/com/diagbot/util/NLPUtil.java

@@ -0,0 +1,25 @@
+package com.diagbot.util;
+
+import com.hankcs.hanlp.tokenizer.SpeedTokenizer;
+
+/**
+ * @Description: NLP工具类
+ * @author: gaodm
+ * @time: 2021/5/25 17:13
+ */
+public class NLPUtil {
+
+    public static void main(String[] args)
+    {
+        String text = "江西鄱阳湖干枯,中国最大淡水湖变成大草原";
+        System.out.println(SpeedTokenizer.segment(text));
+//        long start = System.currentTimeMillis();
+//        int pressure = 1000000;
+//        for (int i = 0; i < pressure; ++i)
+//        {
+//            SpeedTokenizer.segment(text);
+//        }
+//        double costTime = (System.currentTimeMillis() - start) / (double)1000;
+//        System.out.printf("分词速度:%.2f字每秒", text.length() * pressure / costTime);
+    }
+}