|
@@ -0,0 +1,249 @@
|
|
|
+package com.qizhen.healsphere;
|
|
|
+
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.qizhen.healsphere.common.ai.BaidubceUtil;
|
|
|
+import com.qizhen.healsphere.common.ai.Knowlege;
|
|
|
+import com.qizhen.healsphere.repository.neo4j.entity.BaseEntity;
|
|
|
+import com.qizhen.healsphere.service.EntityService;
|
|
|
+import com.qizhen.healsphere.service.RelationshipService;
|
|
|
+import com.qizhen.healsphere.web.vo.CreateEntityVO;
|
|
|
+import com.qizhen.healsphere.web.vo.RelationshipVO;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFRow;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFSheet;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
|
|
+import org.junit.Test;
|
|
|
+import org.junit.runner.RunWith;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.boot.test.context.SpringBootTest;
|
|
|
+import org.springframework.context.annotation.ComponentScan;
|
|
|
+import org.springframework.test.context.junit4.SpringRunner;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+
|
|
|
+import java.io.*;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Objects;
|
|
|
+
|
|
|
+/**
|
|
|
+ * 别名
|
|
|
+ */
|
|
|
+@RunWith(SpringRunner.class)
|
|
|
+@ComponentScan(basePackages = {"com.qizhen.healsphere.model", "com.qizhen.healsphere.repository"})
|
|
|
+@SpringBootTest
|
|
|
+public class JianchaBmTest {
|
|
|
+ @Autowired
|
|
|
+ RelationshipService relationshipService;
|
|
|
+ @Autowired
|
|
|
+ EntityService entityService;
|
|
|
+ private static int maxCount= 100;
|
|
|
+ private static String directoryPath = "C:\\Users\\17664\\Desktop\\邵逸夫医院爬取数据\\检查";
|
|
|
+ static HSSFWorkbook workbook;
|
|
|
+ static String startLabel = "辅助检查";
|
|
|
+ @Test
|
|
|
+ public void writeNeo4j() throws Exception {
|
|
|
+ String propertys = "别名" +
|
|
|
+ ",英文名称" +
|
|
|
+ ",参考区间";
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
+ String accessToken = BaidubceUtil.getAccessToken();
|
|
|
+ String[] split = propertys.split(",");
|
|
|
+ for(String property:split) {
|
|
|
+ HSSFSheet sheet = workbook.createSheet(property);
|
|
|
+ List<Knowlege> knowleges = saveExecl(property, accessToken, sheet);
|
|
|
+
|
|
|
+ for (Knowlege temp:knowleges) {
|
|
|
+ String value = temp.getValue();
|
|
|
+ if(StringUtils.isBlank(value)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ try{
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(value);
|
|
|
+ BaseEntity startEntity = createNoExists(startLabel, temp.getEntity());
|
|
|
+ long startId = startEntity.getId();
|
|
|
+ List<RelationshipVO> relationshipList = new ArrayList<>();
|
|
|
+ for(int i=0;i<jsonArray.size();i++){
|
|
|
+ String name = jsonArray.getString(i);
|
|
|
+ if(StringUtils.isEmpty(name)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ BaseEntity endEntity = createNoExists(startLabel+property, name);
|
|
|
+ Long endId = endEntity.getId();
|
|
|
+ RelationshipVO relationshipVO = new RelationshipVO();
|
|
|
+ relationshipVO.setStartId(startId);
|
|
|
+ relationshipVO.setEndId(endId);
|
|
|
+ relationshipVO.setStartLabel(startLabel);
|
|
|
+ relationshipVO.setEndLabel(startLabel+property);
|
|
|
+ relationshipVO.setRelationshipType(startLabel+"相关"+property);
|
|
|
+ relationshipList.add(relationshipVO);
|
|
|
+ }
|
|
|
+ if(!CollectionUtils.isEmpty(relationshipList)) {
|
|
|
+ System.out.println(relationshipService.createRelationship(relationshipList));
|
|
|
+ }
|
|
|
+ }catch (Exception e){
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private BaseEntity createNoExists(String labelName, String name) {
|
|
|
+ BaseEntity nodeByName = entityService.findNodeByName(labelName, name);
|
|
|
+ if(Objects.nonNull(nodeByName)){//节点不存在
|
|
|
+ return nodeByName;
|
|
|
+ }
|
|
|
+ CreateEntityVO createEntity = new CreateEntityVO();
|
|
|
+ createEntity.setName(name);
|
|
|
+ createEntity.setLabel(labelName);
|
|
|
+ return entityService.create(createEntity);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
+ String propertys ="别名或别称";
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
+ String accessToken = BaidubceUtil.getAccessToken();
|
|
|
+ String[] split = propertys.split(",");
|
|
|
+ for(String property:split) {
|
|
|
+ HSSFSheet sheet = workbook.createSheet(property);
|
|
|
+ saveExecl(property,accessToken, sheet);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String readTxtFile(File txtFile) {
|
|
|
+ StringBuilder content = new StringBuilder();
|
|
|
+ try (BufferedReader reader = new BufferedReader(new FileReader(txtFile))) {
|
|
|
+ String line;
|
|
|
+ // 逐行读取文件内容
|
|
|
+ while ((line = reader.readLine()) != null) {
|
|
|
+ // 输出读取到的每一行内容
|
|
|
+ content.append(line+"\n");
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ System.out.println("读取文件时发生错误: " + e.getMessage());
|
|
|
+ }
|
|
|
+ return content.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private static List<Knowlege> saveExecl(String property, String accessToken, HSSFSheet sheet) throws Exception {
|
|
|
+ int curCount = 0;
|
|
|
+ List<Knowlege> result = new ArrayList<>();
|
|
|
+ File directory = new File(directoryPath);
|
|
|
+ // 获取目录下的所有文件和子目录
|
|
|
+ File[] files = directory.listFiles();
|
|
|
+
|
|
|
+ // 检查files是否为null,避免NullPointerException
|
|
|
+ if (files != null) {
|
|
|
+ // 循环遍历目录下的每个文件和子目录
|
|
|
+ for (File file : files) {
|
|
|
+ // 检查文件是否是.txt文件
|
|
|
+ if (file.isFile() && file.getName().endsWith(".txt")) {
|
|
|
+ // 读取.txt文件的内容
|
|
|
+ String s = readTxtFile(file);
|
|
|
+ String name = file.getName().split("_")[1].replace(".txt","");
|
|
|
+ /* if(!s.contains(property)){
|
|
|
+ continue;
|
|
|
+ }*/
|
|
|
+ String value = getValues(s, property, accessToken);
|
|
|
+
|
|
|
+ HSSFRow writeRow = sheet.createRow(curCount);
|
|
|
+ name = (name == null ? "" : name);
|
|
|
+ value = (value == null ? "" : value);
|
|
|
+ writeRow.createCell(0).setCellValue(name);
|
|
|
+ writeRow.createCell(1).setCellValue(value);
|
|
|
+ writeRow.createCell(2).setCellValue(s == null ? "" : s);
|
|
|
+ Knowlege knowlege = new Knowlege();
|
|
|
+ knowlege.setEntity(name);
|
|
|
+ knowlege.setProperty(property);
|
|
|
+ knowlege.setValue(value);
|
|
|
+
|
|
|
+ result.add(knowlege);
|
|
|
+ curCount++;
|
|
|
+ if (maxCount > 0) {
|
|
|
+ if (curCount >= maxCount) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ System.out.println("指定的目录不存在或无法访问: " + directoryPath);
|
|
|
+ }
|
|
|
+ save(property);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String getValues(String zhaiyao, String property, String accessToken) {
|
|
|
+ if (StringUtils.isEmpty(zhaiyao)) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ String format = "你是医学检查领域的专家。你将在指定的文本中抽取其中“" + property + "”。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#要求\n" +
|
|
|
+ "1、抽取的结果将以JSON数组的形式呈现。每个抽取的“" + property + "”高度简洁、高度概括,不要要描述性的文字,文字尽量保持在20个字符以内!\n" +
|
|
|
+ "2、“" + property + "”可以是英文缩写或英文简称,但不能是英文名称!\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例1\n" +
|
|
|
+ "以抽取“肱骨骨折的分期”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "肱骨骨折如果是**肱骨头坏死**则有Cruess分期,包括I期、Ⅱ期、Ⅲ期、IV期、V期^[1]^。\n" +
|
|
|
+ "\n" +
|
|
|
+ "如果是肱骨近端骨折则有Neer分型和AO分型^[3]^。\n" +
|
|
|
+ "输出:[\"Cruess分期I期\",\"Cruess分期Ⅱ期\",\"Cruess分期Ⅲ期\",\"Cruess分期IV期\",\"Cruess分期V期\"]\n\n" +
|
|
|
+ "#示例2\n" +
|
|
|
+ "以抽取“别名或别称”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "\n" +
|
|
|
+ "名称:呼气中期流量测定\n" +
|
|
|
+ "英文名称:forced expiratory flow 25%~75%\n" +
|
|
|
+ "英文缩写:FEF25%~75%\n" +
|
|
|
+ "别称:最大呼气中段流量\n" +
|
|
|
+ "\n" +
|
|
|
+ "输出:[\"FEF25%~75%\",\"最大呼气中段流量\"]\n\n"+
|
|
|
+ "#示例3\n" +
|
|
|
+ "以抽取“"+property+"”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "\n" +
|
|
|
+ "名称:伴有中央颞区棘波的儿童良性癫痫的脑电图改变\n" +
|
|
|
+ "概述:BECT又称为儿童良性Rolandic癫痫,是儿童期最常见的部分性癫痫,是一种特殊类型的部分性癫痫综合征。发病年龄为3~13岁。\n" +
|
|
|
+ "检查前准备:\n" +
|
|
|
+ "检查前嘱咐患者进食,检查时使患者放松,取卧位或坐位,在闭目安静状态下描记。已接受癫痫治疗的患者,特殊情况下为增加痫性放电的记录机会,可停服抗癫痫药1~2日。 \n" +
|
|
|
+ "\n" +
|
|
|
+ "输出:[]\n\n"+
|
|
|
+ "3、没有可抽取的“" + property + "”,则返回空json数组。\n\n" +
|
|
|
+
|
|
|
+ "本次抽取的文本如下:\n\n";
|
|
|
+
|
|
|
+ String zhiling = format + zhaiyao;
|
|
|
+ System.out.println(zhiling);
|
|
|
+ String chatResponse = BaidubceUtil.getChatResponse(zhiling, accessToken);
|
|
|
+ chatResponse = filte(chatResponse);
|
|
|
+ System.out.println(chatResponse);
|
|
|
+ return chatResponse;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static synchronized void save(String fileName) {
|
|
|
+ try {
|
|
|
+ fileName="C:\\Users\\17664\\Desktop\\"+fileName+System.currentTimeMillis()+".xlsx";
|
|
|
+ //文档输出
|
|
|
+ FileOutputStream out = new FileOutputStream(new File(fileName));
|
|
|
+ workbook.write(out);
|
|
|
+ out.close();
|
|
|
+ System.out.println(fileName + "存储完毕");
|
|
|
+ } catch (IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String filte(String chatResponse) {
|
|
|
+ if (chatResponse.startsWith("```json")) {
|
|
|
+ chatResponse = chatResponse.substring(7);
|
|
|
+ }
|
|
|
+ if (chatResponse.endsWith("```")) {
|
|
|
+ chatResponse = chatResponse.substring(0, chatResponse.length() - 3);
|
|
|
+ }
|
|
|
+ return chatResponse;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+}
|
|
|
+
|