|
@@ -0,0 +1,265 @@
|
|
|
+package com.qizhen.healsphere;
|
|
|
+
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.qizhen.healsphere.common.ai.BaidubceUtil;
|
|
|
+import com.qizhen.healsphere.common.ai.Knowlege;
|
|
|
+import com.qizhen.healsphere.repository.neo4j.entity.BaseEntity;
|
|
|
+import com.qizhen.healsphere.service.EntityService;
|
|
|
+import com.qizhen.healsphere.service.RelationshipService;
|
|
|
+import com.qizhen.healsphere.web.vo.CreateEntityVO;
|
|
|
+import com.qizhen.healsphere.web.vo.RelationshipVO;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFRow;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFSheet;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
|
|
+import org.junit.Test;
|
|
|
+import org.junit.runner.RunWith;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.boot.test.context.SpringBootTest;
|
|
|
+import org.springframework.context.annotation.ComponentScan;
|
|
|
+import org.springframework.test.context.junit4.SpringRunner;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+
|
|
|
+import java.io.*;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Objects;
|
|
|
+
|
|
|
+@RunWith(SpringRunner.class)
|
|
|
+@ComponentScan(basePackages = {"com.qizhen.healsphere.model", "com.qizhen.healsphere.repository"})
|
|
|
+@SpringBootTest
|
|
|
+public class JianyanTest10 {
|
|
|
+ @Autowired
|
|
|
+ RelationshipService relationshipService;
|
|
|
+ @Autowired
|
|
|
+ EntityService entityService;
|
|
|
+ private static int maxCount= 10;
|
|
|
+ private static String directoryPath = "C:\\Users\\17664\\Desktop\\邵逸夫医院爬取数据\\检验";
|
|
|
+ static HSSFWorkbook workbook;
|
|
|
+ static String startLabel = "检验";
|
|
|
+ @Test
|
|
|
+ public void writeNeo4j() throws Exception {
|
|
|
+ String propertys = "别名" +
|
|
|
+ ",英文名称" +
|
|
|
+ ",参考区间";
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
+ String accessToken = BaidubceUtil.getAccessToken();
|
|
|
+ String[] split = propertys.split(",");
|
|
|
+ for(String property:split) {
|
|
|
+ HSSFSheet sheet = workbook.createSheet(property);
|
|
|
+ List<Knowlege> knowleges = saveExecl(property, accessToken, sheet);
|
|
|
+
|
|
|
+ for (Knowlege temp:knowleges) {
|
|
|
+ String value = temp.getValue();
|
|
|
+ if(StringUtils.isBlank(value)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ try{
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(value);
|
|
|
+ BaseEntity startEntity = createNoExists(startLabel, temp.getEntity());
|
|
|
+ long startId = startEntity.getId();
|
|
|
+ List<RelationshipVO> relationshipList = new ArrayList<>();
|
|
|
+ for(int i=0;i<jsonArray.size();i++){
|
|
|
+ String name = jsonArray.getString(i);
|
|
|
+ if(StringUtils.isEmpty(name)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ BaseEntity endEntity = createNoExists(startLabel+property, name);
|
|
|
+ Long endId = endEntity.getId();
|
|
|
+ RelationshipVO relationshipVO = new RelationshipVO();
|
|
|
+ relationshipVO.setStartId(startId);
|
|
|
+ relationshipVO.setEndId(endId);
|
|
|
+ relationshipVO.setStartLabel(startLabel);
|
|
|
+ relationshipVO.setEndLabel(startLabel+property);
|
|
|
+ relationshipVO.setRelationshipType(startLabel+"相关"+property);
|
|
|
+ relationshipList.add(relationshipVO);
|
|
|
+ }
|
|
|
+ if(!CollectionUtils.isEmpty(relationshipList)) {
|
|
|
+ System.out.println(relationshipService.createRelationship(relationshipList));
|
|
|
+ }
|
|
|
+ }catch (Exception e){
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private BaseEntity createNoExists(String labelName, String name) {
|
|
|
+ BaseEntity nodeByName = entityService.findNodeByName(labelName, name);
|
|
|
+ if(Objects.nonNull(nodeByName)){//节点不存在
|
|
|
+ return nodeByName;
|
|
|
+ }
|
|
|
+ CreateEntityVO createEntity = new CreateEntityVO();
|
|
|
+ createEntity.setName(name);
|
|
|
+ createEntity.setLabel(labelName);
|
|
|
+ return entityService.create(createEntity);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
+ String propertys = /*"别名" +
|
|
|
+ ",英文名称" +
|
|
|
+ ",参考区间" +
|
|
|
+ ",升高的临床意义" +*/
|
|
|
+ "降低的临床意义" +
|
|
|
+ ",危急高值" +
|
|
|
+ ",危急低值" +
|
|
|
+ ",适用性别" +
|
|
|
+ ",是否空腹";
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
+ String accessToken = BaidubceUtil.getAccessToken();
|
|
|
+ String[] split = propertys.split(",");
|
|
|
+ for(String property:split) {
|
|
|
+ HSSFSheet sheet = workbook.createSheet(property);
|
|
|
+ saveExecl(property,accessToken, sheet);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String readTxtFile(File txtFile) {
|
|
|
+ StringBuilder content = new StringBuilder();
|
|
|
+ try (BufferedReader reader = new BufferedReader(new FileReader(txtFile))) {
|
|
|
+ String line;
|
|
|
+ // 逐行读取文件内容
|
|
|
+ while ((line = reader.readLine()) != null) {
|
|
|
+ // 输出读取到的每一行内容
|
|
|
+ content.append(line+"\n");
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ System.out.println("读取文件时发生错误: " + e.getMessage());
|
|
|
+ }
|
|
|
+ return content.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private static List<Knowlege> saveExecl(String property, String accessToken, HSSFSheet sheet) throws Exception {
|
|
|
+ int curCount = 0;
|
|
|
+ List<Knowlege> result = new ArrayList<>();
|
|
|
+ File directory = new File(directoryPath);
|
|
|
+ // 获取目录下的所有文件和子目录
|
|
|
+ File[] files = directory.listFiles();
|
|
|
+
|
|
|
+ // 检查files是否为null,避免NullPointerException
|
|
|
+ if (files != null) {
|
|
|
+ // 循环遍历目录下的每个文件和子目录
|
|
|
+ for (File file : files) {
|
|
|
+ // 检查文件是否是.txt文件
|
|
|
+ if (file.isFile() && file.getName().endsWith(".txt")) {
|
|
|
+ // 读取.txt文件的内容
|
|
|
+ String s = readTxtFile(file);
|
|
|
+ String name = file.getName().split("_")[1].replace(".txt","");
|
|
|
+
|
|
|
+ String value = getValues(s, property, accessToken);
|
|
|
+
|
|
|
+ HSSFRow writeRow = sheet.createRow(curCount);
|
|
|
+ name = (name == null ? "" : name);
|
|
|
+ value = (value == null ? "" : value);
|
|
|
+ writeRow.createCell(0).setCellValue(name);
|
|
|
+ writeRow.createCell(1).setCellValue(value);
|
|
|
+ writeRow.createCell(2).setCellValue(s == null ? "" : s);
|
|
|
+ Knowlege knowlege = new Knowlege();
|
|
|
+ knowlege.setEntity(name);
|
|
|
+ knowlege.setProperty(property);
|
|
|
+ knowlege.setValue(value);
|
|
|
+
|
|
|
+ result.add(knowlege);
|
|
|
+ curCount++;
|
|
|
+ if (maxCount > 0) {
|
|
|
+ if (curCount >= maxCount) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ System.out.println("指定的目录不存在或无法访问: " + directoryPath);
|
|
|
+ }
|
|
|
+ save(property);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String getValues(String zhaiyao, String property, String accessToken) {
|
|
|
+ if (StringUtils.isEmpty(zhaiyao)) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ String format = "你是医学检验领域的专家。你将在指定的文本中抽取其中“" + property + "”。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#要求\n" +
|
|
|
+ "1、抽取的结果将以JSON数组的形式呈现。每个抽取的“" + property + "”高度简洁、高度概括,不要要描述性的文字,文字尽量保持在20个字符以内!\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例1\n" +
|
|
|
+ "以抽取“分期”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "肱骨骨折如果是**肱骨头坏死**则有Cruess分期,包括I期、Ⅱ期、Ⅲ期、IV期、V期^[1]^。\n" +
|
|
|
+ "\n" +
|
|
|
+ "如果是肱骨近端骨折则有Neer分型和AO分型^[3]^。\n" +
|
|
|
+ "输出:[\"Cruess分期I期\",\"Cruess分期Ⅱ期\",\"Cruess分期Ⅲ期\",\"Cruess分期IV期\",\"Cruess分期V期\"]\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例2\n" +
|
|
|
+ "以抽取“英文名称”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "通用名称:双氯芬酸钠缓释片\n" +
|
|
|
+ "商品名称:迪根\n" +
|
|
|
+ "英文名称:DiclofenacSodium Sustained Release Tablets\n" +
|
|
|
+ "汉语拼音:ShuanglvfensuannaHuanshiPian\n" +
|
|
|
+ "\n" +
|
|
|
+ "输出:[\"DiclofenacSodium Sustained Release Tablet\"]\n\n"+
|
|
|
+ "#示例3\n" +
|
|
|
+ "以抽取“参考区间”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "\n" +
|
|
|
+ "参考区间:\n" +
|
|
|
+ " 1. \n" +
|
|
|
+ "成年男性HDL-C为1.16~1.42mmol/L(45~55mg/dl);女性为1.29~1.55mmol/L(50~60mg/dl)。正常人HDL-C约占TC的25%~30%。\n" +
|
|
|
+ "\n" +
|
|
|
+ " 2. 我国《中国成人血脂异常防治建议》提出的标准(2007)为: \n" +
|
|
|
+ "\n" +
|
|
|
+ " 理想范围:>1.04mmol/L(>40mg/dl)。 \n" +
|
|
|
+ "\n" +
|
|
|
+ " 升高:≥1.55mmol/L(60mg/dl)。 \n" +
|
|
|
+ "\n" +
|
|
|
+ " 降低:<1.04mmol/L(<40mg/dl)。 \n" +
|
|
|
+ "\n" +
|
|
|
+ " 3. NCEPATPⅢ提出的医学决定水平: \n" +
|
|
|
+ "\n" +
|
|
|
+ " <1.03mmol/L(40mg/dl)为降低,CHD发生风险增高。 \n" +
|
|
|
+ "\n" +
|
|
|
+ " ≥1.55mmol/L(60mg/dl),CHD发生风险降低。\n" +
|
|
|
+ "\n" +
|
|
|
+ "输出:[\"成年男性HDL-C为1.16~1.42mmol/L(45~55mg/dl)\",\"女性为1.29~1.55mmol/L(50~60mg/dl)\",\"正常人HDL-C约占TC的25%~30%\",\"理想范围:>1.04mmol/L(>40mg/dl)\",\"升高:≥1.55mmol/L(60mg/dl)\",\"降低:<1.04mmol/L(<40mg/dl)\",\"≥1.55mmol/L(60mg/dl),CHD发生风险降低\",\"≥1.55mmol/L(60mg/dl),CHD发生风险降低\"]\n\n"+
|
|
|
+
|
|
|
+ "2、没有可抽取的“" + property + "”,则返回空json数组。\n" +
|
|
|
+ "\n" +
|
|
|
+ "本次抽取的文本如下:\n\n";
|
|
|
+
|
|
|
+ String zhiling = format + zhaiyao;
|
|
|
+ System.out.println(zhiling);
|
|
|
+ String chatResponse = BaidubceUtil.getChatResponse(zhiling, accessToken);
|
|
|
+ chatResponse = filte(chatResponse);
|
|
|
+ System.out.println(chatResponse);
|
|
|
+ return chatResponse;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static synchronized void save(String fileName) {
|
|
|
+ try {
|
|
|
+ fileName="C:\\Users\\17664\\Desktop\\"+fileName+System.currentTimeMillis()+".xlsx";
|
|
|
+ //文档输出
|
|
|
+ FileOutputStream out = new FileOutputStream(new File(fileName));
|
|
|
+ workbook.write(out);
|
|
|
+ out.close();
|
|
|
+ System.out.println(fileName + "存储完毕");
|
|
|
+ } catch (IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String filte(String chatResponse) {
|
|
|
+ if (chatResponse.startsWith("```json")) {
|
|
|
+ chatResponse = chatResponse.substring(7);
|
|
|
+ }
|
|
|
+ if (chatResponse.endsWith("```")) {
|
|
|
+ chatResponse = chatResponse.substring(0, chatResponse.length() - 3);
|
|
|
+ }
|
|
|
+ return chatResponse;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+}
|
|
|
+
|