|
@@ -0,0 +1,347 @@
|
|
|
+package com.qizhen.healsphere;
|
|
|
+
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.qizhen.healsphere.common.ai.BaidubceUtil;
|
|
|
+import com.qizhen.healsphere.common.ai.Knowlege;
|
|
|
+import com.qizhen.healsphere.common.ai.QizhenAssistant;
|
|
|
+import com.qizhen.healsphere.repository.neo4j.entity.BaseEntity;
|
|
|
+import com.qizhen.healsphere.service.EntityService;
|
|
|
+import com.qizhen.healsphere.service.RelationshipService;
|
|
|
+import com.qizhen.healsphere.web.vo.CreateEntityVO;
|
|
|
+import com.qizhen.healsphere.web.vo.RelationshipVO;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFRow;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFSheet;
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
|
|
+import org.apache.poi.ss.usermodel.Row;
|
|
|
+import org.apache.poi.ss.usermodel.Sheet;
|
|
|
+import org.apache.poi.ss.usermodel.Workbook;
|
|
|
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
+import org.junit.Test;
|
|
|
+import org.junit.runner.RunWith;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.boot.test.context.SpringBootTest;
|
|
|
+import org.springframework.context.annotation.ComponentScan;
|
|
|
+import org.springframework.test.context.junit4.SpringRunner;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+
|
|
|
+import java.io.*;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.Objects;
|
|
|
+
|
|
|
+@RunWith(SpringRunner.class)
|
|
|
+@ComponentScan(basePackages = {"com.qizhen.healsphere.model","com.qizhen.healsphere.repository"})
|
|
|
+@SpringBootTest
|
|
|
+public class DataWriteTestDSV3 {
|
|
|
+ @Autowired
|
|
|
+ RelationshipService relationshipService;
|
|
|
+ @Autowired
|
|
|
+ EntityService entityService;
|
|
|
+ private static int maxCount= 10;
|
|
|
+ private static boolean selfCheck = true;
|
|
|
+ private static String urlExcelPath = "C:\\Users\\17664\\Desktop\\疾病.xlsx";
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void writeNeo4j() {
|
|
|
+ String startLabel = "疾病";
|
|
|
+ String propertyStr = "并发症\t的并发症有哪些疾病";
|
|
|
+ String[] properties = propertyStr.split(",");
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
+ String fileName = "temp";//
|
|
|
+ HSSFSheet sheet = workbook.createSheet(fileName);
|
|
|
+ int rows = 0;
|
|
|
+ for(String property:properties){
|
|
|
+ String[] split = property.split("\t");
|
|
|
+ if(split.length<2){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String endLabel = split[0];
|
|
|
+ List<Knowlege> data = getData(BaidubceUtil.getAccessToken(), property);
|
|
|
+ if(!CollectionUtils.isEmpty(data)) {
|
|
|
+
|
|
|
+ for (Knowlege temp:data) {
|
|
|
+ HSSFRow row = sheet.createRow(rows++);
|
|
|
+ row.createCell(0).setCellValue(temp.getEntity() == null ? "" : temp.getEntity());
|
|
|
+ row.createCell(1).setCellValue(temp.getProperty() == null ? "" : temp.getProperty());
|
|
|
+ row.createCell(2).setCellValue(temp.getValue() == null ? "" : temp.getValue());
|
|
|
+ row.createCell(3).setCellValue(temp.getQuestion() == null ? "" : temp.getQuestion());
|
|
|
+ row.createCell(4).setCellValue(temp.getAnswer() == null ? "" : temp.getAnswer());
|
|
|
+ row.createCell(5).setCellValue(temp.getChunk() == null ? "" : temp.getChunk());
|
|
|
+ row.createCell(6).setCellValue(temp.getRefenrece() == null ? "" : temp.getRefenrece());
|
|
|
+ }
|
|
|
+ save(fileName);
|
|
|
+
|
|
|
+ for (Knowlege temp:data) {
|
|
|
+ String value = temp.getValue();
|
|
|
+ if(StringUtils.isBlank(value)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ try{
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(value);
|
|
|
+ BaseEntity startEntity = createNoExists(startLabel, temp.getEntity());
|
|
|
+ long startId = startEntity.getId();
|
|
|
+ List<RelationshipVO> relationshipList = new ArrayList<>();
|
|
|
+ for(int i=0;i<jsonArray.size();i++){
|
|
|
+ String name = jsonArray.getString(i);
|
|
|
+ if(StringUtils.isEmpty(name)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ BaseEntity endEntity = createNoExists(endLabel, name);
|
|
|
+ Long endId = endEntity.getId();
|
|
|
+ RelationshipVO relationshipVO = new RelationshipVO();
|
|
|
+ relationshipVO.setStartId(startId);
|
|
|
+ relationshipVO.setEndId(endId);
|
|
|
+ relationshipVO.setStartLabel(startLabel);
|
|
|
+ relationshipVO.setEndLabel(endLabel);
|
|
|
+ relationshipVO.setRelationshipType(startLabel+"相关"+endLabel);
|
|
|
+ relationshipList.add(relationshipVO);
|
|
|
+ }
|
|
|
+ if(!CollectionUtils.isEmpty(relationshipList)) {
|
|
|
+ System.out.println( relationshipService.createRelationship(relationshipList));
|
|
|
+ }
|
|
|
+ }catch (Exception e){
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private BaseEntity createNoExists(String labelName, String name) {
|
|
|
+ BaseEntity nodeByName = entityService.findNodeByName(labelName, name);
|
|
|
+ if(Objects.nonNull(nodeByName)){//节点不存在
|
|
|
+ return nodeByName;
|
|
|
+ }
|
|
|
+ CreateEntityVO createEntity = new CreateEntityVO();
|
|
|
+ createEntity.setName(name);
|
|
|
+ createEntity.setLabel(labelName);
|
|
|
+ return entityService.create(createEntity);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
+
|
|
|
+ String accessToken = BaidubceUtil.getAccessToken();
|
|
|
+ String propertyStr = "并发症\t的并发症有哪些疾病" +
|
|
|
+ ",常见并发症\t的常见并发症或常并发的疾病有哪些疾病" +
|
|
|
+ ",提示病情加重或进展的检查指标\t的相关检查中可以提示病情加重或进展的指标变化有哪些" +
|
|
|
+ ",早期预警指标\t疾病早期的诊疗过程或病程中出现哪些体检指标的异常或临床症状的恶化,可能提示疾病进一步进展、加重或恶化" +
|
|
|
+ ",病因\t的病因或常见病因有哪些病因或致病原因" +
|
|
|
+ ",危险因素\t的危险因素有哪些危险因素(不要病因)" +
|
|
|
+ ",遗传方式\t的遗传方式有哪些遗传方式(如果该病无遗传方式,可回答“无”)" +
|
|
|
+ ",遗传基因\t的遗传基因名称有哪些遗传基因名称" +
|
|
|
+ ",诱因\t的诱因或诱发因素或常见诱因有哪些诱因" +
|
|
|
+ ",出院标准\t的出院标准有哪些";
|
|
|
+ saveExel(propertyStr, accessToken,"v3");
|
|
|
+ }
|
|
|
+
|
|
|
+ static HSSFWorkbook workbook;
|
|
|
+ static Long totalCount = 0l;
|
|
|
+ static Long successCount = 0l;
|
|
|
+ static Long failCount = 0l;
|
|
|
+ static Long unkonwCount = 0l;
|
|
|
+ private static void saveExel(String propertyStr, String accessToken,String fileName) {
|
|
|
+ String[] properties = propertyStr.split(",");
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
+ HSSFSheet sheet = workbook.createSheet(fileName);
|
|
|
+ int rows = 0;
|
|
|
+ for(String property:properties){
|
|
|
+ List<Knowlege> data = getData(accessToken, property);
|
|
|
+ if(!CollectionUtils.isEmpty(data)) {
|
|
|
+ for (Knowlege temp:data) {
|
|
|
+ HSSFRow row = sheet.createRow(rows++);
|
|
|
+ row.createCell(0).setCellValue(temp.getEntity() == null ? "" : temp.getEntity());
|
|
|
+ row.createCell(1).setCellValue(temp.getProperty() == null ? "" : temp.getProperty());
|
|
|
+ row.createCell(2).setCellValue(temp.getValue() == null ? "" : temp.getValue());
|
|
|
+ row.createCell(3).setCellValue(temp.getQuestion() == null ? "" : temp.getQuestion());
|
|
|
+ row.createCell(4).setCellValue(temp.getAnswer() == null ? "" : temp.getAnswer());
|
|
|
+ row.createCell(5).setCellValue(temp.getChunk() == null ? "" : temp.getChunk());
|
|
|
+ row.createCell(6).setCellValue(temp.getRefenrece() == null ? "" : temp.getRefenrece());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ String successRate = String.format("%.2f", Double.valueOf(successCount)/ Double.valueOf(totalCount));
|
|
|
+ String unkonwRate = String.format("%.2f", Double.valueOf(unkonwCount)/ Double.valueOf(totalCount));
|
|
|
+ sheet.getRow(rows-1).createCell(7).setCellValue(successRate);
|
|
|
+ sheet.getRow(rows-1).createCell(8).setCellValue(unkonwRate);
|
|
|
+ save(fileName);
|
|
|
+ totalCount = 0l;
|
|
|
+ successCount = 0l;
|
|
|
+ failCount = 0l;
|
|
|
+ unkonwCount = 0l;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static synchronized void save(String fileName) {
|
|
|
+ try {
|
|
|
+ fileName="C:\\Users\\17664\\Desktop\\"+fileName+System.currentTimeMillis()+".xlsx";
|
|
|
+ //文档输出
|
|
|
+ FileOutputStream out = new FileOutputStream(new File(fileName));
|
|
|
+ workbook.write(out);
|
|
|
+ out.close();
|
|
|
+ System.out.println(fileName + "存储完毕");
|
|
|
+ } catch (IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static List<Knowlege> getData(String accessToken,String property) {
|
|
|
+ List<Knowlege> list = new ArrayList<>();
|
|
|
+ try {
|
|
|
+ String appId= "5d271a65-754b-460a-9f9b-28b566d40538";
|
|
|
+ InputStream fis = new FileInputStream(urlExcelPath);
|
|
|
+ Workbook urlWorkbook = new XSSFWorkbook(fis);
|
|
|
+ Sheet urlSheet = urlWorkbook.getSheetAt(0);
|
|
|
+
|
|
|
+ for (int rowNum = 0; rowNum <= urlSheet.getLastRowNum(); rowNum++) {
|
|
|
+ if (maxCount > 0) {
|
|
|
+ if (rowNum >= maxCount) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ Row row = urlSheet.getRow(rowNum);
|
|
|
+ String disease = row.getCell(0).getStringCellValue();
|
|
|
+ if(StringUtils.isEmpty(disease)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ disease = disease.trim();
|
|
|
+ String[] split = property.split("\t");
|
|
|
+ if(split.length<2){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String quetionParty = split[1];
|
|
|
+ String relation = split[0];
|
|
|
+ String question = disease + quetionParty + "?";
|
|
|
+
|
|
|
+ Map<String, String> result = QizhenAssistant.getChatResponse(question, QizhenAssistant.getConversationId(appId),appId);
|
|
|
+ String answer = result.get("answer");
|
|
|
+ String references = result.get("references");
|
|
|
+ String defaultReferences = result.get("defaultReferences");
|
|
|
+ String chatResponse = "";
|
|
|
+ if (!("failed".equals(answer) || answer.contains(QizhenAssistant.noAnswer))) {
|
|
|
+ String format = "你是专门处理医学领域文本的关系抽取专家。你将在指定的文本中抽取其中“"+disease+"的"+relation+"”。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#要求\n" +
|
|
|
+ "1、抽取的结果将以JSON数组的形式呈现。每个抽取的“"+relation+"”高度简洁、高度概括,不要要描述性的文字,文字尽量保持在12个字符以内!\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例1\n" +
|
|
|
+ "以抽取“分期”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "肱骨骨折如果是**肱骨头坏死**则有Cruess分期,包括I期、Ⅱ期、Ⅲ期、IV期、V期^[1]^。\n" +
|
|
|
+ "\n" +
|
|
|
+ "如果是肱骨近端骨折则有Neer分型和AO分型^[3]^。\n" +
|
|
|
+ "输出:[\"Cruess分期I期\",\"Cruess分期Ⅱ期\",\"Cruess分期Ⅲ期\",\"Cruess分期IV期\",\"Cruess分期V期\"]\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例2\n" +
|
|
|
+ "以抽取“是否传染病”为例\n" +
|
|
|
+ "文本:\n" +
|
|
|
+ "**是**^[2][4][6]^。\n" +
|
|
|
+ "\n" +
|
|
|
+ "输出:[\"是\"]\n\n"+
|
|
|
+ "2、没有可抽取的“"+relation+"”,则返回空json数组。\n" +
|
|
|
+ "\n" +
|
|
|
+ "本次抽取的文本如下:\n\n";
|
|
|
+
|
|
|
+ String zhiling = format+ answer;
|
|
|
+ System.out.println(zhiling);
|
|
|
+ chatResponse = BaidubceUtil.getChatResponse(zhiling, accessToken);
|
|
|
+ chatResponse = filte(chatResponse);
|
|
|
+ JSONArray jsonResult = new JSONArray();
|
|
|
+ if(!StringUtils.isBlank(chatResponse)){
|
|
|
+ try{
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(chatResponse);
|
|
|
+ JSONArray referenceJA = JSONArray.parseArray(references);
|
|
|
+ for(int t=0;t<jsonArray.size();t++){
|
|
|
+ String name = jsonArray.getString(t);
|
|
|
+ JSONObject temp = new JSONObject();
|
|
|
+ temp.put("name", name);
|
|
|
+ JSONArray tempReferenceJA = new JSONArray();
|
|
|
+ for(int r=0;r<referenceJA.size();r++){
|
|
|
+ JSONObject referenceJO = referenceJA.getJSONObject(r);
|
|
|
+ String content = referenceJO.getString("content");
|
|
|
+ String contentFilted = content.replaceAll("\\s+", "");
|
|
|
+ if(contentFilted.contains(name)){
|
|
|
+ //JSONObject clone = referenceJO.clone();
|
|
|
+ tempReferenceJA.add(referenceJO.getString("title"));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(selfCheck) {
|
|
|
+ String llmQuestion = "你是一个资深的医学专家,请用“是”、“否”和“不确定”回答用户的问题回答。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#要求\n" +
|
|
|
+ "1、只回答“是”、“否”和“不确定”,不要有额外的信息。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例\n" +
|
|
|
+ "用户输入:急性上呼吸道感染的治疗药物是否包括“利巴韦林”?\n" +
|
|
|
+ "输出:是\n\n" +
|
|
|
+ "请回答:" + disease + "的" + relation + "是否包括“" + name + "”?";
|
|
|
+ String llmAnswer = BaidubceUtil.getChatResponse(llmQuestion, accessToken);
|
|
|
+ totalCount++;
|
|
|
+ temp.put("LLM-question", llmQuestion);
|
|
|
+ temp.put("LLM-answer", llmAnswer);
|
|
|
+ if (!StringUtils.isEmpty(llmAnswer) && llmAnswer.length() < 7) {
|
|
|
+ if (llmAnswer.contains("是")) {
|
|
|
+ successCount++;
|
|
|
+ } else if (llmAnswer.contains("否")) {
|
|
|
+ failCount++;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ unkonwCount++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ temp.put("reference", tempReferenceJA);
|
|
|
+ if(tempReferenceJA.size()<1){
|
|
|
+ temp.put("defaultReferences", defaultReferences);
|
|
|
+ }
|
|
|
+ jsonResult.add(temp);
|
|
|
+ }
|
|
|
+ }catch (Exception e){
|
|
|
+ System.out.println("######"+chatResponse);
|
|
|
+ e.printStackTrace();
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ addNode(disease, relation, chatResponse,jsonResult.toJSONString(), answer, question, result, list);
|
|
|
+ }else {
|
|
|
+ addNode(disease, relation, "","", answer, question, result, list);
|
|
|
+ }
|
|
|
+ }catch (Exception e){
|
|
|
+ System.out.println("抽取三元组失败!");
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return list;
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.out.println("未知错误!");
|
|
|
+ e.printStackTrace();
|
|
|
+ }finally {
|
|
|
+ return list;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void addNode(String disease, String property, String chatResponse,String refences, String answer, String question, Map<String, String> result, List<Knowlege> list) {
|
|
|
+ Knowlege knowlege = new Knowlege();
|
|
|
+ knowlege.setEntity(disease);
|
|
|
+ knowlege.setProperty(property);
|
|
|
+ knowlege.setValue(chatResponse);
|
|
|
+ knowlege.setAnswer(answer);
|
|
|
+ knowlege.setQuestion(question);
|
|
|
+ knowlege.setChunk(result.get("references"));
|
|
|
+ knowlege.setRefenrece(refences);
|
|
|
+ list.add(knowlege);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String filte(String chatResponse) {
|
|
|
+ if (chatResponse.startsWith("```json")) {
|
|
|
+ chatResponse = chatResponse.substring(7);
|
|
|
+ }
|
|
|
+ if (chatResponse.endsWith("```")) {
|
|
|
+ chatResponse = chatResponse.substring(0, chatResponse.length() - 3);
|
|
|
+ }
|
|
|
+ return chatResponse;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|