|
@@ -0,0 +1,351 @@
|
|
|
|
+package com.qizhen.healsphere;
|
|
|
|
+
|
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
|
+import com.qizhen.healsphere.common.ai.BaidubceUtil;
|
|
|
|
+import com.qizhen.healsphere.common.ai.Knowlege;
|
|
|
|
+import com.qizhen.healsphere.common.ai.QizhenAssistant;
|
|
|
|
+import com.qizhen.healsphere.repository.neo4j.entity.BaseEntity;
|
|
|
|
+import com.qizhen.healsphere.service.EntityService;
|
|
|
|
+import com.qizhen.healsphere.service.RelationshipService;
|
|
|
|
+import com.qizhen.healsphere.web.vo.CreateEntityVO;
|
|
|
|
+import com.qizhen.healsphere.web.vo.RelationshipVO;
|
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFRow;
|
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFSheet;
|
|
|
|
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
|
|
|
+import org.apache.poi.ss.usermodel.Row;
|
|
|
|
+import org.apache.poi.ss.usermodel.Sheet;
|
|
|
|
+import org.apache.poi.ss.usermodel.Workbook;
|
|
|
|
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
|
+import org.junit.Test;
|
|
|
|
+import org.junit.runner.RunWith;
|
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
+import org.springframework.boot.test.context.SpringBootTest;
|
|
|
|
+import org.springframework.context.annotation.ComponentScan;
|
|
|
|
+import org.springframework.test.context.junit4.SpringRunner;
|
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
|
+
|
|
|
|
+import java.io.*;
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
+import java.util.List;
|
|
|
|
+import java.util.Map;
|
|
|
|
+import java.util.Objects;
|
|
|
|
+
|
|
|
|
+@RunWith(SpringRunner.class)
|
|
|
|
+@ComponentScan(basePackages = {"com.qizhen.healsphere.model","com.qizhen.healsphere.repository"})
|
|
|
|
+@SpringBootTest
|
|
|
|
+public class ZhengzhuangDataWriteTest31 {
|
|
|
|
+ @Autowired
|
|
|
|
+ RelationshipService relationshipService;
|
|
|
|
+ @Autowired
|
|
|
|
+ EntityService entityService;
|
|
|
|
+ private static int maxCount= 2;
|
|
|
|
+ private static boolean selfCheck = false;
|
|
|
|
+ private static String urlExcelPath = "C:\\Users\\17664\\Desktop\\症状列表.xlsx";
|
|
|
|
+ @Test
|
|
|
|
+ public void writeNeo4j() {
|
|
|
|
+ String startLabel = "症状";
|
|
|
|
+ String propertyStr = "别名##症状\t“#症状#”此处作为一种“症状”,“#症状#”的别称或别名有哪些(请更加严格的遵照文献原文,不要把子类名称当做别称给我)" +
|
|
|
|
+ ",常伴随的症状##症状\t“#症状#”此处作为一种“症状”,“#症状#”常伴随的症状有哪些症状" +
|
|
|
|
+ ",常见病因\t“#症状#”此处作为一种“症状”,“#症状#”的常见病因有哪些" +
|
|
|
|
+ ",常见疾病##疾病\t“#症状#”此处作为一种“症状”,“#症状#”常见于哪些疾病" +
|
|
|
|
+ ",子类或分类##症状\t“#症状#”此处作为一种“症状”,“#症状#”的症状子类或症状分类有哪些症状(答案必须包含“#症状#”几个字,否则不提取)";
|
|
|
|
+ String[] properties = propertyStr.split(",");
|
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
|
+ String fileName = "temp";//
|
|
|
|
+ HSSFSheet sheet = workbook.createSheet(fileName);
|
|
|
|
+ int rows = 0;
|
|
|
|
+ for(String property:properties){
|
|
|
|
+ String[] split = property.split("\t");
|
|
|
|
+ if(split.length<2){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ List<Knowlege> data = getData(BaidubceUtil.getAccessToken(), property);
|
|
|
|
+ if(!CollectionUtils.isEmpty(data)) {
|
|
|
|
+
|
|
|
|
+ for (Knowlege temp:data) {
|
|
|
|
+ HSSFRow row = sheet.createRow(rows++);
|
|
|
|
+ row.createCell(0).setCellValue(temp.getEntity() == null ? "" : temp.getEntity());
|
|
|
|
+ row.createCell(1).setCellValue(temp.getProperty() == null ? "" : temp.getProperty());
|
|
|
|
+ row.createCell(2).setCellValue(temp.getValue() == null ? "" : temp.getValue());
|
|
|
|
+ row.createCell(3).setCellValue(temp.getQuestion() == null ? "" : temp.getQuestion());
|
|
|
|
+ row.createCell(4).setCellValue(temp.getAnswer() == null ? "" : temp.getAnswer());
|
|
|
|
+ row.createCell(5).setCellValue(temp.getChunk() == null ? "" : temp.getChunk());
|
|
|
|
+ row.createCell(6).setCellValue(temp.getRefenrece() == null ? "" : temp.getRefenrece());
|
|
|
|
+ }
|
|
|
|
+ save(fileName);
|
|
|
|
+ String endLabel = split[0];
|
|
|
|
+ String relationShip = split[0];
|
|
|
|
+ String[] endLabelSplit = endLabel.split("##");
|
|
|
|
+ if(endLabelSplit.length==2){
|
|
|
|
+ endLabel = endLabelSplit[1];
|
|
|
|
+ relationShip= endLabelSplit[0];
|
|
|
|
+ }
|
|
|
|
+ if(!startLabel.equals(endLabel)){
|
|
|
|
+ endLabel = startLabel+endLabel;
|
|
|
|
+ }
|
|
|
|
+ for (Knowlege temp:data) {
|
|
|
|
+ String value = temp.getValue();
|
|
|
|
+ if(StringUtils.isBlank(value)){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ try{
|
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(value);
|
|
|
|
+ BaseEntity startEntity = createNoExists(startLabel, temp.getEntity());
|
|
|
|
+ long startId = startEntity.getId();
|
|
|
|
+ List<RelationshipVO> relationshipList = new ArrayList<>();
|
|
|
|
+ for(int i=0;i<jsonArray.size();i++){
|
|
|
|
+ String name = jsonArray.getString(i);
|
|
|
|
+ if(StringUtils.isEmpty(name)){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ BaseEntity endEntity = createNoExists(endLabel, name);
|
|
|
|
+ Long endId = endEntity.getId();
|
|
|
|
+ RelationshipVO relationshipVO = new RelationshipVO();
|
|
|
|
+ relationshipVO.setStartId(startId);
|
|
|
|
+ relationshipVO.setEndId(endId);
|
|
|
|
+ relationshipVO.setStartLabel(startLabel);
|
|
|
|
+ relationshipVO.setEndLabel(endLabel);
|
|
|
|
+ relationshipVO.setRelationshipType(startLabel+"相关"+relationShip);
|
|
|
|
+ relationshipList.add(relationshipVO);
|
|
|
|
+ }
|
|
|
|
+ if(!CollectionUtils.isEmpty(relationshipList)) {
|
|
|
|
+ System.out.println( relationshipService.createRelationship(relationshipList));
|
|
|
|
+ }
|
|
|
|
+ }catch (Exception e){
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private BaseEntity createNoExists(String labelName, String name) {
|
|
|
|
+ BaseEntity nodeByName = entityService.findNodeByName(labelName, name);
|
|
|
|
+ if(Objects.nonNull(nodeByName)){//节点不存在
|
|
|
|
+ return nodeByName;
|
|
|
|
+ }
|
|
|
|
+ CreateEntityVO createEntity = new CreateEntityVO();
|
|
|
|
+ createEntity.setName(name);
|
|
|
|
+ createEntity.setLabel(labelName);
|
|
|
|
+ return entityService.create(createEntity);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
|
+
|
|
|
|
+ String accessToken = BaidubceUtil.getAccessToken();
|
|
|
|
+ String propertyStr = "症状的子类或分类(按性质、特点等细分)\t“#症状#”此处作为一种“症状”,“#症状#”的子类症状或子分类症状有哪些症状?(答案必须包含“#症状#”几个字!)";
|
|
|
|
+
|
|
|
|
+ saveExel(propertyStr, accessToken,"检查");
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ static HSSFWorkbook workbook;
|
|
|
|
+ static Long totalCount = 0l;
|
|
|
|
+ static Long successCount = 0l;
|
|
|
|
+ static Long failCount = 0l;
|
|
|
|
+ static Long unkonwCount = 0l;
|
|
|
|
+ private static void saveExel(String propertyStr, String accessToken,String fileName) {
|
|
|
|
+ String[] properties = propertyStr.split(",");
|
|
|
|
+ workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
|
+ HSSFSheet sheet = workbook.createSheet(fileName);
|
|
|
|
+ int rows = 0;
|
|
|
|
+ for(String property:properties){
|
|
|
|
+ List<Knowlege> data = getData(accessToken, property);
|
|
|
|
+ if(!CollectionUtils.isEmpty(data)) {
|
|
|
|
+ for (Knowlege temp:data) {
|
|
|
|
+ HSSFRow row = sheet.createRow(rows++);
|
|
|
|
+ row.createCell(0).setCellValue(temp.getEntity() == null ? "" : temp.getEntity());
|
|
|
|
+ row.createCell(1).setCellValue(temp.getProperty() == null ? "" : temp.getProperty());
|
|
|
|
+ row.createCell(2).setCellValue(temp.getValue() == null ? "" : temp.getValue());
|
|
|
|
+ row.createCell(3).setCellValue(temp.getQuestion() == null ? "" : temp.getQuestion());
|
|
|
|
+ row.createCell(4).setCellValue(temp.getAnswer() == null ? "" : temp.getAnswer());
|
|
|
|
+ row.createCell(5).setCellValue(temp.getChunk() == null ? "" : temp.getChunk());
|
|
|
|
+ row.createCell(6).setCellValue(temp.getRefenrece() == null ? "" : temp.getRefenrece());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ String successRate = String.format("%.2f", Double.valueOf(successCount)/ Double.valueOf(totalCount));
|
|
|
|
+ String unkonwRate = String.format("%.2f", Double.valueOf(unkonwCount)/ Double.valueOf(totalCount));
|
|
|
|
+ sheet.getRow(rows-1).createCell(7).setCellValue(successRate);
|
|
|
|
+ sheet.getRow(rows-1).createCell(8).setCellValue(unkonwRate);
|
|
|
|
+ save(fileName);
|
|
|
|
+ totalCount = 0l;
|
|
|
|
+ successCount = 0l;
|
|
|
|
+ failCount = 0l;
|
|
|
|
+ unkonwCount = 0l;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static synchronized void save(String fileName) {
|
|
|
|
+ try {
|
|
|
|
+ fileName="C:\\Users\\17664\\Desktop\\"+fileName+System.currentTimeMillis()+".xlsx";
|
|
|
|
+ //文档输出
|
|
|
|
+ FileOutputStream out = new FileOutputStream(new File(fileName));
|
|
|
|
+ workbook.write(out);
|
|
|
|
+ out.close();
|
|
|
|
+ System.out.println(fileName + "存储完毕");
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static List<Knowlege> getData(String accessToken,String property) {
|
|
|
|
+ List<Knowlege> list = new ArrayList<>();
|
|
|
|
+ try {
|
|
|
|
+ String appId= "3b615957-f9b1-4811-9dfc-13dcee6e0a37";
|
|
|
|
+ InputStream fis = new FileInputStream(urlExcelPath);
|
|
|
|
+ Workbook urlWorkbook = new XSSFWorkbook(fis);
|
|
|
|
+ Sheet urlSheet = urlWorkbook.getSheetAt(0);
|
|
|
|
+
|
|
|
|
+ for (int rowNum = 0; rowNum <= urlSheet.getLastRowNum(); rowNum++) {
|
|
|
|
+ try {
|
|
|
|
+ if(maxCount>0) {
|
|
|
|
+ if (rowNum > maxCount) {
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ Row row = urlSheet.getRow(rowNum);
|
|
|
|
+ String disease = row.getCell(0).getStringCellValue();
|
|
|
|
+ if(StringUtils.isEmpty(disease)){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ disease = disease.trim();
|
|
|
|
+ String[] split = property.split("\t");
|
|
|
|
+ if(split.length<2){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ String quetionParty = split[1];
|
|
|
|
+ String question = quetionParty.replaceAll("#症状#",disease)+"?";
|
|
|
|
+
|
|
|
|
+ Map<String, String> result = QizhenAssistant.getChatResponse(question, QizhenAssistant.getConversationId(appId),appId);
|
|
|
|
+ String answer = result.get("answer");
|
|
|
|
+ String references = result.get("references");
|
|
|
|
+ String defaultReferences = result.get("defaultReferences");
|
|
|
|
+ String chatResponse = "";
|
|
|
|
+ String relation = split[0].split("##")[0];
|
|
|
|
+ if (!("failed".equals(answer) || answer.contains(QizhenAssistant.noAnswer))) {
|
|
|
|
+ String format = "你是专门处理医学领域文本的关系抽取专家。你将在指定的文本中抽取其中“"+disease+"的"+relation+"”。\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "#要求\n" +
|
|
|
|
+ "1、抽取的结果将以JSON数组的形式呈现。每个抽取的“"+relation+"”高度简洁、高度概括,不要要描述性的文字,文字尽量保持在12个字符以内!\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "#示例1\n" +
|
|
|
|
+ "以抽取“分期”为例\n" +
|
|
|
|
+ "文本:\n" +
|
|
|
|
+ "肱骨骨折如果是**肱骨头坏死**则有Cruess分期,包括I期、Ⅱ期、Ⅲ期、IV期、V期^[1]^。\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "如果是肱骨近端骨折则有Neer分型和AO分型^[3]^。\n" +
|
|
|
|
+ "输出:[\"Cruess分期I期\",\"Cruess分期Ⅱ期\",\"Cruess分期Ⅲ期\",\"Cruess分期IV期\",\"Cruess分期V期\"]\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "#示例2\n" +
|
|
|
|
+ "以抽取“是否传染病”为例\n" +
|
|
|
|
+ "文本:\n" +
|
|
|
|
+ "**是**^[2][4][6]^。\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "输出:[\"是\"]\n\n"+
|
|
|
|
+ "2、没有可抽取的“"+relation+"”,则返回空json数组。\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "本次抽取的文本如下:\n\n";
|
|
|
|
+
|
|
|
|
+ String zhiling = format+ answer;
|
|
|
|
+ System.out.println(zhiling);
|
|
|
|
+ chatResponse = BaidubceUtil.getChatResponse(zhiling, accessToken);
|
|
|
|
+ chatResponse = filte(chatResponse);
|
|
|
|
+ JSONArray jsonResult = new JSONArray();
|
|
|
|
+ if(!StringUtils.isBlank(chatResponse)){
|
|
|
|
+ try{
|
|
|
|
+ JSONArray jsonArray = JSONArray.parseArray(chatResponse);
|
|
|
|
+ JSONArray referenceJA = JSONArray.parseArray(references);
|
|
|
|
+ for(int t=0;t<jsonArray.size();t++){
|
|
|
|
+ String name = jsonArray.getString(t);
|
|
|
|
+ JSONObject temp = new JSONObject();
|
|
|
|
+ temp.put("name", name);
|
|
|
|
+ JSONArray tempReferenceJA = new JSONArray();
|
|
|
|
+ for(int r=0;r<referenceJA.size();r++){
|
|
|
|
+ JSONObject referenceJO = referenceJA.getJSONObject(r);
|
|
|
|
+ String content = referenceJO.getString("content");
|
|
|
|
+ String contentFilted = content.replaceAll("\\s+", "");
|
|
|
|
+ if(contentFilted.contains(name)){
|
|
|
|
+ //JSONObject clone = referenceJO.clone();
|
|
|
|
+ tempReferenceJA.add(referenceJO.getString("title"));
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if(selfCheck) {
|
|
|
|
+ String llmQuestion = "你是一个资深的医学专家,请用“是”、“否”和“不确定”回答用户的问题回答。\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "#要求\n" +
|
|
|
|
+ "1、只回答“是”、“否”和“不确定”,不要有额外的信息。\n" +
|
|
|
|
+ "\n" +
|
|
|
|
+ "#示例\n" +
|
|
|
|
+ "用户输入:急性上呼吸道感染的治疗药物是否包括“利巴韦林”?\n" +
|
|
|
|
+ "输出:是\n\n" +
|
|
|
|
+ "请回答:" + disease + "的" + relation + "是否包括“" + name + "”?";
|
|
|
|
+ String llmAnswer = BaidubceUtil.getChatResponse(llmQuestion, accessToken);
|
|
|
|
+ totalCount++;
|
|
|
|
+ temp.put("LLM-question", llmQuestion);
|
|
|
|
+ temp.put("LLM-answer", llmAnswer);
|
|
|
|
+ if (!StringUtils.isEmpty(llmAnswer) && llmAnswer.length() < 7) {
|
|
|
|
+ if (llmAnswer.contains("是")) {
|
|
|
|
+ successCount++;
|
|
|
|
+ } else if (llmAnswer.contains("否")) {
|
|
|
|
+ failCount++;
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ unkonwCount++;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ temp.put("reference", tempReferenceJA);
|
|
|
|
+ if(tempReferenceJA.size()<1){
|
|
|
|
+ temp.put("defaultReferences", defaultReferences);
|
|
|
|
+ }
|
|
|
|
+ jsonResult.add(temp);
|
|
|
|
+ }
|
|
|
|
+ }catch (Exception e){
|
|
|
|
+ System.out.println("######"+chatResponse);
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ addNode(disease, relation, chatResponse,jsonResult.toJSONString(), answer, question, result, list);
|
|
|
|
+ }else {
|
|
|
|
+ addNode(disease, relation, "","", answer, question, result, list);
|
|
|
|
+ }
|
|
|
|
+ }catch (Exception e){
|
|
|
|
+ System.out.println("抽取三元组失败!");
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return list;
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ System.out.println("未知错误!");
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }finally {
|
|
|
|
+ return list;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static void addNode(String disease, String property, String chatResponse,String refences, String answer, String question, Map<String, String> result, List<Knowlege> list) {
|
|
|
|
+ Knowlege knowlege = new Knowlege();
|
|
|
|
+ knowlege.setEntity(disease);
|
|
|
|
+ knowlege.setProperty(property);
|
|
|
|
+ knowlege.setValue(chatResponse);
|
|
|
|
+ knowlege.setAnswer(answer);
|
|
|
|
+ knowlege.setQuestion(question);
|
|
|
|
+ knowlege.setChunk(result.get("references"));
|
|
|
|
+ knowlege.setRefenrece(refences);
|
|
|
|
+ list.add(knowlege);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static String filte(String chatResponse) {
|
|
|
|
+ if (chatResponse.startsWith("```json")) {
|
|
|
|
+ chatResponse = chatResponse.substring(7);
|
|
|
|
+ }
|
|
|
|
+ if (chatResponse.endsWith("```")) {
|
|
|
|
+ chatResponse = chatResponse.substring(0, chatResponse.length() - 3);
|
|
|
|
+ }
|
|
|
|
+ return chatResponse;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|