|
@@ -14,6 +14,10 @@ import org.apache.commons.lang3.StringUtils;
|
|
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
|
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
|
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
|
|
+import org.apache.poi.ss.usermodel.Row;
|
|
|
+import org.apache.poi.ss.usermodel.Sheet;
|
|
|
+import org.apache.poi.ss.usermodel.Workbook;
|
|
|
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
import org.junit.Test;
|
|
|
import org.junit.runner.RunWith;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
@@ -22,9 +26,7 @@ import org.springframework.context.annotation.ComponentScan;
|
|
|
import org.springframework.test.context.junit4.SpringRunner;
|
|
|
import org.springframework.util.CollectionUtils;
|
|
|
|
|
|
-import java.io.File;
|
|
|
-import java.io.FileOutputStream;
|
|
|
-import java.io.IOException;
|
|
|
+import java.io.*;
|
|
|
import java.util.ArrayList;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
@@ -39,107 +41,8 @@ public class DataWriteTest31 {
|
|
|
@Autowired
|
|
|
EntityService entityService;
|
|
|
|
|
|
- private static String diseaseStr = "风湿性心脏病" +
|
|
|
- ",原发性高血压" +
|
|
|
- ",急性心肌梗死" +
|
|
|
- ",急性冠脉综合征" +
|
|
|
- ",冠状动脉粥样硬化性心脏病" +
|
|
|
- ",心力衰竭" +
|
|
|
- ",心源性休克" +
|
|
|
- ",酒精中毒" +
|
|
|
- ",咽炎" +
|
|
|
- ",急性扁桃体炎" +
|
|
|
- ",变应性鼻炎" +
|
|
|
- ",慢性鼻窦炎" +
|
|
|
- ",鼻出血" +
|
|
|
- ",急性牙髓炎" +
|
|
|
- ",牙周病" +
|
|
|
- ",溃疡性口炎" +
|
|
|
- ",反流性食管炎" +
|
|
|
- ",胃溃疡" +
|
|
|
- ",慢性萎缩性胃炎" +
|
|
|
- ",功能性消化不良" +
|
|
|
- ",急性阑尾炎" +
|
|
|
- ",克罗恩病" +
|
|
|
- ",肠梗阻" +
|
|
|
- ",肝硬化" +
|
|
|
- ",急性胰腺炎" +
|
|
|
- ",过敏性皮炎" +
|
|
|
- ",湿疹" +
|
|
|
- ",银屑病" +
|
|
|
- ",带状疱疹" +
|
|
|
- ",类风湿性关节炎" +
|
|
|
- ",系统性红斑狼疮" +
|
|
|
- ",慢性肾炎" +
|
|
|
- ",肾病综合征" +
|
|
|
- ",慢性肾衰竭" +
|
|
|
- ",肾结石" +
|
|
|
- ",输尿管结石" +
|
|
|
- ",膀胱炎" +
|
|
|
- ",前列腺增生" +
|
|
|
- ",子宫内膜异位症" +
|
|
|
- ",卵巢囊肿" +
|
|
|
- ",痛经" +
|
|
|
- ",女性更年期综合征" +
|
|
|
- ",异位妊娠" +
|
|
|
- ",妊娠剧吐" +
|
|
|
- ",鼻咽恶性肿瘤" +
|
|
|
- ",肝恶性肿瘤" +
|
|
|
- ",乳房恶性肿瘤" +
|
|
|
- ",宫颈恶性肿瘤" +
|
|
|
- ",多发性骨髓瘤" +
|
|
|
- ",急性白血病" +
|
|
|
- ",血管瘤" +
|
|
|
- ",子宫平滑肌瘤" +
|
|
|
- ",新生儿黄疸" +
|
|
|
- ",新生儿腹泻" +
|
|
|
- ",缺铁性贫血" +
|
|
|
- ",地中海贫血" +
|
|
|
- ",血友病" +
|
|
|
- ",甲状腺功能减退症" +
|
|
|
- ",甲状腺功能亢进症" +
|
|
|
- ",桥本甲状腺炎" +
|
|
|
- ",2型糖尿病" +
|
|
|
- ",糖尿病" +
|
|
|
- ",甲状旁腺功能减退症" +
|
|
|
- ",卵巢早衰" +
|
|
|
- ",坏血病" +
|
|
|
- ",高脂血症" +
|
|
|
- ",高尿酸血症" +
|
|
|
- ",精神分裂症" +
|
|
|
- ",抑郁症" +
|
|
|
- ",化脓性脑膜炎" +
|
|
|
- ",帕金森病" +
|
|
|
- ",癫痫" +
|
|
|
- ",偏头痛" +
|
|
|
- ",脑梗死" +
|
|
|
- ",脑卒中" +
|
|
|
- ",阻塞性睡眠呼吸暂停综合征" +
|
|
|
- ",肺栓塞" +
|
|
|
- ",慢性肺源性心脏病" +
|
|
|
- ",急性上呼吸道感染" +
|
|
|
- ",病毒性肺炎" +
|
|
|
- ",社区获得性肺炎" +
|
|
|
- ",支气管肺炎" +
|
|
|
- ",肺炎" +
|
|
|
- ",支气管哮喘" +
|
|
|
- ",呼吸衰竭" +
|
|
|
- ",肺部感染" +
|
|
|
- ",急性泪腺炎" +
|
|
|
- ",急性泪囊炎" +
|
|
|
- ",巩膜炎" +
|
|
|
- ",角膜炎" +
|
|
|
- ",中耳炎" +
|
|
|
- ",风湿性关节炎" +
|
|
|
- ",颈椎病" +
|
|
|
- ",颈肩综合征" +
|
|
|
- ",坐骨神经痛" +
|
|
|
- ",肩周炎" +
|
|
|
- ",骨质疏松" +
|
|
|
- ",锁骨骨折" +
|
|
|
- ",肱骨骨折" +
|
|
|
- ",肩关节脱位";
|
|
|
-
|
|
|
+ private static boolean selfCheck = false;
|
|
|
+ private static String urlExcelPath = "C:\\Users\\17664\\Desktop\\疾病.xlsx";
|
|
|
|
|
|
@Test
|
|
|
public void writeNeo4j() {
|
|
@@ -147,7 +50,7 @@ public class DataWriteTest31 {
|
|
|
String propertyStr = "并发症\t的并发症有哪些疾病";
|
|
|
String[] properties = propertyStr.split(",");
|
|
|
workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
- String fileName = "";//
|
|
|
+ String fileName = "temp";//
|
|
|
HSSFSheet sheet = workbook.createSheet(fileName);
|
|
|
int rows = 0;
|
|
|
for(String property:properties){
|
|
@@ -218,56 +121,24 @@ public class DataWriteTest31 {
|
|
|
return entityService.create(createEntity);
|
|
|
}
|
|
|
|
|
|
- public static void main(String[] args) {
|
|
|
- /*String disease = "咽炎";
|
|
|
- String relation = "临床表现";
|
|
|
- String answer = "**咽炎的临床表现为急性咽炎**^[1]^。";
|
|
|
- String format = "你是专门处理医学领域文本的关系抽取专家。你将在指定的文本中抽取其中“"+disease+"的"+relation+"”。\n" +
|
|
|
- "\n" +
|
|
|
- "#要求\n" +
|
|
|
- "1、抽取的结果将以JSON数组的形式呈现。每个抽取的“"+relation+"”高度简洁、高度概括,不要要描述性的文字,文字尽量保持在12个字符以内!\n" +
|
|
|
- "\n" +
|
|
|
- "#示例1\n" +
|
|
|
- "以抽取“主要症状”为例\n" +
|
|
|
- "文本:\n" +
|
|
|
- "臭汗症的主要症状有全身或局部多汗且有臭味^[1][3][4]^。\n" +
|
|
|
- "\n" +
|
|
|
- "局部性臭汗症多发于大汗腺所在部位,如腋窝、腹股沟、足部、肛周、外阴、脐部及女性乳房下等处,以足部、腋部臭汗症最为多见。腋窝臭汗症俗称狐臭,是一种特殊的刺鼻臭味。足部臭汗症常与足部多汗伴发,有刺鼻的臭味^[1][4]^。\n" +
|
|
|
- "\n" +
|
|
|
- "全身性臭汗症为一种与种族有关的生理现象,也可见于卫生习惯不良者,服食某些食物(如葱、蒜、芥末)或某些药物(如麝香)后,在个别人中可产生臭汗^[4]^。\n" +
|
|
|
- "\n" +
|
|
|
- "输出:[\"全身或局部多汗且有臭味\",\"局部性臭汗症\",\"腋窝臭汗症\",\"足部臭汗症常\",\"全身性臭汗症\"]\n" +
|
|
|
- "\n" +
|
|
|
- "#示例2\n" +
|
|
|
- "以抽取“是否传染病”为例\n" +
|
|
|
- "文本:\n" +
|
|
|
- "**是**^[2][4][6]^。\n" +
|
|
|
- "\n" +
|
|
|
- "输出:[\"是\"]\n\n"+
|
|
|
- "2、没有可抽取的“"+relation+"”,则返回空json数组。\n" +
|
|
|
- "\n" +
|
|
|
- "本次抽取的文本如下:\n\n";
|
|
|
-
|
|
|
-
|
|
|
- String zhiling = format+ answer;
|
|
|
- System.out.println(zhiling);
|
|
|
- System.out.println(BaidubceUtil.getChatResponse(zhiling, BaidubceUtil.getAccessToken()));*/
|
|
|
-
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
|
|
|
String accessToken = BaidubceUtil.getAccessToken();
|
|
|
- String propertyStr = "分期\t的分期有哪些分期";
|
|
|
+ String propertyStr = "就诊科室\t有哪些“相关科室”或“就诊科室”或“所属科室”(有多个科室时,选相关性最高的,最多给我三个科室)?";
|
|
|
|
|
|
- saveExel(propertyStr, accessToken,"分期3.1");
|
|
|
+ saveExel(propertyStr, accessToken,"就诊科室");
|
|
|
|
|
|
}
|
|
|
|
|
|
static HSSFWorkbook workbook;
|
|
|
-
|
|
|
+ static Long totalCount = 0l;
|
|
|
+ static Long successCount = 0l;
|
|
|
+ static Long failCount = 0l;
|
|
|
+ static Long unkonwCount = 0l;
|
|
|
private static void saveExel(String propertyStr, String accessToken,String fileName) {
|
|
|
String[] properties = propertyStr.split(",");
|
|
|
workbook = new HSSFWorkbook();//这里也可以设置sheet的Name
|
|
|
HSSFSheet sheet = workbook.createSheet(fileName);
|
|
|
- ;//工作表
|
|
|
int rows = 0;
|
|
|
for(String property:properties){
|
|
|
List<Knowlege> data = getData(accessToken, property);
|
|
@@ -283,7 +154,16 @@ public class DataWriteTest31 {
|
|
|
row.createCell(6).setCellValue(temp.getRefenrece() == null ? "" : temp.getRefenrece());
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ String successRate = String.format("%.2f", Double.valueOf(successCount)/ Double.valueOf(totalCount));
|
|
|
+ String unkonwRate = String.format("%.2f", Double.valueOf(unkonwCount)/ Double.valueOf(totalCount));
|
|
|
+ sheet.getRow(rows-1).createCell(7).setCellValue(successRate);
|
|
|
+ sheet.getRow(rows-1).createCell(8).setCellValue(unkonwRate);
|
|
|
save(fileName);
|
|
|
+ totalCount = 0l;
|
|
|
+ successCount = 0l;
|
|
|
+ failCount = 0l;
|
|
|
+ unkonwCount = 0l;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -303,13 +183,18 @@ public class DataWriteTest31 {
|
|
|
private static List<Knowlege> getData(String accessToken,String property) {
|
|
|
List<Knowlege> list = new ArrayList<>();
|
|
|
try {
|
|
|
- BaidubceUtil baidubceUtil = new BaidubceUtil();
|
|
|
-
|
|
|
- String[] diseases = diseaseStr.split(",");
|
|
|
String appId= "3b615957-f9b1-4811-9dfc-13dcee6e0a37";
|
|
|
- for (int i=0;i<diseases.length;i++) {
|
|
|
+ InputStream fis = new FileInputStream(urlExcelPath);
|
|
|
+ Workbook urlWorkbook = new XSSFWorkbook(fis);
|
|
|
+ Sheet urlSheet = urlWorkbook.getSheetAt(0);
|
|
|
+
|
|
|
+ for (int rowNum = 0; rowNum <= urlSheet.getLastRowNum(); rowNum++) {
|
|
|
try {
|
|
|
- String disease = diseases[i];
|
|
|
+ Row row = urlSheet.getRow(rowNum);
|
|
|
+ String disease = row.getCell(0).getStringCellValue();
|
|
|
+ if(StringUtils.isEmpty(disease)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
disease = disease.trim();
|
|
|
String[] split = property.split("\t");
|
|
|
if(split.length<2){
|
|
@@ -317,14 +202,13 @@ public class DataWriteTest31 {
|
|
|
}
|
|
|
String quetionParty = split[1];
|
|
|
String relation = split[0];
|
|
|
- String question = disease + quetionParty + "?";
|
|
|
+ String question = "“"+disease+"”" + quetionParty + "?";
|
|
|
|
|
|
Map<String, String> result = QizhenAssistant.getChatResponse(question, QizhenAssistant.getConversationId(appId),appId);
|
|
|
String answer = result.get("answer");
|
|
|
String references = result.get("references");
|
|
|
String defaultReferences = result.get("defaultReferences");
|
|
|
String chatResponse = "";
|
|
|
- System.out.println(answer);
|
|
|
if (!("failed".equals(answer) || answer.contains(QizhenAssistant.noAnswer))) {
|
|
|
String format = "你是专门处理医学领域文本的关系抽取专家。你将在指定的文本中抽取其中“"+disease+"的"+relation+"”。\n" +
|
|
|
"\n" +
|
|
@@ -351,7 +235,7 @@ public class DataWriteTest31 {
|
|
|
|
|
|
String zhiling = format+ answer;
|
|
|
System.out.println(zhiling);
|
|
|
- chatResponse = baidubceUtil.getChatResponse(zhiling, accessToken);
|
|
|
+ chatResponse = BaidubceUtil.getChatResponse(zhiling, accessToken);
|
|
|
chatResponse = filte(chatResponse);
|
|
|
JSONArray jsonResult = new JSONArray();
|
|
|
if(!StringUtils.isBlank(chatResponse)){
|
|
@@ -372,6 +256,30 @@ public class DataWriteTest31 {
|
|
|
tempReferenceJA.add(referenceJO.getString("title"));
|
|
|
}
|
|
|
}
|
|
|
+ if(selfCheck) {
|
|
|
+ String llmQuestion = "你是一个资深的医学专家,请用“是”、“否”和“不确定”回答用户的问题回答。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#要求\n" +
|
|
|
+ "1、只回答“是”、“否”和“不确定”,不要有额外的信息。\n" +
|
|
|
+ "\n" +
|
|
|
+ "#示例\n" +
|
|
|
+ "用户输入:急性上呼吸道感染的治疗药物是否包括“利巴韦林”?\n" +
|
|
|
+ "输出:是\n\n" +
|
|
|
+ "请回答:" + disease + "的" + relation + "是否包括“" + name + "”?";
|
|
|
+ String llmAnswer = BaidubceUtil.getChatResponse(llmQuestion, accessToken);
|
|
|
+ totalCount++;
|
|
|
+ temp.put("LLM-question", llmQuestion);
|
|
|
+ temp.put("LLM-answer", llmAnswer);
|
|
|
+ if (!StringUtils.isEmpty(llmAnswer) && llmAnswer.length() < 7) {
|
|
|
+ if (llmAnswer.contains("是")) {
|
|
|
+ successCount++;
|
|
|
+ } else if (llmAnswer.contains("否")) {
|
|
|
+ failCount++;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ unkonwCount++;
|
|
|
+ }
|
|
|
+ }
|
|
|
temp.put("reference", tempReferenceJA);
|
|
|
if(tempReferenceJA.size()<1){
|
|
|
temp.put("defaultReferences", defaultReferences);
|