Browse Source

临床表现症状标记1

yuchengwei 3 weeks ago
parent
commit
b2a52f3c3b
1 changed files with 90 additions and 82 deletions
  1. 90 82
      router/text_search.py

+ 90 - 82
router/text_search.py

@@ -434,8 +434,6 @@ def _process_sentence_search(node_name: str, prop_title: str, sentences: list, t
             limit=500,
             type='trunk'
         )
-        # 查询1000条切片数据
-        # db_trunks = db.query(Trunks).filter(Trunks.type == 'trunk').limit(1000).all()
 
         # 准备语料库数据
         trunk_texts = []
@@ -495,6 +493,48 @@ def _process_sentence_search(node_name: str, prop_title: str, sentences: list, t
     
     return result_sentences, all_references
 
+def _mark_symptoms(text: str, symptom_list: List[str]) -> str:
+    """处理症状标记"""
+    if not symptom_list:
+        return text
+        
+    marked_sentence = text
+    # 创建一个标记位置的列表,记录每个位置是否已被标记
+    marked_positions = [False] * len(marked_sentence)
+    
+    # 创建一个列表来存储已处理的症状
+    processed_symptoms = []
+    
+    for symptom in symptom_list:
+        # 检查是否已处理过该症状或其子集
+        if any(symptom in processed_sym or processed_sym in symptom for processed_sym in processed_symptoms):
+            continue
+            
+        # 查找所有匹配位置
+        start_pos = 0
+        while True:
+            pos = marked_sentence.find(symptom, start_pos)
+            if pos == -1:
+                break
+                
+            # 检查这个位置是否已被标记
+            if not any(marked_positions[pos:pos + len(symptom)]):
+                # 标记这个范围的所有位置
+                for i in range(pos, pos + len(symptom)):
+                    marked_positions[i] = True
+                # 替换文本
+                marked_sentence = marked_sentence[:pos] + f'<i style="color:red;">{symptom}</i>' + marked_sentence[pos + len(symptom):]
+                # 将成功标记的症状添加到已处理列表中
+                if symptom not in processed_symptoms:
+                    processed_symptoms.append(symptom)
+                # 更新标记位置数组以适应新插入的标签
+                new_positions = [False] * (len('<i style="color:red;">') + len('</i>'))
+                marked_positions = marked_positions[:pos] + new_positions + marked_positions[pos:]
+            
+            start_pos = pos + len('<i style="color:red;">') + len(symptom) + len('</i>')
+            
+    return marked_sentence
+
 @router.post("/kgrt_api/text/eb_search", response_model=StandardResponse)
 @router.post("/knowledge/text/eb_search", response_model=StandardResponse)
 async def node_props_search(request: NodePropsSearchRequest, db: Session = Depends(get_db)):
@@ -504,6 +544,42 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
         # 检查缓存
         cached_result = _check_cache(request.node_id)
         if cached_result:
+            # 如果有症状列表,处理症状标记
+            if request.symptoms:
+                symptom_list = []
+                try:
+                    # 初始化服务
+                    node_service = KGNodeService(db)
+                    edge_service = KGEdgeService(db)
+                    
+                    for symptom in request.symptoms:
+                        # 添加原始症状
+                        symptom_list.append(symptom)
+                        try:
+                            # 获取症状节点
+                            symptom_node = node_service.get_node_by_name_category(symptom, '症状')
+                            # 获取症状相关同义词
+                            edges = edge_service.get_edges_by_nodes(src_id=symptom_node['id'], category='症状同义词')
+                            if edges:
+                                # 添加同义词
+                                for edge in edges:
+                                    if edge['dest_node'] and edge['dest_node'].get('name'):
+                                        symptom_list.append(edge['dest_node']['name'])
+                        except ValueError:
+                            # 如果找不到节点,只添加原始症状
+                            continue
+                    
+                    # 按照字符长度进行倒序排序
+                    symptom_list.sort(key=len, reverse=True)
+                    
+                    # 处理缓存结果中的症状标记
+                    for prop in cached_result.get('props', []):
+                        if prop.get('prop_title') == '临床表现' and 'answer' in prop:
+                            for answer in prop['answer']:
+                                answer['sentence'] = _mark_symptoms(answer['sentence'], symptom_list)
+                except Exception as e:
+                    logger.error(f"处理症状标记失败: {str(e)}")
+            
             return StandardResponse(success=True, data=cached_result)
 
         # 初始化服务
@@ -534,7 +610,7 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
                                 symptom_list.append(edge['dest_node']['name'])
                 except ValueError:
                     # 如果找不到节点,只添加原始症状
-                    symptom_list.append(symptom)
+                    continue
             
             # 按照字符长度进行倒序排序
             symptom_list.sort(key=len, reverse=True)
@@ -619,44 +695,8 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
                     
                     reference, _ = _process_search_result(search_result, 1)
                     prop_result["references"] = [reference]
-                    # 处理症状标记
-                    marked_sentence = prop_value
-                    if prop_title == '临床表现' and symptom_list:
-                        # 创建一个标记位置的列表,记录每个位置是否已被标记
-                        marked_positions = [False] * len(marked_sentence)
-                        
-                        # 创建一个列表来存储已处理的症状
-                        processed_symptoms = []
-                        
-                        for symptom in symptom_list:
-                            # 检查是否已处理过该症状或其子集
-                            if any(symptom in processed_sym or processed_sym in symptom for processed_sym in processed_symptoms):
-                                continue
-                                
-                            # 查找所有匹配位置
-                            start_pos = 0
-                            while True:
-                                pos = marked_sentence.find(symptom, start_pos)
-                                if pos == -1:
-                                    break
-                                    
-                                # 检查这个位置是否已被标记
-                                if not any(marked_positions[pos:pos + len(symptom)]):
-                                    # 标记这个范围的所有位置
-                                    for i in range(pos, pos + len(symptom)):
-                                        marked_positions[i] = True
-                                    # 替换文本
-                                    marked_sentence = marked_sentence[:pos] + f'<i style="color:red;">{symptom}</i>' + marked_sentence[pos + len(symptom):]
-                                    # 将成功标记的症状添加到已处理列表中
-                                    if symptom not in processed_symptoms:
-                                        processed_symptoms.append(symptom)
-                                    # 更新标记位置数组以适应新插入的标签
-                                    new_positions = [False] * (len('<i style="color:red;">') + len('</i>'))
-                                    marked_positions = marked_positions[:pos] + new_positions + marked_positions[pos:]
-                                
-                                start_pos = pos + len('<i style="color:red;">') + len(symptom) + len('</i>')
                     prop_result["answer"] = [{
-                        "sentence": marked_sentence,
+                        "sentence": prop_value,
                         "flag": "1"
                     }]
                 else:
@@ -668,45 +708,6 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
                 result_sentences, references = _process_sentence_search(
                     node_name, prop_title, sentences, trunks_service
                 )
-                # 处理症状标记
-                if prop_title == '临床表现' and symptom_list and result_sentences:
-                    for sentence in result_sentences:
-                        marked_sentence = sentence["sentence"]
-                        # 创建一个标记位置的列表,记录每个位置是否已被标记
-                        marked_positions = [False] * len(marked_sentence)
-
-                        # 创建一个列表来存储已处理的症状
-                        processed_symptoms = []
-                        
-                        for symptom in symptom_list:
-                            # 检查是否已处理过该症状或其子集
-                            if any(symptom in processed_sym or processed_sym in symptom for processed_sym in processed_symptoms):
-                                continue
-                                
-                            # 查找所有匹配位置
-                            start_pos = 0
-                            while True:
-                                pos = marked_sentence.find(symptom, start_pos)
-                                if pos == -1:
-                                    break
-
-                                # 检查这个位置是否已被标记
-                                if not any(marked_positions[pos:pos + len(symptom)]):
-                                    # 标记这个范围的所有位置
-                                    for i in range(pos, pos + len(symptom)):
-                                        marked_positions[i] = True
-                                    # 替换文本
-                                    marked_sentence = marked_sentence[:pos] + f'<i style="color:red;">{symptom}</i>' + marked_sentence[pos + len(symptom):]
-                                    # 将成功标记的症状添加到已处理列表中
-                                    if symptom not in processed_symptoms:
-                                        processed_symptoms.append(symptom)
-                                    # 更新标记位置数组以适应新插入的标签
-                                    new_positions = [False] * (len('<i style="color:red;">') + len('</i>'))
-                                    marked_positions = marked_positions[:pos] + new_positions + marked_positions[pos:]
-
-                                start_pos = pos + len('<i style="color:red;">') + len(symptom) + len('</i>')
-
-                        sentence["sentence"] = marked_sentence
                 if references:
                     prop_result["references"] = references
                 if result_sentences:
@@ -761,13 +762,20 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
             "index": str(file_index_map[file_name])
         } for file_name, file_type in all_files], key=lambda x: int(x["index"]))
 
-        end_time = time.time()
-        logger.info(f"node_props_search接口耗时: {(end_time - start_time) * 1000:.2f}ms")
-
         # 缓存结果
         cache_key = f"xunzheng_{request.node_id}"
         cache[cache_key] = result
 
+        # 处理症状标记
+        if request.symptoms:
+            for prop in result.get('props', []):
+                if prop.get('prop_title') == '临床表现' and 'answer' in prop:
+                    for answer in prop['answer']:
+                        answer['sentence'] = _mark_symptoms(answer['sentence'], symptom_list)
+
+        end_time = time.time()
+        logger.info(f"node_props_search接口耗时: {(end_time - start_time) * 1000:.2f}ms")
+
         return StandardResponse(success=True, data=result)
     except Exception as e:
         logger.error(f"Node props search failed: {str(e)}")