Explorar o código

1、查询节点列表属性排序修改
2、分句的flag拆分、增加文件列表字段

yuchengwei hai 3 meses
pai
achega
e16518f908
Modificáronse 2 ficheiros con 101 adicións e 18 borrados
  1. 22 0
      router/knowledge_saas.py
  2. 79 18
      router/text_search.py

+ 22 - 0
router/knowledge_saas.py

@@ -64,6 +64,28 @@ async def paginated_search(
             'load_props': True
         }
         result = service.paginated_search(search_params)
+        
+        # 定义prop_title的排序顺序
+        prop_title_order = [
+            '基础信息', '概述', '病因学', '流行病学', '发病机制', '病理学',
+            '临床表现', '辅助检查', '诊断', '鉴别诊断', '并发症', '治疗', '护理', '预后', '预防'
+        ]
+        
+        # 处理每个记录的props,过滤并排序
+        for record in result['records']:
+            if 'props' in record:
+                # 只保留指定的prop_title
+                filtered_props = [prop for prop in record['props'] if prop.get('prop_title') in prop_title_order]
+                
+                # 按照指定顺序排序
+                sorted_props = sorted(
+                    filtered_props,
+                    key=lambda x: prop_title_order.index(x.get('prop_title')) if x.get('prop_title') in prop_title_order else len(prop_title_order)
+                )
+                
+                # 更新记录中的props
+                record['props'] = sorted_props
+        
         return StandardResponse(
             success=True,
             data={

+ 79 - 18
router/text_search.py

@@ -333,6 +333,7 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
             "name": node_name,
             "category": node.get('category', ''),
             "props": [],
+            "files": [],
             "distance": 0
         }
 
@@ -370,15 +371,36 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
             reference_index = 1
 
             # 对每个句子进行向量搜索
-            for sentence in sentences:
-                original_sentence = sentence
-                sentence = sentence.replace("\n", "<br>")
-                if len(sentence) < 10:
-                    result_sentences.append(sentence)
+            i = 0
+            while i < len(sentences):
+                original_sentence = sentences[i]
+                sentence = original_sentence.replace("\n", "<br>")
+                
+                # 如果当前句子长度小于10且不是最后一句,则与下一句合并
+                if len(sentence) < 10 and i + 1 < len(sentences):
+                    next_sentence = sentences[i + 1].replace("\n", "<br>")
+                    combined_sentence = sentence + " " + next_sentence
+                    # 添加原短句到结果,flag为空
+                    result_sentences.append({
+                        "sentence": sentence,
+                        "flag": ""
+                    })
+                    # 使用合并后的句子进行搜索
+                    search_text = f"{node_name}:{prop_title}:{combined_sentence}"
+                    i += 1  # 跳过下一句,因为已经合并使用
+                elif len(sentence) < 10:
+                    # 如果是最后一句且长度小于10,直接添加到结果,flag为空
+                    result_sentences.append({
+                        "sentence": sentence,
+                        "flag": ""
+                    })
+                    i += 1
                     continue
-
-                # 构建搜索文本
-                search_text = f"{node_name}:{prop_title}:{sentence}"
+                else:
+                    # 句子长度足够,直接使用
+                    search_text = f"{node_name}:{prop_title}:{sentence}"
+                
+                i += 1
 
                 # 检查缓存
                 if cached_results:
@@ -407,10 +429,22 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
                     )
 
                 # 处理搜索结果
+                if not search_results:
+                    # 没有搜索结果,添加原句子,flag为空
+                    result_sentences.append({
+                        "sentence": sentence,
+                        "flag": ""
+                    })
+                    continue
+                    
                 for search_result in search_results:
                     distance = search_result.get("distance", DISTANCE_THRESHOLD)
                     if distance >= DISTANCE_THRESHOLD:
-                        result_sentences.append(sentence)
+                        # 距离过大,添加原句子,flag为空
+                        result_sentences.append({
+                            "sentence": sentence,
+                            "flag": ""
+                        })
                         continue
 
                     # 检查是否已存在相同引用
@@ -432,15 +466,11 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
                         all_references.append(reference)
                         reference_index += 1
 
-                    # 添加引用标记
-                    if sentence.endswith('<br>'):
-                        # 如果有多个<br>,在所有<br>前添加^[current_index]^
-                        result_sentence = sentence.replace('<br>', f'^[{current_index}]^<br>')
-                    else:
-                        # 直接在句子末尾添加^[current_index]^
-                        result_sentence = f'{sentence}^[{current_index}]^'
-
-                    result_sentences.append(result_sentence)
+                    # 添加句子和引用标记(作为单独的flag字段)
+                    result_sentences.append({
+                        "sentence": sentence,
+                        "flag": str(current_index)
+                    })
 
             # 更新属性值,添加引用信息
             if all_references:
@@ -450,6 +480,37 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
             if result_sentences:
                 prop_result["answer"] = result_sentences
 
+        # 处理所有引用中的文件信息
+        all_files = set()
+        for prop_result in result["props"]:
+            if "references" in prop_result:
+                for ref in prop_result["references"]:
+                    referrence = ref.get("referrence", "")
+                    if referrence and "/books/" in referrence:
+                        # 提取/books/后面的文件名
+                        file_name = referrence.split("/books/")[-1]
+                        if file_name:
+                            # 根据文件名后缀确定文件类型
+                            file_type = ""
+                            if file_name.lower().endswith(".pdf"):
+                                file_type = "pdf"
+                            elif file_name.lower().endswith(".doc") or file_name.lower().endswith(".docx"):
+                                file_type = "doc"
+                            elif file_name.lower().endswith(".xls") or file_name.lower().endswith(".xlsx"):
+                                file_type = "excel"
+                            elif file_name.lower().endswith(".ppt") or file_name.lower().endswith(".pptx"):
+                                file_type = "ppt"
+                            else:
+                                file_type = "other"
+                            
+                            all_files.add((file_name, file_type))
+        
+        # 将文件信息添加到结果中
+        result["files"] = [{
+            "file_name": file_name,
+            "file_type": file_type
+        } for file_name, file_type in all_files]
+        
         end_time = time.time()
         logger.info(f"node_props_search接口耗时: {(end_time - start_time) * 1000:.2f}ms")
         return StandardResponse(success=True, data=result)