浏览代码

新增循证接口

SGTY 2 周之前
父节点
当前提交
644931044a
共有 2 个文件被更改,包括 225 次插入23 次删除
  1. 197 0
      app.log
  2. 28 23
      router/text_search.py

文件差异内容过多而无法显示
+ 197 - 0
app.log


+ 28 - 23
router/text_search.py

@@ -406,6 +406,10 @@ def _get_file_type(file_name: str) -> str:
     return "other"
 
 def _process_sentence_search(node_name: str, prop_title: str, sentences: list, trunks_service: TrunksService) -> tuple[list, list]:
+    keywords = [node_name, prop_title] if node_name and prop_title else None
+    return _process_sentence_search_keywords(sentences, trunks_service,keywords=keywords)
+    
+def _process_sentence_search_keywords(sentences: list, trunks_service: TrunksService,keywords: Optional[List[str]] = None) -> tuple[list, list]:
     """处理句子搜索,返回结果句子和引用列表"""
     result_sentences = []
     all_references = []
@@ -414,8 +418,9 @@ def _process_sentence_search(node_name: str, prop_title: str, sentences: list, t
     
     while i < len(sentences):
         sentence = sentences[i]
-
-        search_text = f"{node_name}:{prop_title}:{sentence}"
+        search_text = sentence
+        if keywords:
+            search_text = f"{keywords}:{sentence}"
         # if len(sentence) < 10 and i + 1 < len(sentences):
         #     next_sentence = sentences[i + 1]
         #     # result_sentences.append({"sentence": sentence, "flag": ""})
@@ -754,12 +759,12 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
                     if file_name in file_index_map:
                         ref["index"] = f"{file_index_map[file_name]}-{ref['index']}"
 
-            # 更新answer中的flag
+            # 更新answer中的index
             if "answer" in prop_result:
                 for sentence in prop_result["answer"]:
-                    if sentence["flag"]:
+                    if sentence["index"]:
                         for ref in prop_result["references"]:
-                            if ref["index"].endswith(f"-{sentence['flag']}"):
+                            if ref["index"].endswith(f"-{sentence['index']}"):
                                 sentence["flag"] = ref["index"]
                                 break
 
@@ -790,12 +795,15 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
         raise HTTPException(status_code=500, detail=str(e))
 
 class FindSimilarTexts(BaseModel):
+    keywords:Optional[List[str]] = None
     search_text: str
 
 @router.post("/knowledge/text/find_similar_texts", response_model=StandardResponse)
-async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get_db)):
+async def find_similar_texts(request: FindSimilarTexts, db: Session = Depends(get_db)):
     trunks_service = TrunksService()
     search_text = request.search_text
+    if request.keywords:
+        search_text = f"{request.keywords}:{search_text}"
     # 使用向量搜索获取相似内容
     search_results = trunks_service.search_by_vector(
         text=search_text,
@@ -832,7 +840,7 @@ async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get
     similarity_finder.load_corpus(trunk_texts, trunk_ids)
 
     similar_results = similarity_finder.find_most_similar(search_text, top_n=1)
-
+    prop_result = {}
     # 处理搜索结果
     if similar_results and similar_results[0]['similarity'] >= 0.3:  # 设置相似度阈值
         # 获取最相似的文本对应的trunk_id
@@ -848,25 +856,22 @@ async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get
             }
 
             reference, _ = _process_search_result(search_result, 1)
-            # prop_result["references"] = [reference]
-            # prop_result["answer"] = [{
-            #     "sentence": prop_value,
-            #     "flag": "1"
-            # }]
-        else:
-            # 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
-            sentences = SentenceUtil.split_text(search_text, 10)
+            prop_result["references"] = [reference]
+            prop_result["answer"] = [{
+                 "sentence": request.search_text,
+                 "flag": "1"
+            }]
     else:
         # 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
-        sentences = SentenceUtil.split_text(search_text, 10)
-        result_sentences, references = _process_sentence_search(
-            '', '', sentences, trunks_service
+        sentences = SentenceUtil.split_text(request.search_text, 10)
+        result_sentences, references = _process_sentence_search_keywords(
+            sentences, trunks_service,keywords=request.keywords
         )
-        # if references:
-        #     prop_result["references"] = references
-        # if result_sentences:
-        #     prop_result["answer"] = result_sentences
-    return StandardResponse(success=True)
+        if references:
+            prop_result["references"] = references
+        if result_sentences:
+            prop_result["answer"] = result_sentences
+    return StandardResponse(success=True,data=prop_result)
 
 
 text_search_router = router