Parcourir la source

新增循证接口

SGTY il y a 3 semaines
Parent
commit
3cf0fa2dca
4 fichiers modifiés avec 159 ajouts et 2 suppressions
  1. 2 1
      agent/cdss/capbility.py
  2. 76 0
      app.log
  3. 1 1
      db/session.py
  4. 80 0
      router/text_search.py

+ 2 - 1
agent/cdss/capbility.py

@@ -8,7 +8,8 @@ logger = logging.getLogger(__name__)
 class CDSSCapability:
     cdss_helper: CDSSHelper = None
     def __init__(self):
-        self.cdss_helper = CDSSHelper()
+        #self.cdss_helper = CDSSHelper()
+        self.cdss_helper = None
         logger.debug("CDSSCapability initialized")
     
     def process(self, input: CDSSInput, embeding_search:bool = True) -> CDSSOutput:        

Fichier diff supprimé car celui-ci est trop grand
+ 76 - 0
app.log


+ 1 - 1
db/session.py

@@ -11,7 +11,7 @@ DB_HOST = os.getenv("DB_HOST", "173.18.12.203")
 DB_PORT = os.getenv("DB_PORT", "5432")
 DB_USER = os.getenv("DB_USER", "knowledge")
 DB_PASS = os.getenv("DB_PASSWORD", "qwer1234.")
-DB_NAME = os.getenv("DB_NAME", "postgres")
+DB_NAME = os.getenv("DB_NAME", "medkg")
 
 DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
 

+ 80 - 0
router/text_search.py

@@ -789,4 +789,84 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
         logger.error(f"Node props search failed: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 
+class FindSimilarTexts(BaseModel):
+    search_text: str
+
+@router.post("/knowledge/text/find_similar_texts", response_model=StandardResponse)
+async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get_db)):
+    trunks_service = TrunksService()
+    search_text = request.search_text
+    # 使用向量搜索获取相似内容
+    search_results = trunks_service.search_by_vector(
+        text=search_text,
+        limit=500,
+        type='trunk',
+        distance=0.7
+    )
+
+    # 准备语料库数据
+    trunk_texts = []
+    trunk_ids = []
+
+    # 创建一个字典来存储trunk的详细信息
+    trunk_details = {}
+
+    for trunk in search_results:
+        trunk_texts.append(trunk.get('content'))
+        trunk_ids.append(trunk.get('id'))
+        # 缓存trunk的详细信息
+        trunk_details[trunk.get('id')] = {
+            'id': trunk.get('id'),
+            'content': trunk.get('content'),
+            'file_path': trunk.get('file_path'),
+            'title': trunk.get('title'),
+            'referrence': trunk.get('referrence'),
+            'page_no': trunk.get('page_no')
+        }
+
+    if len(trunk_texts) == 0:
+        return
+
+    # 初始化TextSimilarityFinder并加载语料库
+    similarity_finder = TextSimilarityFinder(method='tfidf', use_jieba=True)
+    similarity_finder.load_corpus(trunk_texts, trunk_ids)
+
+    similar_results = similarity_finder.find_most_similar(search_text, top_n=1)
+
+    # 处理搜索结果
+    if similar_results and similar_results[0]['similarity'] >= 0.3:  # 设置相似度阈值
+        # 获取最相似的文本对应的trunk_id
+        trunk_id = similar_results[0]['path']
+
+        # 从缓存中获取trunk详细信息
+        trunk_info = trunk_details.get(trunk_id)
+
+        if trunk_info:
+            search_result = {
+                **trunk_info,
+                'distance': similar_results[0]['similarity']  # 转换相似度为距离
+            }
+
+            reference, _ = _process_search_result(search_result, 1)
+            # prop_result["references"] = [reference]
+            # prop_result["answer"] = [{
+            #     "sentence": prop_value,
+            #     "flag": "1"
+            # }]
+        else:
+            # 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
+            sentences = SentenceUtil.split_text(search_text, 10)
+    else:
+        # 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
+        sentences = SentenceUtil.split_text(search_text, 10)
+        result_sentences, references = _process_sentence_search(
+            '', '', sentences, trunks_service
+        )
+        # if references:
+        #     prop_result["references"] = references
+        # if result_sentences:
+        #     prop_result["answer"] = result_sentences
+    return StandardResponse(success=True)
+
+
 text_search_router = router