il y a 3 semaines · 3cf0fa2dca
--- a/agent/cdss/capbility.py
+++ b/agent/cdss/capbility.py
@@ -8,7 +8,8 @@ logger = logging.getLogger(__name__)
 
				 class CDSSCapability:
			
 
				     cdss_helper: CDSSHelper = None
			
 
				     def __init__(self):
			
 
				-        self.cdss_helper = CDSSHelper()
			
 
				+        #self.cdss_helper = CDSSHelper()
			
 
				+        self.cdss_helper = None
			
 
				         logger.debug("CDSSCapability initialized")
			
 
				     
			
 
				     def process(self, input: CDSSInput, embeding_search:bool = True) -> CDSSOutput:        
			
--- a/app.log
+++ b/app.log
--- a/db/session.py
+++ b/db/session.py
@@ -11,7 +11,7 @@ DB_HOST = os.getenv("DB_HOST", "173.18.12.203")
 
				 DB_PORT = os.getenv("DB_PORT", "5432")
			
 
				 DB_USER = os.getenv("DB_USER", "knowledge")
			
 
				 DB_PASS = os.getenv("DB_PASSWORD", "qwer1234.")
			
 
				-DB_NAME = os.getenv("DB_NAME", "postgres")
			
 
				+DB_NAME = os.getenv("DB_NAME", "medkg")
			
 
				 
			
 
				 DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
			
 
				 
			
--- a/router/text_search.py
+++ b/router/text_search.py
@@ -789,4 +789,84 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
 
				         logger.error(f"Node props search failed: {str(e)}")
			
 
				         raise HTTPException(status_code=500, detail=str(e))
			
 
				 
			
 
				+class FindSimilarTexts(BaseModel):
			
 
				+    search_text: str
			
 
				+
			
 
				+@router.post("/knowledge/text/find_similar_texts", response_model=StandardResponse)
			
 
				+async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get_db)):
			
 
				+    trunks_service = TrunksService()
			
 
				+    search_text = request.search_text
			
 
				+    # 使用向量搜索获取相似内容
			
 
				+    search_results = trunks_service.search_by_vector(
			
 
				+        text=search_text,
			
 
				+        limit=500,
			
 
				+        type='trunk',
			
 
				+        distance=0.7
			
 
				+    )
			
 
				+
			
 
				+    # 准备语料库数据
			
 
				+    trunk_texts = []
			
 
				+    trunk_ids = []
			
 
				+
			
 
				+    # 创建一个字典来存储trunk的详细信息
			
 
				+    trunk_details = {}
			
 
				+
			
 
				+    for trunk in search_results:
			
 
				+        trunk_texts.append(trunk.get('content'))
			
 
				+        trunk_ids.append(trunk.get('id'))
			
 
				+        # 缓存trunk的详细信息
			
 
				+        trunk_details[trunk.get('id')] = {
			
 
				+            'id': trunk.get('id'),
			
 
				+            'content': trunk.get('content'),
			
 
				+            'file_path': trunk.get('file_path'),
			
 
				+            'title': trunk.get('title'),
			
 
				+            'referrence': trunk.get('referrence'),
			
 
				+            'page_no': trunk.get('page_no')
			
 
				+        }
			
 
				+
			
 
				+    if len(trunk_texts) == 0:
			
 
				+        return
			
 
				+
			
 
				+    # 初始化TextSimilarityFinder并加载语料库
			
 
				+    similarity_finder = TextSimilarityFinder(method='tfidf', use_jieba=True)
			
 
				+    similarity_finder.load_corpus(trunk_texts, trunk_ids)
			
 
				+
			
 
				+    similar_results = similarity_finder.find_most_similar(search_text, top_n=1)
			
 
				+
			
 
				+    # 处理搜索结果
			
 
				+    if similar_results and similar_results[0]['similarity'] >= 0.3:  # 设置相似度阈值
			
 
				+        # 获取最相似的文本对应的trunk_id
			
 
				+        trunk_id = similar_results[0]['path']
			
 
				+
			
 
				+        # 从缓存中获取trunk详细信息
			
 
				+        trunk_info = trunk_details.get(trunk_id)
			
 
				+
			
 
				+        if trunk_info:
			
 
				+            search_result = {
			
 
				+                **trunk_info,
			
 
				+                'distance': similar_results[0]['similarity']  # 转换相似度为距离
			
 
				+            }
			
 
				+
			
 
				+            reference, _ = _process_search_result(search_result, 1)
			
 
				+            # prop_result["references"] = [reference]
			
 
				+            # prop_result["answer"] = [{
			
 
				+            #     "sentence": prop_value,
			
 
				+            #     "flag": "1"
			
 
				+            # }]
			
 
				+        else:
			
 
				+            # 如果整体搜索没有找到匹配结果，则进行句子拆分搜索
			
 
				+            sentences = SentenceUtil.split_text(search_text, 10)
			
 
				+    else:
			
 
				+        # 如果整体搜索没有找到匹配结果，则进行句子拆分搜索
			
 
				+        sentences = SentenceUtil.split_text(search_text, 10)
			
 
				+        result_sentences, references = _process_sentence_search(
			
 
				+            '', '', sentences, trunks_service
			
 
				+        )
			
 
				+        # if references:
			
 
				+        #     prop_result["references"] = references
			
 
				+        # if result_sentences:
			
 
				+        #     prop_result["answer"] = result_sentences
			
 
				+    return StandardResponse(success=True)
			
 
				+
			
 
				+
			
 
				 text_search_router = router