3 周之前 · 3cf0fa2dca
--- a/agent/cdss/capbility.py
+++ b/agent/cdss/capbility.py
@@ -8,7 +8,8 @@ logger = logging.getLogger(__name__)
 
																 class CDSSCapability:
															
 
																     cdss_helper: CDSSHelper = None
															
 
																     def __init__(self):
															
 
																-        self.cdss_helper = CDSSHelper()
															
 
																+        #self.cdss_helper = CDSSHelper()
															
 
																+        self.cdss_helper = None
															
 
																         logger.debug("CDSSCapability initialized")
															
 
																     def process(self, input: CDSSInput, embeding_search:bool = True) -> CDSSOutput:        
															
--- a/app.log
+++ b/app.log
--- a/db/session.py
+++ b/db/session.py
@@ -11,7 +11,7 @@ DB_HOST = os.getenv("DB_HOST", "173.18.12.203")
 
																 DB_PORT = os.getenv("DB_PORT", "5432")
															
 
																 DB_USER = os.getenv("DB_USER", "knowledge")
															
 
																 DB_PASS = os.getenv("DB_PASSWORD", "qwer1234.")
															
 
																-DB_NAME = os.getenv("DB_NAME", "postgres")
															
 
																+DB_NAME = os.getenv("DB_NAME", "medkg")
															
 
																 DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
															
--- a/router/text_search.py
+++ b/router/text_search.py
@@ -789,4 +789,84 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
 
																         logger.error(f"Node props search failed: {str(e)}")
															
 
																         raise HTTPException(status_code=500, detail=str(e))
															
 
																+class FindSimilarTexts(BaseModel):
															
 
																+    search_text: str
															
 
																+
															
 
																+@router.post("/knowledge/text/find_similar_texts", response_model=StandardResponse)
															
 
																+async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get_db)):
															
 
																+    trunks_service = TrunksService()
															
 
																+    search_text = request.search_text
															
 
																+    # 使用向量搜索获取相似内容
															
 
																+    search_results = trunks_service.search_by_vector(
															
 
																+        text=search_text,
															
 
																+        limit=500,
															
 
																+        type='trunk',
															
 
																+        distance=0.7
															
 
																+    )
															
 
																+
															
 
																+    # 准备语料库数据
															
 
																+    trunk_texts = []
															
 
																+    trunk_ids = []
															
 
																+
															
 
																+    # 创建一个字典来存储trunk的详细信息
															
 
																+    trunk_details = {}
															
 
																+
															
 
																+    for trunk in search_results:
															
 
																+        trunk_texts.append(trunk.get('content'))
															
 
																+        trunk_ids.append(trunk.get('id'))
															
 
																+        # 缓存trunk的详细信息
															
 
																+        trunk_details[trunk.get('id')] = {
															
 
																+            'id': trunk.get('id'),
															
 
																+            'content': trunk.get('content'),
															
 
																+            'file_path': trunk.get('file_path'),
															
 
																+            'title': trunk.get('title'),
															
 
																+            'referrence': trunk.get('referrence'),
															
 
																+            'page_no': trunk.get('page_no')
															
 
																+        }
															
 
																+
															
 
																+    if len(trunk_texts) == 0:
															
 
																+        return
															
 
																+
															
 
																+    # 初始化TextSimilarityFinder并加载语料库
															
 
																+    similarity_finder = TextSimilarityFinder(method='tfidf', use_jieba=True)
															
 
																+    similarity_finder.load_corpus(trunk_texts, trunk_ids)
															
 
																+
															
 
																+    similar_results = similarity_finder.find_most_similar(search_text, top_n=1)
															
 
																+
															
 
																+    # 处理搜索结果
															
 
																+    if similar_results and similar_results[0]['similarity'] >= 0.3:  # 设置相似度阈值
															
 
																+        # 获取最相似的文本对应的trunk_id
															
 
																+        trunk_id = similar_results[0]['path']
															
 
																+
															
 
																+        # 从缓存中获取trunk详细信息
															
 
																+        trunk_info = trunk_details.get(trunk_id)
															
 
																+
															
 
																+        if trunk_info:
															
 
																+            search_result = {
															
 
																+                **trunk_info,
															
 
																+                'distance': similar_results[0]['similarity']  # 转换相似度为距离
															
 
																+            }
															
 
																+
															
 
																+            reference, _ = _process_search_result(search_result, 1)
															
 
																+            # prop_result["references"] = [reference]
															
 
																+            # prop_result["answer"] = [{
															
 
																+            #     "sentence": prop_value,
															
 
																+            #     "flag": "1"
															
 
																+            # }]
															
 
																+        else:
															
 
																+            # 如果整体搜索没有找到匹配结果，则进行句子拆分搜索
															
 
																+            sentences = SentenceUtil.split_text(search_text, 10)
															
 
																+    else:
															
 
																+        # 如果整体搜索没有找到匹配结果，则进行句子拆分搜索
															
 
																+        sentences = SentenceUtil.split_text(search_text, 10)
															
 
																+        result_sentences, references = _process_sentence_search(
															
 
																+            '', '', sentences, trunks_service
															
 
																+        )
															
 
																+        # if references:
															
 
																+        #     prop_result["references"] = references
															
 
																+        # if result_sentences:
															
 
																+        #     prop_result["answer"] = result_sentences
															
 
																+    return StandardResponse(success=True)
															
 
																+
															
 
																+
															
 
																 text_search_router = router