|
@@ -406,6 +406,10 @@ def _get_file_type(file_name: str) -> str:
|
|
return "other"
|
|
return "other"
|
|
|
|
|
|
def _process_sentence_search(node_name: str, prop_title: str, sentences: list, trunks_service: TrunksService) -> tuple[list, list]:
|
|
def _process_sentence_search(node_name: str, prop_title: str, sentences: list, trunks_service: TrunksService) -> tuple[list, list]:
|
|
|
|
+ keywords = [node_name, prop_title] if node_name and prop_title else None
|
|
|
|
+ return _process_sentence_search_keywords(sentences, trunks_service,keywords=keywords)
|
|
|
|
+
|
|
|
|
+def _process_sentence_search_keywords(sentences: list, trunks_service: TrunksService,keywords: Optional[List[str]] = None) -> tuple[list, list]:
|
|
"""处理句子搜索,返回结果句子和引用列表"""
|
|
"""处理句子搜索,返回结果句子和引用列表"""
|
|
result_sentences = []
|
|
result_sentences = []
|
|
all_references = []
|
|
all_references = []
|
|
@@ -414,8 +418,9 @@ def _process_sentence_search(node_name: str, prop_title: str, sentences: list, t
|
|
|
|
|
|
while i < len(sentences):
|
|
while i < len(sentences):
|
|
sentence = sentences[i]
|
|
sentence = sentences[i]
|
|
-
|
|
|
|
- search_text = f"{node_name}:{prop_title}:{sentence}"
|
|
|
|
|
|
+ search_text = sentence
|
|
|
|
+ if keywords:
|
|
|
|
+ search_text = f"{keywords}:{sentence}"
|
|
# if len(sentence) < 10 and i + 1 < len(sentences):
|
|
# if len(sentence) < 10 and i + 1 < len(sentences):
|
|
# next_sentence = sentences[i + 1]
|
|
# next_sentence = sentences[i + 1]
|
|
# # result_sentences.append({"sentence": sentence, "flag": ""})
|
|
# # result_sentences.append({"sentence": sentence, "flag": ""})
|
|
@@ -754,12 +759,12 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
if file_name in file_index_map:
|
|
if file_name in file_index_map:
|
|
ref["index"] = f"{file_index_map[file_name]}-{ref['index']}"
|
|
ref["index"] = f"{file_index_map[file_name]}-{ref['index']}"
|
|
|
|
|
|
- # 更新answer中的flag
|
|
|
|
|
|
+ # 更新answer中的index
|
|
if "answer" in prop_result:
|
|
if "answer" in prop_result:
|
|
for sentence in prop_result["answer"]:
|
|
for sentence in prop_result["answer"]:
|
|
- if sentence["flag"]:
|
|
|
|
|
|
+ if sentence["index"]:
|
|
for ref in prop_result["references"]:
|
|
for ref in prop_result["references"]:
|
|
- if ref["index"].endswith(f"-{sentence['flag']}"):
|
|
|
|
|
|
+ if ref["index"].endswith(f"-{sentence['index']}"):
|
|
sentence["flag"] = ref["index"]
|
|
sentence["flag"] = ref["index"]
|
|
break
|
|
break
|
|
|
|
|
|
@@ -790,12 +795,15 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
class FindSimilarTexts(BaseModel):
|
|
class FindSimilarTexts(BaseModel):
|
|
|
|
+ keywords:Optional[List[str]] = None
|
|
search_text: str
|
|
search_text: str
|
|
|
|
|
|
@router.post("/knowledge/text/find_similar_texts", response_model=StandardResponse)
|
|
@router.post("/knowledge/text/find_similar_texts", response_model=StandardResponse)
|
|
-async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get_db)):
|
|
|
|
|
|
+async def find_similar_texts(request: FindSimilarTexts, db: Session = Depends(get_db)):
|
|
trunks_service = TrunksService()
|
|
trunks_service = TrunksService()
|
|
search_text = request.search_text
|
|
search_text = request.search_text
|
|
|
|
+ if request.keywords:
|
|
|
|
+ search_text = f"{request.keywords}:{search_text}"
|
|
# 使用向量搜索获取相似内容
|
|
# 使用向量搜索获取相似内容
|
|
search_results = trunks_service.search_by_vector(
|
|
search_results = trunks_service.search_by_vector(
|
|
text=search_text,
|
|
text=search_text,
|
|
@@ -832,7 +840,7 @@ async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get
|
|
similarity_finder.load_corpus(trunk_texts, trunk_ids)
|
|
similarity_finder.load_corpus(trunk_texts, trunk_ids)
|
|
|
|
|
|
similar_results = similarity_finder.find_most_similar(search_text, top_n=1)
|
|
similar_results = similarity_finder.find_most_similar(search_text, top_n=1)
|
|
-
|
|
|
|
|
|
+ prop_result = {}
|
|
# 处理搜索结果
|
|
# 处理搜索结果
|
|
if similar_results and similar_results[0]['similarity'] >= 0.3: # 设置相似度阈值
|
|
if similar_results and similar_results[0]['similarity'] >= 0.3: # 设置相似度阈值
|
|
# 获取最相似的文本对应的trunk_id
|
|
# 获取最相似的文本对应的trunk_id
|
|
@@ -848,25 +856,22 @@ async def node_props_search(request: FindSimilarTexts, db: Session = Depends(get
|
|
}
|
|
}
|
|
|
|
|
|
reference, _ = _process_search_result(search_result, 1)
|
|
reference, _ = _process_search_result(search_result, 1)
|
|
- # prop_result["references"] = [reference]
|
|
|
|
- # prop_result["answer"] = [{
|
|
|
|
- # "sentence": prop_value,
|
|
|
|
- # "flag": "1"
|
|
|
|
- # }]
|
|
|
|
- else:
|
|
|
|
- # 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
|
|
|
|
- sentences = SentenceUtil.split_text(search_text, 10)
|
|
|
|
|
|
+ prop_result["references"] = [reference]
|
|
|
|
+ prop_result["answer"] = [{
|
|
|
|
+ "sentence": request.search_text,
|
|
|
|
+ "flag": "1"
|
|
|
|
+ }]
|
|
else:
|
|
else:
|
|
# 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
|
|
# 如果整体搜索没有找到匹配结果,则进行句子拆分搜索
|
|
- sentences = SentenceUtil.split_text(search_text, 10)
|
|
|
|
- result_sentences, references = _process_sentence_search(
|
|
|
|
- '', '', sentences, trunks_service
|
|
|
|
|
|
+ sentences = SentenceUtil.split_text(request.search_text, 10)
|
|
|
|
+ result_sentences, references = _process_sentence_search_keywords(
|
|
|
|
+ sentences, trunks_service,keywords=request.keywords
|
|
)
|
|
)
|
|
- # if references:
|
|
|
|
- # prop_result["references"] = references
|
|
|
|
- # if result_sentences:
|
|
|
|
- # prop_result["answer"] = result_sentences
|
|
|
|
- return StandardResponse(success=True)
|
|
|
|
|
|
+ if references:
|
|
|
|
+ prop_result["references"] = references
|
|
|
|
+ if result_sentences:
|
|
|
|
+ prop_result["answer"] = result_sentences
|
|
|
|
+ return StandardResponse(success=True,data=prop_result)
|
|
|
|
|
|
|
|
|
|
text_search_router = router
|
|
text_search_router = router
|