|
@@ -333,6 +333,7 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
"name": node_name,
|
|
"name": node_name,
|
|
"category": node.get('category', ''),
|
|
"category": node.get('category', ''),
|
|
"props": [],
|
|
"props": [],
|
|
|
|
+ "files": [],
|
|
"distance": 0
|
|
"distance": 0
|
|
}
|
|
}
|
|
|
|
|
|
@@ -370,15 +371,36 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
reference_index = 1
|
|
reference_index = 1
|
|
|
|
|
|
# 对每个句子进行向量搜索
|
|
# 对每个句子进行向量搜索
|
|
- for sentence in sentences:
|
|
|
|
- original_sentence = sentence
|
|
|
|
- sentence = sentence.replace("\n", "<br>")
|
|
|
|
- if len(sentence) < 10:
|
|
|
|
- result_sentences.append(sentence)
|
|
|
|
|
|
+ i = 0
|
|
|
|
+ while i < len(sentences):
|
|
|
|
+ original_sentence = sentences[i]
|
|
|
|
+ sentence = original_sentence.replace("\n", "<br>")
|
|
|
|
+
|
|
|
|
+ # 如果当前句子长度小于10且不是最后一句,则与下一句合并
|
|
|
|
+ if len(sentence) < 10 and i + 1 < len(sentences):
|
|
|
|
+ next_sentence = sentences[i + 1].replace("\n", "<br>")
|
|
|
|
+ combined_sentence = sentence + " " + next_sentence
|
|
|
|
+ # 添加原短句到结果,flag为空
|
|
|
|
+ result_sentences.append({
|
|
|
|
+ "sentence": sentence,
|
|
|
|
+ "flag": ""
|
|
|
|
+ })
|
|
|
|
+ # 使用合并后的句子进行搜索
|
|
|
|
+ search_text = f"{node_name}:{prop_title}:{combined_sentence}"
|
|
|
|
+ i += 1 # 跳过下一句,因为已经合并使用
|
|
|
|
+ elif len(sentence) < 10:
|
|
|
|
+ # 如果是最后一句且长度小于10,直接添加到结果,flag为空
|
|
|
|
+ result_sentences.append({
|
|
|
|
+ "sentence": sentence,
|
|
|
|
+ "flag": ""
|
|
|
|
+ })
|
|
|
|
+ i += 1
|
|
continue
|
|
continue
|
|
-
|
|
|
|
- # 构建搜索文本
|
|
|
|
- search_text = f"{node_name}:{prop_title}:{sentence}"
|
|
|
|
|
|
+ else:
|
|
|
|
+ # 句子长度足够,直接使用
|
|
|
|
+ search_text = f"{node_name}:{prop_title}:{sentence}"
|
|
|
|
+
|
|
|
|
+ i += 1
|
|
|
|
|
|
# 检查缓存
|
|
# 检查缓存
|
|
if cached_results:
|
|
if cached_results:
|
|
@@ -407,10 +429,22 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
)
|
|
)
|
|
|
|
|
|
# 处理搜索结果
|
|
# 处理搜索结果
|
|
|
|
+ if not search_results:
|
|
|
|
+ # 没有搜索结果,添加原句子,flag为空
|
|
|
|
+ result_sentences.append({
|
|
|
|
+ "sentence": sentence,
|
|
|
|
+ "flag": ""
|
|
|
|
+ })
|
|
|
|
+ continue
|
|
|
|
+
|
|
for search_result in search_results:
|
|
for search_result in search_results:
|
|
distance = search_result.get("distance", DISTANCE_THRESHOLD)
|
|
distance = search_result.get("distance", DISTANCE_THRESHOLD)
|
|
if distance >= DISTANCE_THRESHOLD:
|
|
if distance >= DISTANCE_THRESHOLD:
|
|
- result_sentences.append(sentence)
|
|
|
|
|
|
+ # 距离过大,添加原句子,flag为空
|
|
|
|
+ result_sentences.append({
|
|
|
|
+ "sentence": sentence,
|
|
|
|
+ "flag": ""
|
|
|
|
+ })
|
|
continue
|
|
continue
|
|
|
|
|
|
# 检查是否已存在相同引用
|
|
# 检查是否已存在相同引用
|
|
@@ -432,15 +466,11 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
all_references.append(reference)
|
|
all_references.append(reference)
|
|
reference_index += 1
|
|
reference_index += 1
|
|
|
|
|
|
- # 添加引用标记
|
|
|
|
- if sentence.endswith('<br>'):
|
|
|
|
- # 如果有多个<br>,在所有<br>前添加^[current_index]^
|
|
|
|
- result_sentence = sentence.replace('<br>', f'^[{current_index}]^<br>')
|
|
|
|
- else:
|
|
|
|
- # 直接在句子末尾添加^[current_index]^
|
|
|
|
- result_sentence = f'{sentence}^[{current_index}]^'
|
|
|
|
-
|
|
|
|
- result_sentences.append(result_sentence)
|
|
|
|
|
|
+ # 添加句子和引用标记(作为单独的flag字段)
|
|
|
|
+ result_sentences.append({
|
|
|
|
+ "sentence": sentence,
|
|
|
|
+ "flag": str(current_index)
|
|
|
|
+ })
|
|
|
|
|
|
# 更新属性值,添加引用信息
|
|
# 更新属性值,添加引用信息
|
|
if all_references:
|
|
if all_references:
|
|
@@ -450,6 +480,37 @@ async def node_props_search(request: NodePropsSearchRequest, db: Session = Depen
|
|
if result_sentences:
|
|
if result_sentences:
|
|
prop_result["answer"] = result_sentences
|
|
prop_result["answer"] = result_sentences
|
|
|
|
|
|
|
|
+ # 处理所有引用中的文件信息
|
|
|
|
+ all_files = set()
|
|
|
|
+ for prop_result in result["props"]:
|
|
|
|
+ if "references" in prop_result:
|
|
|
|
+ for ref in prop_result["references"]:
|
|
|
|
+ referrence = ref.get("referrence", "")
|
|
|
|
+ if referrence and "/books/" in referrence:
|
|
|
|
+ # 提取/books/后面的文件名
|
|
|
|
+ file_name = referrence.split("/books/")[-1]
|
|
|
|
+ if file_name:
|
|
|
|
+ # 根据文件名后缀确定文件类型
|
|
|
|
+ file_type = ""
|
|
|
|
+ if file_name.lower().endswith(".pdf"):
|
|
|
|
+ file_type = "pdf"
|
|
|
|
+ elif file_name.lower().endswith(".doc") or file_name.lower().endswith(".docx"):
|
|
|
|
+ file_type = "doc"
|
|
|
|
+ elif file_name.lower().endswith(".xls") or file_name.lower().endswith(".xlsx"):
|
|
|
|
+ file_type = "excel"
|
|
|
|
+ elif file_name.lower().endswith(".ppt") or file_name.lower().endswith(".pptx"):
|
|
|
|
+ file_type = "ppt"
|
|
|
|
+ else:
|
|
|
|
+ file_type = "other"
|
|
|
|
+
|
|
|
|
+ all_files.add((file_name, file_type))
|
|
|
|
+
|
|
|
|
+ # 将文件信息添加到结果中
|
|
|
|
+ result["files"] = [{
|
|
|
|
+ "file_name": file_name,
|
|
|
|
+ "file_type": file_type
|
|
|
|
+ } for file_name, file_type in all_files]
|
|
|
|
+
|
|
end_time = time.time()
|
|
end_time = time.time()
|
|
logger.info(f"node_props_search接口耗时: {(end_time - start_time) * 1000:.2f}ms")
|
|
logger.info(f"node_props_search接口耗时: {(end_time - start_time) * 1000:.2f}ms")
|
|
return StandardResponse(success=True, data=result)
|
|
return StandardResponse(success=True, data=result)
|