xwiki_search.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. import requests
  2. import re
  3. import os
  4. import json
  5. from dotenv import load_dotenv
  6. load_dotenv()
  7. DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
  8. # XWiki 基础 URL 和认证信息
  9. base_url = "http://localhost:8081"
  10. username = "ZHU"
  11. password = "p@ssw0rd"
  12. def wiki_search_documents(search_query):
  13. # 搜索文档
  14. search_url = f"{base_url}/bin/view/Main/Search?text={search_query}&f_type=DOCUMENT&f_locale=zh_CN&f_locale=&r=1"
  15. #search_url = f"{base_url}/xwiki/rest/wikis/query?q={search_query}"
  16. #search_url = f"{base_url}/wikis/XWiki/spaces/Main/documents?query={search_query}"
  17. # 发起请求
  18. response = requests.get(search_url, auth=(username, password))
  19. ret = []
  20. if response.status_code == 200:
  21. search_results = response.text
  22. match_result = re.compile(r'<span class="fa fa-file-o" aria-hidden="true"></span>\n<a href="(.*?)">(.*?)</a>').findall(search_results)
  23. if match_result:
  24. index = 1000
  25. for k,v in match_result:
  26. doc = { "id": f"{index}", "title": v, "url": k, "pub_date":""}
  27. ret.append(doc)
  28. index = index + 1
  29. else:
  30. print(f"搜索失败,状态码:{response.status_code}")
  31. if len(ret) > 0:
  32. with open(f"{DOC_STORAGE_PATH}\wiki_cache.json", "w", encoding="utf-8") as f:
  33. f.write(json.dumps({"data":ret}, ensure_ascii=False, indent=4))
  34. return ret
  35. def get_wiki_document(title: str, url: str):
  36. get_url = f"{base_url}{url}?xpage=plain"
  37. print(f"get_wiki_document {get_url}")
  38. response = requests.get(get_url, auth=(username, password))
  39. if response.status_code == 200:
  40. search_results = response.text
  41. return {"url": url,"title":title, "text": search_results}
  42. return None
  43. def wiki_get_document(title: str):
  44. print(">>>>>> wiki_get_document ", title)
  45. with open(f"{DOC_STORAGE_PATH}\wiki_cache.json", "r", encoding="utf-8") as f:
  46. data = json.load(f)
  47. for v in data["data"]:
  48. if v["title"] == title or v["id"] == title:
  49. return get_wiki_document(v["title"] , v["url"])
  50. return None
  51. if __name__ == "__main__":
  52. ret = wiki_search_documents("保险公司 大模型")
  53. print(ret)
  54. content = wiki_get_document("在保险公司实施大模型项目的规定")
  55. print(content)