12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- import requests
- import re
- import os
- import json
- from dotenv import load_dotenv
- load_dotenv()
- DOC_STORAGE_PATH = os.getenv("DOC_STORAGE_PATH")
- # XWiki 基础 URL 和认证信息
- base_url = "http://localhost:8081"
- username = "ZHU"
- password = "p@ssw0rd"
- def wiki_search_documents(search_query):
- # 搜索文档
- search_url = f"{base_url}/bin/view/Main/Search?text={search_query}&f_type=DOCUMENT&f_locale=zh_CN&f_locale=&r=1"
- #search_url = f"{base_url}/xwiki/rest/wikis/query?q={search_query}"
- #search_url = f"{base_url}/wikis/XWiki/spaces/Main/documents?query={search_query}"
- # 发起请求
- response = requests.get(search_url, auth=(username, password))
- ret = []
- if response.status_code == 200:
- search_results = response.text
- match_result = re.compile(r'<span class="fa fa-file-o" aria-hidden="true"></span>\n<a href="(.*?)">(.*?)</a>').findall(search_results)
- if match_result:
- index = 1000
-
- for k,v in match_result:
- doc = { "id": f"{index}", "title": v, "url": k, "pub_date":""}
- ret.append(doc)
- index = index + 1
- else:
- print(f"搜索失败,状态码:{response.status_code}")
- if len(ret) > 0:
- with open(f"{DOC_STORAGE_PATH}\wiki_cache.json", "w", encoding="utf-8") as f:
- f.write(json.dumps({"data":ret}, ensure_ascii=False, indent=4))
- return ret
- def get_wiki_document(title: str, url: str):
- get_url = f"{base_url}{url}?xpage=plain"
- print(f"get_wiki_document {get_url}")
- response = requests.get(get_url, auth=(username, password))
- if response.status_code == 200:
- search_results = response.text
- return {"url": url,"title":title, "text": search_results}
- return None
- def wiki_get_document(title: str):
- print(">>>>>> wiki_get_document ", title)
- with open(f"{DOC_STORAGE_PATH}\wiki_cache.json", "r", encoding="utf-8") as f:
- data = json.load(f)
- for v in data["data"]:
- if v["title"] == title or v["id"] == title:
- return get_wiki_document(v["title"] , v["url"])
- return None
- if __name__ == "__main__":
- ret = wiki_search_documents("保险公司 大模型")
- print(ret)
- content = wiki_get_document("在保险公司实施大模型项目的规定")
-
- print(content)
|