generate_doc_abstract.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. #通过分析文章,生成分析结果
  2. import asyncio
  3. import os
  4. import time
  5. from test.deepseek_chat import chat_with_deepseek
  6. def load_prompt(filename):
  7. with open(filename, "r", encoding="utf-8") as f:
  8. return "".join(f.readlines())
  9. async def chat(prompt: str) -> str:
  10. message = [{'role':'user', 'content': prompt}]
  11. call_deepseek = chat_with_deepseek(message)
  12. output = ""
  13. async for chunk in call_deepseek:
  14. output = output + chunk
  15. print(chunk, end="")
  16. print("\n")
  17. return output
  18. if __name__ == "__main__":
  19. #
  20. #
  21. prompt_template = load_prompt("kb/prompt_4_abstract.txt")
  22. path = "./docs"
  23. for root, dirs, files in os.walk(path):
  24. for file in files:
  25. file_path = os.path.join(root, file)
  26. print(">>> process", file_path)
  27. text = load_prompt(file_path)
  28. prompt = prompt_template.format(text=text)
  29. count = 0
  30. while count < 3:
  31. try:
  32. coro = chat(prompt)
  33. output = asyncio.run(coro)
  34. if os.path.exists(f"./doc_abstract/{file}"):
  35. print("abstract file already exists, skip")
  36. else:
  37. with open(f"./doc_abstract/{file}", "w", encoding="utf-8") as f:
  38. f.write(output)
  39. count = 3
  40. except Exception as e:
  41. print(e)
  42. print(">>> process", file_path, "failed, retry", count)
  43. count = count + 1
  44. time.sleep(3)