file_reader2.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import os
  2. import urllib
  3. from service.trunks_service import TrunksService
  4. class FileReader2:
  5. @staticmethod
  6. def find_and_print_split_files(directory):
  7. for root, dirs, files in os.walk(directory):
  8. for file in files:
  9. if '_split_' in file and file.endswith('.txt'):
  10. #if file.endswith('.md'):
  11. try:
  12. file_path = os.path.join(root, file)
  13. relative_path = os.path.relpath(file_path, directory)
  14. #relative_path的\trunk\前面的部分去除掉
  15. relative_path = relative_path.split('\\trunk\\')[1]
  16. relative_path='\\report\\trunk2\\'+relative_path
  17. with open(file_path, 'r', encoding='utf-8') as f:
  18. lines = f.readlines()
  19. meta_header = lines[0]
  20. content = ''.join(lines[1:])
  21. filename = os.path.dirname(file_path).split('\\trunk\\')[0]
  22. #filename取到最后一个\后面的部分
  23. filename = filename.split('\\')[-1]
  24. newfilename = urllib.parse.quote(filename)
  25. TrunksService().create_trunk({'file_path': relative_path, 'content': content,'type':'trunk','meta_header':meta_header,'referrence':'http://173.18.12.205:8001/books/'+newfilename+'.pdf'})
  26. except Exception as e:
  27. print(f'Error processing file {file_path}: {str(e)}')
  28. @staticmethod
  29. def process_txt_files(directory):
  30. for root, dirs, files in os.walk(directory):
  31. for file in files:
  32. if file.endswith('.txt'):
  33. file_path = os.path.join(root, file)
  34. with open(file_path, 'r', encoding='utf-8') as f:
  35. content = f.read()
  36. title = os.path.splitext(file)[0]
  37. TrunksService().create_trunk({'file_path': file_path, 'content': content, 'type': 'trunk', 'title': title})
  38. if __name__ == '__main__':
  39. directory = 'E:\急诊科资料\中华医学期刊数据库'
  40. FileReader2.find_and_print_split_files(directory)