1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- import os
- import urllib
- from service.trunks_service import TrunksService
- class FileReader2:
- @staticmethod
- def find_and_print_split_files(directory):
- for root, dirs, files in os.walk(directory):
- for file in files:
- if '_split_' in file and file.endswith('.txt'):
- #if file.endswith('.md'):
- try:
- file_path = os.path.join(root, file)
- relative_path = os.path.relpath(file_path, directory)
- #relative_path的\trunk\前面的部分去除掉
- relative_path = relative_path.split('\\trunk\\')[1]
- relative_path='\\report\\trunk2\\'+relative_path
- with open(file_path, 'r', encoding='utf-8') as f:
- lines = f.readlines()
- meta_header = lines[0]
- content = ''.join(lines[1:])
-
- filename = os.path.dirname(file_path).split('\\trunk\\')[0]
- #filename取到最后一个\后面的部分
- filename = filename.split('\\')[-1]
- newfilename = urllib.parse.quote(filename)
- TrunksService().create_trunk({'file_path': relative_path, 'content': content,'type':'trunk','meta_header':meta_header,'referrence':'http://173.18.12.205:8001/books/'+newfilename+'.pdf'})
- except Exception as e:
- print(f'Error processing file {file_path}: {str(e)}')
- @staticmethod
- def process_txt_files(directory):
- for root, dirs, files in os.walk(directory):
- for file in files:
- if file.endswith('.txt'):
- file_path = os.path.join(root, file)
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
- title = os.path.splitext(file)[0]
- TrunksService().create_trunk({'file_path': file_path, 'content': content, 'type': 'trunk', 'title': title})
- if __name__ == '__main__':
- directory = 'E:\急诊科资料\中华医学期刊数据库'
- FileReader2.find_and_print_split_files(directory)
|