python
/
knowledge


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344
							import os
import urllib

from service.trunks_service import TrunksService

class FileReader2:
    @staticmethod
    def find_and_print_split_files(directory):
        for root, dirs, files in os.walk(directory):
            for file in files:
                if '_split_' in file and file.endswith('.txt'):
                #if file.endswith('.md'):
                    try:
                        file_path = os.path.join(root, file)
                        relative_path = os.path.relpath(file_path, directory)
                        #relative_path的\trunk\前面的部分去除掉
                        relative_path = relative_path.split('\\trunk\\')[1]
                        relative_path='\\report\\trunk2\\'+relative_path
                        with open(file_path, 'r', encoding='utf-8') as f:
                            lines = f.readlines()
                        meta_header = lines[0]
                        content = ''.join(lines[1:])
                  
                        filename = os.path.dirname(file_path).split('\\trunk\\')[0]
                        #filename取到最后一个\后面的部分
                        filename = filename.split('\\')[-1]
                        newfilename = urllib.parse.quote(filename)
                        TrunksService().create_trunk({'file_path': relative_path, 'content': content,'type':'trunk','meta_header':meta_header,'referrence':'http://173.18.12.205:8001/books/'+newfilename+'.pdf'})
                    except Exception as e:
                        print(f'Error processing file {file_path}: {str(e)}')
    @staticmethod
    def process_txt_files(directory):
        for root, dirs, files in os.walk(directory):
            for file in files:
                if file.endswith('.txt'):
                    file_path = os.path.join(root, file)
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                    title = os.path.splitext(file)[0]
                    TrunksService().create_trunk({'file_path': file_path, 'content': content, 'type': 'trunk', 'title': title})

if __name__ == '__main__':
    directory = 'E:\急诊科资料\中华医学期刊数据库'
    FileReader2.find_and_print_split_files(directory)