SGTY il y a 2 semaines
Parent
commit
18d31efad2
1 fichiers modifiés avec 189 ajouts et 0 suppressions
  1. 189 0
      src/main/java/com/qizhen/healsphere/WordFileProcessor.java

+ 189 - 0
src/main/java/com/qizhen/healsphere/WordFileProcessor.java

@@ -0,0 +1,189 @@
+package com.qizhen.healsphere;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class WordFileProcessor {
+
+    // 递归查找Word文件并处理
+    public void processWordFiles(String rootDir) {
+        File root = new File(rootDir);
+        if (!root.exists() || !root.isDirectory()) {
+            System.out.println("Invalid directory: " + rootDir);
+            return;
+        }
+
+        findAndProcessWordFiles(root);
+    }
+
+    private void findAndProcessWordFiles(File dir) {
+        File[] files = dir.listFiles();
+        if (files == null) return;
+
+        for (File file : files) {
+            if (file.isDirectory()) {
+                findAndProcessWordFiles(file); // 递归查找子目录
+            } else if (isWordFile(file)) {
+                processWordFile(file); // 处理Word文件
+            }
+        }
+    }
+
+    private boolean isWordFile(File file) {
+        String name = file.getName().toLowerCase();
+        return name.endsWith(".doc") || name.endsWith(".docx");
+    }
+
+    private void processWordFile(File wordFile) {
+        String fileNameWithoutExt = removeExtension(wordFile.getName());
+        File parentDir = wordFile.getParentFile();
+
+        // 查找与Word文件名匹配的文件夹
+        File matchingFolder = findMatchingFolder(parentDir, fileNameWithoutExt);
+        if (matchingFolder == null) {
+            System.out.println("No matching folder found for: " + wordFile.getName());
+            return;
+        }
+
+        // 查找_split_+数字的txt文件
+        List<File> splitTxtFiles = findSplitTxtFiles(matchingFolder);
+        if (splitTxtFiles.isEmpty()) {
+            System.out.println("No _split_ txt files found in folder: " + matchingFolder.getName());
+            return;
+        }
+
+        // 按数字升序排序
+        //splitTxtFiles.sort((f1, f2) -> extractNumber(f1.getName()) - extractNumber(f2.getName()));
+
+        // 读取内容并保存到Excel
+        saveToExcel(splitTxtFiles, parentDir, fileNameWithoutExt);
+    }
+
+    private File findMatchingFolder(File parentDir, String fileNameWithoutExt) {
+        File[] folders = parentDir.listFiles(File::isDirectory);
+        if (folders == null) return null;
+
+        for (File folder : folders) {
+            if (folder.getName().equalsIgnoreCase(fileNameWithoutExt)) {
+                return folder;
+            }
+        }
+        return null;
+    }
+
+    private List<File> findSplitTxtFiles(File folder) {
+        List<File> result = new ArrayList<>();
+        Pattern pattern = Pattern.compile("_split_(\\d+)\\.txt", Pattern.CASE_INSENSITIVE);
+        recursiveFindSplitTxtFiles(folder, pattern, result);
+        return result;
+    }
+
+    private void recursiveFindSplitTxtFiles(File folder, Pattern pattern, List<File> result) {
+        File[] files = folder.listFiles();
+        if (files == null) return;
+
+        for (File file : files) {
+            if (file.isDirectory()) {
+                recursiveFindSplitTxtFiles(file, pattern, result); // 递归查找子目录
+            } else if (pattern.matcher(file.getName()).find()) {
+                result.add(file); // 匹配的文件加入结果列表
+            }
+        }
+    }
+
+    private int extractNumber(String fileName) {
+        Pattern pattern = Pattern.compile("_split_(\\d+)", Pattern.CASE_INSENSITIVE);
+        Matcher matcher = pattern.matcher(fileName);
+        if (matcher.find()) {
+            return Integer.parseInt(matcher.group(1));
+        }
+        return Integer.MAX_VALUE; // 默认值,确保排序正确
+    }
+
+    private void saveToExcel(List<File> txtFiles, File parentDir, String fileNameWithoutExt) {
+        Workbook workbook = new XSSFWorkbook();
+        Sheet sheet = workbook.createSheet("Content");
+
+        int rowNum = 0;
+        for (File txtFile : txtFiles) {
+            try (FileInputStream fis = new FileInputStream(txtFile)) {
+                StringBuilder content = new StringBuilder();
+                byte[] buffer = new byte[1024];
+                int bytesRead;
+                while ((bytesRead = fis.read(buffer)) != -1) {
+                    content.append(new String(buffer, 0, bytesRead));
+                }
+
+                // 构造相对路径
+                String relativePath = getRelativePath(txtFile, parentDir);
+                relativePath = relativePath.replace(fileNameWithoutExt, "");
+                Row row = sheet.createRow(rowNum++);
+                Cell pathCell = row.createCell(0);
+                pathCell.setCellValue(relativePath); // 第一列:文件路径
+
+                Cell contentCell = row.createCell(1);
+                contentCell.setCellValue(content.toString()); // 第二列:文件内容
+            } catch (Exception e) {
+                System.err.println("Error reading file: " + txtFile.getName());
+                e.printStackTrace();
+            }
+        }
+
+        // 保存Excel文件
+        String excelFilePath = parentDir.getAbsolutePath() + File.separator + fileNameWithoutExt + ".xlsx";
+        File excelFile = new File(excelFilePath);
+
+        // 检查文件是否存在并尝试覆盖
+        if (excelFile.exists()) {
+            if (!excelFile.canWrite()) {
+                throw new RuntimeException("Excel file exists but cannot be overwritten: " + excelFilePath);
+            }
+            // 删除现有文件以确保覆盖
+            if (!excelFile.delete()) {
+                throw new RuntimeException("Failed to delete existing Excel file: " + excelFilePath);
+            }
+        }
+
+        try (FileOutputStream fos = new FileOutputStream(excelFilePath)) {
+            workbook.write(fos);
+            System.out.println("Excel file saved to: " + excelFilePath);
+        } catch (Exception e) {
+            System.err.println("Error saving Excel file.");
+            e.printStackTrace();
+        } finally {
+            try {
+                workbook.close();
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    // 新增方法:获取相对于parentDir的相对路径
+    private String getRelativePath(File file, File parentDir) {
+        String filePath = file.getAbsolutePath();
+        String parentPath = parentDir.getAbsolutePath();
+        return filePath.substring(parentPath.length()).replaceFirst("^\\\\", ""); // 去掉开头的斜杠
+    }
+
+    private String removeExtension(String fileName) {
+        int lastIndexOf = fileName.lastIndexOf(".");
+        if (lastIndexOf == -1) return fileName;
+        return fileName.substring(0, lastIndexOf);
+    }
+
+    public static void main(String[] args) {
+        WordFileProcessor processor = new WordFileProcessor();
+        processor.processWordFiles("E:\\打标资料\\"); // 替换为实际根目录路径
+    }
+}