|
@@ -0,0 +1,189 @@
|
|
|
+package com.qizhen.healsphere;
|
|
|
+
|
|
|
+import org.apache.poi.hwpf.HWPFDocument;
|
|
|
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
|
+import org.apache.poi.ss.usermodel.*;
|
|
|
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
+
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileInputStream;
|
|
|
+import java.io.FileOutputStream;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+public class WordFileProcessor {
|
|
|
+
|
|
|
+ // 递归查找Word文件并处理
|
|
|
+ public void processWordFiles(String rootDir) {
|
|
|
+ File root = new File(rootDir);
|
|
|
+ if (!root.exists() || !root.isDirectory()) {
|
|
|
+ System.out.println("Invalid directory: " + rootDir);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ findAndProcessWordFiles(root);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void findAndProcessWordFiles(File dir) {
|
|
|
+ File[] files = dir.listFiles();
|
|
|
+ if (files == null) return;
|
|
|
+
|
|
|
+ for (File file : files) {
|
|
|
+ if (file.isDirectory()) {
|
|
|
+ findAndProcessWordFiles(file); // 递归查找子目录
|
|
|
+ } else if (isWordFile(file)) {
|
|
|
+ processWordFile(file); // 处理Word文件
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private boolean isWordFile(File file) {
|
|
|
+ String name = file.getName().toLowerCase();
|
|
|
+ return name.endsWith(".doc") || name.endsWith(".docx");
|
|
|
+ }
|
|
|
+
|
|
|
+ private void processWordFile(File wordFile) {
|
|
|
+ String fileNameWithoutExt = removeExtension(wordFile.getName());
|
|
|
+ File parentDir = wordFile.getParentFile();
|
|
|
+
|
|
|
+ // 查找与Word文件名匹配的文件夹
|
|
|
+ File matchingFolder = findMatchingFolder(parentDir, fileNameWithoutExt);
|
|
|
+ if (matchingFolder == null) {
|
|
|
+ System.out.println("No matching folder found for: " + wordFile.getName());
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 查找_split_+数字的txt文件
|
|
|
+ List<File> splitTxtFiles = findSplitTxtFiles(matchingFolder);
|
|
|
+ if (splitTxtFiles.isEmpty()) {
|
|
|
+ System.out.println("No _split_ txt files found in folder: " + matchingFolder.getName());
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 按数字升序排序
|
|
|
+ //splitTxtFiles.sort((f1, f2) -> extractNumber(f1.getName()) - extractNumber(f2.getName()));
|
|
|
+
|
|
|
+ // 读取内容并保存到Excel
|
|
|
+ saveToExcel(splitTxtFiles, parentDir, fileNameWithoutExt);
|
|
|
+ }
|
|
|
+
|
|
|
+ private File findMatchingFolder(File parentDir, String fileNameWithoutExt) {
|
|
|
+ File[] folders = parentDir.listFiles(File::isDirectory);
|
|
|
+ if (folders == null) return null;
|
|
|
+
|
|
|
+ for (File folder : folders) {
|
|
|
+ if (folder.getName().equalsIgnoreCase(fileNameWithoutExt)) {
|
|
|
+ return folder;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<File> findSplitTxtFiles(File folder) {
|
|
|
+ List<File> result = new ArrayList<>();
|
|
|
+ Pattern pattern = Pattern.compile("_split_(\\d+)\\.txt", Pattern.CASE_INSENSITIVE);
|
|
|
+ recursiveFindSplitTxtFiles(folder, pattern, result);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private void recursiveFindSplitTxtFiles(File folder, Pattern pattern, List<File> result) {
|
|
|
+ File[] files = folder.listFiles();
|
|
|
+ if (files == null) return;
|
|
|
+
|
|
|
+ for (File file : files) {
|
|
|
+ if (file.isDirectory()) {
|
|
|
+ recursiveFindSplitTxtFiles(file, pattern, result); // 递归查找子目录
|
|
|
+ } else if (pattern.matcher(file.getName()).find()) {
|
|
|
+ result.add(file); // 匹配的文件加入结果列表
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private int extractNumber(String fileName) {
|
|
|
+ Pattern pattern = Pattern.compile("_split_(\\d+)", Pattern.CASE_INSENSITIVE);
|
|
|
+ Matcher matcher = pattern.matcher(fileName);
|
|
|
+ if (matcher.find()) {
|
|
|
+ return Integer.parseInt(matcher.group(1));
|
|
|
+ }
|
|
|
+ return Integer.MAX_VALUE; // 默认值,确保排序正确
|
|
|
+ }
|
|
|
+
|
|
|
+ private void saveToExcel(List<File> txtFiles, File parentDir, String fileNameWithoutExt) {
|
|
|
+ Workbook workbook = new XSSFWorkbook();
|
|
|
+ Sheet sheet = workbook.createSheet("Content");
|
|
|
+
|
|
|
+ int rowNum = 0;
|
|
|
+ for (File txtFile : txtFiles) {
|
|
|
+ try (FileInputStream fis = new FileInputStream(txtFile)) {
|
|
|
+ StringBuilder content = new StringBuilder();
|
|
|
+ byte[] buffer = new byte[1024];
|
|
|
+ int bytesRead;
|
|
|
+ while ((bytesRead = fis.read(buffer)) != -1) {
|
|
|
+ content.append(new String(buffer, 0, bytesRead));
|
|
|
+ }
|
|
|
+
|
|
|
+ // 构造相对路径
|
|
|
+ String relativePath = getRelativePath(txtFile, parentDir);
|
|
|
+ relativePath = relativePath.replace(fileNameWithoutExt, "");
|
|
|
+ Row row = sheet.createRow(rowNum++);
|
|
|
+ Cell pathCell = row.createCell(0);
|
|
|
+ pathCell.setCellValue(relativePath); // 第一列:文件路径
|
|
|
+
|
|
|
+ Cell contentCell = row.createCell(1);
|
|
|
+ contentCell.setCellValue(content.toString()); // 第二列:文件内容
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("Error reading file: " + txtFile.getName());
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 保存Excel文件
|
|
|
+ String excelFilePath = parentDir.getAbsolutePath() + File.separator + fileNameWithoutExt + ".xlsx";
|
|
|
+ File excelFile = new File(excelFilePath);
|
|
|
+
|
|
|
+ // 检查文件是否存在并尝试覆盖
|
|
|
+ if (excelFile.exists()) {
|
|
|
+ if (!excelFile.canWrite()) {
|
|
|
+ throw new RuntimeException("Excel file exists but cannot be overwritten: " + excelFilePath);
|
|
|
+ }
|
|
|
+ // 删除现有文件以确保覆盖
|
|
|
+ if (!excelFile.delete()) {
|
|
|
+ throw new RuntimeException("Failed to delete existing Excel file: " + excelFilePath);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ try (FileOutputStream fos = new FileOutputStream(excelFilePath)) {
|
|
|
+ workbook.write(fos);
|
|
|
+ System.out.println("Excel file saved to: " + excelFilePath);
|
|
|
+ } catch (Exception e) {
|
|
|
+ System.err.println("Error saving Excel file.");
|
|
|
+ e.printStackTrace();
|
|
|
+ } finally {
|
|
|
+ try {
|
|
|
+ workbook.close();
|
|
|
+ } catch (Exception e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 新增方法:获取相对于parentDir的相对路径
|
|
|
+ private String getRelativePath(File file, File parentDir) {
|
|
|
+ String filePath = file.getAbsolutePath();
|
|
|
+ String parentPath = parentDir.getAbsolutePath();
|
|
|
+ return filePath.substring(parentPath.length()).replaceFirst("^\\\\", ""); // 去掉开头的斜杠
|
|
|
+ }
|
|
|
+
|
|
|
+ private String removeExtension(String fileName) {
|
|
|
+ int lastIndexOf = fileName.lastIndexOf(".");
|
|
|
+ if (lastIndexOf == -1) return fileName;
|
|
|
+ return fileName.substring(0, lastIndexOf);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+ WordFileProcessor processor = new WordFileProcessor();
|
|
|
+ processor.processWordFiles("E:\\打标资料\\"); // 替换为实际根目录路径
|
|
|
+ }
|
|
|
+}
|