file_converter.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import os
  2. import sys
  3. import platform
  4. from typing import Optional
  5. import subprocess
  6. def convert_doc_to_docx(input_path: str, output_path: Optional[str] = None) -> str:
  7. """
  8. 将DOC文件转换为DOCX格式,兼容Windows和Linux系统
  9. 参数:
  10. input_path: 输入的DOC文件路径
  11. output_path: 输出的DOCX文件路径(可选),如果不指定则自动生成
  12. 返回:
  13. 转换后的文件路径
  14. """
  15. # 检查输入文件是否存在
  16. if not os.path.isfile(input_path):
  17. raise FileNotFoundError(f"输入文件不存在: {input_path}")
  18. # 检查文件扩展名
  19. if not input_path.lower().endswith('.doc'):
  20. raise ValueError("输入文件必须是DOC格式")
  21. # 设置默认输出路径
  22. if output_path is None:
  23. output_path = os.path.splitext(input_path)[0] + '.docx'
  24. # 根据操作系统选择转换方法
  25. system = platform.system().lower()
  26. if system == 'windows':
  27. _convert_using_comtypes(input_path, output_path)
  28. elif system == 'linux':
  29. _convert_using_libreoffice(input_path, output_path)
  30. else:
  31. raise NotImplementedError(f"不支持的操作系统: {system}")
  32. # 检查输出文件是否创建成功
  33. if not os.path.isfile(output_path):
  34. raise RuntimeError("文件转换失败,输出文件未生成")
  35. return output_path
  36. def _convert_using_comtypes(input_path: str, output_path: str):
  37. """在Windows上使用comtypes和MS Word进行转换"""
  38. try:
  39. import comtypes.client
  40. except ImportError:
  41. raise ImportError("comtypes库未安装,请使用: pip install comtypes")
  42. # 确保路径是绝对路径
  43. input_path = os.path.abspath(input_path)
  44. output_path = os.path.abspath(output_path)
  45. try:
  46. word = comtypes.client.CreateObject('Word.Application')
  47. doc = word.Documents.Open(input_path)
  48. doc.SaveAs(output_path, FileFormat=16) # 16表示docx格式
  49. doc.Close()
  50. word.Quit()
  51. except Exception as e:
  52. raise RuntimeError(f"使用MS Word转换失败: {str(e)}")
  53. def _convert_using_libreoffice(input_path: str, output_path: str):
  54. """在Linux上使用LibreOffice进行转换"""
  55. # 检查unoconv是否可用
  56. try:
  57. # 方法1: 尝试使用unoconv (推荐)
  58. subprocess.run(['unoconv', '-f', 'docx', '-o', output_path, input_path],
  59. check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
  60. except (subprocess.CalledProcessError, FileNotFoundError):
  61. try:
  62. # 方法2: 直接使用LibreOffice
  63. output_dir = os.path.dirname(output_path)
  64. subprocess.run([
  65. 'libreoffice', '--headless', '--convert-to', 'docx',
  66. '--outdir', output_dir, input_path
  67. ], check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
  68. # LibreOffice的输出文件名可能与预期不同,需要处理
  69. expected_path = os.path.splitext(input_path)[0] + '.docx'
  70. if os.path.exists(expected_path) and expected_path != output_path:
  71. os.rename(expected_path, output_path)
  72. except subprocess.CalledProcessError as e:
  73. raise RuntimeError(f"LibreOffice转换失败: {e.stderr.decode('utf-8')}")
  74. except FileNotFoundError:
  75. raise RuntimeError("未找到LibreOffice或unoconv,请确保已安装")
  76. if __name__ == '__main__':
  77. input_file = 'C:\\Users\\17664\\Desktop\\test\\test.doc'
  78. output_file = 'C:\\Users\\17664\\Desktop\\test\\test.docx'
  79. try:
  80. result = convert_doc_to_docx(input_file, output_file)
  81. print(f"转换成功: {result}")
  82. except Exception as e:
  83. print(f"转换失败: {str(e)}", file=sys.stderr)
  84. sys.exit(1)