import os import sys import platform from typing import Optional import subprocess def convert_doc_to_docx(input_path: str, output_path: Optional[str] = None) -> str: """ 将DOC文件转换为DOCX格式,兼容Windows和Linux系统 参数: input_path: 输入的DOC文件路径 output_path: 输出的DOCX文件路径(可选),如果不指定则自动生成 返回: 转换后的文件路径 """ # 检查输入文件是否存在 if not os.path.isfile(input_path): raise FileNotFoundError(f"输入文件不存在: {input_path}") # 检查文件扩展名 if not input_path.lower().endswith('.doc'): raise ValueError("输入文件必须是DOC格式") # 设置默认输出路径 if output_path is None: output_path = os.path.splitext(input_path)[0] + '.docx' # 根据操作系统选择转换方法 system = platform.system().lower() if system == 'windows': _convert_using_comtypes(input_path, output_path) elif system == 'linux': _convert_using_libreoffice(input_path, output_path) else: raise NotImplementedError(f"不支持的操作系统: {system}") # 检查输出文件是否创建成功 if not os.path.isfile(output_path): raise RuntimeError("文件转换失败,输出文件未生成") return output_path def _convert_using_comtypes(input_path: str, output_path: str): """在Windows上使用comtypes和MS Word进行转换""" try: import comtypes.client except ImportError: raise ImportError("comtypes库未安装,请使用: pip install comtypes") # 确保路径是绝对路径 input_path = os.path.abspath(input_path) output_path = os.path.abspath(output_path) try: word = comtypes.client.CreateObject('Word.Application') doc = word.Documents.Open(input_path) doc.SaveAs(output_path, FileFormat=16) # 16表示docx格式 doc.Close() word.Quit() except Exception as e: raise RuntimeError(f"使用MS Word转换失败: {str(e)}") def _convert_using_libreoffice(input_path: str, output_path: str): """在Linux上使用LibreOffice进行转换""" # 检查unoconv是否可用 try: # 方法1: 尝试使用unoconv (推荐) subprocess.run(['unoconv', '-f', 'docx', '-o', output_path, input_path], check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) except (subprocess.CalledProcessError, FileNotFoundError): try: # 方法2: 直接使用LibreOffice output_dir = os.path.dirname(output_path) subprocess.run([ 'libreoffice', '--headless', '--convert-to', 'docx', '--outdir', output_dir, input_path ], check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) # LibreOffice的输出文件名可能与预期不同,需要处理 expected_path = os.path.splitext(input_path)[0] + '.docx' if os.path.exists(expected_path) and expected_path != output_path: os.rename(expected_path, output_path) except subprocess.CalledProcessError as e: raise RuntimeError(f"LibreOffice转换失败: {e.stderr.decode('utf-8')}") except FileNotFoundError: raise RuntimeError("未找到LibreOffice或unoconv,请确保已安装") if __name__ == '__main__': input_file = 'C:\\Users\\17664\\Desktop\\test\\test.doc' output_file = 'C:\\Users\\17664\\Desktop\\test\\test.docx' try: result = convert_doc_to_docx(input_file, output_file) print(f"转换成功: {result}") except Exception as e: print(f"转换失败: {str(e)}", file=sys.stderr) sys.exit(1)