123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import os
- import sys
- import platform
- from typing import Optional
- import subprocess
- def convert_doc_to_docx(input_path: str, output_path: Optional[str] = None) -> str:
- """
- 将DOC文件转换为DOCX格式,兼容Windows和Linux系统
- 参数:
- input_path: 输入的DOC文件路径
- output_path: 输出的DOCX文件路径(可选),如果不指定则自动生成
- 返回:
- 转换后的文件路径
- """
- # 检查输入文件是否存在
- if not os.path.isfile(input_path):
- raise FileNotFoundError(f"输入文件不存在: {input_path}")
- # 检查文件扩展名
- if not input_path.lower().endswith('.doc'):
- raise ValueError("输入文件必须是DOC格式")
- # 设置默认输出路径
- if output_path is None:
- output_path = os.path.splitext(input_path)[0] + '.docx'
- # 根据操作系统选择转换方法
- system = platform.system().lower()
- if system == 'windows':
- _convert_using_comtypes(input_path, output_path)
- elif system == 'linux':
- _convert_using_libreoffice(input_path, output_path)
- else:
- raise NotImplementedError(f"不支持的操作系统: {system}")
- # 检查输出文件是否创建成功
- if not os.path.isfile(output_path):
- raise RuntimeError("文件转换失败,输出文件未生成")
- return output_path
- def _convert_using_comtypes(input_path: str, output_path: str):
- """在Windows上使用comtypes和MS Word进行转换"""
- try:
- import comtypes.client
- except ImportError:
- raise ImportError("comtypes库未安装,请使用: pip install comtypes")
- # 确保路径是绝对路径
- input_path = os.path.abspath(input_path)
- output_path = os.path.abspath(output_path)
- try:
- word = comtypes.client.CreateObject('Word.Application')
- doc = word.Documents.Open(input_path)
- doc.SaveAs(output_path, FileFormat=16) # 16表示docx格式
- doc.Close()
- word.Quit()
- except Exception as e:
- raise RuntimeError(f"使用MS Word转换失败: {str(e)}")
- def _convert_using_libreoffice(input_path: str, output_path: str):
- """在Linux上使用LibreOffice进行转换"""
- # 检查unoconv是否可用
- try:
- # 方法1: 尝试使用unoconv (推荐)
- subprocess.run(['unoconv', '-f', 'docx', '-o', output_path, input_path],
- check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- except (subprocess.CalledProcessError, FileNotFoundError):
- try:
- # 方法2: 直接使用LibreOffice
- output_dir = os.path.dirname(output_path)
- subprocess.run([
- 'libreoffice', '--headless', '--convert-to', 'docx',
- '--outdir', output_dir, input_path
- ], check=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- # LibreOffice的输出文件名可能与预期不同,需要处理
- expected_path = os.path.splitext(input_path)[0] + '.docx'
- if os.path.exists(expected_path) and expected_path != output_path:
- os.rename(expected_path, output_path)
- except subprocess.CalledProcessError as e:
- raise RuntimeError(f"LibreOffice转换失败: {e.stderr.decode('utf-8')}")
- except FileNotFoundError:
- raise RuntimeError("未找到LibreOffice或unoconv,请确保已安装")
- if __name__ == '__main__':
- input_file = 'C:\\Users\\17664\\Desktop\\test\\test.doc'
- output_file = 'C:\\Users\\17664\\Desktop\\test\\test.docx'
- try:
- result = convert_doc_to_docx(input_file, output_file)
- print(f"转换成功: {result}")
- except Exception as e:
- print(f"转换失败: {str(e)}", file=sys.stderr)
- sys.exit(1)
|