# DOCXtoPDF.py # Author: Vasudev Ram - http://www.dancingbison.com # Copyright 2012 Vasudev Ram, http://www.dancingbison.com # This is open source code, released under the New BSD License - # see http://www.opensource.org/licenses/bsd-license.php . # This program uses the python-docx library, available at: # https://github.com/mikemaccana/python-docx import sys import os import os.path import string from textwrap import TextWrapper from docx import opendocx, getdocumenttext from PDFWriter import PDFWriter def docx_to_pdf(infilename, outfilename): # Extract the text from the DOCX file object infile and write it to # a PDF file. try: infil = opendocx(infilename) except Exception, e: print "Error opening infilename" print "Exception: " + repr(e) + "\n" sys.exit(1) paragraphs = getdocumenttext(infil) pw = PDFWriter(outfilename) pw.setFont("Courier", 12) pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF") pw.setFooter("Generated by xtopdf and python-docx") wrapper = TextWrapper(width=70, drop_whitespace=False) # For Unicode handling. new_paragraphs = [] for paragraph in paragraphs: new_paragraphs.append(paragraph.encode("utf-8")) for paragraph in new_paragraphs: lines = wrapper.wrap(paragraph) for line in lines: pw.writeLine(line) pw.writeLine("") pw.savePage() pw.close() def usage(): return "Usage: python DOCXtoPDF.py infile.docx outfile.txt\n" def main(): try: # Check for correct number of command-line arguments. if len(sys.argv) != 3: print "Wrong number of arguments" print usage() sys.exit(1) infilename = sys.argv[1] outfilename = sys.argv[2] # Check for right infilename extension. infile_ext = os.path.splitext(infilename)[1] if infile_ext.upper() != ".DOCX": print "Input filename extension should be .DOCX" print usage() sys.exit(1) # Check for right outfilename extension. outfile_ext = os.path.splitext(outfilename)[1] if outfile_ext.upper() != ".PDF": print "Output filename extension should be .PDF" print usage() sys.exit(1) docx_to_pdf(infilename, outfilename) except Exception, e: sys.stderr.write("Error: " + repr(e) + "\n") sys.exit(1) if __name__ == '__main__': main() # EOF Run the program with a command of the form: python DOCXtoPDF.py infilename.docx outfilename.pdf