Welcome, guest | Sign In | My Account | Store | Cart
# DOCXtoPDF.py

# Author: Vasudev Ram - http://www.dancingbison.com
# Copyright 2012 Vasudev Ram, http://www.dancingbison.com

# This is open source code, released under the New BSD License -
# see http://www.opensource.org/licenses/bsd-license.php .

# This program uses the python-docx library, available at:
# https://github.com/mikemaccana/python-docx

import sys
import os
import os.path
import string
from textwrap import TextWrapper
from docx import opendocx, getdocumenttext
from PDFWriter import PDFWriter

def docx_to_pdf(infilename, outfilename):

    # Extract the text from the DOCX file object infile and write it to 
    # a PDF file.

    try:
        infil = opendocx(infilename)
    except Exception, e:
        print "Error opening infilename"
        print "Exception: " + repr(e) + "\n"
        sys.exit(1)

    paragraphs = getdocumenttext(infil)

    pw = PDFWriter(outfilename)
    pw.setFont("Courier", 12)
    pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF")
    pw.setFooter("Generated by xtopdf and python-docx")
    wrapper = TextWrapper(width=70, drop_whitespace=False)

    # For Unicode handling.
    new_paragraphs = []
    for paragraph in paragraphs:
        new_paragraphs.append(paragraph.encode("utf-8"))

    for paragraph in new_paragraphs:
        lines = wrapper.wrap(paragraph)
        for line in lines:
            pw.writeLine(line)
        pw.writeLine("")

    pw.savePage()
    pw.close()
    
def usage():

    return "Usage: python DOCXtoPDF.py infile.docx outfile.txt\n"

def main():

    try:
        # Check for correct number of command-line arguments.
        if len(sys.argv) != 3:
            print "Wrong number of arguments"
            print usage()
            sys.exit(1)
        infilename = sys.argv[1]
        outfilename = sys.argv[2]

        # Check for right infilename extension.
        infile_ext = os.path.splitext(infilename)[1]
        if infile_ext.upper() != ".DOCX":
            print "Input filename extension should be .DOCX"
            print usage()
            sys.exit(1)

        # Check for right outfilename extension.
        outfile_ext = os.path.splitext(outfilename)[1]
        if outfile_ext.upper() != ".PDF":
            print "Output filename extension should be .PDF"
            print usage()
            sys.exit(1)

        docx_to_pdf(infilename, outfilename)

    except Exception, e:
        sys.stderr.write("Error: " + repr(e) + "\n")
        sys.exit(1)

if __name__ == '__main__':
    main()

# EOF

Run the program with a command of the form:

python DOCXtoPDF.py infilename.docx outfilename.pdf

History