Welcome, guest | Sign In | My Account | Store | Cart
# DOCXtoPDF.py

# Author: Vasudev Ram - http://www.dancingbison.com
# Copyright 2012 Vasudev Ram, http://www.dancingbison.com

# This is open source code, released under the New BSD License -
# see http://www.opensource.org/licenses/bsd-license.php .

# This program uses the python-docx library, available at:
# https://github.com/mikemaccana/python-docx

import sys
import os
import os.path
import string
from textwrap import TextWrapper
from docx import opendocx, getdocumenttext
from PDFWriter import PDFWriter

def docx_to_pdf(infilename, outfilename):

   
# Extract the text from the DOCX file object infile and write it to
   
# a PDF file.

   
try:
        infil
= opendocx(infilename)
   
except Exception, e:
       
print "Error opening infilename"
       
print "Exception: " + repr(e) + "\n"
        sys
.exit(1)

    paragraphs
= getdocumenttext(infil)

    pw
= PDFWriter(outfilename)
    pw
.setFont("Courier", 12)
    pw
.setHeader("DOCXtoPDF - convert text in DOCX file to PDF")
    pw
.setFooter("Generated by xtopdf and python-docx")
    wrapper
= TextWrapper(width=70, drop_whitespace=False)

   
# For Unicode handling.
    new_paragraphs
= []
   
for paragraph in paragraphs:
        new_paragraphs
.append(paragraph.encode("utf-8"))

   
for paragraph in new_paragraphs:
        lines
= wrapper.wrap(paragraph)
       
for line in lines:
            pw
.writeLine(line)
        pw
.writeLine("")

    pw
.savePage()
    pw
.close()
   
def usage():

   
return "Usage: python DOCXtoPDF.py infile.docx outfile.txt\n"

def main():

   
try:
       
# Check for correct number of command-line arguments.
       
if len(sys.argv) != 3:
           
print "Wrong number of arguments"
           
print usage()
            sys
.exit(1)
        infilename
= sys.argv[1]
        outfilename
= sys.argv[2]

       
# Check for right infilename extension.
        infile_ext
= os.path.splitext(infilename)[1]
       
if infile_ext.upper() != ".DOCX":
           
print "Input filename extension should be .DOCX"
           
print usage()
            sys
.exit(1)

       
# Check for right outfilename extension.
        outfile_ext
= os.path.splitext(outfilename)[1]
       
if outfile_ext.upper() != ".PDF":
           
print "Output filename extension should be .PDF"
           
print usage()
            sys
.exit(1)

        docx_to_pdf
(infilename, outfilename)

   
except Exception, e:
        sys
.stderr.write("Error: " + repr(e) + "\n")
        sys
.exit(1)

if __name__ == '__main__':
    main
()

# EOF

Run the program with a command of the form:

python
DOCXtoPDF.py infilename.docx outfilename.pdf

History