This recipe shows how to publish delimiter-separated values (a commonly used tabular data format) to PDF, using the xtopdf toolkit for PDF creation. It lets the user specify the delimiter via one of two command-line options - an ASCII code or an ASCII character. As Unix filters tend to do, it can operate either on standard input or on input filenames given as command-line arguments. In the case of multiple inputs via files, each input goes to a separate PDF output file.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | from __future__ import print_function
"""
DSVToPDF.py
Author: Vasudev Ram
Web site: https://vasudevram.github.io
Blog: https://jugad2.blogspot.com
Product store: https://gumroad.com/vasudevram
Twitter: https://mobile.twitter.com/vasudevram
Purpose: Show how to publish DSV data (Delimiter-Separated Values)
to PDF, using the xtopdf toolkit.
Requires:
- ReportLab: https://www.reportlab.com/ftp/reportlab-1.21.1.tar.gz
- xtopdf: https://bitbucket.org/vasudevram/xtopdf
First install ReportLab, then install xtopdf, using instructions here:
http://jugad2.blogspot.in/2012/07/guide-to-installing-and-using-xtopdf.html
The DSV data can be read from either files or standard input.
The delimiter character is configurable by the user and can
be specified as either a character or its ASCII code.
References:
DSV format: https://en.wikipedia.org/wiki/Delimiter-separated_values
TAOUP (The Art Of Unix Programming): Data File Metaformats:
http://www.catb.org/esr/writings/taoup/html/ch05s02.html
ASCII table: http://www.asciitable.com/
"""
import sys
import string
from PDFWriter import PDFWriter
def err_write(message):
sys.stderr.write(message)
def error_exit(message):
err_write(message)
sys.exit(1)
def usage(argv, verbose=False):
usage1 = \
"{}: publish DSV (Delimiter-Separated-Values) data to PDF.\n".format(argv[0])
usage2 = "Usage: python" + \
" {} [ -c delim_char | -n delim_code ] [ dsv_file ] ...\n".format(argv[0])
usage3 = [
"where one of either the -c or -n option must be given,\n",
"delim_char is a single ASCII delimiter character, and\n",
"delim_code is a delimiter character's ASCII code.\n",
"Text lines will be read from specified DSV file(s) or\n",
"from standard input, split on the specified delimiter\n",
"specified by either the -c or -n option, processed, and\n",
"written, formatted, to PDF files with the name dsv_file.pdf.\n",
]
usage4 = "Use the -h or --help option for a more detailed help message.\n"
err_write(usage1)
err_write(usage2)
if verbose:
'''
for line in usage3:
err_write(line)
'''
err_write(''.join(usage3))
if not verbose:
err_write(usage4)
def str_to_int(s):
try:
return int(s)
except ValueError as ve:
error_exit(repr(ve))
def valid_delimiter(delim_code):
return not invalid_delimiter(delim_code)
def invalid_delimiter(delim_code):
# Non-ASCII codes not allowed, i.e. codes outside
# the range 0 to 255.
if delim_code < 0 or delim_code > 255:
return True
# Also, don't allow some specific ASCII codes;
# add more, if it turns out they are needed.
if delim_code in (10, 13):
return True
return False
def dsv_to_pdf(dsv_fil, delim_char, pdf_filename):
with PDFWriter(pdf_filename) as pw:
pw.setFont("Courier", 12)
pw.setHeader(pdf_filename[:-4] + " => " + pdf_filename)
pw.setFooter("Generated by xtopdf: https://google.com/search?q=xtopdf")
for idx, lin in enumerate(dsv_fil):
fields = lin.split(delim_char)
assert len(fields) > 0
# Knock off the newline at the end of the last field,
# since it is the line terminator, not part of the field.
if fields[-1][-1] == '\n':
fields[-1] = fields[-1][:-1]
# Treat a blank line as a line with one field,
# an empty string (that is what split returns).
pw.writeLine(' - '.join(fields))
def main():
# Get and check validity of arguments.
sa = sys.argv
lsa = len(sa)
if lsa == 1:
usage(sa)
sys.exit(0)
elif lsa == 2:
# Allow the help option with any letter case.
if sa[1].lower() in ("-h", "--help"):
usage(sa, verbose=True)
sys.exit(0)
else:
usage(sa)
sys.exit(0)
# If we reach here, lsa is >= 3.
# Check for valid mandatory options (sic).
if not sa[1] in ("-c", "-n"):
usage(sa, verbose=True)
sys.exit(0)
# If -c option given ...
if sa[1] == "-c":
# If next token is not a single character ...
if len(sa[2]) != 1:
error_exit(
"{}: Error: -c option needs a single character after it.".format(sa[0]))
if not sa[2] in string.printable:
error_exit(
"{}: Error: -c option needs a printable ASCII character after it.".format(\
sa[0]))
delim_char = sa[2]
# else if -n option given ...
elif sa[1] == "-n":
delim_code = str_to_int(sa[2])
if invalid_delimiter(delim_code):
error_exit(
"{}: Error: invalid delimiter code {} given for -n option.".format(\
sa[0], delim_code))
delim_char = chr(delim_code)
else:
# Checking for what should not happen ... a bit of defensive programming here.
error_exit("{}: Program error: neither -c nor -n option given.".format(sa[0]))
try:
# If no filenames given, do sys.stdin to PDF ...
if lsa == 3:
print("Converting content of standard input to PDF.")
dsv_fil = sys.stdin
dsv_to_pdf(dsv_fil, delim_char, "dsv_output.pdf")
dsv_fil.close()
print("Output is in dsv_output.pdf")
# else (filenames given), convert them to PDFs ...
else:
for dsv_filename in sa[3:]:
pdf_filename = dsv_filename + ".pdf"
print("Converting file {} to PDF.", dsv_filename)
dsv_fil = open(dsv_filename, 'r')
dsv_to_pdf(dsv_fil, delim_char, pdf_filename)
dsv_fil.close()
print("Output is in {}".format(pdf_filename))
except IOError as ioe:
error_exit("{}: Error: {}".format(sa[0], repr(ioe)))
if __name__ == '__main__':
main()
|
More details, and sample input and output, available here:
http://jugad2.blogspot.in/2016/12/xtopdf-publish-dsv-data-delimiter.html