This script allows to populate Cheetah text templates (http://www.cheetahtemplate.org/) with XML or CSV input data.
This is useful in my day2day work, where I often need to quickly generate bunch of files based on a templates and data.
Cheetah template are very easy to write and understand, and I find it easy to use with CSV or XML data.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | #!/usr/bin/python
import getopt, sys
usage="""
This script will populate a Cheetah template ((http://www.cheetahtemplate.org/)
with some input data (XML, CSV or JSON format).
By default, the output is directed to stdout.
USAGE:
template [ -o outputFile ] [options] <template_file> <data_file>
template [-h/--help]
ARGUMENTS:
<template_file> : Filename for the template file. Can be "stdin"
<data_file> : Filename for input data. Can be "stdin"
OPTIONS:
-o <output file> Direct output in a file instead of stdout.
-c 'commentChar' Change the character used to begin comments in the template.
-d 'directiveChar' Change the character used for directives in the template.
-t XML|CSV Input type (or guessed with file extension)
"""
def dieWith(msg) :
sys.stderr.write(msg + '\n')
sys.exit(-1)
# Enum of data type
TYPE_NONE = 0
TYPE_XML = 2
TYPE_CSV = 3
# Parse options
try:
opts, args = getopt.getopt(sys.argv[1:], "hc:d:t:o:", ["help"])
except getopt.GetoptError :
print usage
# Init arguments / options
compilerSettings = {}
inputType = TYPE_NONE
outFilename = None
# Switch on options
for opt, arg in opts:
# Help
if opt in ("-h", "--help") :
print usage
sys.exit(0)
# Comment char
elif opt == "-c" :
compilerSettings['commentStartToken'] = arg
# Directive char
elif opt == "-d" :
compilerSettings['directiveStartToken'] = arg
# Output file
elif opt == "-o" :
outFilename = arg
# Input type
elif opt == "-t" :
arg= arg.lower()
if arg == "csv" :
inputType = TYPE_CSV
elif arg == "xml" :
inputType = TYPE_XML
else :
dieWith("Invalid input type. Valid options are : CSV, XML")
# 2 mandatory arguments
if len(args) != 2:
print usage
sys.exit(2);
(templateFile, dataFile) = args
# --------------------------------------------------------------------------
# XML to Python Object parser
# --------------------------------------------------------------------------
## {{{ http://code.activestate.com/recipes/534109/ (r8)
## Created by Wai Yip Tung on Sat, 13 Oct 2007
import re
import xml.sax.handler
def xml2obj(src):
"""
A simple function to converts XML data into native Python object.
"""
non_id_char = re.compile('[^_0-9a-zA-Z]')
def _name_mangle(name):
return non_id_char.sub('_', name)
class DataNode(object):
def __init__(self):
self._attrs = {} # XML attributes and child elements
self.data = None # child text data
def __len__(self):
# treat single element as a list of 1
return 1
def __getitem__(self, key):
if isinstance(key, basestring):
return self._attrs.get(key,None)
else:
return [self][key]
def __contains__(self, name):
return self._attrs.has_key(name)
def __nonzero__(self):
return bool(self._attrs or self.data)
def __getattr__(self, name):
if name.startswith('__'):
# need to do this for Python special methods???
raise AttributeError(name)
return self._attrs.get(name,None)
def _add_xml_attr(self, name, value):
if name in self._attrs:
# multiple attribute of the same name are represented by a list
children = self._attrs[name]
if not isinstance(children, list):
children = [children]
self._attrs[name] = children
children.append(value)
else:
self._attrs[name] = value
def __str__(self):
return self.data or ''
def __repr__(self):
items = sorted(self._attrs.items())
if self.data:
items.append(('data', self.data))
return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items])
class TreeBuilder(xml.sax.handler.ContentHandler):
def __init__(self):
self.stack = []
self.root = DataNode()
self.current = self.root
self.text_parts = []
def startElement(self, name, attrs):
self.stack.append((self.current, self.text_parts))
self.current = DataNode()
self.text_parts = []
# xml attributes --> python attributes
for k, v in attrs.items():
self.current._add_xml_attr(_name_mangle(k), v)
def endElement(self, name):
text = ''.join(self.text_parts).strip()
if text:
self.current.data = text
if self.current._attrs:
obj = self.current
else:
# a text only node is simply represented by the string
obj = text or ''
self.current, self.text_parts = self.stack.pop()
self.current._add_xml_attr(_name_mangle(name), obj)
def characters(self, content):
self.text_parts.append(content)
builder = TreeBuilder()
if isinstance(src,basestring):
xml.sax.parseString(src, builder)
else:
xml.sax.parse(src, builder)
return builder.root._attrs.values()[0]
## end of http://code.activestate.com/recipes/534109/ }}}
# -------------------------------------------------
# Read input data file
# -------------------------------------------------
# Open input file
import csv
if dataFile == "stdin" :
file = sys.stdin
else:
file = open(dataFile);
# Guess input type if not set in options
if inputType == TYPE_NONE :
import os.path as path
ext = path.splitext(dataFile)[1].lower()
if ext == '.csv' :
inputType = TYPE_CSV
elif ext == '.xml' :
inputType = TYPE_XML
elif ext == '.json' :
inputType = TYPE_JSON
# Switch on input type
if inputType == TYPE_NONE :
dieWith("No input data type specified. Failed to guess it.")
# CSV
elif inputType == TYPE_CSV :
reader = csv.DictReader(file, delimiter=";")
# Almost empty
class Container :
def __init__(self) :
self.lines= []
data = Container()
# Loop on lines
for line in reader:
data.lines.append(line)
# Loop on values of the line
for key, value in line.items() :
# Does it exists yet in "data"
if data.__dict__.has_key(key) :
# Then happend it
data.__dict__[key].append(value)
else :
# Create a list
data.__dict__[key] = [value]
# Make 'columns' accessible as a global name in the template
data.columns = reader.fieldnames
# XML
elif inputType == TYPE_XML :
# Transform XML into Python object
data = xml2obj(file)
else :
dieWith('Input data type not supported')
# --------------------------------------------
# Read template
# --------------------------------------------
from Cheetah.Template import Template
if templateFile == 'stdin' :
file = sys.stdin
else:
file = open(templateFile)
template = Template(
file=file,
searchList=[data], # Attach data
compilerSettings = compilerSettings)
# -------------------------------------------
# Output result
# -------------------------------------------
if outFilename == None :
out = sys.stdout
else :
out = open(outFilename, 'w')
out.write(str(template))
|
Usage
Here is the usage of this script.
USAGE:
template [ -o outputFile ] [options] <template_file> <data_file>
template [-h/--help]
ARGUMENTS:
<template_file> : Filename for the template file. Can be "stdin"
<data_file> : Filename for input data. Can be "stdin"
OPTIONS:
-o <output file> Set the output file (stdout by default)
-c 'commentChar' Change the character used to begin comments in the template.
-d 'directiveChar' Change the character used for directives in the template.
-t XML|CSV Input type (or guessed with input file extension)
XML input data
Here is a example of a template file :
#for $currContact in $contact
Name : $currContact.firstName $currContact.name
Tel: #echo ', '.join(map(lambda number : '%s (%s)' % (number, number.type), $currContact.number))
Emails:
#for $email in $currContact.email
$email.type : $email
#end for
#end for
And its associated input file :
<address-book>
<contact firstName="John" name="Doh" >
<address>10, Red Street. 088990 SpringField</address>
<email type="work" >john.doh@gmail.com</email>
<email type="personal" >john.doh@hotmail.com</email>
<number type="work">0102030506</number>
<number type="mobile">0506030506</number>
</contact>
<contact firstName="Barak" name="Obama" >
<address>White House, Washington</address>
<email type="work" >barak.obama@whitehouse.com</email>
<email type="personal" >barak.obama@hotmail.com</email>
<number type="work">0102030506</number>
</contact>
</address-book>
And here is the result output :
Name : John Doh
Tel: 0102030506 (work), 0506030506 (mobile)
Emails:
work : john.doh@gmail.com
personal : john.doh@hotmail.com
Name : Barak Obama
Tel: 0102030506 (work)
Emails:
work : barak.obama@whitehouse.com
personal : barak.obama@hotmail.com
CSV input data
CSV files should use semi-colon separator ';'. They should contain one line of header, giving the names of the columns. Then, from the template, the lines of data are accessed by the global variable "$lines" and the list of columns is accessed by the global variable "$columns". For each line, the value for a specific column can be accessed accessed either by "$line.columnName" or "$line['columnName']".
Here is an example of a template written for CSV data :
#for $line in $lines
Name : $line.firstName $line.name
Tel : $line.tel
Email : $line.email
Address :
$line.address
#end for
And the corresponding CSV input data :
firstName;name;address;tel;email
Barack;Obama;White house;01020304;barack.oabama@whitehouse.com
John;Doh;Springfield;99999999;john.doe@hotmail.com
And here is the result :
Name : Barack Obama
Tel : 01020304
Email : barack.oabama@whitehouse.com
Address :
White house
Name : John Doh
Tel : 99999999
Email : john.doe@hotmail.com
Address :
Springfiel