Ever found "str" % (a,b) a bit limited? Do you want to supply some arguments now, and fill the rest in later? Or mix positional and named arguments, i.e. supply a tuple and a dict? Do you want to put string templates in a configuration file to be read with ConfigParser? I hope you like this recipe.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | import re
# Match any % formatting stanza
# Any character is allowed in names, unicode names work in python 2.2+
# The name can be an empty string
safmt_pat = re.compile(r'''
% # Start with percent,
(?:\( ([^()]*) \))? # optional name in parens (do not capture parens),
[-+ #0]* # zero or more flags
(?:\*|[0-9]*) # optional minimum field width
(?:\.(?:\*|[0-9]*))? # optional dot and length modifier
[EGXcdefgiorsux%] # type code (or [formatted] percent character)
''', re.VERBOSE)
# Wrapper to allow e.g. safmt("%blah", 10, 20, spam='eggs')
# First argument must be the template
def safmt(*args, **kw):
return safmtb(args[0], args[1:], kw)
# Safe and augmented "%" string interpolation:
# - preserve % stanzas in case of missing argument or key
# - allow mixed positional and named arguments
# Note: TypeError exceptions can still happen, e.g. safmt("%d", "abc")
# Function arguments:
# template: a string containing "%" format specifiers
# args : sequence arguments for format string
# kw : mapping arguments for format string
# savepc : optionally preserve "escaped percent" stanzas
# (parameterised positional stanzas always eat args)
# verb : verbose execution, prints debug output to stdout
def safmtb(template, args=(), kw=None, savepc=0, verb=0):
if verb:
print "safmt(%r)" % (template,)
if kw is None:
kw = {}
ret = []
last = i = 0
d = {}
di = 0
pat = safmt_pat
while 1:
mo = pat.search(template, i)
if not mo:
# End of string
ret.append(template[last:])
break
i = mo.end(0)
if verb: print mo.start(), mo.group(0, 1),
stanza, name = mo.group(0, 1)
if name is not None:
# str[-1]=='x' is faster than str.endswith('x'),
# and stanza is always non-empty here so slice will never fail
if stanza[-1] == "%":
if savepc:
if verb: print 'saving stanza'
continue
# Workaround weird behaviour in python2.1-2.5: a named
# argument that is just a percent escape still raises
# KeyError, even though a positional escaped percent eats
# no args and is happy with an empty sequence.
# Workaround: provide a dummy key which never gets used.
dat = stanza % {name: None}
else:
try:
dat = stanza % kw
except KeyError:
if verb: print 'ignore missing key'
continue
if verb: print "fmt %r" % dat
else:
# %<blah>% does not use up arguments, but "%*.*%" does
numargs = stanza[-1] != "%"
if verb: print "args=%s" % numargs,
# Allow for "*" parameterisation (uses up to 2)
numargs += mo.group(0).count("*")
if verb: print "args=%s" % numargs,
p = args[di: di + numargs]
di += numargs
if verb: print "p=%s" % (p,),
if len(p) != numargs:
if verb: print "not enough pos args"
continue
if savepc and stanza[-1] == "%":
if verb: print 'saving stanza'
continue
dat = stanza % p
if verb: print "fmt %r" % dat
ret.append(template[last:mo.start()])
ret.append(dat)
last = i
return ''.join(ret)
# ****** Related Recipe ******
from ConfigParser import *
from ConfigParser import DEFAULTSECT
class SafeConfigParser(ConfigParser):
# Override get() method to use safe string interpolation
def get(self, section, option, raw=0, vars=None):
# In python2.3, the name changed from __sections to _sections
if hasattr(self, '_sections'):
sections = self._sections
defaults = self._defaults
else:
sections = self._ConfigParser__sections
defaults = self._ConfigParser__defaults
try:
sectdict = sections[section].copy()
except KeyError:
if section == DEFAULTSECT:
sectdict = {}
else:
raise NoSectionError(section)
d = defaults.copy()
d.update(sectdict)
# Update with the entry specific variables
if vars:
d.update(vars)
option = self.optionxform(option)
try:
rawval = d[option]
except KeyError:
raise NoOptionError(option, section)
if raw:
return rawval
# do the string interpolation
value = rawval # Make it a pretty variable name
for depth in range(10):
oldvalue = value
value = safmtb(value, kw=d, savepc=1)
if value == oldvalue: break
else:
raise InterpolationDepthError(option, section, rawval)
return value
|
The safmt functions are useful on their own, and the SafeConfigParser class shows one very useful (to me at least) application. It allows including format templates in config files.
If named formats are used in a file read by ConfigParser, it assumes all format stanzas are named and supplied, and that all percents in the file are valid format stanzas. This is not helpful when you want to do some interpolation from configuration keys, and preserve other templates for later use at runtime, and include non-formatting percents in a string. SafeConfigParser is the answer. The 'savepc' flag was added for this application, where converting "%%" to "%" is never what is wanted (AFAICT); "%%" is preserved for later runtime formatting.
Caveat: Balanced open and close parentheses are allowed in python's string formats, e.g. "%(())s", "%(()())s", "%((()))s", but Python's regex engine cannot match nested constructs.
This quirky behaviour is not preserved: "%s %(abc)s" % {"abc":10} ==> "{'abc': 10} 10".
The trailing "b" in the function name safmtb() means 'base' implementation, with the expectation that the safmt() wrapper will be preferred.
References: Python/Objects/stringobject.c
Some ideas for enhancement: 1) Optimise if args or kw empty: search only for positional or named 2) Check for huge field widths, and limit for safety 3) Return information about how many pos+named args were used, or missing 4) Optimise if only kw given, by constructing a regex to only find given names 5) Take a callback function(stanza, num_args|name), implement PEP3101 using % syntax 6) Allow parameterised named formatting e.g. safmt("%(name).f", name=(10, 5, 3.142))
Please tell me if this recipe is useful to you, or would be if it had any of these enhancements or others you may think of.