Welcome, guest | Sign In | My Account | Store | Cart

Ever found "str" % (a,b) a bit limited? Do you want to supply some arguments now, and fill the rest in later? Or mix positional and named arguments, i.e. supply a tuple and a dict? Do you want to put string templates in a configuration file to be read with ConfigParser? I hope you like this recipe.

Python, 147 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import re

# Match any % formatting stanza
# Any character is allowed in names, unicode names work in python 2.2+
# The name can be an empty string
safmt_pat = re.compile(r'''
    %                     # Start with percent,
    (?:\( ([^()]*) \))?   # optional name in parens (do not capture parens),
    [-+ #0]*              # zero or more flags
    (?:\*|[0-9]*)         # optional minimum field width
    (?:\.(?:\*|[0-9]*))?  # optional dot and length modifier
    [EGXcdefgiorsux%]     # type code (or [formatted] percent character)
    ''', re.VERBOSE)

# Wrapper to allow e.g. safmt("%blah", 10, 20, spam='eggs')
# First argument must be the template
def safmt(*args, **kw):
    return safmtb(args[0], args[1:], kw)

# Safe and augmented "%" string interpolation:
# - preserve % stanzas in case of missing argument or key
# - allow mixed positional and named arguments
# Note: TypeError exceptions can still happen, e.g. safmt("%d", "abc")
# Function arguments:
#  template: a string containing "%" format specifiers
#  args    : sequence arguments for format string
#  kw      : mapping arguments for format string
#  savepc  : optionally preserve "escaped percent" stanzas
#            (parameterised positional stanzas always eat args)
#  verb    : verbose execution, prints debug output to stdout
def safmtb(template, args=(), kw=None, savepc=0, verb=0):
    if verb:
        print "safmt(%r)" % (template,)
    
    if kw is None:
        kw = {}
    
    ret = []
    last = i = 0
    d = {}
    di = 0
    pat = safmt_pat
    while 1:
        mo = pat.search(template, i)
        
        if not mo:
            # End of string
            ret.append(template[last:])
            break
        
        i = mo.end(0)
        if verb: print mo.start(), mo.group(0, 1),
        
        stanza, name = mo.group(0, 1)
        if name is not None:
            # str[-1]=='x' is faster than str.endswith('x'),
            # and stanza is always non-empty here so slice will never fail
            if stanza[-1] == "%":
                if savepc:
                    if verb: print 'saving stanza'
                    continue
                # Workaround weird behaviour in python2.1-2.5: a named
                # argument that is just a percent escape still raises
                # KeyError, even though a positional escaped percent eats
                # no args and is happy with an empty sequence.
                # Workaround: provide a dummy key which never gets used.
                dat = stanza % {name: None}
            else:
                try:
                    dat = stanza % kw
                except KeyError:
                    if verb: print 'ignore missing key'
                    continue
            if verb: print "fmt %r" % dat
        else:
            # %<blah>% does not use up arguments, but "%*.*%" does
            numargs = stanza[-1] != "%"
            if verb: print "args=%s" % numargs,
            # Allow for "*" parameterisation (uses up to 2)
            numargs += mo.group(0).count("*")
            if verb: print "args=%s" % numargs,
            
            p = args[di: di + numargs]
            di += numargs
            if verb: print "p=%s" % (p,),
            if len(p) != numargs:
                if verb: print "not enough pos args"
                continue
            if savepc and stanza[-1] == "%":
                if verb: print 'saving stanza'
                continue
            dat = stanza % p
            if verb: print "fmt %r" % dat
        
        ret.append(template[last:mo.start()])
        ret.append(dat)
        last = i
    
    return ''.join(ret)


# ****** Related Recipe ******

from ConfigParser import *
from ConfigParser import DEFAULTSECT
class SafeConfigParser(ConfigParser):
    # Override get() method to use safe string interpolation
    def get(self, section, option, raw=0, vars=None):
        # In python2.3, the name changed from __sections to _sections
        if hasattr(self, '_sections'):
            sections = self._sections
            defaults = self._defaults
        else:
            sections = self._ConfigParser__sections
            defaults = self._ConfigParser__defaults
        
        try:
            sectdict = sections[section].copy()
        except KeyError:
            if section == DEFAULTSECT:
                sectdict = {}
            else:
                raise NoSectionError(section)
        d = defaults.copy()
        d.update(sectdict)
        # Update with the entry specific variables
        if vars:
            d.update(vars)
        option = self.optionxform(option)
        try:
            rawval = d[option]
        except KeyError:
            raise NoOptionError(option, section)

        if raw:
            return rawval

        # do the string interpolation
        value = rawval                  # Make it a pretty variable name
        for depth in range(10):
            oldvalue = value
            value = safmtb(value, kw=d, savepc=1)
            if value == oldvalue: break
        else:
            raise InterpolationDepthError(option, section, rawval)
        
        return value

The safmt functions are useful on their own, and the SafeConfigParser class shows one very useful (to me at least) application. It allows including format templates in config files.

If named formats are used in a file read by ConfigParser, it assumes all format stanzas are named and supplied, and that all percents in the file are valid format stanzas. This is not helpful when you want to do some interpolation from configuration keys, and preserve other templates for later use at runtime, and include non-formatting percents in a string. SafeConfigParser is the answer. The 'savepc' flag was added for this application, where converting "%%" to "%" is never what is wanted (AFAICT); "%%" is preserved for later runtime formatting.

Caveat: Balanced open and close parentheses are allowed in python's string formats, e.g. "%(())s", "%(()())s", "%((()))s", but Python's regex engine cannot match nested constructs.

This quirky behaviour is not preserved: "%s %(abc)s" % {"abc":10} ==> "{'abc': 10} 10".

The trailing "b" in the function name safmtb() means 'base' implementation, with the expectation that the safmt() wrapper will be preferred.

References: Python/Objects/stringobject.c

Some ideas for enhancement: 1) Optimise if args or kw empty: search only for positional or named 2) Check for huge field widths, and limit for safety 3) Return information about how many pos+named args were used, or missing 4) Optimise if only kw given, by constructing a regex to only find given names 5) Take a callback function(stanza, num_args|name), implement PEP3101 using % syntax 6) Allow parameterised named formatting e.g. safmt("%(name).f", name=(10, 5, 3.142))

Please tell me if this recipe is useful to you, or would be if it had any of these enhancements or others you may think of.

Created by Graham Horler on Mon, 29 Oct 2007 (PSF)
Python recipes (4591)
Graham Horler's recipes (1)

Required Modules

Other Information and Tasks