Welcome, guest | Sign In | My Account | Store | Cart
import re

class MultiRegex(object):
    flags = re.DOTALL
    regexes = ()

    def __init__(self):
        '''
        compile a disjunction of regexes, in order
        '''
        self._regex = re.compile("|".join(self.regexes), self.flags)

    def sub(self, s):
        return self._regex.sub(self._sub, s)

    def _sub(self, mo):
        '''
        determine which partial regex matched, and
        dispatch on self accordingly.
        '''
        for k,v in mo.groupdict().iteritems():
            if v:
                sub = getattr(self, k)
                if callable(sub):
                    return sub(mo)
                return sub
        raise AttributeError, \
             'nothing captured, matching sub-regex could not be identified'


class TrivialExample(MultiRegex):
    regexes = (
        r'(?P<lower>[a-z]{2,})',
        r'(?P<upper>[A-Z]{2,})',
        r'(?P<mixed>[A-Za-z]+)'
    )

    def lower(self, mo):
        return 'lower:' + mo.group()

    upper = lambda self, mo: 'upper:' + mo.group()
    mixed = 'stuff'


class TrivialExample2(TrivialExample):
    '''
    this illustrates that the order of regexes is important
    '''
    regexes = (
        r'(?P<mixed>[a-zA-Z]+)',
        r'(?P<lower>[a-z]{2,})',
        r'(?P<upper>[A-Z]{2,})'
    )

a = 'That cake was AWESOME, dude!'
print TrivialExample().sub(a)
print TrivialExample2().sub(a)

'''
produces:                                                                      
stuff lower:cake lower:was upper:AWESOME, lower:dude!
stuff stuff stuff stuff, stuff!
'''

History

  • revision 5 (15 years ago)
  • previous revisions are not available