This is an implementation of an easy to use unrestricted grammar. It could be used for quickly prototyping a DSL, testing a post-formal system, or parsing test in a way that requires context. It would however be extremely slow for simple substitutions.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | #!/usr/bin/env python
#
# unresistricted grammar with dictionary interface
# v.11
# Written by Shea Kauffman
#
# based upon the thue langauage by:
# John Colagioia
# and the interpreter by:
# Frederic van der plancke
#
#
import string, re
def find_all(s, pattern):
"""
return ordered list of indexes where [pattern] appears in [s];
"""
shift_on_match = 1
indexes = []
i = re.search(pattern, s)
if i:
i, j, k = i.start(), i.end(), i.groups()
indexes.append((i,j,k))
return indexes
class Rule:
def __init__(self, lhs, rhs, output=None):
self.lhs = lhs
self.rhs = rhs
def __str__(self):
return "%s :- %s" % (self.lhs, self.rhs)
def __repr__(self):
return "Rule(%s,%s)" % (repr(self.lhs), repr(self.rhs))
class grammar(object):
rulebase = ()
def __init__(self, rules=()):
self.rulebase = tuple(rules)
self.trace = False
def __call__(self, item):
prev = None
while prev != item:
prev = item
item = self[item]
return item
def __getitem__(self, tape):
(match, rule) = self._matches(tape)
if not match:
'''If there are no matches then we're done'''
if self.trace: print tape
return tape
startpos, endpos, matched = match
middledata = self._transform(matched, rule)
dataspace = tape[ : startpos] + middledata + tape[endpos : ]
if self.trace:
print tape, '->', dataspace, '\t\t::\t', rule
return dataspace
def __setitem__(self, lhs, rhs):
self.rulebase = self.rulebase + (Rule(lhs, rhs), )
def __repr__(self):
return '\n'.join([rule.__str__() for rule in self.rulebase])
def __add__(self, other):
newg = grammar(self.rulebase+other.rulebase)
newg.trace = self.trace and other.trace
return newg
def _matches(self, tape):
'''iterate through rules returning first match'''
for rule in self.rulebase:
for i in find_all(tape, rule.lhs):
return i, rule
return False, None
def _transform(self, matched, rule):
if not matched:
return rule.rhs
return rule.rhs(*matched) if callable(rule.rhs) else rule.rhs % matched
if __name__ == '__main__':
TRACES = True
#TESTS:
#lengthdiv3
lengthdiv3 = grammar()
lengthdiv3['ZZZ'] = '...'
lengthdiv3.trace = TRACES
assert lengthdiv3('ZZZZZZZZZZZZ') == '............'
#bitshift
bitshift = grammar()
bitshift['0_']='0--'
bitshift['1_']='0'
bitshift['10--']='01'
bitshift['00--']='0--1'
bitshift['_1--']='@'
bitshift['_0--']='1'
bitshift['_0']=''
bitshift.trace = TRACES
assert bitshift('_1000000000000_') == '111111111111'
#helloworld
helloworld = grammar()
helloworld['__']='Hello, World!'
helloworld.trace = TRACES
assert helloworld('__') == 'Hello, World!'
#binarysucc
binarysucc = grammar()
binarysucc['1_']='1++'
binarysucc['0_']='1'
binarysucc['01\+\+']='10'
binarysucc['11\+\+']='1++0'
binarysucc['_0']='_'
binarysucc['_1\+\+']='10'
binarysucc.trace = TRACES
assert binarysucc('_1111111111_') == '10000000000'
#binaryadd
binaryadd = grammar()
binaryadd[r'_\+_'] = '<+|+>'
binaryadd[r'\+\>0'] = '0?+>'
binaryadd[r'\+\>1'] = '1?+>'
binaryadd[r'\+\>_'] = '<@0C_'
binaryadd[r'0\<\+'] = '<+0?'
binaryadd[r'1\<\+'] = '<+1?'
binaryadd[r'_\<\+'] = '_'
binaryadd[r'0\@0\?'] = '0?0@'
binaryadd[r'0\@1\?'] = '1?0@'
binaryadd[r'1\@0\?'] = '0?1@'
binaryadd[r'1\@1\?'] = '1?1@'
binaryadd[r'0\@\|'] = '|0@'
binaryadd[r'1\@\|'] = '|1@'
binaryadd[r'0\@0\@0C'] = '<@0C0'
binaryadd[r'0\@0\@1C'] = '<@0C1'
binaryadd[r'0\@1\@0C'] = '<@0C1'
binaryadd[r'0\@1\@1C'] = '<@1C0'
binaryadd[r'1\@0\@0C'] = '<@0C1'
binaryadd[r'1\@0\@1C'] = '<@1C0'
binaryadd[r'1\@1\@0C'] = '<@1C0'
binaryadd[r'1\@1\@1C'] = '<@1C1'
binaryadd[r'0\?\<\@'] = '<?0@'
binaryadd[r'1\?\<\@'] = '<?1@'
binaryadd[r'0\?\<\?'] = '<?0?'
binaryadd[r'1\?\<\?'] = '<?1?'
binaryadd[r'\|\<\?'] = '<@|'
binaryadd[r'_\<\?'] = '_'
binaryadd[r'0\?\|\<\@'] = '|0@0@'
binaryadd[r'1\?\|\<\@'] = '|1@0@'
binaryadd[r'_\<\@\|'] = '_|0@'
binaryadd[r'_\|\<\@'] = '_'
binaryadd[r'_0C'] = '_'
binaryadd[r'_1C'] = '_1'
binaryadd.trace = TRACES
assert binaryadd('_111100_+_10010_') == '_1001110_'
palindrome = grammar()
palindrome[r'(?:^1)([01]*)(?:1$)'] = '%s'
palindrome[r'(?:^0)([01]*)(?:0$)'] = '%s'
palindrome[r'^1$'] = ''
palindrome[r'^0$'] = ''
palindrome.trace = True
assert palindrome('000111000') == ''
assert palindrome('000101000') == ''
assert palindrome('0001110') == '00111'
assert palindrome('101100101') == '100'
dictionary = grammar()
dictionary["database"]="master"
dictionary["server"]="mpilgrim"
dictionary.trace = TRACES
assert dictionary["server"] == 'mpilgrim'
assert dictionary["database"] == 'master'
assert dictionary["server is offline"] == 'mpilgrim is offline'
assert dictionary["database server is offline"] == 'master server is offline'
assert dictionary("database server is offline") == 'master mpilgrim is offline'
#print re.search(r"(\w+)\+(\w+)", "3+6", 1).groups()
addition = grammar()
addition.trace = TRACES
addition[r'(\w+)\+(\w+)'] = '+(%s, %s)'
addition[r'\+\((\w+), (\w+)\)'] = lambda x,y: str(int(x)+int(y))
assert addition('2+3') == '5'
assert int(addition('26435+33333')) == 26435+33333
multiply = grammar()
multiply.trace = TRACES
multiply[r'(\w+)\s*\*\s*(\w+)'] = '*(%s, %s)'
multiply[r'\*\((\w+),\s*(\w+)\)'] = lambda x,y: str(int(x)*int(y))
assert multiply('2*3') == '6'
assert int(multiply('26435*33333')) == 26435*33333
math = multiply+addition
math['\((\w+)\)'] = lambda x: math(x) #recursive step
math.trace = TRACES
assert int(math('26435*33333')) == 26435*33333
assert int(math('(26435+26435)*33333')) == (26435+26435)*33333
assert int(math('26435+(26435*33333)')) == 26435+(26435*33333)
assert math('26435+(26435*33333)') == math('26435+26435*33333')
whitespace = grammar()
whitespace.trace = TRACES
whitespace['\n(?P<ws>\s+)(.*)(?P=ws)'] = lambda x,y: ',%s' % y
whitespace['\|,(.*)'] = lambda x: 'where(%s)' % x
assert whitespace('''
z = y |
baz = bar
foo = blurk
moo = cow
''').strip() == 'z = y where(baz = bar,foo = blurk,moo = cow)'
assert whitespace('''
z = y |
baz = bar |
foo = blurk
moo = cow
''').strip() == 'z = y where(baz = bar where(foo = blurk,moo = cow))'
assert whitespace('''
z = y |
baz = bar
foo = blurk
moo = cow
z = y |
baz = bar |
foo = blurk
moo = cow
''').strip() == '''
z = y where(baz = bar,foo = blurk,moo = cow)
z = y where(baz = bar where(foo = blurk,moo = cow))'''.strip()
|