Welcome, guest | Sign In | My Account | Store | Cart

AWK is a text processing language that makes it easy to "search files for lines [...] that contain certain patterns. When a line matches one of the patterns, awk performs specified actions on that line." (GNU Awk User's Guide) This recipe provides a way to do the same thing in python.

Python, 56 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import re

class AwkUnhandledLine( RuntimeError ):
    pass

class Awk:
    """awk-like mapping from patterns to handlers."""
    def __init__(self):
        # Start without any patterns
        self.pats=[]
    def add(self,pattern,handler=None):
        # Add a pattern and its handler,
        # precompiling the pattern
        self.pats.append((re.compile(pattern),handler))
    def process(self,line):
        # Find the first pattern that matches the input,
        # and call the handler with the result of the match.
        for pat, handler in self.pats:
            m = pat.match(line)
            if m:
                if callable(handler):
                    return handler(**m.groupdict())
                else:
                    return handler
        raise AwkUnhandledLine( line )

class AwkFileInput(Awk):
    def __init__(self):
        import fileinput
        self.fileinput = fileinput
        Awk.__init__(self)
    def processinput(self):
        for line in self.fileinput.input():
            try:
                self.process(line)
            except AwkUnhandledLine, e:
                raise AwkUnhandledLine(
                    "Don't understand line %d of file %s: %s" %
                    (fileinput.filelineno(),
                     fileinput.filename(),
                     line) )

# example:

def handle_thing(name=None,num=0):
    if name:
        print name, "=", float(num)
    else:
        print float(num), "has no name"

def example():
    a = AwkFileInput()
    a.add("^#") # Ignore comments (handler==None)
    a.add("^(?P<num>\d+\.\d+)", handle_thing) # Print numbers
    a.add("^(?P<name>\w+)\s+(?P<num>\d+)", handle_thing) # Print named numbers
    a.processinput()

This is useful for processing a mini-language for data input.

1 comment

Erik Knowles 13 years, 8 months ago  # | flag

Error handling incorrect.

raise AwkUnhandledLine(
    "Don't understand line %d of file %s: %s" %
    (fileinput.filelineno(),
     fileinput.filename(),
     line) )

should be:

raise AwkUnhandledLine(
    "Don't understand line %d of file %s: %s" %
    (self.fileinput.filelineno(),
     self.fileinput.filename(),
     line) )
Created by Ian Bygrave on Wed, 22 Mar 2006 (PSF)
Python recipes (4591)
Ian Bygrave's recipes (1)

Required Modules

Other Information and Tasks