This recipe implements a simple Table class which allows searching in a syntax similar to SQLs WHERE statement. So if t is Table, t.a == 3 finds all rows, where the column named "a" has the value 3. Intersection and unions of search results are possible too, for examples look at the __main__ part below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | class RowObject(dict):
""" allows access to dict via attributes as keys """
def __init__(self, *l, **kw):
dict.__init__(self, *l, **kw)
def __setattr__(self, k, v):
dict.__setitem__(self, k, v)
def __getattr__(self, k):
return self.get(k)
class Table(object):
""" represents table and set operations on tables """
def __init__(self, rows=[]):
self.rows = rows[:]
def append(self, obj):
self.rows.append(obj)
def __iter__(self):
return iter(self.rows)
def __getattr__(self, column):
""" constructs intermediate table when accessing t.column """
return ITable(self.rows, column)
def __and__(self, other):
""" computes intersection of two tables """
tmp = [ r for r in self.rows if r in other.rows ]
return Table(tmp)
def __or__(self, other):
""" computes union of two tables """
tmp = self.rows[:]
tmp.extend(other.rows[:]) # use copys of lists !
return Table(tmp)
def __str__(self):
""" quite stupid, just for demonstration purposes """
txt = "\t".join(self.rows[0].keys()).expandtabs()
txt += "\n"
txt += "-"*len(txt)
for r in self.rows:
txt += "\n"
txt += "\t".join([str(v) for v in r.values()])
return txt
class ITable(object):
""" intermediate table for storing fixed column name internally """
def __init__(self, rows, column):
self.rows=rows[:]
self.column=column
def _extract(self, fun):
return Table([ row for row in self.rows if fun(row.get(self.column)) ])
def __le__(self, limit):
return self._extract(lambda v: v<=limit)
def __ge__(self, limit):
return self._extract(lambda v: v>=limit)
def __eq__(self, other):
return self._extract(lambda v: v==other)
# store original max function for later usage
__savedMax = max
def max(*what):
""" computes max of a given column if argument is of type ITable
and regular max elsewise. """
if isinstance(what[0], ITable):
it = what[0]
return __savedMax([row.get(it.column) for row in it.rows])
return __savedMax(what)
if __name__ == "__main__":
t=Table()
t.append(RowObject(a=1, b=1))
t.append(RowObject(a=2, b=2))
t.append(RowObject(a=3, b=1))
t.append(RowObject(a=4, b=2))
print
print ">>> Table:"
print t
print
print ">>> max of column b:"
print max(t.b)
print
print ">>> rows with maximal b value:"
print t.b == max(t.b)
print
print ">>> rows with a>=2 and a<=3:"
print (t.a>=2) & (t.a <=3)
print
print ">>> rows with a==1 or b==2:"
print (t.a==1) | (t.b ==2)
|
The RowObject is a helper class and decorates dict where 'd.a' is translated to 'd["a"]'.
For a Table object t, accessing t.b constructs an intermediate table where the name "b" is stored internally: tmp = ITable(rows=t.rows, column="b"). Comparing "t.b <= 2" then calls tmp.__le__(2) which selects all rows r with r["b"] <=2.
One can easily extend these classes, eg implementing __not__ for building the complement to a given set of rows.
I use these classes with some extensions for accessing a csv file.
SQL Generation. sqlbuilder, a module in SQLObject (http://sqlobject.org) implements something like this. You can use Python expressions to create a where clause, and generate SQL or evaluate the expression with bound variables. There is a (somewhat obsolete) backend to SQLObject using dbm files, which simply iterates through a table evaluating the expression with each row, and returning those rows for which the expression is true. Anyway, similar idea.