""" matcher() makes a string matcher function from any of: "RE pattern string" re.compile() a function, i.e. callable a dict / list / tuple / set / container This uniformity is simple, useful, a Good Thing. Usage: matchf = matcher( "re pattern" / re / func / dict / list / tuple / set ) ... if matchf( str ): -- re.search( str ) / func( str ) / str in the container A few example functions using matchers are here too: grep( matcher(), afile ) -- print matching lines + header and trailer, in a file or iterable getfields( "kw kw2 ...".split(), afile ) -- lines starting "kw:" -> [ ("kw", "kw: line") ... ] kwgrep, combined grep and getfields: kwgrep( "name: ^mp3 version: ^1.2.3" -- match these "home-page: summary:" -- and get these too ) -> lines, nhit, nre e.g. [( "name", "name: mp3xx" )], 1, 2 -- 1 kw match only, not both """ # care: re.match( "end$" ) doesn't match "end " "end\n" "end\r" # so rstrip lines early # see also: pls egg-info list + search, gmatch goopat # 5may: matcher( ">= NJ" ) -> lambda x: x >= "NJ" -- str not num import re, sys __version__ = "2009-05-06-May" __author_email__ = "denis-bz-py@t-online.de" __credits__ = "BeautifulSoup" Test = 0 _re_type = type( re.compile( "" )) _relop_re = re.compile( r" ([<=>] =?) \s* (.*) ", re.X ) #------------------------------------------------------------------------------- def matcher( x, negate=False ): """ matcher( "string" / compiled re / func / dict / list / tuple / set ) -> a uniform match func, see above """ if x in ( "", ".*", "*" ): # always match f = lambda _: True elif isinstance( x, basestring ): if x[0] in "<=>": f = compare_func( x ) # ">= NJ" else: f = re.compile( x ) .search # not BSoup lambda s: s == x elif isinstance( x, _re_type ): f = x.search elif callable( x ): # e.g. re.compile().match f = x elif isinstance( x, (list, tuple) ): f = set( x ).__contains__ elif hasattr( x, "__contains__" ): # dict, set -- care if they change later ?! f = x.__contains__ elif x in ( 0, 1, True, False, None ): f = lambda _: x # matcher( matcher) == matcher else: assert 0, "matcher: %s must be one of: str re callable dict list tuple" % x if negate: return lambda x: not f( x ) # match this - that, cf goopat return f #............................................................................... def grep( matchf, afile, header="", indent="", trailer="", out=sys.stdout ): """ print lines matching matchf ("re" / re / func / dict) + header and trailer -> nmatch out None: just return 1 on first match / 0 """ matchf = matcher( matchf ) if isinstance( afile, basestring ): afile = open( afile ) # IOError: [Errno 2] No such file or directory nmatch = 0 for line in afile: if matchf( line ): if out is None: return 1 if header: print >>out, header header = None print "%s%s" % (indent, line) nmatch += 1 if nmatch and out and trailer: print >>out, trailer.rstrip( " " ) return nmatch _kw_re = re.compile( r" \s* ([\w.-]+) \s* : \s* ", re.X ) _kw_rest_re = re.compile( r" \s* ([\w.-]+) \s* : \s* (.*) ", re.X ) # kw: rest of line #............................................................................... def getfields( fields, afile, lower=True ): """ grep lines starting with given keywords / field names ":" e.g. "name version".split() -> lines [ ("name", "name: ...") ... ] """ if isinstance( fields, basestring ): fields = filter( None, re.split( r"[\s:,]+", fields )) matchf = matcher( fields ) if isinstance( afile, basestring ): afile = open( afile ) lines = [] for line in afile: m = _kw_re.match( line ) # kw: ... if not m: continue kw = m.group( 1 ) if lower: kw = kw.lower() if matchf( kw ): # kw in dict list tuple or set or func( kw ) lines.append( (kw, line.rstrip()) ) return lines #............................................................................... def kwgrep( grepdict, afile, lower=True ): """ combined grep + getfields: "name: ^mp3 version: ^1.2.3" -- match these "home-page: summary:" -- and get these too or a dict, kw -> matcher() / "" for also-gets usage: lines, nmatch, nre = kwgrep() if nmatch == nre: ... all REs matched, here both name: and version: ... or 0 == 0, just getfields """ if isinstance( grepdict, basestring ): grepdict = str_grepdict( grepdict ) if isinstance( afile, basestring ): afile = open( afile ) lines = [] hits = {} # nr diff keywords w RE matches for line in afile: m = _kw_rest_re.match( line ) # kw: ... if not m: continue kw = m.group( 1 ) if lower: kw = kw.lower() if kw not in grepdict: continue matchf = grepdict[kw] if not matchf: lines.append( (kw, line.rstrip()) ) # also-gets continue restofline = m.group( 2 ) .rstrip() if matchf( restofline ): lines.append( (kw, line.rstrip()) ) hits[kw] = 1 nre = sum( [bool(re) for re in grepdict.values()] ) if Test: print >>sys.stderr, "test kwgrep:", lines, len(hits), nre return (lines, len(hits), nre) def str_grepdict( s ): # for kwget if ":" not in s: # "a b c", "a: b: c:" -> get fields a b c, matchf True return dict.fromkeys( s.split(), "" ) pairs = _kw_re.split( s.rstrip() ) # "a: 1 b: c: d: 2" -> ['', 'a', ' 1', 'b', '', 'c', '', 'd', ' 2'] grepdict = {} for kw, val in [pairs[j:j+2] for j in range( 1, len(pairs), 2 )]: grepdict[kw] = matcher( val ) if val else "" if Test: print >>sys.stderr, "test str_grepdict:", grepdict return grepdict #............................................................................... def putlines( lines, header="", indent="", trailer="", out=sys.stdout ): """ kwlines = getfields( "name version".split(), afile ) putlines( map( itemgetter(1), kwlines ), header=afile ) """ if not lines: return if header: print >>out, header for line in lines: print "%s%s" % (indent, line) if trailer: print >>out, trailer.rstrip( " " ) #............................................................................... def compare_func( relopstr ): """ "< 3" -> the function x -> (x < "3") NB str "3" not num 3 """ # (could lambda x: (x < 3) if isnum(x) else (x < "3") # but then version: > 0.1 is num compare, # version: > 0.1.0 str relop, s = _relop_re.match( relopstr ) .groups() if relop == "=": relop = "==" s = s.rstrip() .strip( "\"" ) cmptext = "lambda x: x %s \"%s\"" % (relop, s) # e.g. x < "3" if Test: print >>sys.stderr, "compare_func: %s" % cmptext return eval( cmptext ) # try ? #............................................................................... if __name__ == "__main__": # for pat in ( "a", re.compile( "^a" ), matcher( "b" ), dict( a=1 ) ): # matchf = matcher( pat ) # for s in "ab": # print "matcher( %s )( %r ) = %s" % ( pat, s, matchf( s )) # print "" # grep( "^def", __file__ ) import readline # for afile in sys.argv[1:]: while 1: try: line = raw_input( "matcher: " ) except EOFError: break if line[0] == "!": exec( line[1:] .strip() ) else: print kwgrep( "name: ^mp3 version: ", line.split( ";" )) # end matcher.py