Welcome, guest | Sign In | My Account | Store | Cart

I needed to write a sed/awk Python equivalent for walking into a directory tree and renaming certain subdirectories, while also looking into all xml files on the way and replacing/modifying certain strings in those files.

It would be nicer if someone could suggest an enhanced re.sub(regex, replacement, subject) where I could replace all strings of a certain pattern with other strings of a certain pattern i.e. the second argument in re.sub namely 'replacement' would then be a regular expression and would be a different string for each different string in 'subject' that matches with the pattern 'regex'. For example 'arthinternational-d' would be replaced by 'arthinternational-r', 'arthfmt-d' would be replaced by 'arthfmt-r' but 'a-d' would remain unmodified.

Python, 96 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
from __future__ import generators
import fileinput, glob, string, sys, os, re
from os.path import join


def checkdirname(name):
	"check if directory name matches with the given pattern"
	
	pattern = re.compile(r'^arth(\D*)-d$')
	#print 'checking dirname:', name
	m = pattern.search(name)
	if m is None:
		return False
	else:
		#print 'returning true for', name
		return True

def checkfilename(name):
	"check if file name matches with the given pattern"

	m = re.search('(\D*).xml$', name)
	#print 'checking filename', name
	if m is None:
		return False
	else:
		#print 'returning true for filename', name
		return True
			
def renamedir(dirname, newname):
 	"rename a directory with a given new name"
	os.rename(dirname, newname)
	

def replacestrs(filename):
	"replace a certain type of string occurances in all files in a directory" 
	
	files = glob.glob(filename)
	#print 'files in files:', files
	stext = '-d0'
	rtext = '-r0'
	
	for line in fileinput.input(files,inplace=1):
		
		lineno = 0
  		lineno = string.find(line, stext)
  		if lineno >0:
  			line =line.replace(stext, rtext)
			
  		sys.stdout.write(line)		
	
	

def dirwalk(dir):
    '''walk a directory tree, using a generator, rename certain directories
    replace particular strings in xml files on the way'''
    newname = 'newdir'
    for f in os.listdir(dir):
    	
        fullpath = os.path.join(dir, f)
        
        if os.path.isdir(fullpath) and not os.path.islink(fullpath):
        	
        	if checkdirname(f):
        		newname = f[:len(f)-2]+'-r'
        		renamedir(fullpath, os.path.join(dir, newname))
        		fullpath = os.path.join(dir, newname)
        	
        	for x in dirwalk(fullpath):
        			#print 'recursing in subdirectory: ', f , x
        			yield x
        if os.path.isfile(fullpath):
			print 'Saw file', fullpath
			
			if checkfilename(f):
				replacestrs(fullpath)
				
			yield f, fullpath	
        else:
        	
        	yield f, fullpath
      
        
        	
def main():

		
		if len(sys.argv) < 2:
			print 'Usage: Python dirwalkren.py directoryname.'
			sys.exit(1)
		else:
			for dir in dirwalk(sys.argv[1]):
				pass
				
			
if __name__ == '__main__':
	main()        	
        	
        	
        	
        	
        	
        

Suggestions welcome!

2 comments

Chris Arndt 18 years, 9 months ago  # | flag

replacement can be a function. From the library reference:

"If repl is a function, it is called for every non-overlapping occurrence of pattern. The function takes a single match object argument, and returns the replacement string."

Applied to your problem (if I understood correctly what you meant):

def repl_func(match):
    return "a%s-r" % match.group(1)

test = ['asomething-d', 'aother-d', 'athing-d', 'a-d']

for item in test:
    print re.sub(r'a(.+)-d$', repl_func, item)
Bibha Tripathi (author) 18 years, 9 months ago  # | flag

Thanks Chris! This is what I'd add to the code, I only realized later. Thank you!