I needed to write a sed/awk Python equivalent for walking into a directory tree and renaming certain subdirectories, while also looking into all xml files on the way and replacing/modifying certain strings in those files.
It would be nicer if someone could suggest an enhanced re.sub(regex, replacement, subject) where I could replace all strings of a certain pattern with other strings of a certain pattern i.e. the second argument in re.sub namely 'replacement' would then be a regular expression and would be a different string for each different string in 'subject' that matches with the pattern 'regex'. For example 'arthinternational-d' would be replaced by 'arthinternational-r', 'arthfmt-d' would be replaced by 'arthfmt-r' but 'a-d' would remain unmodified.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | from __future__ import generators
import fileinput, glob, string, sys, os, re
from os.path import join
def checkdirname(name):
"check if directory name matches with the given pattern"
pattern = re.compile(r'^arth(\D*)-d$')
#print 'checking dirname:', name
m = pattern.search(name)
if m is None:
return False
else:
#print 'returning true for', name
return True
def checkfilename(name):
"check if file name matches with the given pattern"
m = re.search('(\D*).xml$', name)
#print 'checking filename', name
if m is None:
return False
else:
#print 'returning true for filename', name
return True
def renamedir(dirname, newname):
"rename a directory with a given new name"
os.rename(dirname, newname)
def replacestrs(filename):
"replace a certain type of string occurances in all files in a directory"
files = glob.glob(filename)
#print 'files in files:', files
stext = '-d0'
rtext = '-r0'
for line in fileinput.input(files,inplace=1):
lineno = 0
lineno = string.find(line, stext)
if lineno >0:
line =line.replace(stext, rtext)
sys.stdout.write(line)
def dirwalk(dir):
'''walk a directory tree, using a generator, rename certain directories
replace particular strings in xml files on the way'''
newname = 'newdir'
for f in os.listdir(dir):
fullpath = os.path.join(dir, f)
if os.path.isdir(fullpath) and not os.path.islink(fullpath):
if checkdirname(f):
newname = f[:len(f)-2]+'-r'
renamedir(fullpath, os.path.join(dir, newname))
fullpath = os.path.join(dir, newname)
for x in dirwalk(fullpath):
#print 'recursing in subdirectory: ', f , x
yield x
if os.path.isfile(fullpath):
print 'Saw file', fullpath
if checkfilename(f):
replacestrs(fullpath)
yield f, fullpath
else:
yield f, fullpath
def main():
if len(sys.argv) < 2:
print 'Usage: Python dirwalkren.py directoryname.'
sys.exit(1)
else:
for dir in dirwalk(sys.argv[1]):
pass
if __name__ == '__main__':
main()
|
Suggestions welcome!
replacement can be a function. From the library reference:
"If repl is a function, it is called for every non-overlapping occurrence of pattern. The function takes a single match object argument, and returns the replacement string."
Applied to your problem (if I understood correctly what you meant):
Thanks Chris! This is what I'd add to the code, I only realized later. Thank you!