Have you ever wanted to describe a filesystem subtree with, as an extra bonus, the md5sum of each file ?
I have.
Here is a tiny Python script that mixes functions of "tree" and of "md5sum", just for your pleasure.
I wrote it with Python 2.4 under Linux.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | #!/usr/bin/python
import os
import os.path
import sys
import md5
from stat import *
from optparse import OptionParser
class Stats:
def __init__(self):
self.filenb = 0
self.dirnb = 0
self.othernb = 0
self.unstatablenb = 0
def scan_tree(lst, maxlen, dirname, dirpath, prefix, nxt_prefix, options, stats):
"""params:
lst: I/O list of (tree_ascii_art_repr_line, path_if_regular_file_else_None)
where both are strings and the second one can also be None
maxlen: integer that contains the rightmost column number of ascii repr of
the tree known by the caller
dirname: name of the directory from which a tree repr is wanted
dirpath: path to the directory from which a tree repr is wanted
prefix: string to prepend to the dirname to form the first line of the ascii
repr of the subtree
nxt_prefix: string to prepend to every lines of the repr of the subtree but
the first one (which uses prefix)
options: options as extracted by the optparse module from cmd line options
stats: Stats instance
returns a new value for maxlen
"""
try:
dir_content = os.listdir(dirpath)
dir_content.sort()
except OSError:
dir_content = None
ascii_art_tree_repr = prefix + dirname
maxlen = max(maxlen, len(ascii_art_tree_repr))
if dir_content is None:
lst.append((ascii_art_tree_repr + ' [error reading dir]', None))
return maxlen
if not options.all:
dir_content = [child for child in dir_content if child[0] != '.']
lst.append((ascii_art_tree_repr, None))
sub_prefix = nxt_prefix + '|-- '
sub_nxt_prefix = nxt_prefix + '| '
for num, child in enumerate(dir_content):
if num == len(dir_content) - 1:
sub_prefix = nxt_prefix + '`-- '
sub_nxt_prefix = nxt_prefix + ' '
joined_path = os.path.join(dirpath, child)
try:
lmode = os.lstat(joined_path)[ST_MODE]
except:
lmode = None
ascii_art_tree_repr = sub_prefix + child
maxlen = max(maxlen, len(ascii_art_tree_repr))
if lmode is None:
stats.unstatablenb += 1
lst.append((ascii_art_tree_repr + ' [error stating child]', None))
elif S_ISREG(lmode):
stats.filenb += 1
lst.append((ascii_art_tree_repr, joined_path))
elif S_ISDIR(lmode):
stats.dirnb += 1
maxlen = scan_tree(lst, maxlen, child, joined_path, sub_prefix, sub_nxt_prefix, options, stats)
elif S_ISLNK(lmode):
stats.filenb += 1
try:
lst.append((ascii_art_tree_repr + ' -> ' + os.readlink(joined_path), None))
except OSError:
lst.append((ascii_art_tree_repr + ' [cannot read symlink]', None))
elif S_ISCHR(lmode):
stats.othernb += 1
lst.append((ascii_art_tree_repr + ' [char device]', None))
elif S_ISBLK(lmode):
stats.othernb += 1
lst.append((ascii_art_tree_repr + ' [block device]', None))
elif S_ISFIFO(lmode):
stats.othernb += 1
lst.append((ascii_art_tree_repr + ' [fifo]', None))
elif S_ISSOCK(lmode):
stats.othernb += 1
lst.append((ascii_art_tree_repr + ' [socket]', None))
else:
stats.othernb += 1
lst.append((ascii_art_tree_repr + ' [unknown]', None))
return maxlen
def md5_from_path(path):
"""Returns an hex repr of the md5sum of the file content path points to.
On IOError returns '<unable to read file>'.
"""
try:
f = open(path)
m = md5.new()
while True:
b = f.read(262144)
if not b:
break
m.update(b)
f.close()
return m.hexdigest()
except IOError:
return '<unable to read file>'
def main():
parser = OptionParser(usage="usage: %prog [options] [dir1 [dir2 [...]]]")
parser.add_option("-a", "--all", action='store_true', dest='all', default=False, help="All files are listed.")
options, roots = parser.parse_args()
stats = Stats()
if not roots:
roots = ['.']
for root in roots:
lst = []
maxlen = scan_tree(lst, 0, root, root, "", "", options, stats)
for line, path in lst:
if path is not None:
m = md5_from_path(path)
print line + ' ' * (maxlen+1-len(line)) + m
else:
print line
print
print ', '.join((
('%d directory', '%d directories')[stats.dirnb > 1] % stats.dirnb,
('%d file', '%d files')[stats.filenb > 1] % stats.filenb,
('%d other', '%d others')[stats.othernb > 1] % stats.othernb,
('%d unstatable', '%d unstatables')[stats.unstatablenb > 1] % stats.unstatablenb))
if __name__ == "__main__":
main()
|
A main loop iterates over each root passed as a command line parameter. In this main loop body, there are two passes. First the recursive function scan_tree() is called on the current root and populates its first argument lst + updates the stats (last arg) + returns the rightmost character position of regular file names in the ascii art tree representation, while taking options into account, then the tree is displayed line by line, and for each line corresponding to a regular file the md5sum of its content is calculated with the md5 module and appended (in hexa) to the printed line after some spaces used to align the sums.
The only option is -a and it works the same way as the -a option of tree(1) does (with -a even hidden files beginning with a dot but "." and ".." are displayed). It would be interesting to extend the script so that more options of tree(1) are supported, and to colorify the output the same way as it does.
Great Utility. This is a great utility. Thank you for making it public. I'm wondering if there is a better place to host this than ASPN Cookbook? Maybe cheeseshop? Also, some example output might be nice. I started to cannibalize this to auther the exact same utility until I realized what it did.
On second thought, here is an example from my own stuff (ASPN messed up the formatting, all of the md5 sums should be aligned):