As one accumulates multiple drives, hard and flash, containing, thousands even millions of files, it becomes useful to have a text file containing an alphabetized catalog list of all files and their locations by drive and directory.
The list can be searched by eye or by an editor to locate particular files.
The list can also be loaded into a script to be filtered programmatically as desired.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | """
Catalog multiple drives.
As one accumulates multiple drives, hard and flash, containing,
thousands even millions of files, it becomes useful to have a text file
containing an alphabetized catalog list of all files and their locations
by drive and directory.
The list can be searched by eye or by an editor to locate particular
files.
The list can also be loaded into a script to be filtered
programmatically as desired.
"""
__author__ = "Jack Trainor"
__date__ = "2016-03-10"
import sys
import os
########################################################################
class FileSpec(object):
def __init__(self, path, drive_name=""):
self.drive, self.path_minus_drive = os.path.splitdrive(path)
self.dir, self.name = os.path.split(path)
self.corename, self.ext = os.path.splitext(self.name)
self.ext = self.ext.lower()
self.dir_minus_drive = self.dir[2:]
self.drive_name = drive_name
class CatalogEntry(object):
def __init__(self, name, drive_name, dir_minus_drive):
self.name = name
self.drive_name = drive_name
self.dir_minus_drive = dir_minus_drive
########################################################################
def read_file(path):
bytes_ = ""
try:
f = open(path, 'rb')
bytes_ = f.read()
f.close()
except Exception as e:
sys.stderr.write('read_file failed: %s [%s]\n' % (path, e))
return bytes_
def write_file(path, bytes_, simulated=True):
try:
if not simulated:
f = open(path, 'wb')
f.write(bytes_)
f.close()
except Exception as e:
sys.stderr.write('write_file: %s [%s]\n' % (path, e))
def println(line):
sys.stdout.write(line + "\n")
########################################################################
def get_files_in_dir(dir_, onelevel=False):
paths = []
for root, dirs, filenames in os.walk(dir_):
for name in filenames:
path = os.path.join(root, name)
paths.append(path)
if onelevel:
break
return paths
########################################################################
def entry_to_line(entry):
line = "%s\t%s\t%s" % (entry.name, entry.drive_name, entry.dir_minus_drive)
return line
def line_to_entry(line):
items = line.split("\t")
if len(items) == 3:
return CatalogEntry(items[0], items[1], items[2])
return None
def path_to_entry(path, drive_name):
spec = FileSpec(path, drive_name)
entry = CatalogEntry(spec.name, spec.drive_name, spec.dir_minus_drive)
return entry
def paths_to_entries(paths, drive_name):
entries = []
for path in paths:
spec = FileSpec(path, drive_name)
entry = CatalogEntry(spec.name, spec.drive_name, spec.dir_minus_drive)
entries.append(entry)
return entries
########################################################################
def read_catalog_file_entries(catalog_path):
println("read_catalog_file_entries %s" % catalog_path)
entries = []
text = read_file(catalog_path)
lines = text.splitlines()
for line in lines:
entry = line_to_entry(line)
if entry:
entries.append(entry)
return entries
def write_catalog_file_entries(catalog_path, entries):
println("write_catalog_file_entries %s" % catalog_path)
lines = []
for entry in entries:
line = entry_to_line(entry)
if line:
lines.append(line)
lines = sorted(lines, key=lambda s: s.lower())
text = "\n".join(lines)
write_file(catalog_path, text, False)
########################################################################
def write_drive_catalog_file(drive_path, drive_name, catalog_path):
println("write_drive_catalog_file %s -> %s" % (drive_path, catalog_path))
file_paths = get_files_in_dir(drive_path)
entries = paths_to_entries(file_paths, drive_name)
write_catalog_file_entries(catalog_path, entries)
def write_master_catalog_file(catalog_paths, master_catalog_path):
println("write_master_catalog_file %s" % master_catalog_path)
master_entries = []
for catalog_path in catalog_paths:
entries = read_catalog_file_entries(catalog_path)
master_entries += entries
write_catalog_file_entries(master_catalog_path, master_entries)
########################################################################
def sample():
""" Sample calls for drives located on J and K drives. """
write_drive_catalog_file("j:\\", "SANSA2_1G", r"c:\SANSA2_1G.txt")
write_drive_catalog_file("k:\\", "8GB", r"c:\8GB.txt")
write_master_catalog_file([r"c:\SANSA2_1G.txt", r"c:\8GB.txt"], r"c:\Master_Catalog.txt")
entries = read_catalog_file_entries(r"c:\Master_Catalog.txt")
for entry in entries:
println(entry_to_line(entry))
if __name__ == "__main__":
print __file__
# sample()
println("Complete.")
|
A friend stored a box of drives containing terabytes of documentaries with me and encouraged me to sort through them and watch any that I wanted. This was my solution for locating items I wanted to watch and finding them later.
I used to do batch work on files by walking through a directory, checking for matches on the filenames, then working on that file in place.
Now I prefer to assemble a list of paths, then later iterate through the path list to do whatever work I want.
As it stands, the catalog code works with all file paths found on a drive. If desired one could extend the code to include standard checks for file extensions or excluded directories to shorten the overall list before later processing.