Welcome, guest | Sign In | My Account | Store | Cart

As one accumulates multiple drives, hard and flash, containing, thousands even millions of files, it becomes useful to have a text file containing an alphabetized catalog list of all files and their locations by drive and directory.

The list can be searched by eye or by an editor to locate particular files.

The list can also be loaded into a script to be filtered programmatically as desired.

Python, 146 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Catalog multiple drives.

As one accumulates multiple drives, hard and flash, containing,
thousands even millions of files, it becomes useful to have a text file
containing an alphabetized catalog list of all files and their locations 
by drive and directory.

The list can be searched by eye or by an editor to locate particular
files.

The list can also be loaded into a script to be filtered 
programmatically as desired.
"""
__author__ = "Jack Trainor"
__date__ = "2016-03-10"

import sys
import os

########################################################################
class FileSpec(object):
    def __init__(self, path, drive_name=""):
        self.drive, self.path_minus_drive = os.path.splitdrive(path)
        self.dir, self.name = os.path.split(path)
        self.corename, self.ext = os.path.splitext(self.name)
        self.ext = self.ext.lower()
        self.dir_minus_drive = self.dir[2:]       
        self.drive_name = drive_name    
        
class CatalogEntry(object):
    def __init__(self, name, drive_name, dir_minus_drive):
        self.name = name
        self.drive_name = drive_name
        self.dir_minus_drive = dir_minus_drive
            
########################################################################
def read_file(path):
    bytes_ = ""
    try:
        f = open(path, 'rb')
        bytes_ = f.read()
        f.close()
    except Exception as e:
        sys.stderr.write('read_file failed: %s [%s]\n' % (path, e))  
    return bytes_     

def write_file(path, bytes_, simulated=True):
    try:
        if not simulated:
            f = open(path, 'wb')
            f.write(bytes_)
            f.close()
    except Exception as e:  
        sys.stderr.write('write_file: %s [%s]\n' % (path, e))  

def println(line):
    sys.stdout.write(line + "\n")

########################################################################
def get_files_in_dir(dir_, onelevel=False):
    paths = []
    for root, dirs, filenames in os.walk(dir_):
        for name in filenames:
            path = os.path.join(root, name)
            paths.append(path)
        if onelevel:
            break
    return paths

########################################################################
def entry_to_line(entry):
    line = "%s\t%s\t%s" % (entry.name, entry.drive_name, entry.dir_minus_drive)
    return line

def line_to_entry(line):
    items = line.split("\t")
    if len(items) == 3:
        return CatalogEntry(items[0], items[1], items[2])
    return None
    
def path_to_entry(path, drive_name):
    spec = FileSpec(path, drive_name)
    entry = CatalogEntry(spec.name, spec.drive_name, spec.dir_minus_drive)
    return entry
    
def paths_to_entries(paths, drive_name):
    entries = []
    for path in paths:
        spec = FileSpec(path, drive_name)
        entry = CatalogEntry(spec.name, spec.drive_name, spec.dir_minus_drive)
        entries.append(entry)    
    return entries

########################################################################  
def read_catalog_file_entries(catalog_path):
    println("read_catalog_file_entries %s" % catalog_path)
    entries = []
    text = read_file(catalog_path)
    lines = text.splitlines()
    for line in lines:
        entry = line_to_entry(line)
        if entry:
            entries.append(entry)
    return entries
        
def write_catalog_file_entries(catalog_path, entries):
    println("write_catalog_file_entries %s" % catalog_path)
    lines = []
    for entry in entries:
        line = entry_to_line(entry)
        if line:
            lines.append(line)
    lines = sorted(lines, key=lambda s: s.lower())
    text = "\n".join(lines)
    write_file(catalog_path, text, False)
        
########################################################################  
def write_drive_catalog_file(drive_path, drive_name, catalog_path):
    println("write_drive_catalog_file %s -> %s" % (drive_path, catalog_path))
    file_paths = get_files_in_dir(drive_path)   
    entries = paths_to_entries(file_paths, drive_name)   
    write_catalog_file_entries(catalog_path, entries)

def write_master_catalog_file(catalog_paths, master_catalog_path):
    println("write_master_catalog_file %s" % master_catalog_path)
    master_entries = []
    for catalog_path in catalog_paths:
        entries = read_catalog_file_entries(catalog_path)
        master_entries += entries
    write_catalog_file_entries(master_catalog_path, master_entries)
    
########################################################################
def sample():
    """ Sample calls for drives located on J and K drives. """
    write_drive_catalog_file("j:\\", "SANSA2_1G", r"c:\SANSA2_1G.txt")
    write_drive_catalog_file("k:\\", "8GB", r"c:\8GB.txt")
    write_master_catalog_file([r"c:\SANSA2_1G.txt", r"c:\8GB.txt"], r"c:\Master_Catalog.txt")
    entries = read_catalog_file_entries(r"c:\Master_Catalog.txt")
    for entry in entries:
        println(entry_to_line(entry))

if __name__ == "__main__":
    print __file__
#    sample()
    println("Complete.")

A friend stored a box of drives containing terabytes of documentaries with me and encouraged me to sort through them and watch any that I wanted. This was my solution for locating items I wanted to watch and finding them later.

I used to do batch work on files by walking through a directory, checking for matches on the filenames, then working on that file in place.

Now I prefer to assemble a list of paths, then later iterate through the path list to do whatever work I want.

As it stands, the catalog code works with all file paths found on a drive. If desired one could extend the code to include standard checks for file extensions or excluded directories to shorten the overall list before later processing.