Welcome, guest | Sign In | My Account | Store | Cart

Windows shell/explorer has a limit size of full path, but both NTFS and ReFS can support full path longer than the limit; this is making os.walk on Windows bad if files are in deeply nested folders, and therefore this recipe.

Python, 83 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import sys
import ctypes
from ctypes import Structure
from ctypes import byref
import ctypes.wintypes as wintypes
from ctypes import addressof

FILE_ATTRIBUTE_DIRECTORY = 16
OPEN_EXISTING = 3
MAX_PATH = 260

GetLastError = ctypes.windll.kernel32.GetLastError

class FILETIME(Structure):
  _fields_ = [("dwLowDateTime", wintypes.DWORD),
              ("dwHighDateTime", wintypes.DWORD)]

class WIN32_FIND_DATAW(Structure):
  _fields_ = [("dwFileAttributes", wintypes.DWORD),
              ("ftCreationTime", FILETIME),
              ("ftLastAccessTime", FILETIME),
              ("ftLastWriteTime", FILETIME),
              ("nFileSizeHigh", wintypes.DWORD),
              ("nFileSizeLow", wintypes.DWORD),
              ("dwReserved0", wintypes.DWORD),
              ("dwReserved1", wintypes.DWORD),
              ("cFileName", wintypes.WCHAR * MAX_PATH),
              ("cAlternateFileName", wintypes.WCHAR * 20)]

def windows_walk(folder):
    folder = unicode(folder)
    if not folder.startswith(u'\\\\?\\'):
        if folder.startswith(u'\\\\'):
            # network drive
            folder = u'\\\\?\\UNC' + folder[1:]
        else:
            # local drive
            folder = u'\\\\?\\' + folder

    dirs = []
    files = []
    data = WIN32_FIND_DATAW()
    gle = 0
    h = ctypes.windll.kernel32.FindFirstFileW(os.path.join(folder, u'*'), byref(data))
    gle = ctypes.windll.kernel32.GetLastError()
    if h < 0:
        ctypes.windll.kernel32.FindClose(h)
        if not sys.stderr.isatty():
            print >> sys.stderr, 'Failed to find first file %s' % (os.path.join(folder, u'*'),)
        if gle != 5: # access denied.
            raise WindowsError('FindFirstFileW %s, Error: %d' % (folder, ctypes.windll.kernel32.GetLastError()))
        return
        
    if data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY:
        if data.cFileName not in (u'.', u'..'):
            dirs.append(data.cFileName[:])
    else:
        files.append(data.cFileName[:])

    try:
        while ctypes.windll.kernel32.FindNextFileW(h, byref(data)):
            if data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY:
                if data.cFileName not in (u'.', u'..'):
                    dirs.append(data.cFileName[:])
            else:
                files.append(data.cFileName[:])
    except WindowsError as e:
        if not sys.stderr.isatty():
            print >> sys.stderr, 'Failed to find next file %s, handle %d, buff addr: 0x%x' % (os.path.join(folder, u'*'), h, addressof(data))
        
        
    ctypes.windll.kernel32.FindClose(h)
    yield folder, dirs, files
    for d in dirs:
        for base, ds, fs in windows_walk(os.path.join(folder, d)):
            yield base, ds, fs

if __name__=='__main__':
    for root, dirs, files in windows_walk(os.getcwdu()):
        for f in files:
            abspath = os.path.join(root, f)
            print abspath