Welcome, guest | Sign In | My Account | Store | Cart

A Python class to extract zip files. It's also written for easy use as a standalone script from the commandline.

Python, 140 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
""" unzip.py
    Version: 1.1

    Extract a zipfile to the directory provided
    It first creates the directory structure to house the files
    then it extracts the files to it.

    Sample usage:
    command line
    unzip.py -p 10 -z c:\testfile.zip -o c:\testoutput

    python class
    import unzip
    un = unzip.unzip()
    un.extract(r'c:\testfile.zip', 'c:\testoutput')
    

    By Doug Tolton
"""

import sys
import zipfile
import os
import os.path
import getopt

class unzip:
    def __init__(self, verbose = False, percent = 10):
        self.verbose = verbose
        self.percent = percent
        
    def extract(self, file, dir):
        if not dir.endswith(':') and not os.path.exists(dir):
            os.mkdir(dir)

        zf = zipfile.ZipFile(file)

        # create directory structure to house files
        self._createstructure(file, dir)

        num_files = len(zf.namelist())
        percent = self.percent
        divisions = 100 / percent
        perc = int(num_files / divisions)

        # extract files to directory structure
        for i, name in enumerate(zf.namelist()):

            if self.verbose == True:
                print "Extracting %s" % name
            elif perc > 0 and (i % perc) == 0 and i > 0:
                complete = int (i / perc) * percent
                print "%s%% complete" % complete

            if not name.endswith('/'):
                outfile = open(os.path.join(dir, name), 'wb')
                outfile.write(zf.read(name))
                outfile.flush()
                outfile.close()


    def _createstructure(self, file, dir):
        self._makedirs(self._listdirs(file), dir)


    def _makedirs(self, directories, basedir):
        """ Create any directories that don't currently exist """
        for dir in directories:
            curdir = os.path.join(basedir, dir)
            if not os.path.exists(curdir):
                os.mkdir(curdir)

    def _listdirs(self, file):
        """ Grabs all the directories in the zip structure
        This is necessary to create the structure before trying
        to extract the file to it. """
        zf = zipfile.ZipFile(file)

        dirs = []

        for name in zf.namelist():
            if name.endswith('/'):
                dirs.append(name)

        dirs.sort()
        return dirs

def usage():
    print """usage: unzip.py -z <zipfile> -o <targetdir>
    <zipfile> is the source zipfile to extract
    <targetdir> is the target destination

    -z zipfile to extract
    -o target location
    -p sets the percentage notification
    -v sets the extraction to verbose (overrides -p)

    long options also work:
    --verbose
    --percent=10
    --zipfile=<zipfile>
    --outdir=<targetdir>"""
    

def main():
    shortargs = 'vhp:z:o:'
    longargs = ['verbose', 'help', 'percent=', 'zipfile=', 'outdir=']

    unzipper = unzip()

    try:
        opts, args = getopt.getopt(sys.argv[1:], shortargs, longargs)
    except getopt.GetoptError:
        usage()
        sys.exit(2)

    zipsource = ""
    zipdest = ""

    for o, a in opts:
        if o in ("-v", "--verbose"):
            unzipper.verbose = True
        if o in ("-p", "--percent"):
            if not unzipper.verbose == True:
                unzipper.percent = int(a)
        if o in ("-z", "--zipfile"):
            zipsource = a
        if o in ("-o", "--outdir"):
            zipdest = a
        if o in ("-h", "--help"):
            usage()
            sys.exit()

    if zipsource == "" or zipdest == "":
        usage()
        sys.exit()
            
    unzipper.extract(zipsource, zipdest)

if __name__ == '__main__': main()

While there are many ways to add files to a zip archive via python, I have been unable to locate a good solution of extracting those same files from a zip archive.

I have written this class to make it easy to extract a zip file to a given location.

I have not yet tested this recipe on a Linux / Unix box, but in principle it should work.

Sample usage: unzip.py -p 10 -z c:\testfile.zip -o c:\testoutput

5 comments

Christopher Dunn 16 years, 7 months ago  # | flag

Yes, but... Yes, this works on Linux. I just verified it, out of curiosity. But we already have standard command-line tools called zip and unzip, which work just fine.

The real value of the zipfile module, as I understand it, is that you can go between memory and zip files easily. Extracting between files doesn't seem so useful.

Anyway, you failed to handle exceptions. This happens when I attempt to extract an encrypted file:

Traceback (most recent call last):
  File "unzip.py", line 140, in ?
    if __name__ == '__main__': main()
  File "unzip.py", line 138, in main
    unzipper.extract(zipsource, zipdest)
  File "unzip.py", line 57, in extract
    outfile.write(zf.read(name))
  File "/_TOOLS_/plat/python-/2.3.3/lib/python2.3/zipfile.py", line 368, in read
    raise BadZipfile, "Bad CRC-32 for file %s" % name
zipfile.BadZipfile: Bad CRC-32 for file log.tcl

So even as a simple utility, it needs work. I tested it only b/c I wanted to be sure that zipfile works on my system.

Scott Stafford 16 years, 7 months ago  # | flag

Paths with \ don't exist... I had a problem with this code because my zipfile didn't have new paths defined as line-items. I changed it to just create the directory structure on the fly:

def create_necessary_paths(filename):
    try:
        (path,name) = os.path.split(filename)
        os.makedirs( path)
    except:
        pass

And within the extract function:

...
if not name.endswith('/'):
    try:
        (path,name) = os.path.split(os.path.join(dir, name))
        os.makedirs( path)
    except:
        pass
    outfile = open(os.path.join(dir, name), 'wb')
    outfile.write(zf.read(name))
...
nicolas pioli 16 years, 5 months ago  # | flag

le script marche bien, une fois pris en compte les commentaires! merci Doug pour ton script, il marche bien, une fois pris en compte les commentaires! je l'ai testé avec succès sous MacOS 9.2.2 avec Python 2.2.3 dans le script, suivant:

! /usr/bin/env python

import base64, re, urllib, string, sys, zipfile, os, os.path

pattern_in_base64 = r""" merci Doug pour ton script, il marche bien, une fois pris en compte les commentaires! je l'ai testé avec succès sous MacOS 9.2.2 avec Python 2.2.3 dans le script, suivant:

! /usr/bin/env python

import base64, re, urllib, string, sys, zipfile, os, os.path

pattern_in_base64 = r"""

nicolas pioli 16 years, 5 months ago  # | flag

suite du précédent.

import base64, re, urllib, string, sys, zipfile, os, os.path

pattern_in_base64 = r"""
cf. rx cookbook, recipe 59864 de ken simpson et commentaires
"""

class unzip:

    def extract(self, file, dir):

        if not dir.endswith(':') and not os.path.exists(dir):
            os.mkdir(dir)

        zf = zipfile.ZipFile(file)
        self._createstructure(file, dir)
        num_files = len(zf.namelist())

        for name in zf.namelist():
            if not name.endswith('/'):
                try:
                    (path,name) = os.path.split(os.path.join(dir, name))
                    os.makedirs( path)
                except:
                    pass
                outfile = open(os.path.join(dir, name), 'wb')
                outfile.write(zf.read(name))

    def _createstructure(self, file, dir):
        self._makedirs(self._listdirs(file), dir)

    def create_necessary_paths(filename):
        try:
            (path,name) = os.path.split(filename)
            os.makedirs( path)
        except:
            pass

    def _makedirs(self, directories, basedir):
        for dir in directories:
            curdir = os.path.join(basedir, dir)
            if not os.path.exists(curdir):
                os.mkdir(curdir)

    def _listdirs(self, file):
        zf = zipfile.ZipFile(file)
        dirs = []
        for name in zf.namelist():
            if name.endswith('/'):
                dirs.append(name)

        dirs.sort()
        return dirs

def main():

    pattern = base64.decodestring(pattern_in_base64)
    matcher = re.compile(pattern)
    print pattern
    the_string = urllib.urlopen('http://www.sophos.fr/downloads/ide').read()
    matches = matcher.findall(the_string)
    matches.sort()
    matches.reverse()

    for match in matches:
       pos = string.find(str(match), '_ides.zip', 22)
        if pos >=0: break

    print pos
    print match[0]

    os.chdir('Classic:Desktop Folder:')
    urllib.urlretrieve(match[0], 'ide.zip')

    pwd = os.getcwd()
    print pwd
    print os.listdir(pwd)

    unzipper = unzip()
    zipsource = "Classic:Desktop Folder:ide.zip"
    zipdest = "Classic:Desktop Folder:ide"
    unzipper.extract(zipsource, zipdest)

if __name__ == '__main__': main()
david SHI 12 years, 11 months ago  # | flag

Can I use it to download a .zip file and unzip it into a local folder?

Regards.

David