class MyZipFile(ZipFile): def __init__(self, file, mode="r", compression=ZIP_STORED): ZipFile.__init__(self, file, mode, compression) def lines(self, name, split="\n", bs=100*1024*1024): """ Generator function to allow iteration over content of a file. The content of the file is read in chunks (maximal size = <bs>), split by the character <split>, and provided for iteration. The intention is to prevent the need to store the entire amount of decompressed data in memory (which does not work for bigger zip-files). Choose <bs> as high as possible before having to fear OutOfMemory exceptions, as this will give maximum performance. The default value of 100 MB does a good job for me. """ if self.mode not in ("r", "a"): raise RuntimeError, 'read() requires mode "r" or "a"' if not self.fp: raise RuntimeError, \ "Attempt to read ZIP archive that was already closed" zinfo = self.getinfo(name) filepos = self.fp.tell() self.fp.seek(zinfo.file_offset, 0) bytes = self.fp.read(zinfo.compress_size) self.fp.seek(filepos, 0) if zinfo.compress_type == ZIP_STORED: for line in bytes.split(split): yield line elif zinfo.compress_type == ZIP_DEFLATED: if not zlib: raise RuntimeError, \ "De-compression requires the (missing) zlib module" dc = zlib.decompressobj(-15) # While most of this routine is copied from the read() method of # the original ZipFile class definition, the following code is # specific to the new functionality. We decompress chunks, # split them, and "yield" the pieces as long as there is either # one more left or no more compressed data available. Then we "yield" # the rest. # The "decompress('Z')"-stund is again taken from the original code. rest = "" while True: # += was faster than + was faster than "%s%s" % (a,b) rest += dc.decompress(bytes, bs) rs = rest.split(split) bytes = dc.unconsumed_tail rl = len(rs) if rl == 1: rest = rs[0] else: for i in xrange(rl - 1): yield rs[i] rest = rs[-1] if len(bytes) == 0: break ex = dc.decompress('Z') + dc.flush() if ex: rest = rest + ex if len(rest) > 0: for r in rest.split(split): yield r else: raise BadZipfile, \ "Unsupported compression method %d for file %s" % \ (zinfo.compress_type, name) def main(): # to test this, change the file names to something you have zfn = "results_0067.zip" fn = "properties.csv" z = MyZipFile(zfn, "r", ZIP_DEFLATED) for line in z.lines(fn): print "+", z.close() if __name__ == "__main__": main()