#!/usr/bin/python # # Author : Raphael Jolivet # Release : 22-mar-2011 import sys import re from zipfile import ZipFile from fnmatch import fnmatch from StringIO import StringIO # ---------------------------------------------------------------------------- # Class IgnoreRules # ---------------------------------------------------------------------------- """ Ignore Rules class. Made of list of 'glob' filename pattern to ignore whole files, and a dictionnary of <'glob' filename patterns> => [list of regexp patterns] to ignore some lines in specific text files. """ class IgnoreRules : def __init__( self, ignoreFiles = [], # List of file patterns to ignore (ala 'glob' with ? and * wildcards) ignorePatternsPerFile = {} # Map of <file glob pattern> => [list of regexp patterns] for lines to ignore in some text files ) : self.ignoreFiles = ignoreFiles # List of regexp patterns to ignore in some files (with ? and * patterns in filenames) # Compile the patterns self.ignorePatternsPerFile = {} for key in ignorePatternsPerFile.keys() : self.ignorePatternsPerFile[key] = [] for pattern in ignorePatternsPerFile[key] : self.ignorePatternsPerFile[key].append(re.compile(pattern)) # ---------------------------------------------------------------------------- # Config # ---------------------------------------------------------------------------- RULES = IgnoreRules( # Ignored files [ "*/README.txt", "*.java" # Source files ], # Lines ignored in text files { # Version information within manifests "META-INF/MANIFEST.MF" : [ '^Implementation-Version\s*:.*$', '^Implementation-Build-Time\s*:.*$', '^Implementation-Revision\s*:.*$'], # Comments within INI files : "; Blabla" "*.ini" : [ '^\s*;.*$'] } ) # ---------------------------------------------------------------------------- # Main method # ---------------------------------------------------------------------------- """ Diff between two zipfiles Returns None if files are the same Return a string describing the first diff encountered otherwise """ def diffZips(zip1, zip2, ignoreRules) : # Build maps of entries zip1Map = {} for entry in zip1.infolist() : zip1Map[entry.filename] = entry zip2Map = {} for entry in zip2.infolist() : zip2Map[entry.filename] = entry # Check we have same list of files zip1KeySet = set(zip1Map.keys()) zip2KeySet = set(zip2Map.keys()) if zip1KeySet != zip2KeySet : return "Different list of entries" + zip1KeySet.symmetric_difference(zip1KeySet) # Loop on entries for filename in zip1KeySet : # Is it a folder => Then no diffs, its ocntents will be checked anyway if filename.endswith('/') : continue # Get each entry entry1 = zip1Map[filename] entry2 = zip2Map[filename] # Is it a bundled zip ? if fnmatch(filename, "*.zip") or fnmatch(filename, "*.war") or fnmatch(filename, "*.jar") : # Same CRC and size ? They are identic : No need to look into it if entry1.file_size == entry2.file_size and entry1.CRC == entry2.CRC : continue # Open the files as ZipFiles subZip1 = ZipFile(StringIO(zip1.read(entry1))) subZip2 = ZipFile(StringIO(zip2.read(entry2))) # Recursively diff them diff = diffZips(subZip1, subZip2, ignoreRules) # Close zip files subZip1.close() subZip2.close() # Diff found => exit if diff != None : return "In %s : %s" % (filename, diff) # No diff here : skip no next one continue # Do we ignore this file ? ignore = False for pattern in ignoreRules.ignoreFiles : if fnmatch(filename, pattern) : ignore = True break if ignore : continue # File ignored => check next entry # Is it a text file ? textFile = False for pattern in ignoreRules.ignorePatternsPerFile.keys() : if fnmatch(filename, pattern) : textFile = True # Open the files and check their lines file1 = zip1.open(entry1) file2 = zip2.open(entry2) result = diffTextFiles( file1, file2, ignoreRules.ignorePatternsPerFile[pattern]) file1.close() file2.close() if result != None : return "Text files %s are not the same : %s" % (filename, result) else : break # This was a text file ? => already checked => continue if textFile : continue # -- Binary file ? # Check size if entry1.file_size != entry2.file_size : return "Entry '%s' has different sizes : %d <> %d" % (filename, entry1.file_size, entry2.file_size) # Check CRC if entry1.CRC != entry2.CRC : return "Entry '%s' has different CRCs : %s <> %s" % (filename, entry1.CRC, entry2.CRC) #else : # print "File %s CRC1=%s, CRC2=%s" % (filename, entry1.CRC, entry2.CRC) # End of loop on entries # No diff found here return None # Diff two text files, # Ignoring some lines # return None if files are identic, a string describing the diff otherwise def diffTextFiles(file1, file2, ignorePatterns) : lineNo = 0 while True : # Get next lines line1 = file1.readline().strip() line2 = file2.readline().strip() lineNo += 1 # We reached the end if len(line1) == 0 and len(line2) == 0 : return None # Replace ignore patterns for pattern in ignorePatterns : if pattern.match(line1) != None : line1 = "#IGNORED" if pattern.match(line2) != None : line2 = "#IGNORED" if line1 != line2 : return "Line %d differ : '%s' <> '%s'" % (lineNo, line1, line2) # -------------------------------------------------------------------- # Main # -------------------------------------------------------------------- if __name__ == "__main__": # Get arguments, create zipfiles zip1 = ZipFile(sys.argv[1], 'r') zip2 = ZipFile(sys.argv[2], 'r') # Diff zipfiles result = diffZips( zip1, zip2, RULES) # If diff : print diff description, status=1 # If no diff : print nothing, status=1 if result == None : sys.exit(0) else: print result sys.exit(1)