ActiveState Code

Recipe 576589: Compare MySQL DB


Small module to and check for differences between 2 DB.

It will compare the table list, the tables structure and their content.

When you run the program, you are ask if you want a detailed comparison. As global comparison will list all the different tables, detailed comparison will output every different record (and will take longer to run)

Python
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# -*- coding: iso-8859-1 -*-
""" This module aims to compare 2 DB and check for differences.

    As it uses memory to compare table content, it may not be the right tool for huge tables.
    When you run the program, you are ask if you want a detailed comparison.
    As global comparison will list all the different tables, detailed comparison will output
    every different record (and will take longer to run)
"""
import os, os.path, MySQLdb, pprint, string

class prm(object):
    """ Class used to store global parameters
    """
    # Connexion parameters to the DB we want to compare
    # DB 1
    db1prms={"host":"127.0.0.1", "user":"myuser", "passwd":"mypassword", "db":"theFirstDB"}
    db1=MySQLdb.connect(db1prms["host"], db1prms["user"], db1prms["passwd"], db1prms["db"])

    # DB 2
    db2prms={"host":"168.25.15.10", "user":"myuser", "passwd":"mypassword", "db":"theSecondDB"}
    db2=MySQLdb.connect(db2prms["host"], db2prms["user"], db2prms["passwd"], db2prms["db"])

    # Outfile to store result
    outfile="out.txt"

class dbao(object):
    """ Utilities to interact with database
    """
    def __init__(self, db, name=''):
        self.db=db
        self.name=name
        self.cur=db.cursor()

    def rowmap(self, rows):
        """ returns a dictionary with column names as keys and values
        """
        cols = [column[0] for column in self.cur.description]
        return [dict(zip(cols, row)) for row in rows]

    def getRows(self, tbl):
        """ Returns the content of the table tbl
        """
        statmt="select * from %s" % tbl
        self.cur.execute(statmt)
        rows=list(self.cur.fetchall())
        return rows

    def getTableList(self):
        """ Returns the list of the DB tables
        """
        statmt="show tables"
        self.cur.execute(statmt)
        rows=list(self.cur.fetchall())
        return rows

class dbCompare(object):
    """ Core function to compare the DBs
    """
    def __init__(self):
        self.processDetailedComparison=string.lower(raw_input("Detailed Compare (y/n) ?")[0]) in ['o', 'y']

    def compareLists(self, l1, l2):
        result={'l1notInl2':[],
                'l2notInl1':[]}
        d1=dict(zip(l1, l1))
        d2=dict(zip(l2, l2))
        for row in l1:
            if not d2.has_key(row):
                result['l1notInl2'].append(row)
        for row in l2:
            if not d1.has_key(row):
                result['l2notInl1'].append(row)
        return result

    def process(self):
        # Récupération de la liste des tables de la première base
        of=outfile()
        db1=dbao(prm.db1, prm.db1prms["host"])
        tl1=db1.getTableList()
        # Récupération de la liste des tables de la deuxième base
        db2=dbao(prm.db2, prm.db2prms["host"])
        tl2=db2.getTableList()
        if tl1==tl2:
            of.write("Identical tables")
        else:
            print "Different tables"
            of.write("Different tables")
            cp=self.compareLists(tl1, tl2)
            if cp['l1notInl2'] != []:
                of.write("   --> Tables from %s missing in %s" % (db1.name, db2.name))
                of.write(string.join([t[0] for t in cp['l1notInl2']],', '))
            if cp['l2notInl1'] != []:
                of.write("   --> Tables from %s missing in %s" % (db2.name, db1.name))
                of.write(string.join([t[0] for t in cp['l2notInl1']], ', '))

        for tbl in tl1:
            if tbl in tl2:
                print tbl[0]
                rl1=db1.getRows(tbl)
                rl2=db2.getRows(tbl)
                if rl1==rl2:
                    of.write("         %s identical" % tbl)
                else:
                    print "ERROR : %s different" % tbl
                    of.write("ERROR : %s different" % tbl)
                    cp=self.compareLists(rl1, rl2)
                    if self.processDetailedComparison:
                        if cp['l1notInl2'] != []:
                            of.write("   --> Rows from %s@%s missing in %s" % (tbl, db1.name, db2.name))
                            of.write(db1.rowmap(cp['l1notInl2']))
                        if cp['l2notInl1'] != []:
                            of.write("   --> Rows from %s@%s missing in %s" % (tbl, db2.name, db1.name))
                            of.write(db2.rowmap(cp['l2notInl1']))

class outfile(object):
    """ To write in the outfile
    """

    def __init__(self):
        self.outFile=prms.outfile
        df=open(self.outFile,'w')
        df.close()

    def write(self, *msg):
        df=open(self.outFile,'a')
        for m in msg:
            if type(m) is dict or type(m) is list:
                df.write("%s\n" % pprint.pformat(m))
            else:
                df.write("%s\n" % str(m))
        df.close()

if __name__ == "__main__":
    dc=dbCompare()
    dc.process()

Discussion

As it uses memory to compare table content, it may not be the right tool for huge tables.

I wrote this when I installed a replication solution for my mysql dbs. As the master was on windows (case insensitive for table names) and the slave on linux (case sensitive), I had to correct my scripts generator to feed my DB and, from time to time, verify the content to see if i forgot some.

Observations are welcome :)

Sign in to comment