Welcome, guest | Sign In | My Account | Store | Cart

Diff two dictionaries returning just the differences. If an item is not found, it is represented by the string "<KEYNOTFOUND>". If there is a better way, please share. :)

Python, 21 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
KEYNOTFOUND = '<KEYNOTFOUND>'       # KeyNotFound for dictDiff

def dict_diff(first, second):
    """ Return a dict of keys that differ with another config object.  If a value is
        not found in one fo the configs, it will be represented by KEYNOTFOUND.
        @param first:   Fist dictionary to diff.
        @param second:  Second dicationary to diff.
        @return diff:   Dict of Key => (first.val, second.val)
    """
    diff = {}
    # Check all keys in first dict
    for key in first.keys():
        if (not second.has_key(key)):
            diff[key] = (first[key], KEYNOTFOUND)
        elif (first[key] != second[key]):
            diff[key] = (first[key], second[key])
    # Check all keys in second dict to find missing
    for key in second.keys():
        if (not first.has_key(key)):
            diff[key] = (KEYNOTFOUND, second[key])
    return diff

11 comments

Raymond Hettinger 15 years, 2 months ago  # | flag

Try using set.symmetric_difference():

>>> first = dict(a=1, b=2)
>>> second = dict(b=2, c=3)
>>> set(first) ^ set(second)
set(['a', 'c'])
David Lambert 15 years, 2 months ago  # | flag

Since dictionaries also have values you'd also have to prove that these are equal for the intersection of the sets of dictionary keys. If the value associated with the particular key in a "config object" has a known value then sets, not dictionaries, were probably a better data structure in the first place.

Michael Shepanski (author) 15 years, 2 months ago  # | flag

Good comments, thanks guys. :)

Tijmen 15 years, 2 months ago  # | flag

I did not think of this myself but definately wanted to share:

def not_in_list(list1,list2):
        dict1 = dict(zip(list1,list1))
        returnList2 = [x for x in list2 if x not in dict1]

        return returnList2
Radu Brumariu 15 years, 1 month ago  # | flag

diff=dict()

for key in first.keys()+second.keys(): try : x = first[key] except KeyError: diff[key] = (None,second[key]) try : x = second[key] except KeyError: diff[key] = (first[key],None)

print diff

Lautaro Pecile 14 years, 9 months ago  # | flag

A little more verbose version.

KEYNOTFOUNDIN1 = '<KEYNOTFOUNDIN1>'       # KeyNotFound for dictDiff
KEYNOTFOUNDIN2 = '<KEYNOTFOUNDIN2>'       # KeyNotFound for dictDiff

def dict_diff(first, second):
    """ Return a dict of keys that differ with another config object.  If a value is
        not found in one fo the configs, it will be represented by KEYNOTFOUND.
        @param first:   Fist dictionary to diff.
        @param second:  Second dicationary to diff.
        @return diff:   Dict of Key => (first.val, second.val)
    """
    diff = {}
    sd1 = set(first)
    sd2 = set(second)
    #Keys missing in the second dict
    for key in sd1.difference(sd2):
        diff[key] = KEYNOTFOUNDIN2
    #Keys missing in the first dict
    for key in sd2.difference(sd1):
        diff[key] = KEYNOTFOUNDIN1
    #Check for differences
    for key in sd1.intersection(sd2):
        if first[key] != second[key]:
            diff[key] = (first[key], second[key])    
    return diff
Hugh Brown 14 years, 9 months ago  # | flag

Here's my code for this:

class DictDiffer(object):
  """
  Calculate the difference between two dictionaries as:
  (1) items added
  (2) items removed
  (3) keys same in both but changed values
  (4) keys same in both and unchanged values
  """
  def __init__(self, current_dict, past_dict):
    self.current_dict, self.past_dict = current_dict, past_dict
    self.set_current, self.set_past = set(current_dict.keys()), set(past_dict.keys())
    self.intersect = self.set_current.intersection(self.set_past)
  def added(self):
    return self.set_current - self.intersect
  def removed(self):
    return self.set_past - self.intersect
  def changed(self):
    return set(o for o in self.intersect if self.past_dict[o] != self.current_dict[o])
  def unchanged(self):
    return set(o for o in self.intersect if self.past_dict[o] == self.current_dict[o])

Not quite the same interface for output, but the algorithm is pretty clean.

Christian Dannie Storgaard 13 years, 1 month ago  # | flag

Here's one that returns any key that's only in one of the dicts and any value that differs in the two dicts (returns the one in the second dict - just change the order or the .get()s to change behaviour).

Short version:

>>> a = {'a': 2, 'b': 1, 'c': 0}
>>> b = {'b': 2, 'c': 0}
>>> dict([ (key, b.get(key, a.get(key))) for key in set(a.keys()+b.keys()) if (key in a and (not key in b or b[key] != a[key])) or (key in b and (not key in a or a[key] != b[key])) ])
{'a': 2, 'b': 1}

Or as a function (same code, but easier to read):

dict_diff(dict_a, dict_b):
    return dict([
        (key, dict_b.get(key, dict_a.get(key)))
        for key in set(dict_a.keys()+dict_b.keys())
        if (
            (key in dict_a and (not key in dict_b or dict_a[key] != dict_b[key])) or
            (key in dict_b and (not key in dict_a or dict_a[key] != dict_b[key]))
        )
    ])
Weng Leong Ng 12 years, 11 months ago  # | flag

This uses dictionary comprehensions, and allows you to change the KEYNOTFOUND string if desired.

def dict_diff(d1, d2, NO_KEY='<KEYNOTFOUND>'):
    both = d1.keys() & d2.keys()
    diff = {k:(d1[k], d2[k]) for k in both if d1[k] != d2[k]}
    diff.update({k:(d1[k], NO_KEY) for k in d1.keys() - both})
    diff.update({k:(NO_KEY, d2[k]) for k in d2.keys() - both})
    return diff
Alex Zeleznyak 8 years ago  # | flag

Hi, this is working for me:

def dict_diff(d1, d2, NO_KEY='<KEYNOTFOUND>'): set_d1 = set(d1.keys()) set_d2 = set(d2.keys()) both = set_d1 & set_d2 diff = {k:(d1[k], d2[k]) for k in both if d1[k] != d2[k]} diff.update({k:(d1[k], NO_KEY) for k in set_d1 - both}) diff.update({k:(NO_KEY, d2[k]) for k in set_d2 - both}) return diff

Alex Zeleznyak 8 years ago  # | flag

def dict_diff(d1, d2, NO_KEY='<KEYNOTFOUND>'): set_d1 = set(d1.keys()) set_d2 = set(d2.keys()) both = set_d1 & set_d2 diff = {k:(d1[k], d2[k]) for k in both if d1[k] != d2[k]} diff.update({k:(d1[k], NO_KEY) for k in set_d1 - both}) diff.update({k:(NO_KEY, d2[k]) for k in set_d2 - both}) return diff

Created by Michael Shepanski on Thu, 5 Feb 2009 (MIT)
Python recipes (4591)
Michael Shepanski's recipes (1)

Required Modules

  • (none specified)

Other Information and Tasks