A line-based diff that shows two versions side-by-side based on Google's diff_match_patch implementation.
Deletions are displayed on the left side, while additions are displayed on the right similar to the diff shown on Wikipedia.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | import itertools
import re
import diff_match_patch
def side_by_side_diff(old_text, new_text):
"""
Calculates a side-by-side line-based difference view.
Wraps insertions in <ins></ins> and deletions in <del></del>.
"""
def yield_open_entry(open_entry):
""" Yield all open changes. """
ls, rs = open_entry
# Get unchanged parts onto the right line
if ls[0] == rs[0]:
yield (False, ls[0], rs[0])
for l, r in itertools.izip_longest(ls[1:], rs[1:]):
yield (True, l, r)
elif ls[-1] == rs[-1]:
for l, r in itertools.izip_longest(ls[:-1], rs[:-1]):
yield (l != r, l, r)
yield (False, ls[-1], rs[-1])
else:
for l, r in itertools.izip_longest(ls, rs):
yield (True, l, r)
line_split = re.compile(r'(?:\r?\n)')
dmp = diff_match_patch.diff_match_patch()
diff = dmp.diff_main(old_text, new_text)
dmp.diff_cleanupSemantic(diff)
open_entry = ([None], [None])
for change_type, entry in diff:
assert change_type in [-1, 0, 1]
entry = (entry.replace('&', '&')
.replace('<', '<')
.replace('>', '>'))
lines = line_split.split(entry)
# Merge with previous entry if still open
ls, rs = open_entry
line = lines[0]
if line:
if change_type == 0:
ls[-1] = ls[-1] or ''
rs[-1] = rs[-1] or ''
ls[-1] = ls[-1] + line
rs[-1] = rs[-1] + line
elif change_type == 1:
rs[-1] = rs[-1] or ''
rs[-1] += '<ins>%s</ins>' % line if line else ''
elif change_type == -1:
ls[-1] = ls[-1] or ''
ls[-1] += '<del>%s</del>' % line if line else ''
lines = lines[1:]
if lines:
if change_type == 0:
# Push out open entry
for entry in yield_open_entry(open_entry):
yield entry
# Directly push out lines until last
for line in lines[:-1]:
yield (False, line, line)
# Keep last line open
open_entry = ([lines[-1]], [lines[-1]])
elif change_type == 1:
ls, rs = open_entry
for line in lines:
rs.append('<ins>%s</ins>' % line if line else '')
open_entry = (ls, rs)
elif change_type == -1:
ls, rs = open_entry
for line in lines:
ls.append('<del>%s</del>' % line if line else '')
open_entry = (ls, rs)
# Push out open entry
for entry in yield_open_entry(open_entry):
yield entry
|
diff_match_patch returns changes between two versions of a text as a list of tuples with -1, 1, 0 marking deletions, additions and no changes.
side_by_side_diff() will work on this change list and yield changes line by line, filtering the diff to show deletions from the old version on the left side and additions to the new version on the right side.
It is important to cope with conversions (i.e. splitting and joining) of a single line from/to multiple lines and so consumed changes are stacked internally until a set of connected lines is fully processed.
This method yields single lines giving a tuple
("change status", "left side", "right side")
to indicate if a line was changed and giving the old line and the new line. None is returned for the left side on additions, and for the right side on deletions.
I've posted a screenshot which is based on the code above to http://jsfiddle.net/hRS9N/1/
Would you mind also sharing a script you wrote to generate html from the example?