Welcome, guest | Sign In | My Account | Store | Cart

Give a string-oriented API to the generic "diff" module.

The "diff" module is very powerful but practically useless on its own. The "search" and "empty_master" functions below resolve this problem.

Python, 164 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#! /usr/bin/env python
"""Give a string-oriented API to the generic "diff" module.

The "diff" module is very powerful but practically useless on its own.
The "search" and "empty_master" functions below resolve this problem."""

################################################################################

__author__ = 'Stephen "Zero" Chappell <Noctis.Skytower@gmail.com>'
__date__ = '11 February 2010'
__version__ = '$Revision: 3 $'

################################################################################

import diff

################################################################################

# Matching Sensitivity - OFF
CASE_AND_PUNCTUATION = False

################################################################################

def connect_tree(tree):
    """Takes the master and finds out what part of the slave matches it.

    The tree from "diff.search" may contain several different routes for
    finding matches. This function takes the best one, gets the master
    match, and fills in the prefix and suffix with the best choices."""
    match = tree.nodes[tree.index.index(tree.value)]
    node = match.a
    if match.prefix.value:
        node.prefix = connect_tree(match.prefix)
    if match.suffix.value:
        node.suffix = connect_tree(match.suffix)
    return node

def flatten_tree(node):
    """Flattens a tree from "connect_tree" for linear iteration.

    The root node created after connecting a tree must be traversed from
    beginning to end in a linear fashion. This function flattens the tree
    to make that possible. Further processing is done by other functions."""
    array = [0]
    _flatten(node, array)
    return array

def _flatten(node, array):
    """Recursively traverse and flatten the given tree.

    This is a helper function that takes "node" and sequentially processes
    its prefix, root, and suffix. The results are appended to the array."""
    if isinstance(node.prefix, diff.Slice):
        _flatten(node.prefix, array)
    else:
        array.append(node.prefix)
    array[0] += 1
    array.append((array[0], node.root))
    if isinstance(node.suffix, diff.Slice):
        _flatten(node.suffix, array)
    else:
        array.append(node.suffix)

default = lambda words: ' '.join('_' * len(word) for word in words)

################################################################################

# Note: search, build_answer, & empty_master documentation is copied!
# ------^^^^^^--^^^^^^^^^^^^----^^^^^^^^^^^^-------------------------

if CASE_AND_PUNCTUATION:

    def search(master, slave):
        """Search for differences in the master and slave strings.

        The strings are translated into key and data, and their difference
        is calculated. An answer is composed after further processing and
        returned with the number of right words and total number of words."""
        key = tuple(master.split())
        data = tuple(slave.split())
        tree = diff.search(key, data)
        if tree.value:
            node = connect_tree(tree)
            array = flatten_tree(node)
            answer = build_answer(array)
        else:
            answer = default(key)
        return tree.value, len(key), answer

    def build_answer(array):
        """Take in flattened / serialized data and generate a hint.

        This implementation returns a string useful for Verse objects.
        Incorrect or missed words get printed up as empty blank lines."""
        cache = []
        for chunk in array:
            if chunk and isinstance(chunk, tuple):
                if isinstance(chunk[0], int):
                    for word in chunk[1]:
                        cache.append(word)
                else:
                    for word in chunk:
                        cache.append('_' * len(word))
        return ' '.join(cache)

    def empty_master(master):
        """Compute the represenation of a master without a slave."""
        return default(master.split())

################################################################################

else:

    def search(master, slave):
        """Search for differences in the master and slave strings.

        The strings are translated into key and data, and their difference
        is calculated. An answer is composed after further processing and
        returned with the number of right words and total number of words."""
        words = master.split()
        key = simplify(words)
        assert len(words) == len(key), 'Cannot Simplify Words'
        data = simplify(slave.split())
        tree = diff.search(key, data)
        if tree.value:
            node = connect_tree(tree)
            array = flatten_tree(node)
            pairs = flatten_list(array)
            answer = build_answer(words, pairs)
        else:
            answer = default(key)
        return tree.value, len(key), answer

    def simplify(words):
        """Remove non-alphabetic characters from an array of words."""
        letter = lambda s: ''.join(filter(lambda s: 'a' <= s <= 'z', s))
        return tuple(filter(bool, map(letter, map(str.lower, words))))

    def flatten_list(array):
        """Build (flag, load) pairs for the "build_answer" function."""
        pairs = []
        for chunk in array:
            if chunk and isinstance(chunk, tuple):
                if isinstance(chunk[0], int):
                    for word in chunk[1]:
                        pairs.append((True, word))
                else:
                    for word in chunk:
                        pairs.append((False, word))
        return pairs

    def build_answer(words, pairs):
        """Take in flattened / serialized data and generate a hint.

        This implementation returns a string useful for Verse objects.
        Incorrect or missed words get tranformed into underscore lines."""
        cache = []
        for word, (flag, load) in zip(words, pairs):
            cache.append(word if flag else '_' * len(load))
        return ' '.join(cache)

    def empty_master(master):
        """Compute the represenation of a master without a slave."""
        return default(simplify(master.split()))

1 comment

Trent Mick 14 years, 1 month ago  # | flag

Stephen, Sounds like these might better fit as a grouped package uploaded to PyPI?