Welcome, guest | Sign In | My Account | Store | Cart

This program is a Sudoku solver that uses 3 simple algorithms.

Python, 295 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/bin/env python

"""Sudoku solver

usage: python sudoku.py <file> [<depth>]

The input file should contain exactly 81 digits, with emtpy fields marked
with 0. Optionally, dots (.) may be used for empty fields if better
readability is required. All other characters are discarded.

The default guessing depth is 2.

"""


import pprint
from itertools import chain


def parseString(text):
    s = text.replace('.', '0')
    s = [int(x) for x in s if x.isdigit()]
    s = [s[i:i+9] for i in range(0, 81, 9)]
    return s


def parse(name):
    f = open(name)
    s = f.read()
    f.close()
    return parseString(s)


class NotSolvable(Exception):

    """Not solvable"""

    pass


class IncorrectSolution(Exception):
    
    """Incorrect solution"""

    pass


class Done(Exception):

    """Done!"""

    pass


# These constants are speed optimizations for use in loops
MASK9 = set(range(1, 10))
NINE = tuple(range(9))
THREE = tuple(range(3))


def try_cross_and_region(matrix, depth=0):
    """Fill cells unambiguously determined by their lines and regions.
    
    For every empty cell, find the values of known digits in its row, column,
    and region. If there is exactly one digit left, it is the value of the
    cell. If there are more and if search depth is non-zero, try solving the
    puzzle for each of the values.
    
    """
    result = False
    for col in NINE:
        yexist = set(m[col] for m in matrix)
        rcol = col // 3
        rcol3 = 3 * rcol
        rcol3p3 = rcol3 + 3
        for row in NINE:
            if not matrix[row][col]:
                xexist = set(matrix[row])
                rrow = row // 3
                rrow3 = 3 * rrow
                region = [matrix[rrow3+i][rcol3:rcol3p3] for i in THREE]
                rexist = set(chain(*region))
                missing = MASK9 - xexist - yexist - rexist
                size = len(missing)
                if size == 1:
                    elem = list(missing)[0]
                    matrix[row][col] = elem
                    yexist.add(elem)  # Speed optimization, see outer loop
                    result = True
                elif size == 0:
                    raise NotSolvable()
                elif size < 4 and depth > 0:
                    # The size limit was established by experimentation.
                    hypothesize(matrix, row, col, missing, depth-1)
    return result


def try_adjacent_lines(matrix):
    """Fill cells unambiguously determined by same-region lines.
    
    For every empty cell, find the values of known digits in the lines passing
    through the same region. For the cell's line, the other two values in the
    region should be known. For the remaining two lines, all digits in the
    other two regions should be known. There should be exactly one value
    happening twice on the other lines and not at all on the current line.
    It is the value of the current cell.

    If the above failed in one direction, try the other.

    If both horizontal and vertical searches failed for a given cell but all
    24 other-region values are known, there should be exactly one value
    happening 4 times (each for one of the other regions checked). It is the
    value of the current cell.
    
    """
    result = False
    for col in NINE:
        rcol = col // 3
        rcol3 = 3 * rcol
        rcol3p3 = rcol3 + 3
        othercolidxs = (rcol3 + (col+1)%3, rcol3 + (col+2)%3)
        for row in NINE:
            if not matrix[row][col]:
                rrow = row // 3
                rrow3 = 3 * rrow
                otherrows = (rrow3 + (row+1)%3, rrow3 + (row+2)%3)
                otherrows = [matrix[orow] for orow in otherrows]
                othercols = [[m[ocol] for m in matrix]
                             for ocol in othercolidxs]
                othercross = []  # Container for other-region known values
                if _check_other_lines2(matrix, rcol, row, col, matrix[row],
                                       otherrows, othercross):
                    result = True
                elif _check_other_lines2(matrix, rcol, row, col,
                                         [m[col] for m in matrix],
                                         othercols, othercross):
                    result = True
                elif len(othercross) == 24:
                    fours = [i for i in MASK9 if othercross.count(i) == 4]
                    if fours:
                        matrix[row][col] = fours[0]
                        result = True
    return result


def _check_other_lines2(matrix, ridx, row, col, line, otherlines, othercross):
    """Helper function for try_adjacent_lines().

    Check the values in one direction. As an artifact, known digit values
    from the lines are collected into othercross.
    
    """
    ridx3 = 3 * ridx
    ridx3p3 = ridx3 + 3
    line = line[ridx3:ridx3p3]
    if line.count(0) != 1:
        return False
    allfields = []
    for other in otherlines:
        other = list(other)
        del other[ridx3:ridx3p3]
        allfields += other
    if allfields.count(0):
        return False
    othercross += allfields
    twos = set(i for i in MASK9 if allfields.count(i) == 2)
    twos -= set(line)
    if len(twos) == 1:
        elem = list(twos)[0]
        if elem not in line:
            matrix[row][col] = elem
            return True
    return False


# Speed optimization in for loop
RLOCATIONS = set((row, col) for row in THREE for col in THREE)


def try_masking(matrix):
    """Fill cells that remain alone after "masking" by other cells.

    For each digit value, "stamp out" the puzzle to see which regions have
    empty cells that could potentially still be filled with that value. For
    regions with single cells left, fill those cells with the value.
    
    Known problems:
    This function causes a 'Bad call' exception in the profile module
    (see http://www.python.org/sf/1117670) in some Python installations.
    
    """
    result = False
    for digit in MASK9:
        locations = set(RLOCATIONS)
        matrix2 = [list(m) for m in matrix]
        for row in NINE:
            try:
                idx = matrix2[row].index(digit)
            except ValueError:
                idx = -1
            else:
                matrix2[row] = [-1 for col in NINE]
                for row2 in NINE:
                    matrix2[row2][idx] = -1
                locations.discard((row//3, idx//3))
        for rrow, rcol in locations:
            rcol3 = 3 * rcol
            rcol3p3 = rcol3 + 3
            rrow3 = 3 * rrow
            region2 = (matrix2[rrow3+i][rcol3:rcol3p3] for i in THREE)
            region2 = [x for x in chain(*region2)]
            if region2.count(0) == 1:
                idx = region2.index(0)
                row = rrow3 + idx // 3
                col = rcol3 + idx % 3
                matrix[row][col] = digit
                result = True
    return result


def hypothesize(matrix, row, col, values, depth):
    """Try further search with the specified cell equal to each of values."""
    for x in values:
        matrix2 = [list(m) for m in matrix]
        matrix2[row][col] = x
        try:
            solve(matrix2, depth)
        except (NotSolvable, IncorrectSolution):
            pass


def check_done(matrix):
    if 0 in chain(*matrix):
        return False
    for row in matrix:
        if len(set(row)) < 9:
            raise IncorrectSolution()
    for col in range(9):
        col = set(m[col] for m in matrix)
        if len(col) < 9:
            raise IncorrectSolution()
    for rrow in range(3):
        for rcol in range(3):
            region = [matrix[3*rrow+i][3*rcol:3*(rcol+1)] for i in range(3)]
            if len(set(chain(*region))) < 9:
                raise IncorrectSolution()
    return True


def solve(matrix, depth=2):
    while try_cross_and_region(matrix):
        pass
    if check_done(matrix):
        pprint.pprint(matrix)
        raise Done()
    
    while try_adjacent_lines(matrix):
        pass
    if check_done(matrix):
        pprint.pprint(matrix)
        raise Done()
    
    while try_masking(matrix):
        pass
    if check_done(matrix):
        pprint.pprint(matrix)
        raise Done()
    
    while try_cross_and_region(matrix, depth=depth):
        pass
    if check_done(matrix):
        pprint.pprint(matrix)
        raise Done()


if __name__ == "__main__":
    import sys
    matrix = parse(sys.argv[1])
    depth = 2
    if len(sys.argv) > 2:
        depth = int(sys.argv[2])
    try:
        solve(matrix, depth)
        print "Failed, this is all I could fill in:"
        pprint.pprint(matrix)
    except Done:
        print "Done!"
    except NotSolvable:
        print "Not solvalbe, this is how I filled it in:"
        pprint.pprint(matrix)
    import os
    utime, stime, cutime, cstime, elapsed = os.times()
    print "cputime=%s" % (utime + stime)

# vim:et:ai:sw=4

This implementation uses 3 simple algorithms for solution search. It has been optimized for speed using the profile module.

It solves typical "contest" puzzles in a fraction of a second even for the very hard category. I also tested it with a small sample of 17-hint puzzles from http://www.csse.uwa.edu.au/~gordon/sudokumin.php - it seems to be able to solve any of them, although some may require setting the depth to 3.

Changes in 1.2: - bugfix (catch more exceptions in hypothesize)

Changes in 1.1: - added descriptions of algorithms in docstrings - added further speed optimizations - minor style corrections