Welcome, guest | Sign In | My Account | Store | Cart

This recipe sorts a list of strings using the numeric order where possible.

Python, 61 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# numsort.py 
# sorting in numeric order 
# for example:
#   ['aaa35', 'aaa6', 'aaa261'] 
# is sorted into:
#   ['aaa6', 'aaa35', 'aaa261']

import sys

def sorted_copy(alist):
    # inspired by Alex Martelli
    # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52234
    indices = map(_generate_index, alist)
    decorated = zip(indices, alist)
    decorated.sort()
    return [ item for index, item in decorated ]
    
def _generate_index(str):
    """
    Splits a string into alpha and numeric elements, which
    is used as an index for sorting"
    """
    #
    # the index is built progressively
    # using the _append function
    #
    index = []
    def _append(fragment, alist=index):
        if fragment.isdigit(): fragment = int(fragment)
        alist.append(fragment)

    # initialize loop
    prev_isdigit = str[0].isdigit()
    current_fragment = ''
    # group a string into digit and non-digit parts
    for char in str:
        curr_isdigit = char.isdigit()
        if curr_isdigit == prev_isdigit:
            current_fragment += char
        else:
            _append(current_fragment)
            current_fragment = char
            prev_isdigit = curr_isdigit
    _append(current_fragment)    
    return tuple(index)

    
def _test():
    initial_list = [ '35 Fifth Avenue', '5 Fifth Avenue', '261 Fifth Avenue' ]
    sorted_list = sorted_copy(initial_list)
    import pprint
    print "Before sorting..."
    pprint.pprint (initial_list)
    print "After sorting..."
    pprint.pprint (sorted_list)
    print "Normal python sorting produces..."
    initial_list.sort()
    pprint.pprint (initial_list)

if __name__ == '__main__':
    _test()

When sorting addresses, sorting by string order gives undesired results. For instance "30 Fifth Avenue" appears before "6 Fifth Avenue". This recipe sorts the strings by splitting a string into an index composed of numeric and non-numeric parts, and then sorting on the index.

2 comments

Matt Connolly 10 years, 9 months ago  # | flag

How about this:

import re

q = ['aaa261', 'aaa35', 'aaa6']

def stringSplitByNumbers(x): r = re.compile('(\d+)') l = r.split(x) return [int(y) if y.isdigit() else y for y in l]

print q

print sorted(q, key = stringSplitByNumbers)

Matt Connolly 10 years, 9 months ago  # | flag

Try this:

import re

q = ['aaa261', 'aaa35', 'aaa6']

def stringSplitByNumbers(x):
    r = re.compile('(\d+)')
    l = r.split(x)
    return [int(y) if y.isdigit() else y for y in l]

print q

print sorted(q, key = stringSplitByNumbers)