Welcome, guest | Sign In | My Account | Store | Cart

The routines rank() and order() outputs the ranks and ordering of the elements of a list. Missing values are indicated by None and ranking have first, min, max, random alternatives just like in R.

Python, 168 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
from random import uniform, sample

def order(x, NoneIsLast = True, decreasing = False):
    """
    Returns the ordering of the elements of x. The list
    [ x[j] for j in order(x) ] is a sorted version of x.

    Missing values in x are indicated by None. If NoneIsLast is true,
    then missing values are ordered to be at the end.
    Otherwise, they are ordered at the beginning.
    """
    omitNone = False
    if NoneIsLast == None:
        NoneIsLast = True
        omitNone = True
        
    n  = len(x)
    ix = range(n)
    if None not in x:
        ix.sort(reverse = decreasing, key = lambda j : x[j])
    else:
        # Handle None values properly.
        def key(i, x = x):
            elem = x[i]
            # Valid values are True or False only.
            if decreasing == NoneIsLast:
                return not(elem is None), elem
            else:
                return elem is None, elem
        ix = range(n)
        ix.sort(key=key, reverse=decreasing)
            
    if omitNone:
        n = len(x)
        for i in range(n-1, -1, -1):
            if x[ix[i]] == None:
                n -= 1
        return ix[:n]
    return ix


def rank(x, NoneIsLast=True, decreasing = False, ties = "first"):
    """
    Returns the ranking of the elements of x. The position of the first
    element in the original vector is rank[0] in the sorted vector.

    Missing values are indicated by None.  Calls the order() function.
    Ties are NOT averaged by default. Choices are:
         "first" "average" "min" "max" "random" "average"
    """
    omitNone = False
    if NoneIsLast == None:
        NoneIsLast = True
        omitNone = True
    O = order(x, NoneIsLast = NoneIsLast, decreasing = decreasing)
    R = O[:]
    n = len(O)
    for i in range(n):
        R[O[i]] = i
    if ties == "first" or ties not in ["first", "average", "min", "max", "random"]:
        return R
        
    blocks     = []
    isnewblock = True
    newblock   = []
    for i in range(1,n) :
        if x[O[i]] == x[O[i-1]]:
            if i-1 not in newblock:
                newblock.append(i-1)
            newblock.append(i)
        else:
            if len(newblock) > 0:
                blocks.append(newblock)
                newblock = []
    if len(newblock) > 0:
        blocks.append(newblock)

    for i, block  in enumerate(blocks):
        # Don't process blocks of None values.
        if x[O[block[0]]] == None:
            continue
        if ties == "average":
            s = 0.0
            for j in block:
                s += j
            s /= float(len(block))
            for j in block:
                R[O[j]] = s                
        elif ties == "min":
            s = min(block)
            for j in block:
                R[O[j]] = s                
        elif ties == "max":
            s =max(block)
            for j in block:
                R[O[j]] = s                
        elif ties == "random":
            s = sample([O[i] for i in block], len(block))
            for i,j in enumerate(block):
                R[O[j]] = s[i]
        else:
            for i,j in enumerate(block):
                R[O[j]] = j
    if omitNone:
        R = [ R[j] for j in range(n) if x[j] != None]
    return R

if __name__ == "__main__":
    """
    Output when run from the command line:

vector  [5, None, None, 6, 2, 3, 2, 0, 5, 5, 1, None]
order(NoneIsLast= True ,decreasing= True ) [3, 0, 8, 9, 5, 4, 6, 10, 7, 1, 2, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= first ) [1, 9, 10, 0, 5, 4, 6, 8, 2, 3, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= average ) [2.0, 9, 10, 0, 5.5, 4, 5.5, 8, 2.0, 2.0, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= min ) [1, 9, 10, 0, 5, 4, 5, 8, 1, 1, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= max ) [3, 9, 10, 0, 6, 4, 6, 8, 3, 3, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= random ) [0, 9, 10, 0, 6, 4, 4, 8, 9, 8, 7, 11]

order(NoneIsLast= True ,decreasing= False ) [7, 10, 4, 6, 5, 0, 8, 9, 3, 1, 2, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= first ) [5, 9, 10, 8, 2, 4, 3, 0, 6, 7, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= average ) [6.0, 9, 10, 8, 2.5, 4, 2.5, 0, 6.0, 6.0, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= min ) [5, 9, 10, 8, 2, 4, 2, 0, 5, 5, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= max ) [7, 9, 10, 8, 3, 4, 3, 0, 7, 7, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= random ) [0, 9, 10, 8, 6, 4, 4, 0, 8, 9, 1, 11]


order(NoneIsLast= False ,decreasing= True ) [1, 2, 11, 3, 0, 8, 9, 5, 4, 6, 10, 7]
rank(x, NoneIsLast= False ,decreasing= True ,ties= first ) [4, 0, 1, 3, 8, 7, 9, 11, 5, 6, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= average ) [5.0, 0, 1, 3, 8.5, 7, 8.5, 11, 5.0, 5.0, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= min ) [4, 0, 1, 3, 8, 7, 8, 11, 4, 4, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= max ) [6, 0, 1, 3, 9, 7, 9, 11, 6, 6, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= random ) [9, 0, 1, 3, 6, 7, 4, 11, 0, 8, 10, 2]

order(NoneIsLast= False ,decreasing= False ) [1, 2, 11, 7, 10, 4, 6, 5, 0, 8, 9, 3]
rank(x, NoneIsLast= False ,decreasing= False ,ties= first ) [8, 0, 1, 11, 5, 7, 6, 3, 9, 10, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= average ) [9.0, 0, 1, 11, 5.5, 7, 5.5, 3, 9.0, 9.0, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= min ) [8, 0, 1, 11, 5, 7, 5, 3, 8, 8, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= max ) [10, 0, 1, 11, 6, 7, 6, 3, 10, 10, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= random ) [9, 0, 1, 11, 4, 7, 6, 3, 0, 8, 4, 2]


order(NoneIsLast= None ,decreasing= True ) [3, 0, 8, 9, 5, 4, 6, 10, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= first ) [1, 9, 10, 0, 5, 4, 6, 8, 2, 3, 7, 11]
rank(x, NoneIsLast= None ,decreasing= True ,ties= average ) [2.0, 0, 5.5, 4, 5.5, 8, 2.0, 2.0, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= min ) [1, 0, 5, 4, 5, 8, 1, 1, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= max ) [3, 0, 6, 4, 6, 8, 3, 3, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= random ) [9, 0, 6, 4, 4, 8, 8, 0, 7]

order(NoneIsLast= None ,decreasing= False ) [7, 10, 4, 6, 5, 0, 8, 9, 3]
rank(x, NoneIsLast= None ,decreasing= False ,ties= first ) [5, 9, 10, 8, 2, 4, 3, 0, 6, 7, 1, 11]
rank(x, NoneIsLast= None ,decreasing= False ,ties= average ) [6.0, 8, 2.5, 4, 2.5, 0, 6.0, 6.0, 1]
rank(x, NoneIsLast= None ,decreasing= False ,ties= min ) [5, 8, 2, 4, 2, 0, 5, 5, 1]
rank(x, NoneIsLast= None ,decreasing= False ,ties= max ) [7, 8, 3, 4, 3, 0, 7, 7, 1]
rank(x, NoneIsLast= None ,decreasing= False ,ties= random ) [9, 8, 4, 4, 6, 0, 0, 8, 1]
    """
    x = [5, None, None, 6, 2, 3, 2, 0, 5, 5, 1, None]
    print "vector ", x
    for NoneIsLast in [True, False, None]:
        for decreasing in [True, False]:
            O = order(x, NoneIsLast=NoneIsLast, decreasing=decreasing)
            print "order(NoneIsLast=", NoneIsLast, ",decreasing=", decreasing, ")", O

            for ties in ["first", "average", "min", "max", "random"]:
                R = rank(x, NoneIsLast=NoneIsLast, decreasing=decreasing, ties=ties)
                print "rank(x, NoneIsLast=", NoneIsLast, ",decreasing=", decreasing, ",ties=", ties, ")", R
            print
        print

Ordering and sorting are basic operations useful for indirect sorting of auxiliary lists and for statistical purposes. The routines are inspired from those available in R.

4 comments

Peter Otten 17 years, 11 months ago  # | flag

You can use a key function to move None to the end.

>>> x = [3, 1, 0, None, 2, 4, None]
>>> ix = range(len(x))
>>> def key(i, x=x):
...     elem = x[i]
...     return elem is None, elem
...
>>> ix.sort(key=key)
>>> [x[i] for i in ix]
[0, 1, 2, 3, 4, None, None]
Ernesto Adorio (author) 17 years, 11 months ago  # | flag

Thanks for the tip. It can also be used in processing missing values which may not be coded as None. Have revised the code.

Ordering and Ranking of lists with simplified requirements. Ignoring None values and treatment of ties, we can simplify the code:

import random, itertools


def order(x):
    """
    returns the order of each element in x as a list.
    """
    L = len(x)
    rangeL = range(L)
    z = izip(x, rangeL)
    z = izip(z, rangeL) #avoid problems with duplicates.
    D = sorted(z)
    return [d[1] for d in D]

def rank(x):
    """
    Returns the rankings of elements in x as a list.
    """
    L = len(x)
    ordering = order(x)
    ranks = [0] * len(x)
    for i in range(L):
        ranks[ordering[i]] = i
    return ranks

#example:
L = 10
x = [random.random() for i in range(L)]

ordering = order(x)
ranking  = rank(x)
for i in range(L):
    print i, x[i], ranking[i],x[ordering[i]], ordering[i]

Note that x[ordering[i] column are sorted.

Katja Hofmann 11 years, 9 months ago  # | flag

Hi Ernesto,

could there be a bug in the "random" treatment of ties? I take it that is means ties are broken randomly. I think line 100 should then be R[s[i]] = j so that the rank of the element at index s[i] is picked randomly.

Created by Ernesto Adorio on Tue, 18 Apr 2006 (PSF)
Python recipes (4591)
Ernesto Adorio's recipes (9)

Required Modules

Other Information and Tasks