The routines rank() and order() outputs the ranks and ordering of the elements of a list. Missing values are indicated by None and ranking have first, min, max, random alternatives just like in R.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | from random import uniform, sample
def order(x, NoneIsLast = True, decreasing = False):
"""
Returns the ordering of the elements of x. The list
[ x[j] for j in order(x) ] is a sorted version of x.
Missing values in x are indicated by None. If NoneIsLast is true,
then missing values are ordered to be at the end.
Otherwise, they are ordered at the beginning.
"""
omitNone = False
if NoneIsLast == None:
NoneIsLast = True
omitNone = True
n = len(x)
ix = range(n)
if None not in x:
ix.sort(reverse = decreasing, key = lambda j : x[j])
else:
# Handle None values properly.
def key(i, x = x):
elem = x[i]
# Valid values are True or False only.
if decreasing == NoneIsLast:
return not(elem is None), elem
else:
return elem is None, elem
ix = range(n)
ix.sort(key=key, reverse=decreasing)
if omitNone:
n = len(x)
for i in range(n-1, -1, -1):
if x[ix[i]] == None:
n -= 1
return ix[:n]
return ix
def rank(x, NoneIsLast=True, decreasing = False, ties = "first"):
"""
Returns the ranking of the elements of x. The position of the first
element in the original vector is rank[0] in the sorted vector.
Missing values are indicated by None. Calls the order() function.
Ties are NOT averaged by default. Choices are:
"first" "average" "min" "max" "random" "average"
"""
omitNone = False
if NoneIsLast == None:
NoneIsLast = True
omitNone = True
O = order(x, NoneIsLast = NoneIsLast, decreasing = decreasing)
R = O[:]
n = len(O)
for i in range(n):
R[O[i]] = i
if ties == "first" or ties not in ["first", "average", "min", "max", "random"]:
return R
blocks = []
isnewblock = True
newblock = []
for i in range(1,n) :
if x[O[i]] == x[O[i-1]]:
if i-1 not in newblock:
newblock.append(i-1)
newblock.append(i)
else:
if len(newblock) > 0:
blocks.append(newblock)
newblock = []
if len(newblock) > 0:
blocks.append(newblock)
for i, block in enumerate(blocks):
# Don't process blocks of None values.
if x[O[block[0]]] == None:
continue
if ties == "average":
s = 0.0
for j in block:
s += j
s /= float(len(block))
for j in block:
R[O[j]] = s
elif ties == "min":
s = min(block)
for j in block:
R[O[j]] = s
elif ties == "max":
s =max(block)
for j in block:
R[O[j]] = s
elif ties == "random":
s = sample([O[i] for i in block], len(block))
for i,j in enumerate(block):
R[O[j]] = s[i]
else:
for i,j in enumerate(block):
R[O[j]] = j
if omitNone:
R = [ R[j] for j in range(n) if x[j] != None]
return R
if __name__ == "__main__":
"""
Output when run from the command line:
vector [5, None, None, 6, 2, 3, 2, 0, 5, 5, 1, None]
order(NoneIsLast= True ,decreasing= True ) [3, 0, 8, 9, 5, 4, 6, 10, 7, 1, 2, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= first ) [1, 9, 10, 0, 5, 4, 6, 8, 2, 3, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= average ) [2.0, 9, 10, 0, 5.5, 4, 5.5, 8, 2.0, 2.0, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= min ) [1, 9, 10, 0, 5, 4, 5, 8, 1, 1, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= max ) [3, 9, 10, 0, 6, 4, 6, 8, 3, 3, 7, 11]
rank(x, NoneIsLast= True ,decreasing= True ,ties= random ) [0, 9, 10, 0, 6, 4, 4, 8, 9, 8, 7, 11]
order(NoneIsLast= True ,decreasing= False ) [7, 10, 4, 6, 5, 0, 8, 9, 3, 1, 2, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= first ) [5, 9, 10, 8, 2, 4, 3, 0, 6, 7, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= average ) [6.0, 9, 10, 8, 2.5, 4, 2.5, 0, 6.0, 6.0, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= min ) [5, 9, 10, 8, 2, 4, 2, 0, 5, 5, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= max ) [7, 9, 10, 8, 3, 4, 3, 0, 7, 7, 1, 11]
rank(x, NoneIsLast= True ,decreasing= False ,ties= random ) [0, 9, 10, 8, 6, 4, 4, 0, 8, 9, 1, 11]
order(NoneIsLast= False ,decreasing= True ) [1, 2, 11, 3, 0, 8, 9, 5, 4, 6, 10, 7]
rank(x, NoneIsLast= False ,decreasing= True ,ties= first ) [4, 0, 1, 3, 8, 7, 9, 11, 5, 6, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= average ) [5.0, 0, 1, 3, 8.5, 7, 8.5, 11, 5.0, 5.0, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= min ) [4, 0, 1, 3, 8, 7, 8, 11, 4, 4, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= max ) [6, 0, 1, 3, 9, 7, 9, 11, 6, 6, 10, 2]
rank(x, NoneIsLast= False ,decreasing= True ,ties= random ) [9, 0, 1, 3, 6, 7, 4, 11, 0, 8, 10, 2]
order(NoneIsLast= False ,decreasing= False ) [1, 2, 11, 7, 10, 4, 6, 5, 0, 8, 9, 3]
rank(x, NoneIsLast= False ,decreasing= False ,ties= first ) [8, 0, 1, 11, 5, 7, 6, 3, 9, 10, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= average ) [9.0, 0, 1, 11, 5.5, 7, 5.5, 3, 9.0, 9.0, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= min ) [8, 0, 1, 11, 5, 7, 5, 3, 8, 8, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= max ) [10, 0, 1, 11, 6, 7, 6, 3, 10, 10, 4, 2]
rank(x, NoneIsLast= False ,decreasing= False ,ties= random ) [9, 0, 1, 11, 4, 7, 6, 3, 0, 8, 4, 2]
order(NoneIsLast= None ,decreasing= True ) [3, 0, 8, 9, 5, 4, 6, 10, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= first ) [1, 9, 10, 0, 5, 4, 6, 8, 2, 3, 7, 11]
rank(x, NoneIsLast= None ,decreasing= True ,ties= average ) [2.0, 0, 5.5, 4, 5.5, 8, 2.0, 2.0, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= min ) [1, 0, 5, 4, 5, 8, 1, 1, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= max ) [3, 0, 6, 4, 6, 8, 3, 3, 7]
rank(x, NoneIsLast= None ,decreasing= True ,ties= random ) [9, 0, 6, 4, 4, 8, 8, 0, 7]
order(NoneIsLast= None ,decreasing= False ) [7, 10, 4, 6, 5, 0, 8, 9, 3]
rank(x, NoneIsLast= None ,decreasing= False ,ties= first ) [5, 9, 10, 8, 2, 4, 3, 0, 6, 7, 1, 11]
rank(x, NoneIsLast= None ,decreasing= False ,ties= average ) [6.0, 8, 2.5, 4, 2.5, 0, 6.0, 6.0, 1]
rank(x, NoneIsLast= None ,decreasing= False ,ties= min ) [5, 8, 2, 4, 2, 0, 5, 5, 1]
rank(x, NoneIsLast= None ,decreasing= False ,ties= max ) [7, 8, 3, 4, 3, 0, 7, 7, 1]
rank(x, NoneIsLast= None ,decreasing= False ,ties= random ) [9, 8, 4, 4, 6, 0, 0, 8, 1]
"""
x = [5, None, None, 6, 2, 3, 2, 0, 5, 5, 1, None]
print "vector ", x
for NoneIsLast in [True, False, None]:
for decreasing in [True, False]:
O = order(x, NoneIsLast=NoneIsLast, decreasing=decreasing)
print "order(NoneIsLast=", NoneIsLast, ",decreasing=", decreasing, ")", O
for ties in ["first", "average", "min", "max", "random"]:
R = rank(x, NoneIsLast=NoneIsLast, decreasing=decreasing, ties=ties)
print "rank(x, NoneIsLast=", NoneIsLast, ",decreasing=", decreasing, ",ties=", ties, ")", R
print
print
|
Ordering and sorting are basic operations useful for indirect sorting of auxiliary lists and for statistical purposes. The routines are inspired from those available in R.
You can use a key function to move None to the end.
Thanks for the tip. It can also be used in processing missing values which may not be coded as None. Have revised the code.
Ordering and Ranking of lists with simplified requirements. Ignoring None values and treatment of ties, we can simplify the code:
Note that x[ordering[i] column are sorted.
Hi Ernesto,
could there be a bug in the "random" treatment of ties? I take it that is means ties are broken randomly. I think line 100 should then be R[s[i]] = j so that the rank of the element at index s[i] is picked randomly.