Welcome, guest | Sign In | My Account | Store | Cart
# -*- coding: utf-8 -*-
# KIndex (Knowledge Index) by Roberto Bello www.freeopen.org
"""
Achieved cataloging into groups by a SOM neural network, the question arises
whether or not there is knowledge in the groups, namely whether the groups are between them
distinct and have homogeneous characteristics within each group.
The use of the coefficient of variation (CV) can be of help.

KINDEX (Knowledge Index) is an index that measures how much knowledge is
contained in the groups obtained from the SOM neural network: in the case KINDEX
reaches the maximum value of 1, each group would consist of records with constant 
values ​​in all the variables / columns, and each group would be quite distinct 
from other groups.
KINDEX is calculated using the weighted-average CV of variables / columns
groups, comparing them to the CV of the variables / columns of the input file before
cataloging.

*************************************************************************************
Ottenuta la catalogazione in gruppi da una rete neurale di tipo SOM, sorge il dubbio
se nei gruppi esista o meno della conoscenza, ossia se i gruppi sono fra di loro 
distinti e con caratteristiche omogenee all'interno di ogni gruppo.

L'utilizzo del coefficiente di variazione (CV) può essere di aiuto.
KIndex (Knowledge Index) è un indice che misura quanta conoscenza sia
contenuta nei gruppi catalogati: nel caso KIndex raggiunga il valore massimo di 1,
ogni gruppo sarebbe composto da record con valori costanti in tutte le variabili /
colonne e renderebbe ogni gruppo del tutto distinto dagli altri gruppi.
KIndex è calcolato utilizzando i CV medi-ponderati delle variabili / colonne dei
gruppi rapportandoli al CV delle variabili / colonne del file di input prima della
catalogazione.

"""

def mean(x):     	# mean
  x = [float(i) for i in x]
  n = len(x)
  mean = sum(x) / n
  if mean == 0.0:
    mean = 0.0000000000001
  return mean

def sd(x):		# standard deviation
  x = [float(i) for i in x]
  n = len(x)
  mean = sum(x) / n
  if mean == 0.0:
    mean = 0.0000000000001
  sd = (sum((x-mean)**2 for x in x) / n) ** 0.5
  return sd

arr0 = [['*Group*','ANIMAL','FUR','FEATHER','EGGS','MILK','FLYING','AQUATIC','PREDATORY',
    'TEETH','VERTEBRATE','POLMONES','POISONOUS','FLIPPERS','LEGS','TAIL','DOMESTIC'],
['G_00_00','ANTELOPE',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','BUFFALO',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','CALF',1,0,0,1,0,0,0,1,1,1,0,0,4,1,1],
['G_00_00','CAT',1,0,0,1,0,0,1,1,1,1,0,0,4,1,1],
['G_00_00','DEER',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','ELEPHANT',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','FIELD_MOUSE',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','GIRAFFE',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','GOAT',1,0,0,1,0,0,0,1,1,1,0,0,4,1,1],
['G_00_00','HAMSTER',1,0,0,1,0,0,0,1,1,1,0,0,4,1,1],
['G_00_00','HARE',1,0,0,1,0,0,0,1,1,1,0,0,4,1,0],
['G_00_00','KANGAROO',1,0,0,1,0,0,0,1,1,1,0,0,2,1,0],
['G_00_00','PONY',1,0,0,1,0,0,0,1,1,1,0,0,4,1,1],
['G_00_00','REINDEER',1,0,0,1,0,0,0,1,1,1,0,0,4,1,1],
['G_00_00','SQUIRREL',1,0,0,1,0,0,0,1,1,1,0,0,2,1,0],
['G_00_00','VAMPIRE',1,0,0,1,1,0,0,1,1,1,0,0,2,1,0],
['G_00_01','CAVY',1,0,0,1,0,0,0,1,1,1,0,0,4,0,1],
['G_00_01','GORILLA',1,0,0,1,0,0,0,1,1,1,0,0,2,0,0],
['G_00_02','BEE',1,0,1,0,1,0,0,0,0,1,1,0,6,0,1],
['G_00_03','CRAB',0,0,1,0,0,1,1,0,0,0,0,0,4,0,0],
['G_00_03','FLY',1,0,1,0,1,0,0,0,0,1,0,0,6,0,0],
['G_00_03','LADYBIRD',0,0,1,0,1,0,1,0,0,1,0,0,6,0,0],
['G_00_03','LOBSTER',0,0,1,0,0,1,1,0,0,0,0,0,6,0,0],
['G_00_03','MIDGE',0,0,1,0,1,0,0,0,0,1,0,0,6,0,0],
['G_00_03','MOLLUSK',0,0,1,0,0,0,1,0,0,0,0,0,0,0,0],
['G_00_03','MOTH',1,0,1,0,1,0,0,0,0,1,0,0,6,0,0],
['G_00_03','POLYP',0,0,1,0,0,1,1,0,0,0,0,0,8,0,0],
['G_00_03','PRAWN',0,0,1,0,0,1,1,0,0,0,0,0,6,0,0],
['G_00_03','STARFISH',0,0,1,0,0,1,1,0,0,0,0,0,5,0,0],
['G_00_03','WASP',1,0,1,0,1,0,0,0,0,1,1,0,6,0,0],
['G_01_00','BEAR',1,0,0,1,0,0,1,1,1,1,0,0,4,0,0],
['G_01_00','BOAR',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','CHEETAH',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','LEOPARD',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','LION',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','LYNX',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','MINK',1,0,0,1,0,1,1,1,1,1,0,0,4,1,0],
['G_01_00','MOLE',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','MONGOOSE',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','OPOSSUM',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','POLECAT',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','PUMA',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_00','WOLF',1,0,0,1,0,0,1,1,1,1,0,0,4,1,0],
['G_01_02','SCORPION',0,0,0,0,0,0,1,0,0,1,1,0,8,1,0],
['G_01_03','FLEA',0,0,1,0,0,0,0,0,0,1,0,0,6,0,0],
['G_01_03','SNAIL',0,0,1,0,0,0,0,0,0,1,0,0,0,0,0],
['G_01_03','TERMITE',0,0,1,0,0,0,0,0,0,1,0,0,6,0,0],
['G_01_03','WORM',0,0,1,0,0,0,0,0,0,1,0,0,0,0,0],
['G_02_00','DOLPHIN',0,0,0,1,0,1,1,1,1,1,0,1,0,1,0],
['G_02_00','SEAL',1,0,0,1,0,1,1,1,1,1,0,1,0,0,0],
['G_02_00','SEA_LION',1,0,0,1,0,1,1,1,1,1,0,1,2,1,0],
['G_02_01','DUCKBILL',1,0,1,1,0,1,1,0,1,1,0,0,4,1,0],
['G_02_02','TOAD',0,0,1,0,0,1,0,1,1,1,0,0,4,0,0],
['G_02_02','TORTOISE',0,0,1,0,0,0,0,0,1,1,0,0,4,1,0],
['G_03_00','CARP',0,0,1,0,0,1,0,1,1,0,0,1,0,1,1],
['G_03_00','CHUB',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','CODFISH',0,0,1,0,0,1,0,1,1,0,0,1,0,1,0],
['G_03_00','HERRING',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','PERCH',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','PIKE',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','PIRANHA',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','SEAHORSE',0,0,1,0,0,1,0,1,1,0,0,1,0,1,0],
['G_03_00','SEA_SNAKE',0,0,0,0,0,1,1,1,1,0,1,0,0,1,0],
['G_03_00','SHARK',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','SOLE',0,0,1,0,0,1,0,1,1,0,0,1,0,1,0],
['G_03_00','STURGEON',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_00','TUNA',0,0,1,0,0,1,1,1,1,0,0,1,0,1,0],
['G_03_01','FROG',0,0,1,0,0,1,1,1,1,1,0,0,4,0,0],
['G_03_01','TRITON',0,0,1,0,0,1,1,1,1,1,0,0,4,1,0],
['G_03_02','GULL',0,1,1,0,1,1,1,0,1,1,0,0,2,1,0],
['G_03_02','KIWI',0,1,1,0,0,0,1,0,1,1,0,0,2,1,0],
['G_03_02','PENGUIN',0,1,1,0,0,1,1,0,1,1,0,0,2,1,0],
['G_03_03','CHICKEN',0,1,1,0,1,0,0,0,1,1,0,0,2,1,1],
['G_03_03','CROW',0,1,1,0,1,0,1,0,1,1,0,0,2,1,0],
['G_03_03','DOVE',0,1,1,0,1,0,0,0,1,1,0,0,2,1,1],
['G_03_03','DUCK',0,1,1,0,1,1,0,0,1,1,0,0,2,1,0],
['G_03_03','FALCON',0,1,1,0,1,0,1,0,1,1,0,0,2,1,0],
['G_03_03','FLAMINGO',0,1,1,0,1,0,0,0,1,1,0,0,2,1,0],
['G_03_03','HAWK',0,1,1,0,1,0,1,0,1,1,0,0,2,1,0],
['G_03_03','OSTRICH',0,1,1,0,0,0,0,0,1,1,0,0,2,1,0],
['G_03_03','PHEASANT',0,1,1,0,1,0,0,0,1,1,0,0,2,1,0],
['G_03_03','SKYLARK',0,1,1,0,1,0,0,0,1,1,0,0,2,1,0],
['G_03_03','SPARROW',0,1,1,0,1,0,0,0,1,1,0,0,2,1,0],
['G_03_03','SWAN',0,1,1,0,1,1,0,0,1,1,0,0,2,1,0]]

# print "*********input*********"
# print arr0

# groups are not considered

num_col = len(arr0[0]) - 2
arr0 = arr0[1:]
n = 0
var = []
while n < len(arr0):
  lst1 = arr0[n]
  var.append(lst1[2:])
  n += 1
   
var         = zip(*var)

means_tot0  = 0.0
sd_tot0     = 0.0
n           = 0

while n < len(var):
  means_tot0 += mean(var[n])
  sd_tot0    += sd(var[n])
  n += 1 

cv0 = sd_tot0 / means_tot0

# Groups are considered

recs = len(arr0)-1
groups = []
num_col = len(arr0[0]) - 2

n = 1
while n < len(arr0):
  lst1 = arr0[n]
  groups.append(lst1[0])
  n += 1
groups = list(set(groups))
groups.sort()
   
group_n     = 0
means_group = 0.0
sd_group    = 0.0

while group_n < len(groups):
  group = groups[group_n]
  var = []
  r = 0
  while r < len(arr0):
    lst1 = arr0[r]
    if lst1[0] == group:
      var.append(lst1[2:]) 
    r += 1 
  
  var = zip(*var)
  n = 0
  while n < len(var):
    means_group += mean(var[n])*len(var[n]) 
    sd_group    += sd(var[n])*len(var[n])
    n += 1
  group_n += 1
means_tot = means_group/recs
sd_tot    = sd_group/recs
cv = sd_tot / means_tot
kindex = 1.0 - cv / cv0

print "kIndex (groups NOT considered)   " + str(1 - cv0)
print "KIndex (groups considered)       " + str(kindex)  

History