Welcome, guest | Sign In | My Account | Store | Cart
```# -*- coding: utf-8 -*-
###############################################################################
# KB_CAT KNOWLEDGE DISCOVERY IN DATA MINING (CATALOG PROGRAM)                 #
# Language used: PYTHON                               .                       #
###############################################################################
import os
import random
import copy
import datetime

def mean(x):     	# mean
n = len(x)
mean = sum(x) / n
return mean

def sd(x):		# standard deviattion
n = len(x)
mean = sum(x) / n
sd = (sum((x-mean)**2 for x in x) / n) ** 0.5
return sd

def is_number(s):
try:
float(s)
return True
except ValueError:
return False

class ndim:             # from 3D array to flat array
def __init__(self,x,y,z,d):
self.dimensions=[x,y,z]
self.numdimensions=d
self.gridsize=x*y*z

def getcellindex(self, location):
cindex = 0
cdrop = self.gridsize
for index in xrange(self.numdimensions):
cdrop /= self.dimensions[index]
cindex += cdrop * location[index]
return cindex

def getlocation(self, cellindex):
res = []
for size in reversed(self.dimensions):
res.append(cellindex % size)
cellindex /= size
return res[::-1]

""" how to use ndim class
n=ndim(4,4,5,3)
print n.getcellindex((0,0,0))
print n.getcellindex((0,0,1))
print n.getcellindex((0,1,0))
print n.getcellindex((1,0,0))

print n.getlocation(20)
print n.getlocation(5)
print n.getlocation(1)
print n.getlocation(0)
"""

print("###############################################################################")
print("# KB_CAT KNOWLEDGE DISCOVERY IN DATA MINING (CATALOG PROGRAM)                 #")
print("# Language used: PYTHON                                                       #")
print("###############################################################################")

# input and run parameters
error = 0

while True:
arch_input = raw_input('InputFile                              : ')
if not os.path.isfile(arch_input):
print("Oops! File does not exist. Try again... or CTR/C to exit")
else:
break

while True:
try:
num_gruppi = int(raw_input('Number of Groups (3 - 20)              : '))
except ValueError:
print("Oops!  That was no valid number.  Try again...")
else:
if(num_gruppi < 3):
print("Oops! Number of Groups too low. Try again...")
else:
if(num_gruppi > 20):
print("Oops! Number of Groups too big. Try again...")
else:
break

while True:
normaliz   = raw_input('Normalization(Max, Std, None)          : ')
normaliz   = normaliz.upper()
normaliz   = normaliz[0]
if(normaliz <> 'M' and normaliz <> 'S' and normaliz <> 'N'):
print("Oops! Input M, S or N. Try again...")
else:
break

while True:
try:
max_alpha   = float(raw_input('Start value of alpha (1.8 - 0.9)       : '))
except ValueError:
print("Oops!  That was no valid number.  Try again...")
else:
if(max_alpha > 1.8):
print("Oops! Start value of alpha too big. Try again...")
else:
if(max_alpha < 0.9):
print("Oops! Start value of alpha too low. Try again...")
else:
break

while True:
try:
min_alpha  = float(raw_input('End value of alpha (0.5 - 0.0001)      : '))
except ValueError:
print("Oops!  That was no valid number.  Try again...")
else:
if(min_alpha > 0.5):
print("Oops! alpha too big. Try again...")
else:
if(min_alpha < 0.0001):
print("Oops! alpha too low. Try again...")
else:
break

while True:
try:
step_alpha  = float(raw_input('Decreasing step of alpha (0.1 - 0.001) : '))
except ValueError:
print("Oops!  That was no valid number.  Try again...")
else:
if(step_alpha > 0.1):
print("Oops! Decreasing step of alpha too big. Try again...")
else:
if(step_alpha < 0.001):
print("Oops! Decreasing step of alpha too low. Try again...")
else:
break

file_input   = arch_input
gruppi_num   = num_gruppi
tipo_norm    = normaliz
alpha_min    = min_alpha
alpha_max    = max_alpha
alpha_step   = step_alpha

# outputs files
file_input   = arch_input
tipo_norm    = normaliz
gruppi_num   = num_gruppi
nome_input   = file_input.split(".")
arch_output  = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_out.txt"
arch_outsrt  = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_outsrt.txt"
arch_sort    = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_sort.txt"
arch_catal   = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_catal.txt"
arch_medsd   = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_medsd.txt"
arch_cv      = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_cv.txt"
arch_grid    = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_grid.txt"
arch_log     = nome_input[0] + "_" + tipo_norm + "_g" + str(gruppi_num) + "_log.txt"

# start time
t0 = datetime.datetime.now()

arr_r    = []
arr_orig = []
arr_c    = []
mtchx    = []
mtchy    = []
txt_col  = []
xnomi    = []

# the numbers of variables / columns in all record must be the same
n_rows = 0
n_cols = 0
err_cols = 0
index = 0
linea = line.split()
if(index == 0):
xnomi.append(linea)
n_cols = len(linea)
else:
arr_r.append(linea)
if(len(linea) != n_cols):
err_cols = 1
print("Different numbers of variables / columns in the record " + str(index)
+ " cols " + str(len(linea)))
index += 1
if(err_cols == 1):
print("File " + file_input + " contains errors. Exit ")
quit()
index = 0
while index < len(arr_r):
linea = arr_r[index]
index_c = 0
while index_c < len(linea):
if linea[index_c].isdigit():
linea[index_c] = float(linea[index_c])
index_c += 1
arr_r[index] = linea
index += 1
arr_orig = copy.deepcopy(arr_r)        # original input file
testata_cat = copy.deepcopy(xnomi[0])  # original header row

# finding columns containing strings and columns containing numbers
testata = xnomi[0]
testata_orig = copy.deepcopy(xnomi[0])
n_cols = len(testata) - 1
n_rows = len(arr_r)
ind_c  = 1
err_type = 0
while ind_c < len(testata):
ind_r    = 1
tipo_num = 0
tipo_txt = 0
while ind_r < len(arr_r):

arr_c = arr_r[ind_r]

if is_number(arr_c[ind_c]):
tipo_num = 1
else:
tipo_txt = 1

ind_r += 1

if tipo_num == 1 and tipo_txt == 1:
print "The columns / variables " + testata[ind_c] + " contains both strings and numbers."
print arr_c
err_type = 1
ind_c += 1
if err_type == 1:
print "Oops! The columns / variables contains both strings and numbers. Exit. "
quit()

index_c = 1
while index_c <= n_cols:
txt_col = []
index = 0
while index < len(arr_r):
arr_c = arr_r[index]
if(isinstance(arr_c[index_c],str)):
txt_col.append(arr_c[index_c])
index += 1
set_txt_col = set(txt_col)             # remove duplicates
txt_col = list(set(set_txt_col))
txt_col.sort()

# from strings to numbers
if(len(txt_col) > 0):
if(len(txt_col) > 1):
passo1 = 1.0 / (len(txt_col) - 1)
else:
passo1 = 0.0
index = 0
while index < len(arr_r):
arr_c = arr_r[index]
campo1 = arr_c[index_c]
indice1 = txt_col.index(campo1)
if(len(txt_col) == 1):  # same values in the column
val_num1 = float(1)
else:
val_num1 = float(passo1 * indice1)
arr_c[index_c] = val_num1 + 0.00000001   # to avoid zero values in means
# (to prevent zero divide in CV)
index += 1
index_c += 1

# means, max & std
xmeans = []
xmaxs  = []
xmins  = []            ### aggiunto Roberto 4/03/2012
xsds   = []
xcv    = []
index_c = 0
while index_c <= n_cols:
xmeans.append(0.0)
xmaxs.append(-9999999999999999.9)
xmins.append(9999999999999999.9)	### aggiunto Roberto 4/03/2012
xsds.append(0.0)
xcv.append(0.0)
index_c += 1

# means & max
index = 0
while index < n_rows:
arr_c = arr_r[index]
index_c = 1
while index_c <= n_cols:
xmeans[index_c] += arr_c[index_c]
if(arr_c[index_c] > xmaxs[index_c]):
xmaxs[index_c] = arr_c[index_c]
index_c += 1
index += 1
index_c = 1
while index_c <= n_cols:
xmeans[index_c] = xmeans[index_c] / n_rows
index_c += 1

# std
index = 0
while index < n_rows:
arr_c = arr_r[index]
index_c = 1
while index_c <= n_cols:
xsds[index_c] += (arr_c[index_c] - xmeans[index_c])**2
index_c += 1
index += 1
index_c = 1

while index_c <= n_cols:
xsds[index_c] = (xsds[index_c] / (n_cols - 1)) ** 0.5
index_c += 1

# Means, Max, Std, CV output file
medsd_file = open(arch_medsd, 'w')

# columns names
medsd_file.write('%s %s ' % ('Function' , "\t"))
index_c = 1
while index_c <= n_cols:
medsd_file.write('%s %s ' % (testata[index_c], "\t"))
index_c += 1
medsd_file.write('%s' % ('\n'))

# means
medsd_file.write('%s %s ' % ('Mean' , "\t"))
index_c = 1
while index_c <= n_cols:
valore = str(xmeans[index_c])
valore = valore[0:6]
medsd_file.write('%s %s ' % (valore, "\t"))
index_c += 1
medsd_file.write('%s' % ('\n'))

# max
medsd_file.write('%s %s ' % ('Max' , "\t"))
index_c = 1
while index_c <= n_cols:
valore = str(xmaxs[index_c])
valore = valore[0:6]
medsd_file.write('%s %s ' % (valore, "\t"))
index_c += 1
medsd_file.write('%s' % ('\n'))

# std
medsd_file.write('%s %s ' % ('Std' , "\t"))
index_c = 1
while index_c <= n_cols:
valore = str(xsds[index_c])
valore = valore[0:6]
medsd_file.write('%s %s ' % (valore, "\t"))
index_c += 1
medsd_file.write('%s' % ('\n'))

# CV
medsd_file.write('%s %s ' % ('CV' , "\t"))
index_c = 1
med_cv_gen = 0.0 		# cv average of all columns / variables
while index_c <= n_cols:
if xmeans[index_c] == 0:
media1 = 0.000001
else:
media1 = xmeans[index_c]
xcv[index_c] = xsds[index_c] / abs(media1)
valore = str(xcv[index_c])
med_cv_gen += xcv[index_c]
valore = valore[0:6]
medsd_file.write('%s %s ' % (valore, "\t"))
index_c += 1
med_cv_gen = med_cv_gen / n_cols
str_med_cv_gen = str(med_cv_gen)
str_med_cv_gen = str_med_cv_gen[0:6]
medsd_file.write('%s' % ('\n'))
medsd_file.close()

# input standardization

# standardization on max

if tipo_norm == 'M':
index = 0
while index < n_rows:
arr_c = arr_r[index]
index_c = 1
while index_c <= n_cols:    ## aggiornare anche kb_cla.py
if xmaxs[index_c] == 0.0:
xmaxs[index_c] = 0.00001
arr_c[index_c] = arr_c[index_c] / xmaxs[index_c]
index_c += 1
index += 1

# standardization on std

if tipo_norm == 'S':
index = 0
while index < n_rows:
arr_c = arr_r[index]
index_c = 1
while index_c <= n_cols:
if xsds[index_c] == 0.0:
xsds[index_c] = 0.00001
arr_c[index_c] = (arr_c[index_c] - xmeans[index_c]) / xsds[index_c]
if arr_c[index_c] < xmins[index_c]:	### aggiunto Roberto 4/03/2012
xmins[index_c] = arr_c[index_c]       	### aggiunto Roberto 4/03/2012
index_c += 1
index += 1
# aggiungo xmins per eliminare i valori negativi (aggiunto da Roberto 4/03/2012)
index = 0
while index < n_rows:
arr_c = arr_r[index]
index_c = 1
while index_c <= n_cols:
arr_c[index_c] = arr_c[index_c] - xmins[index_c]
print arr_c[index_c]
index_c += 1
index += 1
# fine aggiunta da Roberto 4/03/2012

# start of kohonen algorithm

# min and max vectors

vmaxs = []
vmins = []

index_c = 0

while index_c <= n_cols:
vmaxs.append(-10000000000000.0)
vmins.append( 10000000000000.0)
index_c += 1

# columns min & max
index = 0
while index < n_rows:
arr_c = arr_r[index]
index_c = 1
while index_c <= n_cols:
if arr_c[index_c] > vmaxs[index_c]:
vmaxs[index_c] = arr_c[index_c]
if arr_c[index_c] < vmins[index_c]:
vmins[index_c] = arr_c[index_c]
index_c += 1
index += 1

# run parameters and temp arrays

n = n_rows
m = n_cols
nx = gruppi_num
ny = gruppi_num
ix = 950041                         # integer as random seed
nsteps = int(10000 * nx * ny)       # number of steps
nepoks = int(nsteps / n ** 0.5)     # number of epochs
unit_calc = int(n * m * nx * ny)    # running units
passo = int(5000 / n)               # step of visualization on monitor
rmax = nx - 1
rmin = 1.0

if passo < 1:
passo = 1
grid = []			    # training grid
index = 0
while index < nx * ny * m:
grid.append(0.0)
index += 1
n=ndim(nx,ny,m,3)
random.seed(ix)	       		    # initial value of random seed to obtain the same sequences in new runs
index = 0
while index < nx:
index_c = 0
while index_c < ny:
index_k = 0
while index_k < m:
ig = n.getcellindex((index,index_c,index_k))
grid[ig] = random.random()
index_k += 1
index_c += 1
index += 1
gridp = copy.deepcopy(grid)     # initial previous grid = current grid
gridm = copy.deepcopy(grid)     # initial min grid = current grid

# for each record in each epoch
iter 	  = 0
discrea   = 1000000000000.0 	# current error
discrep   = 0.0 		# previous error
if nepoks < 20:
nepoks  = 20 			# min epochs = 20
nepokx    = 0
min_epok  = 0          		# epoch with min error
min_err   = 1000000000.0    	# min error
alpha     = float(alpha_max)   	# initial value of alpha parameter
ir        = 0.0                 # initial value of ir parameter ir
ne        = 1

print " "
print 'Record ' + str(n_rows) + ' Columns ' + str(n_cols)

# main loop
try:
while ne <= nepoks:
if (ne % passo == 0):  # print running message when modulo division = zero
min_err_txt = "%14.5f" % min_err    # format 8 integers and 3 decimals
alpha_txt  = "%12.5f" % alpha       # format 6 integers and 5 decimals
print ('Epoch ' + str(ne) + '   min err ' + min_err_txt + '   min epoch ' +
str(min_epok - 1) + "   alpha " + alpha_txt)
if min_err < 1000000000.0:
nepokx += 1
if min_err > discrea and discrep > discrea and discrea > 0.0:
min_epok = ne               # current epoch (min)
min_err = discrea
# copy current grid to min grid
gridm = copy.deepcopy(grid)
min_err_txt = "%12.3f" % min_err    # format 8 integers and 3 decimals
alpha_txt  = "%12.5f" % alpha       # format 6 integer and  5 decimals
print ('**** Epoch ' + str(ne - 1) + '       WITH MIN ERROR ' + min_err_txt +
"   alpha " + alpha_txt)

# cheking the current value of alpha
if alpha > alpha_min:
discrea = discrep
discrep = 0.0
# copy current grid to previous grid
gridp = copy.deepcopy(grid)

# from the starting row to the ending row
i = 0
while i < n_rows:
iter += 1
# find the best grid coefficient
ihit = 0
jhit = 0
dhit = 100000.0
igx = 0
igy = 0
while igx < nx:
igy = 0
while igy < ny:
d = 0.0
neff = 0
k = 0
arr_c = arr_r[i]
while k < m:   # update the sum of squared deviation of input
# value from the grid coefficient
ig = n.getcellindex((igx,igy,k))
d = d + (arr_c[k+1] - grid[ig]) ** 2
k += 1
d = d / float(m)
#  d = d / m
if d < dhit:
dhit = d
ihit = int(igx)
jhit = int(igy)
igy += 1
igx += 1
# update iteration error
discrep = discrep + dhit
# now we have the coordinates of the best grid coefficient
ir = max(rmax * float(1001 - iter) / 1000.0 + 0.9999999999 , 1)
ir = int(ir)
# new alpha value to increase the radius of groups proximity
alpha = max(alpha_max * float(1 - ne * alpha_step) , alpha_min)
# update the grid coefficients applying alpha parameter
inn0 = int(ihit) - int(ir)
inn9 = int(ihit) + int(ir)
jnn0 = int(jhit) - int(ir)
jnn9 = int(jhit) + int(ir)
while inn0 <= inn9:
jnn0 = int(jhit) - int(ir)
while jnn0 <= jnn9:
if not (inn0 < 0 or inn0 >= nx):
if not (jnn0 < 0 or jnn0 >= ny):
arr_c = arr_r[i]
k = 0
while k < m:
ig = n.getcellindex((inn0,jnn0,k))
grid[ig] += alpha * (arr_c[k+1] - grid[ig])
k += 1
jnn0 += 1
inn0 += 1
i += 1
else:
print
print "Min alpha reached "
print
break
ne += 1
except KeyboardInterrupt:
print
print "KeyboardInterrupt (Ctrl/C) "
print
pass

# computing results
# grid = grid min
grid = copy.deepcopy(gridm)

# write min grid file
arch_grid_file = open(arch_grid, 'w')
ii = 0
while ii < nx:
j = 0
while j < ny:
k = 0
while k < m:
ig = n.getcellindex((ii,j,k))
arch_grid_file.write('%6i %s %.6i %s %.6i %s %14.7f %s' % (ii,' ', j ,' ', k,' ', grid[ig], "\n"))
k += 1
j += 1
ii += 1
arch_grid_file.close()

ii = 0
while ii < n_rows:
ihit = 0
jhit = 0
dhit = 100000.0
# from 1 to numbers of groups
ir = 0
while ir < nx:         # from 1 to numbers of groups
jc = 0
while jc < ny:       # from 1 to numbers of groups
d = 0.0
neff = 0
k = 0
while k < n_cols:  # update the sum of squared deviation of input
# value from the grid coefficient
arr_c = arr_r[ii]
ig = n.getcellindex((ir,jc,k))
d = d + (arr_c[k+1] - grid[ig]) ** 2
k += 1
d = d / m
if d < dhit:       # save the coordinates of the best coefficient
dhit = d
ihit = ir
jhit = jc
jc += 1
ir += 1
mtchx.append(ihit)
mtchy.append(jhit)
ii += 1

# write arch_catal file
arch_catal_file = open(arch_catal, 'w')
ii = 0
while ii < n_rows:
arch_catal_file.write("%.6i %s %.6i %s %.6i %s" % (ii, ' ', mtchx[ii], ' ', mtchy[ii], "\n"))
ii += 1
arch_catal_file.close()

# matrix of statistics
arr_cv   = []   	       # CV array of the Groups and Total
arr_med  = []      	       # means array of the Groups
riga_cv  = []          	       # CV row in arr_cv
arr_col  = []          	       # group temporary array
arr_grsg = []         	       # input data array (normalized)
arr_grsg_c = []                # copy of arr_grsg (for file out sort)

# input matrix sort in group sequence
ii = 0
ix = 0
while ii < n_rows:
ix += 1
gr1 = str(mtchx[ii])
if mtchx[ii] < 10:
gr1 = '0' + str(mtchx[ii])
sg1 = str(mtchy[ii])
if mtchy[ii] < 10:
sg1 = '0' + str(mtchy[ii])
riga_norm = arr_r[ii]
im = 0
riga_norm1 = []
while im <= m:
riga_norm1.append(str(riga_norm[im]))
im += 1
riga_norm2 = " ".join(riga_norm1)
gr_sg_txt = "G_" + gr1 + "_" + sg1 + " " + str(ix) + " " + riga_norm2
arr_grsg.append(gr_sg_txt)
ii += 1
arr_grsg.sort()
ii = 0
while ii < n_rows:
arr_grsg_c.append(arr_grsg[ii])
ii += 1

# setup of arr_cv matrix
num_gr = 0
gruppo0 = ""
ir = 0
while ir < n_rows:
grsg_key = arr_grsg_c[ir].split()
if not grsg_key[0] == gruppo0:
gruppo0 = grsg_key[0]
num_gr +=1
ic = 1
riga1 = []
riga1.append(grsg_key[0])
while ic <= m + 2:          # adding new columns for row mean and  n° of records
riga1.append(0.0)
ic += 1
arr_cv.append(riga1)        # cv row
ir += 1
riga1 = []
riga1.append("*Means*") 	# adding new row for cv mean
ic = 1
while ic <= m + 2:          	# adding new column for row mean and n° of records
riga1.append(0.0)
ic += 1
arr_cv.append(riga1)

def found(x):
ir = 0
while ir < len(arr_cv):
linea_cv = arr_cv[ir]
key_cv = linea_cv[0]
if key_cv == x:
return ir
ir += 1

ir  = 0
irx = len(arr_grsg_c)
ic  = 3
linea_cv = arr_cv[0]
icx = len(linea_cv)
val_col = []

while ic < icx:
ir = 0
gruppo  = ""
val_col = []
while ir < irx:
linea = arr_grsg_c[ir].split()
if linea[0] == gruppo or gruppo == "":
gruppo = linea[0]
val_col.append(float(linea[ic]))
else:
i_gruppo = found(gruppo)
linea_cv = arr_cv[i_gruppo]
media_v = abs(mean(val_col))
if media_v == 0.0:
media_v = 0.0000000001
std_v = sd(val_col)
cv_v  = std_v / media_v
linea_cv[ic-2] = cv_v                      # cv value
linea_cv[len(linea_cv)-1] = len(val_col)   # number of records
val_col = []
val_col.append(float(linea[ic]))
gruppo = linea[0]
ir += 1
i_gruppo = found(gruppo)
linea_cv = arr_cv[i_gruppo]
media_v = abs(mean(val_col))
if media_v == 0.0:
media_v = 0.0000000001
std_v = sd(val_col)
cv_v  = std_v / media_v
linea_cv[ic-2] = cv_v                          # cv value
linea_cv[len(linea_cv)-1] = len(val_col)       # number of records
ic += 1
ir  = 0
irx = len(arr_cv)
linea_cv = arr_cv[0]
icx = len(linea_cv) - 2
ic  = 1
num_rec1 = 0

while ir < irx:                                  # rows mean
media_riga = 0.0
ic = 1
num_col1 = 0
linea_cv = arr_cv[ir]
while ic < icx:
media_riga += float(linea_cv[ic])
num_col1 += 1
ic += 1
linea_cv[icx] = media_riga / num_col1
num_rec1 += linea_cv[icx + 1]
ir += 1
ir  = 0
ic  = 1

while ic < icx:                  # weighted mean of columns
media_col = 0.0
ir = 0
num_rec1 = 0
while ir < irx - 1:
linea_cv = arr_cv[ir]
media_col = media_col + linea_cv[ic] * linea_cv[icx+1]  # linea_cv[icx+1] = number of records
num_rec1 = num_rec1 + linea_cv[icx+1]
ir += 1
linea_cv = arr_cv[irx - 1]
linea_cv[ic] = media_col / num_rec1
ic += 1

# updating mean of the row
linea_cv = arr_cv[irx - 1]
linea_means = linea_cv[1:icx]
media_riga  = mean(linea_means)
linea_cv[icx] = media_riga        # Total mean
linea_cv[icx + 1] = num_rec1      # n° of records
cv_media_gen_after = str(media_riga)
cv_media_gen_after = cv_media_gen_after[0:6]

# write cv  file
testata_cv = testata
testata_cv[0] = "*Groups*"
testata_cv.append("*Mean*")
testata_cv.append("N_recs")
arch_cv_file = open(arch_cv, 'w')
ic = 0
while ic <= icx + 1:
arch_cv_file.write('%s %s ' % (testata_cv[ic], " "*(9-len(testata_cv[ic]))))
ic += 1
arch_cv_file.write('%s' % ('\n'))
ir = 0
while ir < irx:
ic = 0
linea_cv = arr_cv[ir]
while ic <= icx + 1:
if ic == 0:
arch_cv_file.write('%s %s ' % (linea_cv[0], "  "))
else:
if ic <= icx:
arch_cv_file.write('%7.4f %s ' % (linea_cv[ic], "  "))
else:
arch_cv_file.write('%6i %s ' % (linea_cv[ic], "  "))
ic += 1
arch_cv_file.write('%s' % ("\n"))
ir += 1
ic = 0

media_xcv = mean(xcv[1:icx])

while ic <= icx :   # print CV input (before catalogue)
if ic == 0:
arch_cv_file.write('%s %s ' % ("*CVinp*", "  "))
else:
if ic < icx:
arch_cv_file.write('%7.4f %s ' % (xcv[ic], "  "))
else:
arch_cv_file.write('%7.4f %s ' % (media_xcv, "  "))
arch_cv_file.write('%6i %s ' % (linea_cv[ic+1], "  "))
ic += 1
arch_cv_file.write('%s' % ("\n"))
#=========istruzioni aggiunte Roberto Bello 29/02/2012======================
#know_index = str(1.0 - float(cv_media_gen_after) / float(str_med_cv_gen))
#know_index = know_index[0:6]
#arch_cv_file.write('%s %s %s' % ('*KIndex*   ', know_index, '\n'))
#=========fine istruzioni aggiunte da Roberto Bello 29/02/2012==============
arch_cv_file.close()

# writing out catalog file
testata_cat1 = []
testata_cat1.append("*Group*")
arch_output_file = open(arch_output, 'w')
ic= 0
while ic < icx:
testata_cat1.append(testata_cat[ic])
ic += 1
ic= 0
while ic < len(testata_cat1):
arch_output_file.write('%s %s ' % (testata_cat1[ic], " "*(15-len(testata_cat1[ic]))))
ic += 1
arch_output_file.write('%s' % ("\n"))
index = 0
while index < len(arr_orig):
riga_orig = arr_orig[index]
ic = 0
while ic < len(riga_orig):
if not(isinstance(riga_orig[ic],str)):
riga_orig[ic] = str(riga_orig[ic])
ic += 1
# place before 0 if gr / sg < 10
gr1 = str(mtchx[index])
if mtchx[index] < 10:
gr1 = '0' + str(mtchx[index])
sg1 = str(mtchy[index])
if mtchy[index] < 10:
sg1 = '0' + str(mtchy[index])
arr_rig0 = "G_" + gr1 + "_" + sg1 + " "*8
arch_output_file.write('%s ' % (arr_rig0))
ic= 0
while ic < len(riga_orig):
arch_output_file.write('%s %s ' % (riga_orig[ic], " "*(15-len(riga_orig[ic]))))
ic += 1
arch_output_file.write('%s' % ("\n"))
index += 1
testata_cat1 = []
testata_cat1.append("*Group*")
testata_cat1.append("*RecNum*")
arch_sort_file = open(arch_sort, 'w')
ic= 0
while ic < icx:
testata_cat1.append(testata_cat[ic])
ic += 1
ic= 0
while ic < len(testata_cat1):
arch_sort_file.write('%s %s ' % (testata_cat1[ic], " "*(15-len(testata_cat1[ic]))))
ic += 1
arch_sort_file.write('%s' % ("\n"))
index = 0
while index < len(arr_grsg_c):
riga_grsg = arr_grsg_c[index].split()
ic = 0
while ic < len(riga_grsg):
val_txt = riga_grsg[ic]
val_txt = val_txt[0:13]
arch_sort_file.write('%s %s ' % (val_txt, " "*(15-len(val_txt))))
ic += 1
if index < len(arr_grsg_c) - 1:
arch_sort_file.write('%s' % ("\n"))
index += 1
arch_sort_file.close()

# writing out catalog and sorted file
arr_outsrt = []
index = 0
while index < len(arr_orig):
riga_sort = []
# place before 0 if gr / sg < 10
gr1 = str(mtchx[index])
if mtchx[index] < 10:
gr1 = '0' + str(mtchx[index])
sg1 = str(mtchy[index])
if mtchy[index] < 10:
sg1 = '0' + str(mtchy[index])
riga_sort.append("G_" + gr1 + "_" + sg1)
ic = 0
riga_orig = arr_orig[index]
while ic < len(riga_orig):
val_riga = riga_orig[ic]
riga_sort.append(val_riga)
ic += 1
arr_outsrt.append(riga_sort)
index += 1

for line in arr_outsrt:
line = "".join(line)

arr_outsrt.sort()

testata_srt = []
testata_srt.append("*Group*")
arch_outsrt_file = open(arch_outsrt, 'w')
ic= 0
while ic < icx:
testata_srt.append(testata_orig[ic])
ic += 1
ic= 0
while ic < len(testata_srt):
arch_outsrt_file.write('%s %s' % (testata_srt[ic], " "*(15-len(testata_srt[ic]))))
ic += 1
arch_outsrt_file.write('%s' % ("\n"))
index = 0
key_gruppo = ""
while index < len(arr_outsrt):
riga_sort = arr_outsrt[index]
index_c = 0
while index_c < len(riga_sort):
if index_c == 0:
if riga_sort[0] != key_gruppo:
# arch_outsrt_file.write('%s ' % ("\n"))
key_gruppo = riga_sort[0]
valore = riga_sort[index_c]
arch_outsrt_file.write('%s %s' % (valore, " "*(15-len(valore))))
index_c += 1
if index < len(arr_grsg_c) - 1:
arch_outsrt_file.write('%s' % ("\n"))
index += 1
arch_outsrt_file.close()

print("###############################################################################")
print("# KB_CAT KNOWLEDGE DISCOVERY IN DATA MINING (CATALOG PROGRAM)                 #")
print("# Language used: PYTHON                                                       #")
print("###############################################################################")

arch_log_file = open(arch_log, 'w')
arch_log_file.write("%s %s" % ("############################################################################", "\n"))
arch_log_file.write("%s %s" % ("# KB_CAT KNOWLEDGE DISCOVERY IN DATA MINING (CATALOG PROGRAM)              #", "\n"))
arch_log_file.write("%s %s" % ("# Language used: PYTHON                     .                              #", "\n"))
arch_log_file.write("%s %s" % ("############################################################################", "\n"))
arch_log_file.write("%s %s %s" % ("Input File                                        -> ", file_input, "\n"))
arch_log_file.write("%s %s %s" % ("Numer of Groups (3 - 20)                          -> ", str(gruppi_num), "\n"))
arch_log_file.write("%s %s %s" % ("Normalization (Max, Std, None)                    -> ", tipo_norm, "\n"))
arch_log_file.write("%s %s %s" % ("Start Value of alpha (from 1.8 to 0.9)            -> ", str(alpha_max), "\n"))
arch_log_file.write("%s %s %s" % ("End Value of alpha (from 0.5 to 0.0001)           -> ", str(alpha_min), "\n"))
arch_log_file.write("%s %s %s" % ("Decreasing step of alpha (from 0.1 to 0.001)      -> ", str(alpha_step), "\n"))
arch_log_file.write("%s"       % ("=========================OUTPUT=======================================================\n"))
arch_log_file.write("%s %s %s" % ("Output File Catalog.original     ", arch_output, "\n"))
arch_log_file.write("%s %s %s" % ("Output File Catalog.sort         ", arch_outsrt, "\n"))
arch_log_file.write("%s %s %s" % ("Output File Summary sort         ", arch_sort, "\n"))
arch_log_file.write("%s %s %s" % ("Output File Matrix Catal.        ", arch_catal, "\n"))
arch_log_file.write("%s %s %s" % ("Output File Means, STD, CV.      ", arch_medsd, "\n"))
arch_log_file.write("%s %s %s" % ("Output File CV of the Groups     ", arch_cv, "\n"))
arch_log_file.write("%s %s %s" % ("Output File Training Grid        ", arch_grid, "\n"))
arch_log_file.write("%s %s %s" % ("Output File Run Parameters       ", arch_log, "\n"))
#=========istruzioni aggiunte Roberto Bello 29/02/2012======================
know_index = str(1.0 - float(cv_media_gen_after) / float(str_med_cv_gen))
know_index = know_index[0:6]
arch_log_file.write('%s %s %s' % ('*KIndex*   ', know_index, '\n'))
#=========fine istruzioni aggiunte da Roberto Bello 29/02/2012==============

min_err_txt = "%12.3f" % min_err      # format 8 integer and 3 decimals
alpha_txt  = "%12.5f" % alpha         # format 6 integer and 5 decimals
alpha_min_txt = "%12.5f" % alpha_min  # format 6 integer and 5 decimals

print
if min_err == 1000000000.000:
print("Oops! No result. Try again with new alpha parameters")
print
print ("EPOCH " + str(min_epok -1) + "   WITH MIN ERROR " + min_err_txt +
" starting alpha " + alpha_min_txt + "   ending alpha " + alpha_txt +
" Iterations " + str(iter) + " Total Epochs " + str(ne - 1))
print
print 'Output File Catalog.original ' + arch_output
print 'Output File Catalog.sort     ' + arch_outsrt
print 'Output File Summary sort     ' + arch_sort
print 'Output File Matrix Catal.    ' + arch_catal
print 'Output File Means, STD, CV.  ' + arch_medsd
print 'Output File CV of the Groups ' + arch_cv
print 'Output File Training Grid    ' + arch_grid
print 'Output File Run Parameters   ' + arch_log
print 'CV before Catalog            ' + str_med_cv_gen
print 'CV after Catalog             ' + cv_media_gen_after
know_index = str(1.0 - float(cv_media_gen_after) / float(str_med_cv_gen))
know_index = know_index[0:6]
print 'Knowledge Index              ' + know_index
print

# Elapsed time
t1 = datetime.datetime.now()
elapsed_time = t1 - t0
print "Elapsed time (seconds)   :   " + str(elapsed_time.seconds)
print
```