""" Artificial Neural Network """ import numpy import copy def sigmoid(x): return 1.0/(1.0 + numpy.exp(-x)) class NeuralNet: def __init__(self, numUnitsPerLayer): """ Constructor @param numUnitsPerLayer numbers of units per layer, excluding bias """ numLayers = len(numUnitsPerLayer) if numLayers < 2: raise RuntimeError, 'ERROR number of layers must be >= 2! (Got %d)' % numLayers # total number of layers incl. input and output self.numLayers = numLayers # activations self.activations = [ numpy.zeros( (numUnitsPerLayer[i] + 1,), numpy.float64 ) for i in range(numLayers)] # set the biases for el in range(numLayers): self.activations[el][0] = 1.0 # weights self.weights = [] self.bigDeltas = [] self.approxGradients = [] for el in range(numLayers - 1): shp = (numUnitsPerLayer[el+1], numUnitsPerLayer[el] + 1) self.weights.append( numpy.zeros(shp, numpy.float64) ) self.bigDeltas.append( numpy.zeros(shp, numpy.float64) ) self.approxGradients.append( numpy.zeros(shp, numpy.float64) ) # back propagating errors, no error for layer 0 self.deltas = [ numpy.zeros( (len(a)-1,), numpy.float64 ) for a in self.activations] self.deltas[0][:] = 0.0 # by definition def randomlyInitializeWeights(self, magnitude = 0.1): """ Randomly initialize the weights to values between -magnitude ... +magnitude @param magnitude """ numpy.random.seed(1234) for w in self.weights: w[:] = magnitude * (numpy.random.rand(w.shape[0], w.shape[1]) - 0.5) def forward(self, inputData): """ Compute activation by propagating input forward @param inputData input (excl. bias) """ self.activations[0][1:] = inputData # copy input data for el in range(1, self.numLayers): z = numpy.dot(self.weights[el-1], self.activations[el-1]) self.activations[el][1:] = sigmoid(z) self.activations[el][0] = 1.0 return self.getOutput() def backward(self, targetOutputData): """ Propagate error backward @param targetOutputData target output data """ weightsTranspose = [ numpy.transpose(w) for w in self.weights ] self.deltas[self.numLayers - 1][:] = self.activations[-1][1:] - targetOutputData for el in range(self.numLayers - 2, 0, -1): a = self.activations[el] gprime = a*(1.0 - a) d = numpy.dot(weightsTranspose[el], self.deltas[el + 1]) * a*(1.0 - a) self.deltas[el][:] = d[1:] def getOutput(self): """ Get the network output (excl. bias) @return array """ return self.activations[-1][1:] def getInput(self): """ Get the network input (excl. bias) @return array """ return self.activations[0][1:] def getCost(self, inputOutputList, lam=0.0): """ Compute cost function associated with input/output training data @param inputOutputList list of [(input, output), ...] values @param lam >= 0 regularization parameter (lam = 0 means no regularization) """ res = 0.0 # standard term for x, y in inputOutputList: # output from inputs and weights out = self.forward(x) # error res -= numpy.sum( y*numpy.log(out) + (1.0-y)*numpy.log(1.0-out) ) # regularization term for w in self.weights: res += (lam/2.0) * numpy.sum(w[:, 1:]**2) res /= float(len(inputOutputList)) return res def train(self, inputOutputList, lam=0.0, alpha=1.0): """ Update the weights using training set @param inputOutputList list of [(input, output), ...] values @param lam >= 0 regularization parameter (lam = 0 means no regularization) @param alpha > 0 gradient descent step @return cost before completion of step, cost after completion of step """ numTraining = len(inputOutputList) # accumulate the error for x, y in inputOutputList: # compute the activations at each level self.forward(x) # compute the errors at each level self.backward(y) for el in range(self.numLayers-1): # d J /d Theta self.bigDeltas[el] += numpy.outer(self.deltas[el+1], self.activations[el]) cost = self.getCost(inputOutputList, lam) # update the weights across all layers for el in range(self.numLayers-1): self.weights[el][:, :] -= alpha*self.bigDeltas[el][:, :] / numTraining # regularization term self.weights[el][:, 1:] -= alpha*lam*self.weights[el][:, 1:] newCost = self.getCost(inputOutputList, lam) return cost, newCost