Following is an artifical neural network program that takes any number of inputs and any number of hidden layers, and spits out an output. It applies back propagation with regularization to minimize the cost function. A gradient descent algorithm tries to find the minimum of the cost function in the landscape of weights.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | """
Artificial Neural Network
"""
import numpy
import copy
def sigmoid(x):
return 1.0/(1.0 + numpy.exp(-x))
class NeuralNet:
def __init__(self, numUnitsPerLayer):
"""
Constructor
@param numUnitsPerLayer numbers of units per layer, excluding bias
"""
numLayers = len(numUnitsPerLayer)
if numLayers < 2:
raise RuntimeError, 'ERROR number of layers must be >= 2! (Got %d)' % numLayers
# total number of layers incl. input and output
self.numLayers = numLayers
# activations
self.activations = [ numpy.zeros( (numUnitsPerLayer[i] + 1,), numpy.float64 ) for i in range(numLayers)]
# set the biases
for el in range(numLayers):
self.activations[el][0] = 1.0
# weights
self.weights = []
self.bigDeltas = []
self.approxGradients = []
for el in range(numLayers - 1):
shp = (numUnitsPerLayer[el+1], numUnitsPerLayer[el] + 1)
self.weights.append( numpy.zeros(shp, numpy.float64) )
self.bigDeltas.append( numpy.zeros(shp, numpy.float64) )
self.approxGradients.append( numpy.zeros(shp, numpy.float64) )
# back propagating errors, no error for layer 0
self.deltas = [ numpy.zeros( (len(a)-1,), numpy.float64 ) for a in self.activations]
self.deltas[0][:] = 0.0 # by definition
def randomlyInitializeWeights(self, magnitude = 0.1):
"""
Randomly initialize the weights to values between -magnitude ... +magnitude
@param magnitude
"""
numpy.random.seed(1234)
for w in self.weights:
w[:] = magnitude * (numpy.random.rand(w.shape[0], w.shape[1]) - 0.5)
def forward(self, inputData):
"""
Compute activation by propagating input forward
@param inputData input (excl. bias)
"""
self.activations[0][1:] = inputData # copy input data
for el in range(1, self.numLayers):
z = numpy.dot(self.weights[el-1], self.activations[el-1])
self.activations[el][1:] = sigmoid(z)
self.activations[el][0] = 1.0
return self.getOutput()
def backward(self, targetOutputData):
"""
Propagate error backward
@param targetOutputData target output data
"""
weightsTranspose = [ numpy.transpose(w) for w in self.weights ]
self.deltas[self.numLayers - 1][:] = self.activations[-1][1:] - targetOutputData
for el in range(self.numLayers - 2, 0, -1):
a = self.activations[el]
gprime = a*(1.0 - a)
d = numpy.dot(weightsTranspose[el], self.deltas[el + 1]) * a*(1.0 - a)
self.deltas[el][:] = d[1:]
def getOutput(self):
"""
Get the network output (excl. bias)
@return array
"""
return self.activations[-1][1:]
def getInput(self):
"""
Get the network input (excl. bias)
@return array
"""
return self.activations[0][1:]
def getCost(self, inputOutputList, lam=0.0):
"""
Compute cost function associated with input/output training data
@param inputOutputList list of [(input, output), ...] values
@param lam >= 0 regularization parameter (lam = 0 means no regularization)
"""
res = 0.0
# standard term
for x, y in inputOutputList:
# output from inputs and weights
out = self.forward(x)
# error
res -= numpy.sum( y*numpy.log(out) + (1.0-y)*numpy.log(1.0-out) )
# regularization term
for w in self.weights:
res += (lam/2.0) * numpy.sum(w[:, 1:]**2)
res /= float(len(inputOutputList))
return res
def train(self, inputOutputList, lam=0.0, alpha=1.0):
"""
Update the weights using training set
@param inputOutputList list of [(input, output), ...] values
@param lam >= 0 regularization parameter (lam = 0 means no regularization)
@param alpha > 0 gradient descent step
@return cost before completion of step, cost after completion of step
"""
numTraining = len(inputOutputList)
# accumulate the error
for x, y in inputOutputList:
# compute the activations at each level
self.forward(x)
# compute the errors at each level
self.backward(y)
for el in range(self.numLayers-1):
# d J /d Theta
self.bigDeltas[el] += numpy.outer(self.deltas[el+1], self.activations[el])
cost = self.getCost(inputOutputList, lam)
# update the weights across all layers
for el in range(self.numLayers-1):
self.weights[el][:, :] -= alpha*self.bigDeltas[el][:, :] / numTraining
# regularization term
self.weights[el][:, 1:] -= alpha*lam*self.weights[el][:, 1:]
newCost = self.getCost(inputOutputList, lam)
return cost, newCost
|
The network topology is set in the constructor by specifying the number of units for each layer, including the input and output. For a very simple case such as representing the AND logical gate you could use the following:
n = NeuralNet([2, 1]) # 2 inputs, 1 output, no hidden layer
n.randomlyInitializeWeights(0.1)
maxNumIter = 200
tol = 0.001
cost = float('inf')
alpha = 1.0 # step size for gradient descent
lam = 0.0 # regularization term, not needed here
count = 0
trainingSet = [([0.,0.], 0.), ([0.,1.], 0.), ([1.,0.], 0.), ([1.,1.], 1.),]
while cost > tol and count < maxNumIter and alpha > 1.e-4:
# save old weights
oldCost, newCost = n.train(trainingSet, lam=lam, alpha=alpha)
print '%d old cost = %f new cost = %f alpha = %f' % (count, oldCost, newCost, alpha)
if newCost < oldCost:
alpha *= 1.2 # increae step
else:
alpha /= 2.0 # decrease step
cost = newCost
count += 1
# check
n.forward([0., 0.])
print '0, 0 -> ', n.getOutput()
n.forward([0., 1.])
print '0, 1 -> ', n.getOutput()
n.forward([1., 0.])
print '1, 0 -> ', n.getOutput()
n.forward([1., 1.])
print '1, 1 -> ', n.getOutput()
I've used this code successfully to interpret hand-written numbers from 5000 20x20 images with 95% accuracy.
This could do with a couple of less-simple examples.
I don't know much about neural networks, but your training set contains the full range of possible values. Isn't that considered a bad idea? How do you know that the network can generalise from your examples if there are no other data sets possible?
I realise that in a trivial case like AND gate, there are only four data sets possible at all. That's why I'd like to see a slightly more complex example.