Welcome, guest | Sign In | My Account | Store | Cart

Following is an artifical neural network program that takes any number of inputs and any number of hidden layers, and spits out an output. It applies back propagation with regularization to minimize the cost function. A gradient descent algorithm tries to find the minimum of the cost function in the landscape of weights.

Python, 155 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Artificial Neural Network 
"""

import numpy
import copy

def sigmoid(x):
	return 1.0/(1.0 + numpy.exp(-x))

class NeuralNet:

	def __init__(self, numUnitsPerLayer):
		"""
		Constructor
		@param numUnitsPerLayer numbers of units per layer, excluding bias
		"""

		numLayers = len(numUnitsPerLayer)
		if numLayers < 2:
			raise RuntimeError, 'ERROR number of layers must be >= 2! (Got %d)' % numLayers

		# total number of layers incl. input and output
		self.numLayers = numLayers 

		# activations
		self.activations = [ numpy.zeros( (numUnitsPerLayer[i] + 1,), numpy.float64 ) for i in range(numLayers)]

		# set the biases
		for el in range(numLayers):
			self.activations[el][0] = 1.0

		# weights
		self.weights = []
		self.bigDeltas = []
		self.approxGradients = []
		for el in range(numLayers - 1):
			shp = (numUnitsPerLayer[el+1], numUnitsPerLayer[el] + 1)
			self.weights.append( numpy.zeros(shp, numpy.float64) )
			self.bigDeltas.append( numpy.zeros(shp, numpy.float64) )
			self.approxGradients.append( numpy.zeros(shp, numpy.float64) )

		# back propagating errors, no error for layer 0
		self.deltas = [ numpy.zeros( (len(a)-1,), numpy.float64 ) for a in self.activations]
		self.deltas[0][:] = 0.0 # by definition

	def randomlyInitializeWeights(self, magnitude = 0.1):
		"""
		Randomly initialize the weights to values between -magnitude ... +magnitude
		@param magnitude
		"""
		numpy.random.seed(1234)
		for w in self.weights:
			w[:] = magnitude * (numpy.random.rand(w.shape[0], w.shape[1]) - 0.5)

	def forward(self, inputData):
		"""
		Compute activation by propagating input forward
		@param inputData input (excl. bias)
		"""
		self.activations[0][1:] = inputData # copy input data
		for el in range(1, self.numLayers):
			z = numpy.dot(self.weights[el-1], self.activations[el-1])
			self.activations[el][1:] = sigmoid(z)
			self.activations[el][0] = 1.0

		return self.getOutput()

	def backward(self, targetOutputData):
		"""
		Propagate error backward
		@param targetOutputData target output data
		"""
		weightsTranspose = [ numpy.transpose(w) for w in self.weights ]
		self.deltas[self.numLayers - 1][:] = self.activations[-1][1:] - targetOutputData
		for el in range(self.numLayers - 2, 0, -1):
			a = self.activations[el]
			gprime = a*(1.0 - a)
			d = numpy.dot(weightsTranspose[el], self.deltas[el + 1]) * a*(1.0 - a)
			self.deltas[el][:] = d[1:]

	def getOutput(self):
		"""
		Get the network output (excl. bias)
		@return array
		"""
		return self.activations[-1][1:]

	def getInput(self):
		"""
		Get the network input (excl. bias)
		@return array
		"""
		return self.activations[0][1:]

	def getCost(self, inputOutputList, lam=0.0):
		"""
		Compute cost function associated with input/output training data
		@param inputOutputList list of [(input, output), ...] values
		@param lam >= 0 regularization parameter (lam = 0 means no regularization)
		"""
		res = 0.0

		# standard term
		for x, y in inputOutputList:

			# output from inputs and weights
			out = self.forward(x)

			# error
			res -= numpy.sum( y*numpy.log(out) + (1.0-y)*numpy.log(1.0-out) )
		
		# regularization term 
		for w in self.weights:
			res += (lam/2.0) * numpy.sum(w[:, 1:]**2)

		res /= float(len(inputOutputList))

		return res

	def train(self, inputOutputList, lam=0.0, alpha=1.0):
		"""
		Update the weights using training set
		@param inputOutputList list of [(input, output), ...] values
		@param lam >= 0 regularization parameter (lam = 0 means no regularization)
		@param alpha > 0 gradient descent step
		@return cost before completion of step, cost after completion of step
		"""

		numTraining = len(inputOutputList)

		# accumulate the error
		for x, y in inputOutputList:

			# compute the activations at each level
			self.forward(x)

			# compute the errors at each level
			self.backward(y)

			for el in range(self.numLayers-1):
				# d J /d Theta
				self.bigDeltas[el] += numpy.outer(self.deltas[el+1], self.activations[el])

		cost = self.getCost(inputOutputList, lam)

		# update the weights across all layers
		for el in range(self.numLayers-1):
			self.weights[el][:, :] -= alpha*self.bigDeltas[el][:, :] / numTraining
			# regularization term
			self.weights[el][:, 1:] -= alpha*lam*self.weights[el][:, 1:]

		newCost = self.getCost(inputOutputList, lam)

		return cost, newCost

The network topology is set in the constructor by specifying the number of units for each layer, including the input and output. For a very simple case such as representing the AND logical gate you could use the following:

n = NeuralNet([2, 1]) # 2 inputs, 1 output, no hidden layer
n.randomlyInitializeWeights(0.1)
maxNumIter = 200
tol = 0.001
cost = float('inf')
alpha = 1.0 # step size for gradient descent
lam = 0.0    # regularization term, not needed here
count = 0
trainingSet = [([0.,0.], 0.), ([0.,1.], 0.), ([1.,0.], 0.), ([1.,1.], 1.),]
while cost > tol and count < maxNumIter and alpha > 1.e-4:
    # save old weights
    oldCost, newCost = n.train(trainingSet, lam=lam, alpha=alpha)
    print '%d old cost = %f new cost = %f alpha = %f' % (count, oldCost, newCost, alpha)
    if newCost < oldCost:
        alpha *= 1.2 # increae step
    else:
        alpha /= 2.0 # decrease step
    cost = newCost
    count += 1
    # check
n.forward([0., 0.])
print '0, 0 -> ', n.getOutput()
n.forward([0., 1.])
print '0, 1 -> ', n.getOutput()
n.forward([1., 0.])
print '1, 0 -> ', n.getOutput()
n.forward([1., 1.])
print '1, 1 -> ', n.getOutput()

I've used this code successfully to interpret hand-written numbers from 5000 20x20 images with 95% accuracy.

1 comment

Steven D'Aprano 9 years, 2 months ago  # | flag

This could do with a couple of less-simple examples.

I don't know much about neural networks, but your training set contains the full range of possible values. Isn't that considered a bad idea? How do you know that the network can generalise from your examples if there are no other data sets possible?

I realise that in a trivial case like AND gate, there are only four data sets possible at all. That's why I'd like to see a slightly more complex example.