"""
Artificial Neural Network 
"""

import numpy
import copy

def sigmoid(x):
	return 1.0/(1.0 + numpy.exp(-x))

class NeuralNet:

	def __init__(self, numUnitsPerLayer):
		"""
		Constructor
		@param numUnitsPerLayer numbers of units per layer, excluding bias
		"""

		numLayers = len(numUnitsPerLayer)
		if numLayers < 2:
			raise RuntimeError, 'ERROR number of layers must be >= 2! (Got %d)' % numLayers

		# total number of layers incl. input and output
		self.numLayers = numLayers 

		# activations
		self.activations = [ numpy.zeros( (numUnitsPerLayer[i] + 1,), numpy.float64 ) for i in range(numLayers)]

		# set the biases
		for el in range(numLayers):
			self.activations[el][0] = 1.0

		# weights
		self.weights = []
		self.bigDeltas = []
		self.approxGradients = []
		for el in range(numLayers - 1):
			shp = (numUnitsPerLayer[el+1], numUnitsPerLayer[el] + 1)
			self.weights.append( numpy.zeros(shp, numpy.float64) )
			self.bigDeltas.append( numpy.zeros(shp, numpy.float64) )
			self.approxGradients.append( numpy.zeros(shp, numpy.float64) )

		# back propagating errors, no error for layer 0
		self.deltas = [ numpy.zeros( (len(a)-1,), numpy.float64 ) for a in self.activations]
		self.deltas[0][:] = 0.0 # by definition

	def randomlyInitializeWeights(self, magnitude = 0.1):
		"""
		Randomly initialize the weights to values between -magnitude ... +magnitude
		@param magnitude
		"""
		numpy.random.seed(1234)
		for w in self.weights:
			w[:] = magnitude * (numpy.random.rand(w.shape[0], w.shape[1]) - 0.5)

	def forward(self, inputData):
		"""
		Compute activation by propagating input forward
		@param inputData input (excl. bias)
		"""
		self.activations[0][1:] = inputData # copy input data
		for el in range(1, self.numLayers):
			z = numpy.dot(self.weights[el-1], self.activations[el-1])
			self.activations[el][1:] = sigmoid(z)
			self.activations[el][0] = 1.0

		return self.getOutput()

	def backward(self, targetOutputData):
		"""
		Propagate error backward
		@param targetOutputData target output data
		"""
		weightsTranspose = [ numpy.transpose(w) for w in self.weights ]
		self.deltas[self.numLayers - 1][:] = self.activations[-1][1:] - targetOutputData
		for el in range(self.numLayers - 2, 0, -1):
			a = self.activations[el]
			gprime = a*(1.0 - a)
			d = numpy.dot(weightsTranspose[el], self.deltas[el + 1]) * a*(1.0 - a)
			self.deltas[el][:] = d[1:]

	def getOutput(self):
		"""
		Get the network output (excl. bias)
		@return array
		"""
		return self.activations[-1][1:]

	def getInput(self):
		"""
		Get the network input (excl. bias)
		@return array
		"""
		return self.activations[0][1:]

	def getCost(self, inputOutputList, lam=0.0):
		"""
		Compute cost function associated with input/output training data
		@param inputOutputList list of [(input, output), ...] values
		@param lam >= 0 regularization parameter (lam = 0 means no regularization)
		"""
		res = 0.0

		# standard term
		for x, y in inputOutputList:

			# output from inputs and weights
			out = self.forward(x)

			# error
			res -= numpy.sum( y*numpy.log(out) + (1.0-y)*numpy.log(1.0-out) )
		
		# regularization term 
		for w in self.weights:
			res += (lam/2.0) * numpy.sum(w[:, 1:]**2)

		res /= float(len(inputOutputList))

		return res

	def train(self, inputOutputList, lam=0.0, alpha=1.0):
		"""
		Update the weights using training set
		@param inputOutputList list of [(input, output), ...] values
		@param lam >= 0 regularization parameter (lam = 0 means no regularization)
		@param alpha > 0 gradient descent step
		@return cost before completion of step, cost after completion of step
		"""

		numTraining = len(inputOutputList)

		# accumulate the error
		for x, y in inputOutputList:

			# compute the activations at each level
			self.forward(x)

			# compute the errors at each level
			self.backward(y)

			for el in range(self.numLayers-1):
				# d J /d Theta
				self.bigDeltas[el] += numpy.outer(self.deltas[el+1], self.activations[el])

		cost = self.getCost(inputOutputList, lam)

		# update the weights across all layers
		for el in range(self.numLayers-1):
			self.weights[el][:, :] -= alpha*self.bigDeltas[el][:, :] / numTraining
			# regularization term
			self.weights[el][:, 1:] -= alpha*lam*self.weights[el][:, 1:]

		newCost = self.getCost(inputOutputList, lam)

		return cost, newCost