I'm working on coding up a neural network from scratch into js, and have implemented the following:
class Network{
constructor(layerSizes, activation){
this.activation = activation[0]
this.activationPrime = activation[1]
this.inverseActivation = activation[2]
this.numLayers = layerSizes.length
this.layerSizes = layerSizes
this.layers = []
this.state = []
this.outputs = []
this.train = false
for (let i=0; i<this.numLayers; i++){
var nodes = Array(layerSizes[i])
var weightsAndBiases = [Array(layerSizes[i-1]).fill(random(0, 1))]
weightsAndBiases.push(0)
nodes.fill(weightsAndBiases)
this.layers.push(nodes)
}
}
drawNetwork(){ // visualise the network
for (let i=0; i<this.numLayers; i++){ // iter over layers
for (let j=0; j<this.layers[i].length; j++){ // iter over nodes
let coords = transformCoords([i,j], [this.numLayers, this.layers[i].length], [10, 10])
drawCircle(coords[0], coords[1], 10, colourScale(this.state[i][j], 1, [255,255,255], [22,22,22]), true) // drawing points
if (i != 0){ // drawing lines
for (let k=0; k<this.layers[i][j][0].length; k++){
let startingCoords = coords
let endingCoords = transformCoords([i-1, k], [this.numLayers, this.layers[i-1].length], [10, 10])
drawLine(startingCoords[0], startingCoords[1], endingCoords[0], endingCoords[1], colourScale(this.layers[i][j][0][k], 1, [255,255,255], [22,22,22]))
}
}
}
}
}
parse(inputs){ // determines the state of the network from given inputs
this.state = [inputs]
for (let i=1; i<this.numLayers; i++){
this.state.push([])
for (let j=0; j<this.layerSizes[i]; j++){
var nodeTotal = dot(this.layers[i][j][0], this.state[i-1]) + this.layers[i][j][1]
nodeTotal = this.activation(nodeTotal)
this.state[i].push(nodeTotal)
}
}
this.outputs = this.state[this.numLayers-1]
}
cost(inputs, desiredOutputs){
this.parse(inputs)
let outputs = this.state[this.numLayers-1]
let c = []
for (let i=0; i<outputs.length; i++){
c.push((outputs[i]-desiredOutputs[i])**2)
}
return c
}
run(inputs, desiredOutputs, learningRate){ // changes the weights and biases with respect to the cost function
let rate = learningRate/inputs.length
if (this.train){
let newLayers = JSON.parse(JSON.stringify(this.layers))
for (let p=0; p<inputs.length; p++){
// console.log(p*100/inputs.length);
let originalCost = this.cost(inputs[p], desiredOutputs[p])
let deltas = Array(this.numLayers).fill([])
deltas[this.numLayers-1] = Array(this.layerSizes[this.numLayers-1]).fill(0)
for (let i=0; i<this.layerSizes[this.numLayers-1]; i++){ // iterate over final nodes and find final deltas
let dCostdOut = 2*(this.state[this.numLayers-1][i] - desiredOutputs[p][i])
let dOutdActivation = this.activationPrime(this.inverseActivation(this.state[this.numLayers-1][i]))
deltas[this.numLayers-1][i] = dCostdOut * dOutdActivation
let dCostdBias = deltas[this.numLayers-1][i]
for (let j=0; j<this.layerSizes[this.numLayers-2]; j++){ // iterate over weights of final nodes
let dCostdWeight = this.layers[this.numLayers-1][i][0][j] * this.state[this.numLayers-2][j] * deltas[this.numLayers-1][i]
newLayers[this.numLayers-1][i][0][j] -= dCostdWeight * rate
}
newLayers[this.numLayers-1][i][1] -= dCostdBias * rate
}
for (let i=this.numLayers-2; i>=0; i--){ // inverse layer iter
deltas[i] = Array(this.layerSizes[i]).fill(0)
for (let j=0; j<this.layerSizes[i]; j++){ // iterate over the current layer's nodes
for (let k=0; k<this.layerSizes[i+1]; k++){ // iterate over the next layer's nodes to get this layer's deltas
deltas[i][j] += this.state[i+1][k] * this.layers[i+1][k][0][j]
}
deltas[i][j] *= this.activationPrime(this.inverseActivation(this.state[i][j]))
let dCostdBias = deltas[i][j]
for (let k=0; k<this.layerSizes[i-1]; k++){ // iterate over previous layer's weights
let dCostdWeight = this.layers[i][j][0][k] * this.state[i-1][k] * deltas[i][j]
newLayers[i][j][0][k] -= dCostdWeight * rate
}
newLayers[i][j][1] -= dCostdBias * rate
}
}
}
this.layers = newLayers
this.drawNetwork()
} else {
this.parse(inputs[0])
}
}
}
let inputs = [
[0, 0], // Input 1
[0, 1], // Input 2
[1, 0], // Input 3
[1, 1], // Input 4
];
let desiredOutputs = [
[0], // Output for Input 1
[1], // Output for Input 2
[1], // Output for Input 3
[0], // Output for Input 4
];
let training = false
setInterval(() => {render(); }, 1)
function render(){
if (training){
net.train = true
net.run(inputs, desiredOutputs, 0.35)
net.train = false
}
}
When I set training = true
, and then disable it after some time, I find that all the outputs for the inputs are ~0.5. I know that this is a common error with the XOR problem and neural networks, but I have made some internet-found tweaks to no avail. Is there an issue in my logic for the backpropagation step somewhere? I fear that there may be, I'm not quite sure about it.
Specifically, I have attempted changing the activation function to tanh, changing the network structure and number of layers, letting the network train for longer, and increasing the learning rate. I have also been reviewing my logic for the past hour or so and have got in a bit of a muddle about it all. A fresh and experienced pair of eyes would be most welcome!