最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

javascript - Neural network XOR problem converging to 0.5 - Stack Overflow

programmeradmin1浏览0评论

I'm working on coding up a neural network from scratch into js, and have implemented the following:

class Network{
constructor(layerSizes, activation){
    this.activation = activation[0]
    this.activationPrime = activation[1]
    this.inverseActivation = activation[2]
    this.numLayers = layerSizes.length
    this.layerSizes = layerSizes
    this.layers = []
    this.state = []
    this.outputs = []
    this.train = false
    for (let i=0; i<this.numLayers; i++){
        var nodes = Array(layerSizes[i])
        var weightsAndBiases = [Array(layerSizes[i-1]).fill(random(0, 1))]
        weightsAndBiases.push(0)
        nodes.fill(weightsAndBiases)
        this.layers.push(nodes)
    }
}

drawNetwork(){ // visualise the network
    for (let i=0; i<this.numLayers; i++){ // iter over layers
        for (let j=0; j<this.layers[i].length; j++){ // iter over nodes
            let coords = transformCoords([i,j], [this.numLayers, this.layers[i].length], [10, 10])
            drawCircle(coords[0], coords[1], 10, colourScale(this.state[i][j], 1, [255,255,255], [22,22,22]), true) // drawing points
            if (i != 0){ // drawing lines
                for (let k=0; k<this.layers[i][j][0].length; k++){
                    let startingCoords = coords
                    let endingCoords = transformCoords([i-1, k], [this.numLayers, this.layers[i-1].length], [10, 10])
                    drawLine(startingCoords[0], startingCoords[1], endingCoords[0], endingCoords[1], colourScale(this.layers[i][j][0][k], 1, [255,255,255], [22,22,22]))
                }
            }
        }
    }        
}

parse(inputs){ // determines the state of the network from given inputs
    this.state = [inputs]
    for (let i=1; i<this.numLayers; i++){
        this.state.push([])
        for (let j=0; j<this.layerSizes[i]; j++){
            var nodeTotal = dot(this.layers[i][j][0], this.state[i-1]) + this.layers[i][j][1]
            nodeTotal = this.activation(nodeTotal)
            this.state[i].push(nodeTotal)
        }
    }
    this.outputs = this.state[this.numLayers-1]
}

cost(inputs, desiredOutputs){
    this.parse(inputs)
    let outputs = this.state[this.numLayers-1]
    let c = []
    for (let i=0; i<outputs.length; i++){
        c.push((outputs[i]-desiredOutputs[i])**2)
    }
    return c
}

run(inputs, desiredOutputs, learningRate){ // changes the weights and biases with respect to the cost function
    let rate = learningRate/inputs.length
    if (this.train){

        let newLayers = JSON.parse(JSON.stringify(this.layers))

        for (let p=0; p<inputs.length; p++){
            // console.log(p*100/inputs.length);
            
            let originalCost = this.cost(inputs[p], desiredOutputs[p])
            let deltas = Array(this.numLayers).fill([])

            deltas[this.numLayers-1] = Array(this.layerSizes[this.numLayers-1]).fill(0)
    
            for (let i=0; i<this.layerSizes[this.numLayers-1]; i++){ // iterate over final nodes and find final deltas
                let dCostdOut = 2*(this.state[this.numLayers-1][i] - desiredOutputs[p][i])
                let dOutdActivation = this.activationPrime(this.inverseActivation(this.state[this.numLayers-1][i]))
    
                deltas[this.numLayers-1][i] = dCostdOut * dOutdActivation
                
                let dCostdBias = deltas[this.numLayers-1][i]
    
                for (let j=0; j<this.layerSizes[this.numLayers-2]; j++){ // iterate over weights of final nodes
                    let dCostdWeight = this.layers[this.numLayers-1][i][0][j] * this.state[this.numLayers-2][j] * deltas[this.numLayers-1][i]
                    newLayers[this.numLayers-1][i][0][j] -= dCostdWeight * rate
                }
    
                newLayers[this.numLayers-1][i][1] -= dCostdBias * rate
            }
    
            for (let i=this.numLayers-2; i>=0; i--){ // inverse layer iter
                deltas[i] = Array(this.layerSizes[i]).fill(0)
                
                for (let j=0; j<this.layerSizes[i]; j++){ // iterate over the current layer's nodes
    
                    for (let k=0; k<this.layerSizes[i+1]; k++){ // iterate over the next layer's nodes to get this layer's deltas
                        deltas[i][j] += this.state[i+1][k] * this.layers[i+1][k][0][j]
                    }
                    deltas[i][j] *= this.activationPrime(this.inverseActivation(this.state[i][j]))
    
                    let dCostdBias = deltas[i][j]
    
                    for (let k=0; k<this.layerSizes[i-1]; k++){ // iterate over previous layer's weights
                        let dCostdWeight = this.layers[i][j][0][k] * this.state[i-1][k] * deltas[i][j]
                        newLayers[i][j][0][k] -= dCostdWeight * rate
                    }
    
                    newLayers[i][j][1] -= dCostdBias * rate
                }
            }
        }
        
        this.layers = newLayers
        this.drawNetwork()
    } else {
        this.parse(inputs[0])
    }
}

}

let inputs = [
    [0, 0],  // Input 1
    [0, 1],  // Input 2
    [1, 0],  // Input 3
    [1, 1],  // Input 4
];

let desiredOutputs = [
    [0],  // Output for Input 1
    [1],  // Output for Input 2
    [1],  // Output for Input 3
    [0],  // Output for Input 4
];

let training = false

setInterval(() => {render();  }, 1)
function render(){
    if (training){
        net.train = true
        net.run(inputs, desiredOutputs, 0.35)
        net.train = false
    }
}

When I set training = true, and then disable it after some time, I find that all the outputs for the inputs are ~0.5. I know that this is a common error with the XOR problem and neural networks, but I have made some internet-found tweaks to no avail. Is there an issue in my logic for the backpropagation step somewhere? I fear that there may be, I'm not quite sure about it.

Specifically, I have attempted changing the activation function to tanh, changing the network structure and number of layers, letting the network train for longer, and increasing the learning rate. I have also been reviewing my logic for the past hour or so and have got in a bit of a muddle about it all. A fresh and experienced pair of eyes would be most welcome!

发布评论

评论列表(0)

  1. 暂无评论