javascript - How to properly set up brain.js Neural Network

I am using the Auto MPG training set from +MPG

My code is:

'use strict';
var brain, fs, normalizeData, trainNetwork, _;

_ = require('lodash');

brain = require('brain');

fs = require('fs');

trainNetwork = function(trainNetworkCb) {
  var net;
  net = new brain.NeuralNetwork();
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, trainingData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    trainingData = lines.splice(0, lines.length / 2);
    trainingData = _.map(trainingData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        input: normalizedData,
        output: {
          continuous: normalizedData.continuous
        }
      };
      delete obj.input.continuous;
      return obj;
    });
    net.train(trainingData, {
      log: true,
      logPeriod: 100,
      errorThresh: 0.00005
    });
    return trainNetworkCb(null, net);
  });
};

trainNetwork(function(err, net) {
  if (err) {
    throw err;
  }
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, testData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    testData = lines.splice(lines.length / 2);
    testData = _.filter(testData, function(point) {
      return point !== '';
    });
    testData = _.map(testData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        output: {
          continuous: normalizedData.continuous
        },
        input: normalizedData
      };
      delete obj.input.continuous;
      return obj;
    });
    return _.each(testData, function(dataPoint) {
      var output;
      output = net.run(dataPoint.input);
      console.log(output);
      console.log(dataPoint);
      return console.log('');
    });
  });
});

normalizeData = function(dataRow) {
  var cylinders, dataSet, model_years, origins, row;
  dataSet = dataRow.split(',');
  dataSet = _.map(dataSet, function(point) {
    return Number(point);
  });
  row = {};
  cylinders = [5, 3, 6, 4, 8];
  _.each(cylinders, function(cylinder) {
    row["cylinder" + cylinder] = cylinder === dataSet[0] ? 1 : 0;
  });
  row.displacement = dataSet[1] / 500;
  row.horsepower = dataSet[2] / 500;
  row.weight = dataSet[3] / 10000;
  row.acceleration = dataSet[4] / 100;
  model_years = [82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70];
  _.each(model_years, function(model_year) {
    row["model_year" + model_year] = model_year === dataSet[5] ? 1 : 0;
  });
  origins = [2, 3, 1];
  _.each(origins, function(origin) {
    row["origin" + origin] = origin === dataSet[6] ? 1 : 0;
  });
  row.continuous = dataSet[7] / 100;
  return row;
};

I believe I am normalizing everything correctly. I am using half the data for training and the other half for testing. The data is not ordered, as far as I can tell, so which half is used for which shouldn't matter.

My errors are pretty large however when testing. Usually by 10MPG or so (30% error). What am I doing incorrectly?

Thanks

I am using the Auto MPG training set from http://archive.ics.uci.edu/ml/datasets/Auto+MPG

My code is:

'use strict';
var brain, fs, normalizeData, trainNetwork, _;

_ = require('lodash');

brain = require('brain');

fs = require('fs');

trainNetwork = function(trainNetworkCb) {
  var net;
  net = new brain.NeuralNetwork();
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, trainingData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    trainingData = lines.splice(0, lines.length / 2);
    trainingData = _.map(trainingData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        input: normalizedData,
        output: {
          continuous: normalizedData.continuous
        }
      };
      delete obj.input.continuous;
      return obj;
    });
    net.train(trainingData, {
      log: true,
      logPeriod: 100,
      errorThresh: 0.00005
    });
    return trainNetworkCb(null, net);
  });
};

trainNetwork(function(err, net) {
  if (err) {
    throw err;
  }
  return fs.readFile('./data/autodata.csv', function(err, fileData) {
    var fileString, lines, testData;
    if (err) {
      return trainNetworkCb(err);
    }
    fileString = fileData.toString();
    lines = fileString.split('\n');
    testData = lines.splice(lines.length / 2);
    testData = _.filter(testData, function(point) {
      return point !== '';
    });
    testData = _.map(testData, function(dataPoint) {
      var normalizedData, obj;
      normalizedData = normalizeData(dataPoint);
      obj = {
        output: {
          continuous: normalizedData.continuous
        },
        input: normalizedData
      };
      delete obj.input.continuous;
      return obj;
    });
    return _.each(testData, function(dataPoint) {
      var output;
      output = net.run(dataPoint.input);
      console.log(output);
      console.log(dataPoint);
      return console.log('');
    });
  });
});

normalizeData = function(dataRow) {
  var cylinders, dataSet, model_years, origins, row;
  dataSet = dataRow.split(',');
  dataSet = _.map(dataSet, function(point) {
    return Number(point);
  });
  row = {};
  cylinders = [5, 3, 6, 4, 8];
  _.each(cylinders, function(cylinder) {
    row["cylinder" + cylinder] = cylinder === dataSet[0] ? 1 : 0;
  });
  row.displacement = dataSet[1] / 500;
  row.horsepower = dataSet[2] / 500;
  row.weight = dataSet[3] / 10000;
  row.acceleration = dataSet[4] / 100;
  model_years = [82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70];
  _.each(model_years, function(model_year) {
    row["model_year" + model_year] = model_year === dataSet[5] ? 1 : 0;
  });
  origins = [2, 3, 1];
  _.each(origins, function(origin) {
    row["origin" + origin] = origin === dataSet[6] ? 1 : 0;
  });
  row.continuous = dataSet[7] / 100;
  return row;
};

My errors are pretty large however when testing. Usually by 10MPG or so (30% error). What am I doing incorrectly?

Thanks

Share Improve this question asked Dec 11, 2014 at 14:10 Shamoon 43.7k101 gold badges332 silver badges628 bronze badges

Add a ment |

1 Answer 1

Sorted by: Reset to default 7 +50

The dataset you linked is ordered by model-year; perhaps drastic changes in technology made the engines more efficient? Neural networks are dependent on correct outputs during training. I would try training the network with all but the last row, and then test using that. Can you link me the csv file you're using? The normalizeData function doesn't give us what you want with the linked file (http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data)

edit:

It seems like regardless of whatever errorThresh you specify, brain won't run more than 20,000 iterations on training runs. There's several ways to get around this. You can specify the learningRate of your neural network. Upping the learningRate to 0.6 (default is 0.3) helped me get more accurate results

net.train(trainingData, {
  log: true,
  logPeriod: 100,
  errorThresh: 0.00005,
  learningRate: 0.6
});

Higher learningRate means more aggressive weight adjustment, which helps when you aren't running as many iterations as you want.

Alternatively, you can specify the total amount of iterations in the options object (if not specified, it defaults to 20,000 - see here).

net.train(trainingData, {
  log: true,
  logPeriod: 100,
  errorThresh: 0.00005,
  iterations: 100000
});

Brain stops training when i < iterations && error > errorThresh evaluates to false. So feel free to crank up the iterations count to ensure that the above expression turns false because the error is below your specified errorTresh (source).

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

javascript - How to properly set up brain.js Neural Network - Stack Overflow

1 Answer 1

与本文相关的文章

评论列表(0)