I am using the Auto MPG training set from +MPG
My code is:
'use strict';
var brain, fs, normalizeData, trainNetwork, _;
_ = require('lodash');
brain = require('brain');
fs = require('fs');
trainNetwork = function(trainNetworkCb) {
var net;
net = new brain.NeuralNetwork();
return fs.readFile('./data/autodata.csv', function(err, fileData) {
var fileString, lines, trainingData;
if (err) {
return trainNetworkCb(err);
}
fileString = fileData.toString();
lines = fileString.split('\n');
trainingData = lines.splice(0, lines.length / 2);
trainingData = _.map(trainingData, function(dataPoint) {
var normalizedData, obj;
normalizedData = normalizeData(dataPoint);
obj = {
input: normalizedData,
output: {
continuous: normalizedData.continuous
}
};
delete obj.input.continuous;
return obj;
});
net.train(trainingData, {
log: true,
logPeriod: 100,
errorThresh: 0.00005
});
return trainNetworkCb(null, net);
});
};
trainNetwork(function(err, net) {
if (err) {
throw err;
}
return fs.readFile('./data/autodata.csv', function(err, fileData) {
var fileString, lines, testData;
if (err) {
return trainNetworkCb(err);
}
fileString = fileData.toString();
lines = fileString.split('\n');
testData = lines.splice(lines.length / 2);
testData = _.filter(testData, function(point) {
return point !== '';
});
testData = _.map(testData, function(dataPoint) {
var normalizedData, obj;
normalizedData = normalizeData(dataPoint);
obj = {
output: {
continuous: normalizedData.continuous
},
input: normalizedData
};
delete obj.input.continuous;
return obj;
});
return _.each(testData, function(dataPoint) {
var output;
output = net.run(dataPoint.input);
console.log(output);
console.log(dataPoint);
return console.log('');
});
});
});
normalizeData = function(dataRow) {
var cylinders, dataSet, model_years, origins, row;
dataSet = dataRow.split(',');
dataSet = _.map(dataSet, function(point) {
return Number(point);
});
row = {};
cylinders = [5, 3, 6, 4, 8];
_.each(cylinders, function(cylinder) {
row["cylinder" + cylinder] = cylinder === dataSet[0] ? 1 : 0;
});
row.displacement = dataSet[1] / 500;
row.horsepower = dataSet[2] / 500;
row.weight = dataSet[3] / 10000;
row.acceleration = dataSet[4] / 100;
model_years = [82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70];
_.each(model_years, function(model_year) {
row["model_year" + model_year] = model_year === dataSet[5] ? 1 : 0;
});
origins = [2, 3, 1];
_.each(origins, function(origin) {
row["origin" + origin] = origin === dataSet[6] ? 1 : 0;
});
row.continuous = dataSet[7] / 100;
return row;
};
I believe I am normalizing everything correctly. I am using half the data for training and the other half for testing. The data is not ordered, as far as I can tell, so which half is used for which shouldn't matter.
My errors are pretty large however when testing. Usually by 10MPG or so (30% error). What am I doing incorrectly?
Thanks
I am using the Auto MPG training set from http://archive.ics.uci.edu/ml/datasets/Auto+MPG
My code is:
'use strict';
var brain, fs, normalizeData, trainNetwork, _;
_ = require('lodash');
brain = require('brain');
fs = require('fs');
trainNetwork = function(trainNetworkCb) {
var net;
net = new brain.NeuralNetwork();
return fs.readFile('./data/autodata.csv', function(err, fileData) {
var fileString, lines, trainingData;
if (err) {
return trainNetworkCb(err);
}
fileString = fileData.toString();
lines = fileString.split('\n');
trainingData = lines.splice(0, lines.length / 2);
trainingData = _.map(trainingData, function(dataPoint) {
var normalizedData, obj;
normalizedData = normalizeData(dataPoint);
obj = {
input: normalizedData,
output: {
continuous: normalizedData.continuous
}
};
delete obj.input.continuous;
return obj;
});
net.train(trainingData, {
log: true,
logPeriod: 100,
errorThresh: 0.00005
});
return trainNetworkCb(null, net);
});
};
trainNetwork(function(err, net) {
if (err) {
throw err;
}
return fs.readFile('./data/autodata.csv', function(err, fileData) {
var fileString, lines, testData;
if (err) {
return trainNetworkCb(err);
}
fileString = fileData.toString();
lines = fileString.split('\n');
testData = lines.splice(lines.length / 2);
testData = _.filter(testData, function(point) {
return point !== '';
});
testData = _.map(testData, function(dataPoint) {
var normalizedData, obj;
normalizedData = normalizeData(dataPoint);
obj = {
output: {
continuous: normalizedData.continuous
},
input: normalizedData
};
delete obj.input.continuous;
return obj;
});
return _.each(testData, function(dataPoint) {
var output;
output = net.run(dataPoint.input);
console.log(output);
console.log(dataPoint);
return console.log('');
});
});
});
normalizeData = function(dataRow) {
var cylinders, dataSet, model_years, origins, row;
dataSet = dataRow.split(',');
dataSet = _.map(dataSet, function(point) {
return Number(point);
});
row = {};
cylinders = [5, 3, 6, 4, 8];
_.each(cylinders, function(cylinder) {
row["cylinder" + cylinder] = cylinder === dataSet[0] ? 1 : 0;
});
row.displacement = dataSet[1] / 500;
row.horsepower = dataSet[2] / 500;
row.weight = dataSet[3] / 10000;
row.acceleration = dataSet[4] / 100;
model_years = [82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70];
_.each(model_years, function(model_year) {
row["model_year" + model_year] = model_year === dataSet[5] ? 1 : 0;
});
origins = [2, 3, 1];
_.each(origins, function(origin) {
row["origin" + origin] = origin === dataSet[6] ? 1 : 0;
});
row.continuous = dataSet[7] / 100;
return row;
};
I believe I am normalizing everything correctly. I am using half the data for training and the other half for testing. The data is not ordered, as far as I can tell, so which half is used for which shouldn't matter.
My errors are pretty large however when testing. Usually by 10MPG or so (30% error). What am I doing incorrectly?
Thanks
Share Improve this question asked Dec 11, 2014 at 14:10 ShamoonShamoon 43.7k101 gold badges332 silver badges628 bronze badges1 Answer
Reset to default 7 +50The dataset you linked is ordered by model-year; perhaps drastic changes in technology made the engines more efficient? Neural networks are dependent on correct outputs during training. I would try training the network with all but the last row, and then test using that. Can you link me the csv file you're using? The normalizeData function doesn't give us what you want with the linked file (http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data)
edit:
It seems like regardless of whatever errorThresh
you specify, brain won't run more than 20,000 iterations on training runs. There's several ways to get around this. You can specify the learningRate
of your neural network. Upping the learningRate to 0.6 (default is 0.3) helped me get more accurate results
net.train(trainingData, {
log: true,
logPeriod: 100,
errorThresh: 0.00005,
learningRate: 0.6
});
Higher learningRate
means more aggressive weight adjustment, which helps when you aren't running as many iterations as you want.
Alternatively, you can specify the total amount of iterations in the options object (if not specified, it defaults to 20,000 - see here).
net.train(trainingData, {
log: true,
logPeriod: 100,
errorThresh: 0.00005,
iterations: 100000
});
Brain stops training when i < iterations && error > errorThresh
evaluates to false. So feel free to crank up the iterations count to ensure that the above expression turns false because the error
is below your specified errorTresh
(source).