I want to save 1 million records to mongodb using javascript like this:
for (var i = 0; i<10000000; i++) {
model = buildModel(i);
db.save(model, function(err, done) {
console.log('cool');
});
}
I tried it, it saved ~160 records, then hang for 2 minutes, then exited. Why?
I want to save 1 million records to mongodb using javascript like this:
for (var i = 0; i<10000000; i++) {
model = buildModel(i);
db.save(model, function(err, done) {
console.log('cool');
});
}
I tried it, it saved ~160 records, then hang for 2 minutes, then exited. Why?
Share Improve this question edited Jan 22, 2015 at 12:56 Neil Lunn 151k36 gold badges355 silver badges325 bronze badges asked Jan 22, 2015 at 11:27 eguneyseguneys 6,4267 gold badges35 silver badges82 bronze badges 3- what do you mean by 'blew up'? – Vsevolod Goloviznin Commented Jan 22, 2015 at 11:34
- You need to expand on blew up. Did nodejs crash, did it error out, did it just exit without error etc. My first thoughts would be node reached its memory limit though – Patrick Evans Commented Jan 22, 2015 at 11:34
- It hang for a while, than flushed some output, and exited. It saved only ~160 records. I abstracted my real problem, I am wondering if this would work or not? @VsevolodGoloviznin – eguneys Commented Jan 22, 2015 at 11:39
2 Answers
Reset to default 5It blew up because you are not waiting for an asynchronous call to plete before moving on to the next iteration. What this means is that you are building a "stack" of unresolved operations until this causes a problem. What is the name of this site again? Get the picture?
So this is not the best way to proceed with "Bulk" insertions. Fortunately the underlying MongoDB driver has already thought about this, aside from the callback issue mentioned earlier. There is in fact a "Bulk API" available to make this a whole lot better. And assuming you already pulled the native driver as the db
object. But I prefer just using the .collection
accessor from the model, and the "async" module to make everything clear:
var bulk = Model.collection.initializeOrderedBulkOp();
var counter = 0;
async.whilst(
// Iterator condition
function() { return count < 1000000 },
// Do this in the iterator
function(callback) {
counter++;
var model = buildModel(counter);
bulk.insert(model);
if ( counter % 1000 == 0 ) {
bulk.execute(function(err,result) {
bulk = Model.collection.initializeOrderedBulkOp();
callback(err);
});
} else {
callback();
}
},
// When all is done
function(err) {
if ( counter % 1000 != 0 )
bulk.execute(function(err,result) {
console.log( "inserted some more" );
});
console.log( "I'm finished now" ;
}
);
The difference there is using both "asynchronous" callback methods on pletion rather that just building up a stack, but also employing the "Bulk Operations API" in order to mitigate the asynchronous write calls by submitting everything in batch update statements of 1000 entries.
This does not not only not "build up a stack" of function execution like your own example code, but also performs efficient "wire" transactions by not sending everything all in individual statements, but rather breaking up into manageable "batches" for server mitment.
You should probably use something like Async's eachLimit
:
// Create a array of numbers 0-999999
var models = new Array(1000000);
for (var i = models.length; i >= 0; i--)
models[i] = i;
// Iterate over the array performing a MongoDB save operation for each item
// while never performing more than 20 parallel saves at the same time
async.eachLimit(models, 20, function iterator(model, next){
// Build a model and save it to the DB, call next when finished
db.save(buildModel(model), next);
}, function done(err, results){
if (err) { // When an error has occurred while trying to save any model to the DB
console.error(err);
} else { // When all 1,000,000 models have been saved to the DB
console.log('Successfully saved ' + results.length + ' models to MongoDB.');
}
});