I have written a Node.js application that writes lots of records to a PostgreSQL 9.6 database. Unfortunately, it feels quite slow. To be able to test things I have created a short but plete program that reproduces the scenario:
'use strict';
const async = require('async'),
pg = require('pg'),
uuid = require('uuidv4');
const pool = new pg.Pool({
protocol: 'pg',
user: 'golo',
host: 'localhost',
port: 5432,
database: 'golo'
});
const records = [];
for (let i = 0; i < 10000; i++) {
records.push({ id: uuid(), revision: i, data: { foo: 'bar', bar: 'baz' }, flag: true });
}
pool.connect((err, database, close) => {
if (err) {
/* eslint-disable no-console */
return console.log(err);
/* eslint-enable no-console */
}
database.query(`
CREATE TABLE IF NOT EXISTS "foo" (
"position" bigserial NOT NULL,
"id" uuid NOT NULL,
"revision" integer NOT NULL,
"data" jsonb NOT NULL,
"flag" boolean NOT NULL,
CONSTRAINT "foo_pk" PRIMARY KEY("position"),
CONSTRAINT "foo_index_id_revision" UNIQUE ("id", "revision")
);
`, errQuery => {
if (errQuery) {
/* eslint-disable no-console */
return console.log(errQuery);
/* eslint-enable no-console */
}
async.series({
beginTransaction (done) {
/* eslint-disable no-console */
console.time('foo');
/* eslint-enable no-console */
database.query('BEGIN', done);
},
saveRecords (done) {
async.eachSeries(records, (record, doneEach) => {
database.query({
name: 'save',
text: `
INSERT INTO "foo"
("id", "revision", "data", "flag")
VALUES
($1, $2, $3, $4) RETURNING position;
`,
values: [ record.id, record.revision, record.data, record.flag ]
}, (errQuery2, result) => {
if (errQuery2) {
return doneEach(errQuery2);
}
record.position = Number(result.rows[0].position);
doneEach(null);
});
}, done);
},
mitTransaction (done) {
database.query('COMMIT', done);
}
}, errSeries => {
/* eslint-disable no-console */
console.timeEnd('foo');
/* eslint-enable no-console */
if (errSeries) {
return database.query('ROLLBACK', errRollback => {
close();
if (errRollback) {
/* eslint-disable no-console */
return console.log(errRollback);
/* eslint-enable no-console */
}
/* eslint-disable no-console */
console.log(errSeries);
/* eslint-enable no-console */
});
}
close();
/* eslint-disable no-console */
console.log('Done!');
/* eslint-enable no-console */
});
});
});
The performance I get for inserting 10.000 rows is 2.5 seconds. This is not bad, but also not great. What can I do to improve speed?
Some thoughts that I had so far:
- Use prepared statements. As you can see I have done this, this speeded up things by ~30 %.
- Insert multiple rows at once using a single
INSERT
mand. Unfortunately, this is not possible, as in reality, the number of records that need to be written varies from call to call and a varying number of arguments makes it impossible to use prepared statements. - Use
COPY
instead ofINSERT
: I can't use this, since this happens at runtime, not at initialization time. - Use
text
instead ofjsonb
: Didn't change a thing. - Use
json
instead ofjsonb
: Didn't change a thing either.
A few more notes on the data that happens in reality:
- The
revision
is not necessarily increasing. This is just a number. - The
flag
is not alwaystrue
, it can betrue
andfalse
as well. - Of course the
data
field contains different data, too.
So in the end it es down to:
- What possibilities are there to significantly speed up multiple single calls to
INSERT
?
I have written a Node.js application that writes lots of records to a PostgreSQL 9.6 database. Unfortunately, it feels quite slow. To be able to test things I have created a short but plete program that reproduces the scenario:
'use strict';
const async = require('async'),
pg = require('pg'),
uuid = require('uuidv4');
const pool = new pg.Pool({
protocol: 'pg',
user: 'golo',
host: 'localhost',
port: 5432,
database: 'golo'
});
const records = [];
for (let i = 0; i < 10000; i++) {
records.push({ id: uuid(), revision: i, data: { foo: 'bar', bar: 'baz' }, flag: true });
}
pool.connect((err, database, close) => {
if (err) {
/* eslint-disable no-console */
return console.log(err);
/* eslint-enable no-console */
}
database.query(`
CREATE TABLE IF NOT EXISTS "foo" (
"position" bigserial NOT NULL,
"id" uuid NOT NULL,
"revision" integer NOT NULL,
"data" jsonb NOT NULL,
"flag" boolean NOT NULL,
CONSTRAINT "foo_pk" PRIMARY KEY("position"),
CONSTRAINT "foo_index_id_revision" UNIQUE ("id", "revision")
);
`, errQuery => {
if (errQuery) {
/* eslint-disable no-console */
return console.log(errQuery);
/* eslint-enable no-console */
}
async.series({
beginTransaction (done) {
/* eslint-disable no-console */
console.time('foo');
/* eslint-enable no-console */
database.query('BEGIN', done);
},
saveRecords (done) {
async.eachSeries(records, (record, doneEach) => {
database.query({
name: 'save',
text: `
INSERT INTO "foo"
("id", "revision", "data", "flag")
VALUES
($1, $2, $3, $4) RETURNING position;
`,
values: [ record.id, record.revision, record.data, record.flag ]
}, (errQuery2, result) => {
if (errQuery2) {
return doneEach(errQuery2);
}
record.position = Number(result.rows[0].position);
doneEach(null);
});
}, done);
},
mitTransaction (done) {
database.query('COMMIT', done);
}
}, errSeries => {
/* eslint-disable no-console */
console.timeEnd('foo');
/* eslint-enable no-console */
if (errSeries) {
return database.query('ROLLBACK', errRollback => {
close();
if (errRollback) {
/* eslint-disable no-console */
return console.log(errRollback);
/* eslint-enable no-console */
}
/* eslint-disable no-console */
console.log(errSeries);
/* eslint-enable no-console */
});
}
close();
/* eslint-disable no-console */
console.log('Done!');
/* eslint-enable no-console */
});
});
});
The performance I get for inserting 10.000 rows is 2.5 seconds. This is not bad, but also not great. What can I do to improve speed?
Some thoughts that I had so far:
- Use prepared statements. As you can see I have done this, this speeded up things by ~30 %.
- Insert multiple rows at once using a single
INSERT
mand. Unfortunately, this is not possible, as in reality, the number of records that need to be written varies from call to call and a varying number of arguments makes it impossible to use prepared statements. - Use
COPY
instead ofINSERT
: I can't use this, since this happens at runtime, not at initialization time. - Use
text
instead ofjsonb
: Didn't change a thing. - Use
json
instead ofjsonb
: Didn't change a thing either.
A few more notes on the data that happens in reality:
- The
revision
is not necessarily increasing. This is just a number. - The
flag
is not alwaystrue
, it can betrue
andfalse
as well. - Of course the
data
field contains different data, too.
So in the end it es down to:
- What possibilities are there to significantly speed up multiple single calls to
INSERT
?
- Are you certain that the bottleneck isn't the munication time between Node and Postgres? Have you benchmarked those 10 inserts running directly on Postgres? – Tim Biegeleisen Commented Dec 10, 2016 at 10:40
- No, I am not. Please note that it's 10k inserts, not 10, so how could I do this in PostgreSQL directly? (And even if, the question then is, how to reduce the munication time between those two ;-)) – Golo Roden Commented Dec 10, 2016 at 10:42
- 10K inserts in 2 seconds doesn't strike me as too bad. – Tim Biegeleisen Commented Dec 10, 2016 at 10:44
- Anyway, it's not good enough ;-) – Golo Roden Commented Dec 10, 2016 at 10:46
- If I put the SQL to a script file and run this from PGAdmin 4, it tells me that the query returned successfully in 1 second. – Golo Roden Commented Dec 10, 2016 at 11:18
1 Answer
Reset to default 4Insert multiple rows at once using a single INSERT mand. Unfortunately, this is not possible, as in reality, the number of records that need to be written varies from call to call and a varying number of arguments makes it impossible to use prepared statements.
This is the right answer, followed by an invalid counter-argument.
You can generate your multi-row inserts in a loop, with some 1000 - 10,000 records per query, depending on the size of the records.
And you do not need prepared statements for this at all.
See this article I wrote about the same issues: Performance Boost.
Following the article, my code was able to insert 10,000 records in under 50ms.
A related question: Multi-row insert with pg-promise.