The following code, which inserts 200,000 records into PostgreSQL server from node.js, is taking about 17 minutes on my laptop PC, which feels horribly slow.
var pg = require('pg');
var Client = pg.Client;
var async = require('async');
var client = new Client(connectionString);
client.connect();
var rollback = function(client) {
client.query('ROLLBACK', function() {
client.end();
process.kill();
});
};
client.query('BEGIN',function(err,result){
if(err){ console.error(err); rollback(client);};
async.waterfall([
function(cb){
client.query('DROP INDEX idx',function(err,result){
client.query('TRUNCATE TABLE tbl',function(err,result){
async.forEach(values,function(value,valueNext){
client.query('INSERT INTO tbl ('
+ 'col1,'
+ 'col2) VALUES ($1,$2)',[
value,
generatedSomething(value)
],function(err){
valueNext();
});
},function(err,result){
if(err){ console.error(err); rollback(client);cb(false);return;};
client.query('CREATE INDEX idx ON tbl',function(err,result){
cb(null);
});
});
});
});
});
},
],function(err){
client.query('COMMIT', client.end.bind(client));
});
There are some strategies I've applied to speed up.
- Drop all indices before insertion, create it after all insertion is done ... ok
- Use
TRUNCATE TABLEinstead ofDELETE FROM... ok - Use
COPY FROMinstead ofINSERT INTO... not done
It seems that using COPY FROM instead of INSERT INTO will make effect, but it's used for imported CSV files, not for script-generated values.
So, does it mean that exporting script-generated values to temporary CSV file, and importing values using COPY FROM, is the most effictive way to insert values into PostgreSQL quickly?