Compare commits
5 Commits
performanc
...
PushSpring
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
be7af371d8 | ||
|
|
17697e98d7 | ||
|
|
bd4a87d3a0 | ||
|
|
9d197a91e1 | ||
|
|
e1ce9a3948 |
12
README.md
12
README.md
@@ -42,15 +42,10 @@ pg.connect(function(err, client, done) {
|
|||||||
var stream = client.query(copyFrom('COPY my_table FROM STDIN'));
|
var stream = client.query(copyFrom('COPY my_table FROM STDIN'));
|
||||||
var fileStream = fs.createReadStream('some_file.tsv')
|
var fileStream = fs.createReadStream('some_file.tsv')
|
||||||
fileStream.on('error', done);
|
fileStream.on('error', done);
|
||||||
stream.on('error', done);
|
fileStream.pipe(stream).on('finish', done).on('error', done);
|
||||||
stream.on('end', done);
|
|
||||||
fileStream.pipe(stream);
|
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
*Important*: Even if `pg-copy-streams.from` is used as a Writable (via `pipe`), you should not listen for the 'finish' event and expect that the COPY command has already been correctly acknowledged by the database. Internally, a duplex stream is used to pipe the data into the database connection and the COPY command should be considered complete only when the 'end' event is triggered.
|
|
||||||
|
|
||||||
|
|
||||||
## install
|
## install
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
@@ -61,10 +56,7 @@ $ npm install pg-copy-streams
|
|||||||
|
|
||||||
This module __only__ works with the pure JavaScript bindings. If you're using `require('pg').native` please make sure to use normal `require('pg')` or `require('pg.js')` when you're using copy streams.
|
This module __only__ works with the pure JavaScript bindings. If you're using `require('pg').native` please make sure to use normal `require('pg')` or `require('pg.js')` when you're using copy streams.
|
||||||
|
|
||||||
Before you set out on this magical piping journey, you _really_ should read this: http://www.postgresql.org/docs/current/static/sql-copy.html, and you might want to take a look at the [tests](https://github.com/brianc/node-pg-copy-streams/tree/master/test) to get an idea of how things work.
|
Before you set out on this magical piping journey, you _really_ should read this: http://www.postgresql.org/docs/9.3/static/sql-copy.html, and you might want to take a look at the [tests](https://github.com/brianc/node-pg-copy-streams/tree/master/test) to get an idea of how things work.
|
||||||
|
|
||||||
Take note of the following warning in the PostgreSQL documentation:
|
|
||||||
> COPY stops operation at the first error. This should not lead to problems in the event of a COPY TO, but the target table will already have received earlier rows in a COPY FROM. These rows will not be visible or accessible, but they still occupy disk space. This might amount to a considerable amount of wasted disk space if the failure happened well into a large copy operation. You might wish to invoke VACUUM to recover the wasted space.
|
|
||||||
|
|
||||||
## contributing
|
## contributing
|
||||||
|
|
||||||
|
|||||||
105
copy-to.js
105
copy-to.js
@@ -4,12 +4,11 @@ module.exports = function(txt, options) {
|
|||||||
|
|
||||||
var Transform = require('stream').Transform
|
var Transform = require('stream').Transform
|
||||||
var util = require('util')
|
var util = require('util')
|
||||||
var code = require('./message-formats')
|
|
||||||
|
|
||||||
var CopyStreamQuery = function(text, options) {
|
var CopyStreamQuery = function(text, options) {
|
||||||
Transform.call(this, options)
|
Transform.call(this, options)
|
||||||
this.text = text
|
this.text = text
|
||||||
this._gotCopyOutResponse = false
|
this._copyOutResponse = null
|
||||||
this.rowCount = 0
|
this.rowCount = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,89 +23,63 @@ CopyStreamQuery.prototype.submit = function(connection) {
|
|||||||
connection.stream.pipe(this)
|
connection.stream.pipe(this)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var code = {
|
||||||
|
E: 69, //Error
|
||||||
|
H: 72, //CopyOutResponse
|
||||||
|
d: 0x64, //CopyData
|
||||||
|
c: 0x63 //CopyDone
|
||||||
|
}
|
||||||
|
|
||||||
CopyStreamQuery.prototype._detach = function() {
|
CopyStreamQuery.prototype._detach = function() {
|
||||||
this.connection.stream.unpipe(this)
|
this.connection.stream.unpipe(this)
|
||||||
// Unpipe can drop us out of flowing mode
|
// Unpipe can drop us out of flowing mode
|
||||||
this.connection.stream.resume()
|
this.connection.stream.resume()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CopyStreamQuery.prototype._transform = function(chunk, enc, cb) {
|
CopyStreamQuery.prototype._transform = function(chunk, enc, cb) {
|
||||||
var offset = 0
|
var offset = 0
|
||||||
var Byte1Len = 1;
|
|
||||||
var Int32Len = 4;
|
|
||||||
if(this._remainder && chunk) {
|
if(this._remainder && chunk) {
|
||||||
chunk = Buffer.concat([this._remainder, chunk])
|
chunk = Buffer.concat([this._remainder, chunk])
|
||||||
}
|
}
|
||||||
|
if(!this._copyOutResponse) {
|
||||||
var length;
|
this._copyOutResponse = true
|
||||||
var messageCode;
|
if(chunk[0] == code.E) {
|
||||||
var needPush = false;
|
this._detach()
|
||||||
|
this.push(null)
|
||||||
var buffer = Buffer.alloc(chunk.length);
|
return cb();
|
||||||
var buffer_offset = 0;
|
|
||||||
|
|
||||||
this.pushBufferIfneeded = function() {
|
|
||||||
if (needPush && buffer_offset > 0) {
|
|
||||||
this.push(buffer.slice(0, buffer_offset))
|
|
||||||
buffer_offset = 0;
|
|
||||||
}
|
}
|
||||||
|
if(chunk[0] != code.H) {
|
||||||
|
this.emit('error', new Error('Expected copy out response'))
|
||||||
|
}
|
||||||
|
var length = chunk.readUInt32BE(1)
|
||||||
|
offset = 1
|
||||||
|
offset += length
|
||||||
}
|
}
|
||||||
|
while((chunk.length - offset) > 5) {
|
||||||
while((chunk.length - offset) >= (Byte1Len + Int32Len)) {
|
|
||||||
var messageCode = chunk[offset]
|
var messageCode = chunk[offset]
|
||||||
|
//complete or error
|
||||||
//console.log('PostgreSQL message ' + String.fromCharCode(messageCode))
|
if(messageCode == code.c || messageCode == code.E) {
|
||||||
switch(messageCode) {
|
this._detach()
|
||||||
|
this.push(null)
|
||||||
// detect COPY start
|
return cb();
|
||||||
case code.CopyOutResponse:
|
|
||||||
if (!this._gotCopyOutResponse) {
|
|
||||||
this._gotCopyOutResponse = true
|
|
||||||
} else {
|
|
||||||
this.emit('error', new Error('Unexpected CopyOutResponse message (H)'))
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
// meaningful row
|
|
||||||
case code.CopyData:
|
|
||||||
needPush = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
// standard interspersed messages. discard
|
|
||||||
case code.ParameterStatus:
|
|
||||||
case code.NoticeResponse:
|
|
||||||
case code.NotificationResponse:
|
|
||||||
break;
|
|
||||||
|
|
||||||
case code.ErrorResponse:
|
|
||||||
case code.CopyDone:
|
|
||||||
this.pushBufferIfneeded();
|
|
||||||
this._detach()
|
|
||||||
this.push(null)
|
|
||||||
return cb();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
this.emit('error', new Error('Unexpected PostgreSQL message ' + String.fromCharCode(messageCode)))
|
|
||||||
}
|
}
|
||||||
|
//something bad happened
|
||||||
length = chunk.readUInt32BE(offset+Byte1Len)
|
if(messageCode != code.d) {
|
||||||
if(chunk.length >= (offset + Byte1Len + length)) {
|
return this.emit('error', new Error('expected "d" (copydata message)'))
|
||||||
offset += Byte1Len + Int32Len
|
}
|
||||||
if (needPush) {
|
var length = chunk.readUInt32BE(offset + 1) - 4 //subtract length of UInt32
|
||||||
var row = chunk.slice(offset, offset + length - Int32Len)
|
//can we read the next row?
|
||||||
this.rowCount++
|
if(chunk.length > (offset + length + 5)) {
|
||||||
row.copy(buffer, buffer_offset);
|
offset += 5
|
||||||
buffer_offset += row.length;
|
var slice = chunk.slice(offset, offset + length)
|
||||||
}
|
offset += length
|
||||||
offset += (length - Int32Len)
|
this.push(slice)
|
||||||
|
this.rowCount++
|
||||||
} else {
|
} else {
|
||||||
// we need more chunks for a complete message
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.pushBufferIfneeded();
|
|
||||||
|
|
||||||
if(chunk.length - offset) {
|
if(chunk.length - offset) {
|
||||||
var slice = chunk.slice(offset)
|
var slice = chunk.slice(offset)
|
||||||
this._remainder = slice
|
this._remainder = slice
|
||||||
|
|||||||
40
index.js
40
index.js
@@ -10,7 +10,6 @@ module.exports = {
|
|||||||
|
|
||||||
var Transform = require('stream').Transform
|
var Transform = require('stream').Transform
|
||||||
var util = require('util')
|
var util = require('util')
|
||||||
var code = require('./message-formats')
|
|
||||||
|
|
||||||
var CopyStreamQuery = function(text, options) {
|
var CopyStreamQuery = function(text, options) {
|
||||||
Transform.call(this, options)
|
Transform.call(this, options)
|
||||||
@@ -27,23 +26,28 @@ CopyStreamQuery.prototype.submit = function(connection) {
|
|||||||
connection.query(this.text)
|
connection.query(this.text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var code = {
|
||||||
|
H: 72, //CopyOutResponse
|
||||||
|
d: 0x64, //CopyData
|
||||||
|
c: 0x63 //CopyDone
|
||||||
|
}
|
||||||
|
|
||||||
var copyDataBuffer = Buffer([code.CopyData])
|
var copyDataBuffer = Buffer([code.d])
|
||||||
CopyStreamQuery.prototype._transform = function(chunk, enc, cb) {
|
CopyStreamQuery.prototype._transform = function(chunk, enc, cb) {
|
||||||
var Int32Len = 4;
|
|
||||||
this.push(copyDataBuffer)
|
this.push(copyDataBuffer)
|
||||||
var lenBuffer = Buffer(Int32Len)
|
var lenBuffer = Buffer(4)
|
||||||
lenBuffer.writeUInt32BE(chunk.length + Int32Len, 0)
|
lenBuffer.writeUInt32BE(chunk.length + 4, 0)
|
||||||
this.push(lenBuffer)
|
this.push(lenBuffer)
|
||||||
this.push(chunk)
|
this.push(chunk)
|
||||||
|
this.rowCount++
|
||||||
cb()
|
cb()
|
||||||
}
|
}
|
||||||
|
|
||||||
CopyStreamQuery.prototype._flush = function(cb) {
|
CopyStreamQuery.prototype._flush = function(cb) {
|
||||||
var Int32Len = 4;
|
var finBuffer = Buffer([code.c, 0, 0, 0, 4])
|
||||||
var finBuffer = Buffer([code.CopyDone, 0, 0, 0, Int32Len])
|
|
||||||
this.push(finBuffer)
|
this.push(finBuffer)
|
||||||
this.cb_flush = cb
|
//never call this callback, do not close underlying stream
|
||||||
|
//cb()
|
||||||
}
|
}
|
||||||
|
|
||||||
CopyStreamQuery.prototype.handleError = function(e) {
|
CopyStreamQuery.prototype.handleError = function(e) {
|
||||||
@@ -51,24 +55,12 @@ CopyStreamQuery.prototype.handleError = function(e) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CopyStreamQuery.prototype.handleCopyInResponse = function(connection) {
|
CopyStreamQuery.prototype.handleCopyInResponse = function(connection) {
|
||||||
this.pipe(connection.stream, { end: false })
|
this.pipe(connection.stream)
|
||||||
}
|
}
|
||||||
|
|
||||||
CopyStreamQuery.prototype.handleCommandComplete = function(msg) {
|
CopyStreamQuery.prototype.handleCommandComplete = function() {
|
||||||
// Parse affected row count as in
|
this.unpipe()
|
||||||
// https://github.com/brianc/node-postgres/blob/35e5567f86774f808c2a8518dd312b8aa3586693/lib/result.js#L37
|
this.emit('end')
|
||||||
var match = /COPY (\d+)/.exec((msg || {}).text)
|
|
||||||
if (match) {
|
|
||||||
this.rowCount = parseInt(match[1], 10)
|
|
||||||
}
|
|
||||||
|
|
||||||
// we delay the _flush cb so that the 'end' event is
|
|
||||||
// triggered after CommandComplete
|
|
||||||
this.cb_flush()
|
|
||||||
|
|
||||||
// unpipe from connection
|
|
||||||
this.unpipe(this.connection)
|
|
||||||
this.connection = null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CopyStreamQuery.prototype.handleReadyForQuery = function() {
|
CopyStreamQuery.prototype.handleReadyForQuery = function() {
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
/**
|
|
||||||
* The COPY feature uses the following protocol codes.
|
|
||||||
* The codes for the most recent protocol version are documented on
|
|
||||||
* https://www.postgresql.org/docs/current/static/protocol-message-formats.html
|
|
||||||
*
|
|
||||||
* The protocol flow itself is described on
|
|
||||||
* https://www.postgresql.org/docs/current/static/protocol-flow.html
|
|
||||||
*/
|
|
||||||
module.exports = {
|
|
||||||
ErrorResponse: 0x45, // E
|
|
||||||
CopyInResponse: 0x47, // G
|
|
||||||
CopyOutResponse: 0x48, // H
|
|
||||||
CopyBothResponse: 0x57, // W
|
|
||||||
CopyDone: 0x63, // c
|
|
||||||
CopyData: 0x64, // d
|
|
||||||
CopyFail: 0x66, // f
|
|
||||||
|
|
||||||
// It is possible for NoticeResponse and ParameterStatus messages to be interspersed between CopyData messages;
|
|
||||||
// frontends must handle these cases, and should be prepared for other asynchronous message types as well
|
|
||||||
// (see Section 50.2.6).
|
|
||||||
// Otherwise, any message type other than CopyData or CopyDone may be treated as terminating copy-out mode.
|
|
||||||
NotificationResponse: 0x41, // A
|
|
||||||
NoticeResponse: 0x4E, // N
|
|
||||||
ParameterStatus: 0x53 // S
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "pg-copy-streams",
|
"name": "pg-copy-streams",
|
||||||
"version": "1.2.0",
|
"version": "0.3.0",
|
||||||
"description": "Low-Level COPY TO and COPY FROM streams for PostgreSQL in JavaScript using",
|
"description": "Low-Level COPY TO and COPY FROM streams for PostgreSQL in JavaScript using",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
@@ -30,6 +30,10 @@ var testRange = function(top) {
|
|||||||
|
|
||||||
var txt = 'COPY numbers FROM STDIN'
|
var txt = 'COPY numbers FROM STDIN'
|
||||||
var stream = fromClient.query(copy(txt))
|
var stream = fromClient.query(copy(txt))
|
||||||
|
var rowEmitCount = 0
|
||||||
|
stream.on('row', function() {
|
||||||
|
rowEmitCount++
|
||||||
|
})
|
||||||
for(var i = 0; i < top; i++) {
|
for(var i = 0; i < top; i++) {
|
||||||
stream.write(Buffer('' + i + '\t' + i*10 + '\n'))
|
stream.write(Buffer('' + i + '\t' + i*10 + '\n'))
|
||||||
}
|
}
|
||||||
@@ -39,7 +43,6 @@ var testRange = function(top) {
|
|||||||
fromClient.query('SELECT COUNT(*) FROM numbers', function(err, res) {
|
fromClient.query('SELECT COUNT(*) FROM numbers', function(err, res) {
|
||||||
assert.ifError(err)
|
assert.ifError(err)
|
||||||
assert.equal(res.rows[0].count, top, 'expected ' + top + ' rows but got ' + res.rows[0].count)
|
assert.equal(res.rows[0].count, top, 'expected ' + top + ' rows but got ' + res.rows[0].count)
|
||||||
assert.equal(stream.rowCount, top, 'expected ' + top + ' rows but db count is ' + stream.rowCount)
|
|
||||||
//console.log('found ', res.rows.length, 'rows')
|
//console.log('found ', res.rows.length, 'rows')
|
||||||
countDone()
|
countDone()
|
||||||
var firstRowDone = gonna('have correct result')
|
var firstRowDone = gonna('have correct result')
|
||||||
@@ -55,19 +58,3 @@ var testRange = function(top) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
testRange(1000)
|
testRange(1000)
|
||||||
|
|
||||||
var testSingleEnd = function() {
|
|
||||||
var fromClient = client()
|
|
||||||
fromClient.query('CREATE TEMP TABLE numbers(num int)')
|
|
||||||
var txt = 'COPY numbers FROM STDIN';
|
|
||||||
var stream = fromClient.query(copy(txt))
|
|
||||||
var count = 0;
|
|
||||||
stream.on('end', function() {
|
|
||||||
count++;
|
|
||||||
assert(count==1, '`end` Event was triggered ' + count + ' times');
|
|
||||||
if (count == 1) fromClient.end();
|
|
||||||
})
|
|
||||||
stream.end(Buffer('1\n'))
|
|
||||||
|
|
||||||
}
|
|
||||||
testSingleEnd()
|
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ var concat = require('concat-stream')
|
|||||||
var pg = require('pg')
|
var pg = require('pg')
|
||||||
|
|
||||||
var copy = require('../').to
|
var copy = require('../').to
|
||||||
var code = require('../message-formats')
|
|
||||||
|
|
||||||
var client = function() {
|
var client = function() {
|
||||||
var client = new pg.Client()
|
var client = new pg.Client()
|
||||||
@@ -23,18 +22,6 @@ var testConstruction = function() {
|
|||||||
|
|
||||||
testConstruction()
|
testConstruction()
|
||||||
|
|
||||||
var testComparators = function() {
|
|
||||||
var copy1 = copy();
|
|
||||||
copy1.pipe(concat(function(buf) {
|
|
||||||
assert(copy1._gotCopyOutResponse, 'should have received CopyOutResponse')
|
|
||||||
assert(!copy1._remainder, 'Message with no additional data (len=Int4Len+0) should not leave a remainder')
|
|
||||||
}))
|
|
||||||
copy1.end(new Buffer([code.CopyOutResponse, 0x00, 0x00, 0x00, 0x04]));
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
testComparators();
|
|
||||||
|
|
||||||
var testRange = function(top) {
|
var testRange = function(top) {
|
||||||
var fromClient = client()
|
var fromClient = client()
|
||||||
var txt = 'COPY (SELECT * from generate_series(0, ' + (top - 1) + ')) TO STDOUT'
|
var txt = 'COPY (SELECT * from generate_series(0, ' + (top - 1) + ')) TO STDOUT'
|
||||||
@@ -86,37 +73,5 @@ var testInternalPostgresError = function() {
|
|||||||
queryClient.end()
|
queryClient.end()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
testInternalPostgresError()
|
testInternalPostgresError()
|
||||||
|
|
||||||
var testNoticeResponse = function() {
|
|
||||||
// we use a special trick to generate a warning
|
|
||||||
// on the copy stream.
|
|
||||||
var queryClient = client()
|
|
||||||
var set = '';
|
|
||||||
set += 'SET SESSION client_min_messages = WARNING;'
|
|
||||||
set += 'SET SESSION standard_conforming_strings = off;'
|
|
||||||
set += 'SET SESSION escape_string_warning = on;'
|
|
||||||
queryClient.query(set, function(err, res) {
|
|
||||||
assert.equal(err, null, 'testNoticeResponse - could not SET parameters')
|
|
||||||
var runStream = function(callback) {
|
|
||||||
var txt = "COPY (SELECT '\\\n') TO STDOUT"
|
|
||||||
var stream = queryClient.query(copy(txt))
|
|
||||||
stream.on('data', function(data) {
|
|
||||||
})
|
|
||||||
stream.on('error', callback)
|
|
||||||
|
|
||||||
// make sure stream is pulled from
|
|
||||||
stream.pipe(concat(callback.bind(null,null)))
|
|
||||||
}
|
|
||||||
|
|
||||||
runStream(function(err) {
|
|
||||||
assert.equal(err, null, err)
|
|
||||||
queryClient.end()
|
|
||||||
})
|
|
||||||
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
testNoticeResponse();
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user