Compare commits
20 Commits
v1.0.0
...
performanc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd3cc95573 | ||
|
|
922627daaf | ||
|
|
61bc713e0c | ||
|
|
e15feb199a | ||
|
|
191a4ec16a | ||
|
|
399bff7ed7 | ||
|
|
8174e10fb5 | ||
|
|
f29aef3bba | ||
|
|
b2e108571e | ||
|
|
7003f6070f | ||
|
|
ade7ab95a0 | ||
|
|
9ccda04036 | ||
|
|
a5e532f20b | ||
|
|
2a4db2920e | ||
|
|
f155899570 | ||
|
|
18be125596 | ||
|
|
ae5b344395 | ||
|
|
ee84aba89f | ||
|
|
e0aa7db324 | ||
|
|
0f0ddf7ad4 |
12
README.md
12
README.md
@@ -42,10 +42,15 @@ pg.connect(function(err, client, done) {
|
||||
var stream = client.query(copyFrom('COPY my_table FROM STDIN'));
|
||||
var fileStream = fs.createReadStream('some_file.tsv')
|
||||
fileStream.on('error', done);
|
||||
fileStream.pipe(stream).on('finish', done).on('error', done);
|
||||
stream.on('error', done);
|
||||
stream.on('end', done);
|
||||
fileStream.pipe(stream);
|
||||
});
|
||||
```
|
||||
|
||||
*Important*: Even if `pg-copy-streams.from` is used as a Writable (via `pipe`), you should not listen for the 'finish' event and expect that the COPY command has already been correctly acknowledged by the database. Internally, a duplex stream is used to pipe the data into the database connection and the COPY command should be considered complete only when the 'end' event is triggered.
|
||||
|
||||
|
||||
## install
|
||||
|
||||
```sh
|
||||
@@ -56,7 +61,10 @@ $ npm install pg-copy-streams
|
||||
|
||||
This module __only__ works with the pure JavaScript bindings. If you're using `require('pg').native` please make sure to use normal `require('pg')` or `require('pg.js')` when you're using copy streams.
|
||||
|
||||
Before you set out on this magical piping journey, you _really_ should read this: http://www.postgresql.org/docs/9.3/static/sql-copy.html, and you might want to take a look at the [tests](https://github.com/brianc/node-pg-copy-streams/tree/master/test) to get an idea of how things work.
|
||||
Before you set out on this magical piping journey, you _really_ should read this: http://www.postgresql.org/docs/current/static/sql-copy.html, and you might want to take a look at the [tests](https://github.com/brianc/node-pg-copy-streams/tree/master/test) to get an idea of how things work.
|
||||
|
||||
Take note of the following warning in the PostgreSQL documentation:
|
||||
> COPY stops operation at the first error. This should not lead to problems in the event of a COPY TO, but the target table will already have received earlier rows in a COPY FROM. These rows will not be visible or accessible, but they still occupy disk space. This might amount to a considerable amount of wasted disk space if the failure happened well into a large copy operation. You might wish to invoke VACUUM to recover the wasted space.
|
||||
|
||||
## contributing
|
||||
|
||||
|
||||
105
copy-to.js
105
copy-to.js
@@ -4,11 +4,12 @@ module.exports = function(txt, options) {
|
||||
|
||||
var Transform = require('stream').Transform
|
||||
var util = require('util')
|
||||
var code = require('./message-formats')
|
||||
|
||||
var CopyStreamQuery = function(text, options) {
|
||||
Transform.call(this, options)
|
||||
this.text = text
|
||||
this._copyOutResponse = null
|
||||
this._gotCopyOutResponse = false
|
||||
this.rowCount = 0
|
||||
}
|
||||
|
||||
@@ -23,63 +24,89 @@ CopyStreamQuery.prototype.submit = function(connection) {
|
||||
connection.stream.pipe(this)
|
||||
}
|
||||
|
||||
var code = {
|
||||
E: 69, //Error
|
||||
H: 72, //CopyOutResponse
|
||||
d: 0x64, //CopyData
|
||||
c: 0x63 //CopyDone
|
||||
}
|
||||
|
||||
CopyStreamQuery.prototype._detach = function() {
|
||||
this.connection.stream.unpipe(this)
|
||||
// Unpipe can drop us out of flowing mode
|
||||
this.connection.stream.resume()
|
||||
}
|
||||
|
||||
|
||||
CopyStreamQuery.prototype._transform = function(chunk, enc, cb) {
|
||||
var offset = 0
|
||||
var Byte1Len = 1;
|
||||
var Int32Len = 4;
|
||||
if(this._remainder && chunk) {
|
||||
chunk = Buffer.concat([this._remainder, chunk])
|
||||
}
|
||||
if(!this._copyOutResponse) {
|
||||
this._copyOutResponse = true
|
||||
if(chunk[0] == code.E) {
|
||||
this._detach()
|
||||
this.push(null)
|
||||
return cb();
|
||||
|
||||
var length;
|
||||
var messageCode;
|
||||
var needPush = false;
|
||||
|
||||
var buffer = Buffer.alloc(chunk.length);
|
||||
var buffer_offset = 0;
|
||||
|
||||
this.pushBufferIfneeded = function() {
|
||||
if (needPush && buffer_offset > 0) {
|
||||
this.push(buffer.slice(0, buffer_offset))
|
||||
buffer_offset = 0;
|
||||
}
|
||||
if(chunk[0] != code.H) {
|
||||
this.emit('error', new Error('Expected copy out response'))
|
||||
}
|
||||
var length = chunk.readUInt32BE(1)
|
||||
offset = 1
|
||||
offset += length
|
||||
}
|
||||
while((chunk.length - offset) > 5) {
|
||||
|
||||
while((chunk.length - offset) >= (Byte1Len + Int32Len)) {
|
||||
var messageCode = chunk[offset]
|
||||
//complete or error
|
||||
if(messageCode == code.c || messageCode == code.E) {
|
||||
this._detach()
|
||||
this.push(null)
|
||||
return cb();
|
||||
|
||||
//console.log('PostgreSQL message ' + String.fromCharCode(messageCode))
|
||||
switch(messageCode) {
|
||||
|
||||
// detect COPY start
|
||||
case code.CopyOutResponse:
|
||||
if (!this._gotCopyOutResponse) {
|
||||
this._gotCopyOutResponse = true
|
||||
} else {
|
||||
this.emit('error', new Error('Unexpected CopyOutResponse message (H)'))
|
||||
}
|
||||
break;
|
||||
|
||||
// meaningful row
|
||||
case code.CopyData:
|
||||
needPush = true;
|
||||
break;
|
||||
|
||||
// standard interspersed messages. discard
|
||||
case code.ParameterStatus:
|
||||
case code.NoticeResponse:
|
||||
case code.NotificationResponse:
|
||||
break;
|
||||
|
||||
case code.ErrorResponse:
|
||||
case code.CopyDone:
|
||||
this.pushBufferIfneeded();
|
||||
this._detach()
|
||||
this.push(null)
|
||||
return cb();
|
||||
break;
|
||||
default:
|
||||
this.emit('error', new Error('Unexpected PostgreSQL message ' + String.fromCharCode(messageCode)))
|
||||
}
|
||||
//something bad happened
|
||||
if(messageCode != code.d) {
|
||||
return this.emit('error', new Error('expected "d" (copydata message)'))
|
||||
}
|
||||
var length = chunk.readUInt32BE(offset + 1) - 4 //subtract length of UInt32
|
||||
//can we read the next row?
|
||||
if(chunk.length > (offset + length + 5)) {
|
||||
offset += 5
|
||||
var slice = chunk.slice(offset, offset + length)
|
||||
offset += length
|
||||
this.push(slice)
|
||||
this.rowCount++
|
||||
|
||||
length = chunk.readUInt32BE(offset+Byte1Len)
|
||||
if(chunk.length >= (offset + Byte1Len + length)) {
|
||||
offset += Byte1Len + Int32Len
|
||||
if (needPush) {
|
||||
var row = chunk.slice(offset, offset + length - Int32Len)
|
||||
this.rowCount++
|
||||
row.copy(buffer, buffer_offset);
|
||||
buffer_offset += row.length;
|
||||
}
|
||||
offset += (length - Int32Len)
|
||||
} else {
|
||||
// we need more chunks for a complete message
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
this.pushBufferIfneeded();
|
||||
|
||||
if(chunk.length - offset) {
|
||||
var slice = chunk.slice(offset)
|
||||
this._remainder = slice
|
||||
|
||||
40
index.js
40
index.js
@@ -10,6 +10,7 @@ module.exports = {
|
||||
|
||||
var Transform = require('stream').Transform
|
||||
var util = require('util')
|
||||
var code = require('./message-formats')
|
||||
|
||||
var CopyStreamQuery = function(text, options) {
|
||||
Transform.call(this, options)
|
||||
@@ -26,28 +27,23 @@ CopyStreamQuery.prototype.submit = function(connection) {
|
||||
connection.query(this.text)
|
||||
}
|
||||
|
||||
var code = {
|
||||
H: 72, //CopyOutResponse
|
||||
d: 0x64, //CopyData
|
||||
c: 0x63 //CopyDone
|
||||
}
|
||||
|
||||
var copyDataBuffer = Buffer([code.d])
|
||||
var copyDataBuffer = Buffer([code.CopyData])
|
||||
CopyStreamQuery.prototype._transform = function(chunk, enc, cb) {
|
||||
var Int32Len = 4;
|
||||
this.push(copyDataBuffer)
|
||||
var lenBuffer = Buffer(4)
|
||||
lenBuffer.writeUInt32BE(chunk.length + 4, 0)
|
||||
var lenBuffer = Buffer(Int32Len)
|
||||
lenBuffer.writeUInt32BE(chunk.length + Int32Len, 0)
|
||||
this.push(lenBuffer)
|
||||
this.push(chunk)
|
||||
this.rowCount++
|
||||
cb()
|
||||
}
|
||||
|
||||
CopyStreamQuery.prototype._flush = function(cb) {
|
||||
var finBuffer = Buffer([code.c, 0, 0, 0, 4])
|
||||
var Int32Len = 4;
|
||||
var finBuffer = Buffer([code.CopyDone, 0, 0, 0, Int32Len])
|
||||
this.push(finBuffer)
|
||||
//never call this callback, do not close underlying stream
|
||||
//cb()
|
||||
this.cb_flush = cb
|
||||
}
|
||||
|
||||
CopyStreamQuery.prototype.handleError = function(e) {
|
||||
@@ -55,12 +51,24 @@ CopyStreamQuery.prototype.handleError = function(e) {
|
||||
}
|
||||
|
||||
CopyStreamQuery.prototype.handleCopyInResponse = function(connection) {
|
||||
this.pipe(connection.stream)
|
||||
this.pipe(connection.stream, { end: false })
|
||||
}
|
||||
|
||||
CopyStreamQuery.prototype.handleCommandComplete = function() {
|
||||
this.unpipe()
|
||||
this.emit('end')
|
||||
CopyStreamQuery.prototype.handleCommandComplete = function(msg) {
|
||||
// Parse affected row count as in
|
||||
// https://github.com/brianc/node-postgres/blob/35e5567f86774f808c2a8518dd312b8aa3586693/lib/result.js#L37
|
||||
var match = /COPY (\d+)/.exec((msg || {}).text)
|
||||
if (match) {
|
||||
this.rowCount = parseInt(match[1], 10)
|
||||
}
|
||||
|
||||
// we delay the _flush cb so that the 'end' event is
|
||||
// triggered after CommandComplete
|
||||
this.cb_flush()
|
||||
|
||||
// unpipe from connection
|
||||
this.unpipe(this.connection)
|
||||
this.connection = null
|
||||
}
|
||||
|
||||
CopyStreamQuery.prototype.handleReadyForQuery = function() {
|
||||
|
||||
25
message-formats.js
Normal file
25
message-formats.js
Normal file
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* The COPY feature uses the following protocol codes.
|
||||
* The codes for the most recent protocol version are documented on
|
||||
* https://www.postgresql.org/docs/current/static/protocol-message-formats.html
|
||||
*
|
||||
* The protocol flow itself is described on
|
||||
* https://www.postgresql.org/docs/current/static/protocol-flow.html
|
||||
*/
|
||||
module.exports = {
|
||||
ErrorResponse: 0x45, // E
|
||||
CopyInResponse: 0x47, // G
|
||||
CopyOutResponse: 0x48, // H
|
||||
CopyBothResponse: 0x57, // W
|
||||
CopyDone: 0x63, // c
|
||||
CopyData: 0x64, // d
|
||||
CopyFail: 0x66, // f
|
||||
|
||||
// It is possible for NoticeResponse and ParameterStatus messages to be interspersed between CopyData messages;
|
||||
// frontends must handle these cases, and should be prepared for other asynchronous message types as well
|
||||
// (see Section 50.2.6).
|
||||
// Otherwise, any message type other than CopyData or CopyDone may be treated as terminating copy-out mode.
|
||||
NotificationResponse: 0x41, // A
|
||||
NoticeResponse: 0x4E, // N
|
||||
ParameterStatus: 0x53 // S
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "pg-copy-streams",
|
||||
"version": "1.0.0",
|
||||
"version": "1.2.0",
|
||||
"description": "Low-Level COPY TO and COPY FROM streams for PostgreSQL in JavaScript using",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
|
||||
@@ -30,10 +30,6 @@ var testRange = function(top) {
|
||||
|
||||
var txt = 'COPY numbers FROM STDIN'
|
||||
var stream = fromClient.query(copy(txt))
|
||||
var rowEmitCount = 0
|
||||
stream.on('row', function() {
|
||||
rowEmitCount++
|
||||
})
|
||||
for(var i = 0; i < top; i++) {
|
||||
stream.write(Buffer('' + i + '\t' + i*10 + '\n'))
|
||||
}
|
||||
@@ -43,6 +39,7 @@ var testRange = function(top) {
|
||||
fromClient.query('SELECT COUNT(*) FROM numbers', function(err, res) {
|
||||
assert.ifError(err)
|
||||
assert.equal(res.rows[0].count, top, 'expected ' + top + ' rows but got ' + res.rows[0].count)
|
||||
assert.equal(stream.rowCount, top, 'expected ' + top + ' rows but db count is ' + stream.rowCount)
|
||||
//console.log('found ', res.rows.length, 'rows')
|
||||
countDone()
|
||||
var firstRowDone = gonna('have correct result')
|
||||
@@ -58,3 +55,19 @@ var testRange = function(top) {
|
||||
}
|
||||
|
||||
testRange(1000)
|
||||
|
||||
var testSingleEnd = function() {
|
||||
var fromClient = client()
|
||||
fromClient.query('CREATE TEMP TABLE numbers(num int)')
|
||||
var txt = 'COPY numbers FROM STDIN';
|
||||
var stream = fromClient.query(copy(txt))
|
||||
var count = 0;
|
||||
stream.on('end', function() {
|
||||
count++;
|
||||
assert(count==1, '`end` Event was triggered ' + count + ' times');
|
||||
if (count == 1) fromClient.end();
|
||||
})
|
||||
stream.end(Buffer('1\n'))
|
||||
|
||||
}
|
||||
testSingleEnd()
|
||||
|
||||
@@ -7,6 +7,7 @@ var concat = require('concat-stream')
|
||||
var pg = require('pg')
|
||||
|
||||
var copy = require('../').to
|
||||
var code = require('../message-formats')
|
||||
|
||||
var client = function() {
|
||||
var client = new pg.Client()
|
||||
@@ -22,6 +23,18 @@ var testConstruction = function() {
|
||||
|
||||
testConstruction()
|
||||
|
||||
var testComparators = function() {
|
||||
var copy1 = copy();
|
||||
copy1.pipe(concat(function(buf) {
|
||||
assert(copy1._gotCopyOutResponse, 'should have received CopyOutResponse')
|
||||
assert(!copy1._remainder, 'Message with no additional data (len=Int4Len+0) should not leave a remainder')
|
||||
}))
|
||||
copy1.end(new Buffer([code.CopyOutResponse, 0x00, 0x00, 0x00, 0x04]));
|
||||
|
||||
|
||||
}
|
||||
testComparators();
|
||||
|
||||
var testRange = function(top) {
|
||||
var fromClient = client()
|
||||
var txt = 'COPY (SELECT * from generate_series(0, ' + (top - 1) + ')) TO STDOUT'
|
||||
@@ -73,5 +86,37 @@ var testInternalPostgresError = function() {
|
||||
queryClient.end()
|
||||
})
|
||||
}
|
||||
|
||||
testInternalPostgresError()
|
||||
|
||||
var testNoticeResponse = function() {
|
||||
// we use a special trick to generate a warning
|
||||
// on the copy stream.
|
||||
var queryClient = client()
|
||||
var set = '';
|
||||
set += 'SET SESSION client_min_messages = WARNING;'
|
||||
set += 'SET SESSION standard_conforming_strings = off;'
|
||||
set += 'SET SESSION escape_string_warning = on;'
|
||||
queryClient.query(set, function(err, res) {
|
||||
assert.equal(err, null, 'testNoticeResponse - could not SET parameters')
|
||||
var runStream = function(callback) {
|
||||
var txt = "COPY (SELECT '\\\n') TO STDOUT"
|
||||
var stream = queryClient.query(copy(txt))
|
||||
stream.on('data', function(data) {
|
||||
})
|
||||
stream.on('error', callback)
|
||||
|
||||
// make sure stream is pulled from
|
||||
stream.pipe(concat(callback.bind(null,null)))
|
||||
}
|
||||
|
||||
runStream(function(err) {
|
||||
assert.equal(err, null, err)
|
||||
queryClient.end()
|
||||
})
|
||||
|
||||
})
|
||||
}
|
||||
|
||||
testNoticeResponse();
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user