AWS S3 CreateReadStream in a loop only reads and writes 1 file - javascript

I am trying to retrieve multiple files from S3 using a readstream, and insert them into a single file locally.
Below, the 'output' variable is the single writestream I wish to append to using the downloaded S3 file data.
I am looping through days where the nextDay variable is used for the S3 key. The fileservice.s3Handler.getS3Obj returns an S3 object which allows a readstream for a single file and appending to the output file.
However, no other files are being read and are not showing the console either using the on('data', ()) method.
I tried to wrap the readstream in a promise to try to wait until the read was finished but it is running the same error.
More recently I keep get this error: "ERR_STREAM_WRITE_AFTER_END"
Not sure what is going wrong here.
async fetchCSV(req, res) {
const output = fs.createWriteStream(outputPathWithFile, {
'flags': 'a'});
let nextDay = startDate;
while (nextDay !== endDate) {
const s3path = path.join(`${req.params.stationId}`, `${nextDay}.csv`);
const file = await this.fileService.s3Handler.getS3Obj(s3path);
await this.completePipe(file, output);
nextDay = await getTomorrow(nextDay);
}
}
completePipe(file, output) {
return new Promise((resolve) => {
file.createReadStream().on('finish', () => {
resolve();
}).on('error', (err) => {
resolve();
}).on('data', (data) => {
console.log(data.toString());
}).pipe(output);
})
}
}
getS3Obj(file) {
return new Promise(async (resolve) => {
const getParams = {
Bucket: this.bucket,
Key: file
};
resolve(this.s3.getObject(getParams, (err) => {
if (err) {
console.log('Error in getS3 object')
}
}));
})
}
Please help me?

Solved it.
Did a couple things:
Added a tag to the pipe method.
stream.pipe(output, {end: false})
Instead of creating a new function for the promise I just put this code in instead:
await new Promise((resolve) => {
stream.once('finish', () => {
resolve();
});
});
But the tag was what made it work, the promise was just a tidy up.
Yay.

Related

Promise function doesn't trigger after another promise

I'm working on a microcontroller that would either take docx files or html strings in input and would transform it into a singular pdf file and return its link as an ouput.
My code looks like this so far:
// 'files' is an array of uploaded docx files.
const uploaded = files.map((file) => {
return new Promise((resolve, reject) => {
pump(
file.toBuffer(),
fs.createWriteStream(join(__dirname, 'files', file.filename))
.on('finish', resolve)
)
})
})
Promise.all(uploaded)
// Is triggered
.then(async () => await convertFiles())
// Is not triggered
.then(async () => {
// concatStoreFiles() is an external function because I need it somewhere else too
test = await concatStoreFiles(join(__dirname, 'files'))
console.log({test})
res.send(test)
})
const convertFiles = () => {
return new Promise((resolve, reject) => {
const cmd = `soffice --headless --convert-to pdf --outdir ${join(__dirname, 'files')} ${join(__dirname, 'files', '*.*')}`
exec(cmd, (error, stdout, stderror) => {
if (error) console.warn(error)
resolve(stdout ?? stderror)
})
})
}
concatStoreFile.js
module.exports = async function concatFiles (dirPath, outPath) {
return new Promise ((resolve, reject) => {
const existingFiles = []
fs.readdir(dirPath, (e, files) => {
files.forEach((file) => {
// is added to the files list only if finishing with ".pdf"
if (/[\d\w_-]+.pdf/.matches(file)) {
existingFiles.push(file)
}
});
resolve(existingFiles)
})
})
}
I'm working with Insomnia for my development / test process, and it tells me that I get an empty response. However, I'm supposed to get an array of pdf files existing in a specific directory. I'm not even getting console.log({test}), so I don't think my second then() is triggered.
I'm really rusty with async / await and Promise syntaxes, what should I do in this situation?
Thank you in advance
The #fastify/multipart's toBuffer() API returns a Promise, not a buffer. Checkout this article
So you need to write something like:
const uploaded = files.map(processFile)
async function processFile (file) {
const buffer = await file.toBuffer()
const storedFileName = join(__dirname, 'files', file.filename)
const writeStream = fs.createWriteStream(storedFileName)
return new Promise((resolve, reject) => {
pump(buffer, writeStream, (err) => {
if(err) { return reject(err) }
resolve(storedFileName)
})
}
}
Moreover, to improve the code, I returned the storedFileName instead of recalculating it.
You can convert this:
.then(async () => await convertFiles())
to this:
.then(() => convertFiles())
Mixing async/await and promise then/catch leads to hidden bugs hard to find

Firebase Google Cloud Function: createReadStream results in empty file

I try to process a Video file (stored in Google Firebase storage) through a Google Cloud Function. I have working code that download the entire video files into the NodeJS Google cloud function: await bucket.file(filePath).download({ destination: tempFile }).
But the goal is only to read the framerate, therefore the headers of the videofile would suffice. But createReadStream gives me an empty tempFile. Any advise much appreciated!
exports.checkFramerate = functions.region('europe-west1').storage.object().onFinalize(async (object, context) => {
const bucket = admin.storage().bucket(object.bucket); // Bucket class
const filePath = object.name; // videos/xbEXdMNFb1Blbd9r2E8m/comp_test.mp4
const fileName = filePath.split('/').pop(); // comp_test.mp4
const bucketDir = path.dirname(filePath); // videos/xbEXdMNFb1Blbd9r2E8m
const tempFile = path.join(os.tmpdir(), 'temp.mp4')
fs.closeSync(fs.openSync(tempFile, 'w'))
console.log("tempFile size1", fs.statSync(tempFile).size)
// await bucket.file(filePath).download({ destination: tempFile }); // this works: tempFile size2 = 3180152
await bucket.file(filePath).createReadStream({ // this does not work: tempFile size2 = 0
start: 10000,
end: 20000
})
.on('error', function(err) {console.log(err)})
.pipe(fs.createWriteStream(tempFile));
console.log("tempFile size2", fs.statSync(tempFile).size)
mi(tempFile).then(data => {
console.log("frameRate", data[0].general.frame_rate[0])
return data[0].general.frame_rate[0];
}).catch(e => {console.error(e)});
});
I tried implementing even the example of https://googleapis.dev/nodejs/storage/latest/File.html#createReadStream but to no avail. remoteFile.download works beautifully but remoteFile.createReadStream gives me empty files...
const remoteFile = bucket.file(filePath);
const localFilename = tempFile;
remoteFile.createReadStream()
.on('error', function(err) {})
.on('response', function(response) {})
.on('end', function() {})
.pipe(fs.createWriteStream(localFilename));
fs.stat(localFilename, (err, stats) => {
if (err) {console.log(err)}
return console.log("stats async",stats.size)
})
as mentioned, promise should be used
reading json file example
let buf = '';
const loadData = async () => {
return await new Promise((resolve, reject) => {
storage.bucket('bucket-name').file('test-config.json')
.createReadStream()
.on('error', reject)
.on('data', function(d) {
buf += d;
}).on('end', function() {
resolve(buf)
});
})
}
const data = await loadData()
Your problem is that the stream API isn't promisifed. So, the await does nothing, and your function continues before the stream is piped, and the file is still zero-length when you stat it the second time.
The download method works just fine because it returns a Promise.
This answer outlines the general approach you need to take. In summary though, you basically want the section of your code that does the piping to read like this:
const stream = bucket.file(filePath).createReadStream({
start: 10000,
end: 20000
})
.pipe(fs.createWriteStream(tempFile));
await new Promise((resolve, reject) => {
stream.on('finish', resolve);
stream.on('error', reject);
});
console.log("tempFile size2", fs.statSync(tempFile).size)
Your function will then wait until the finish event occurs when the piping is complete and the stream is closed. Obviously you probably want to do something more clever with the error handler too, but this is the general form of what you need.

Write createWriteStream txt contents to a global variable with Node.js

I am trying to download txt and mp3 files and use the content of them in another node module.
How can I create a global variable with the piped contents from the downloaded txt (and MP3 at a later stage) file to use outside of the FTP function?
async function example() {
var finalData = '';
const client = new ftp.Client()
client.ftp.verbose = true
try {
await client.access({
host: "XXXX",
user: "XXXX",
password: "XXXX",
})
await client.upload(fs.createReadStream("README.txt"), myFileNameWithExtension)
//let writeStream = fs.createWriteStream('/tmp/' + myFileNameWithExtension);
//await client.download(writeStream, myFileNameWithExtension)
finalData = await (() => {
return new Promise((resolve, reject) => {
writeStream
.on('finish', () => {
// Create a global variable to be used outside of the FTP function scope to pipe the txt content into another node mogule
})
.on('error', (err) => {
console.log(err);
reject(err);
})
})
})();
}
catch (err) {
console.log(err)
}
client.close();
return finalData;
}
No, don't create any global variables. Just resolve the promise with the data:
var finalData = await new Promise((resolve, reject) => {
writeStream.on('finish', () => {
resolve(); // pass a value
}).on('error', (err) => {
reject(err);
});
});
The finalData will become whatever value you pass into resolve(…) - I don't know what result you want to pass there. In the end, just return the data from your example function (as you already do), so that the caller will be able to use it after waiting for the returned promise.

ssh2-sftp-client get multiple files - error "write after end"

I am trying to load multiple files from ftp but I am getting error: "write after end". I am not able to find the issue.
connections.js
async function getFtpFileContent(_path) {
const ftpConfig = {
host: config.FTPhost.replace('https://', ''),
username: config.FTPusername,
password: config.FTPpassword
};
let ftpFileContent;
let sftp = new StfpClient();
try {
await sftp.connect(ftpConfig);
const stream = await sftp.get(_path);
ftpFileContent = await readStream(sftp, stream);
stream.close();
await sftp.end();
} catch (error) {
console.log('FTP Error: \n', error);
}
return ftpFileContent;
}
function readStream(sftp, stream) {
return new Promise((resolve, reject) => {
let body;
stream.on('data', chunk => {
body += chunk;
});
stream.on('end', () => {
resolve(body);
});
stream.on('error', err => reject(err));
});
}
calling in the next method:
async function getFiles(req, res) {
// first file
let fileContent = await conn.getFtpFileContent(filePath);
// second file
let fileContent2 = await conn.getFtpFileContent(filePath2);
}
When I call the method getFtpFileContent second time I got the mentioned error ("write after end").
Could you please help me? Where I doing the mistake?
I'm not sure where your problem is but should you be calling stream.close() in the getFtpFileContent function? I don't see anything about a close method for a stream, only a close event.
I found the issue. The issue was in CSV converter which I used later. The converter used writable stream which was not close after convert.

Accessing the Node ReadStream

I have a piece of code which is wrapped in a promise. That piece of code reads an image form http, does various things, and at the end sends it to aws.s3.putObject. It looks like this (simplified):
Please note form is multiparty object.
form.on('part', (part) => {//form is multiparty
fileCount++;
let tmpFile = path.join(os.tmpDir(), `${userId}_${timePrefix}_${path.basename(part.filename)}`);
part.pipe(fs.createWriteStream(tmpFile));
part.on('end', () => {
resolve(fs.createReadStream(tmpFile));
});
part.once('error', (error) => {
handleError(error);
});
});
form.once('error', (error) => {
handleError(error);
});
form.parse(request);
}).then((imageStream) => {
//here is a call to AWS S3.putObject. Which return a promise
}).then(() => {
return new Ok();
});
In essence a stream is made on a created image and sent to AWS. I wanted to do some manipulation on binary level (read file signature, to check if it is an image). I got it to work like this:
form.on('part', (part) => {
fileCount++;
let tmpFile = path.join(os.tmpDir(), `${userId}_${timePrefix}_${path.basename(part.filename)}`);
part.pipe(fs.createWriteStream(tmpFile));
part.on('end', () => {
let chunk = [];
let file = fs.createReadStream(tmpFile);
let isFirstChunkSet = false;
file.on('data', function(chunks) {
if (!isFirstChunkSet){
chunk = chunks;
isFirstChunkSet = true;
}
});
file.on('close', function() {
let magicNumber = chunk.toString('hex', 0, 2);
if (imageProperties.allowed_file_types.includes(magicNumber)) {
resolve(fs.createReadStream(tmpFile));
} else {
error.message = 'wrong file type';
handleError(error);
}
});
});
part.once('error', (error) => {
handleError(error);
});
});
form.once('error', (error) => {
handleError(error);
});
form.parse(request);
}).then((imageStream) => {
//here is a call to AWS S3.putObject. Which return a promise
}).then(() => {
return new Ok();
});
Basically I attached two event listeners to the existing stream to access the data, and so some checking on the header file.
What bothers me is the feeling that I am overdoing things here. I would like to avoid those two listeners (data and close) and read a stream if possible.
To be more precise, this section of code receives the stream, and inside it I would like to access the data before sending it to AWS. This stream is ready, and simply put how do I read it, without using events?
form.parse(request);
}).then((imageStream) => {
//How can I access the imageStream here, without event listeners.
//Assumption is the stream is ready to be accessed.
//here is a call to AWS S3.putObject. Which return a promise
}).then(() => {
return new Ok();
});

Categories