Firebase Google Cloud Function: createReadStream results in empty file - javascript

I try to process a Video file (stored in Google Firebase storage) through a Google Cloud Function. I have working code that download the entire video files into the NodeJS Google cloud function: await bucket.file(filePath).download({ destination: tempFile }).
But the goal is only to read the framerate, therefore the headers of the videofile would suffice. But createReadStream gives me an empty tempFile. Any advise much appreciated!
exports.checkFramerate = functions.region('europe-west1').storage.object().onFinalize(async (object, context) => {
const bucket = admin.storage().bucket(object.bucket); // Bucket class
const filePath = object.name; // videos/xbEXdMNFb1Blbd9r2E8m/comp_test.mp4
const fileName = filePath.split('/').pop(); // comp_test.mp4
const bucketDir = path.dirname(filePath); // videos/xbEXdMNFb1Blbd9r2E8m
const tempFile = path.join(os.tmpdir(), 'temp.mp4')
fs.closeSync(fs.openSync(tempFile, 'w'))
console.log("tempFile size1", fs.statSync(tempFile).size)
// await bucket.file(filePath).download({ destination: tempFile }); // this works: tempFile size2 = 3180152
await bucket.file(filePath).createReadStream({ // this does not work: tempFile size2 = 0
start: 10000,
end: 20000
})
.on('error', function(err) {console.log(err)})
.pipe(fs.createWriteStream(tempFile));
console.log("tempFile size2", fs.statSync(tempFile).size)
mi(tempFile).then(data => {
console.log("frameRate", data[0].general.frame_rate[0])
return data[0].general.frame_rate[0];
}).catch(e => {console.error(e)});
});
I tried implementing even the example of https://googleapis.dev/nodejs/storage/latest/File.html#createReadStream but to no avail. remoteFile.download works beautifully but remoteFile.createReadStream gives me empty files...
const remoteFile = bucket.file(filePath);
const localFilename = tempFile;
remoteFile.createReadStream()
.on('error', function(err) {})
.on('response', function(response) {})
.on('end', function() {})
.pipe(fs.createWriteStream(localFilename));
fs.stat(localFilename, (err, stats) => {
if (err) {console.log(err)}
return console.log("stats async",stats.size)
})

as mentioned, promise should be used
reading json file example
let buf = '';
const loadData = async () => {
return await new Promise((resolve, reject) => {
storage.bucket('bucket-name').file('test-config.json')
.createReadStream()
.on('error', reject)
.on('data', function(d) {
buf += d;
}).on('end', function() {
resolve(buf)
});
})
}
const data = await loadData()

Your problem is that the stream API isn't promisifed. So, the await does nothing, and your function continues before the stream is piped, and the file is still zero-length when you stat it the second time.
The download method works just fine because it returns a Promise.
This answer outlines the general approach you need to take. In summary though, you basically want the section of your code that does the piping to read like this:
const stream = bucket.file(filePath).createReadStream({
start: 10000,
end: 20000
})
.pipe(fs.createWriteStream(tempFile));
await new Promise((resolve, reject) => {
stream.on('finish', resolve);
stream.on('error', reject);
});
console.log("tempFile size2", fs.statSync(tempFile).size)
Your function will then wait until the finish event occurs when the piping is complete and the stream is closed. Obviously you probably want to do something more clever with the error handler too, but this is the general form of what you need.

Related

Promise function doesn't trigger after another promise

I'm working on a microcontroller that would either take docx files or html strings in input and would transform it into a singular pdf file and return its link as an ouput.
My code looks like this so far:
// 'files' is an array of uploaded docx files.
const uploaded = files.map((file) => {
return new Promise((resolve, reject) => {
pump(
file.toBuffer(),
fs.createWriteStream(join(__dirname, 'files', file.filename))
.on('finish', resolve)
)
})
})
Promise.all(uploaded)
// Is triggered
.then(async () => await convertFiles())
// Is not triggered
.then(async () => {
// concatStoreFiles() is an external function because I need it somewhere else too
test = await concatStoreFiles(join(__dirname, 'files'))
console.log({test})
res.send(test)
})
const convertFiles = () => {
return new Promise((resolve, reject) => {
const cmd = `soffice --headless --convert-to pdf --outdir ${join(__dirname, 'files')} ${join(__dirname, 'files', '*.*')}`
exec(cmd, (error, stdout, stderror) => {
if (error) console.warn(error)
resolve(stdout ?? stderror)
})
})
}
concatStoreFile.js
module.exports = async function concatFiles (dirPath, outPath) {
return new Promise ((resolve, reject) => {
const existingFiles = []
fs.readdir(dirPath, (e, files) => {
files.forEach((file) => {
// is added to the files list only if finishing with ".pdf"
if (/[\d\w_-]+.pdf/.matches(file)) {
existingFiles.push(file)
}
});
resolve(existingFiles)
})
})
}
I'm working with Insomnia for my development / test process, and it tells me that I get an empty response. However, I'm supposed to get an array of pdf files existing in a specific directory. I'm not even getting console.log({test}), so I don't think my second then() is triggered.
I'm really rusty with async / await and Promise syntaxes, what should I do in this situation?
Thank you in advance
The #fastify/multipart's toBuffer() API returns a Promise, not a buffer. Checkout this article
So you need to write something like:
const uploaded = files.map(processFile)
async function processFile (file) {
const buffer = await file.toBuffer()
const storedFileName = join(__dirname, 'files', file.filename)
const writeStream = fs.createWriteStream(storedFileName)
return new Promise((resolve, reject) => {
pump(buffer, writeStream, (err) => {
if(err) { return reject(err) }
resolve(storedFileName)
})
}
}
Moreover, to improve the code, I returned the storedFileName instead of recalculating it.
You can convert this:
.then(async () => await convertFiles())
to this:
.then(() => convertFiles())
Mixing async/await and promise then/catch leads to hidden bugs hard to find

AWS S3 CreateReadStream in a loop only reads and writes 1 file

I am trying to retrieve multiple files from S3 using a readstream, and insert them into a single file locally.
Below, the 'output' variable is the single writestream I wish to append to using the downloaded S3 file data.
I am looping through days where the nextDay variable is used for the S3 key. The fileservice.s3Handler.getS3Obj returns an S3 object which allows a readstream for a single file and appending to the output file.
However, no other files are being read and are not showing the console either using the on('data', ()) method.
I tried to wrap the readstream in a promise to try to wait until the read was finished but it is running the same error.
More recently I keep get this error: "ERR_STREAM_WRITE_AFTER_END"
Not sure what is going wrong here.
async fetchCSV(req, res) {
const output = fs.createWriteStream(outputPathWithFile, {
'flags': 'a'});
let nextDay = startDate;
while (nextDay !== endDate) {
const s3path = path.join(`${req.params.stationId}`, `${nextDay}.csv`);
const file = await this.fileService.s3Handler.getS3Obj(s3path);
await this.completePipe(file, output);
nextDay = await getTomorrow(nextDay);
}
}
completePipe(file, output) {
return new Promise((resolve) => {
file.createReadStream().on('finish', () => {
resolve();
}).on('error', (err) => {
resolve();
}).on('data', (data) => {
console.log(data.toString());
}).pipe(output);
})
}
}
getS3Obj(file) {
return new Promise(async (resolve) => {
const getParams = {
Bucket: this.bucket,
Key: file
};
resolve(this.s3.getObject(getParams, (err) => {
if (err) {
console.log('Error in getS3 object')
}
}));
})
}
Please help me?
Solved it.
Did a couple things:
Added a tag to the pipe method.
stream.pipe(output, {end: false})
Instead of creating a new function for the promise I just put this code in instead:
await new Promise((resolve) => {
stream.once('finish', () => {
resolve();
});
});
But the tag was what made it work, the promise was just a tidy up.
Yay.

Read and write to csv file with Node.js fast-csv library

I may be lacking some in depth understanding of streams in general. However, I would like to know how efficiently what I need should work.
I want to implement so that a csv file would be read, then to each row a query to the database (or api) is made and data is attached. After that the row with attached data is written to a new csv file. I am using fast-csv node library for this.
Here is my implementation:
const fs = require("fs");
const csv = require("fast-csv");
const delay = t => new Promise(resolve => setTimeout(resolve, t));
const asyncFunction = async (row, csvStream) => {
// Imitate some stuff with database
await delay(1200);
row.data = "data";
csvStream.write(row);
};
const array = [];
const csvStream = csv.format({ headers: true });
const writeStream = fs.createWriteStream("output.csv");
csvStream.pipe(writeStream).on("finish", () => {
console.log("End of writing");
});
fs.createReadStream("input.csv")
.pipe(csv.parse({ headers: true }))
.transform(async function(row, next) {
array.push(asyncFunction(row, csvStream));
next();
})
.on("finish", function() {
console.log("finished reading file");
//Wait for all database requests and writings to be finished to close write stream
Promise.all(array).then(() => {
csvStream.end();
console.log("finished writing file");
});
});
Particularly I would like to know are there ways to optimize what I am doing here, because I feel that I am missing something important on how this library can be used for these type of cases
Regards,
Rokas
I was able to find a solution in fast-csv issues section. A good person doug-martin, provided this gist, on how you can do efficiently this kind of operation via Transform stream:
const path = require('path');
const fs = require('fs');
const { Transform } = require('stream');
const csv = require('fast-csv');
class PersistStream extends Transform {
constructor(args) {
super({ objectMode: true, ...(args || {}) });
this.batchSize = 100;
this.batch = [];
if (args && args.batchSize) {
this.batchSize = args.batchSize;
}
}
_transform(record, encoding, callback) {
this.batch.push(record);
if (this.shouldSaveBatch) {
// we have hit our batch size to process the records as a batch
this.processRecords()
// we successfully processed the records so callback
.then(() => callback())
// An error occurred!
.catch(err => err(err));
return;
}
// we shouldnt persist so ignore
callback();
}
_flush(callback) {
if (this.batch.length) {
// handle any leftover records that were not persisted because the batch was too small
this.processRecords()
// we successfully processed the records so callback
.then(() => callback())
// An error occurred!
.catch(err => err(err));
return;
}
// no records to persist so just call callback
callback();
}
pushRecords(records) {
// emit each record for down stream processing
records.forEach(r => this.push(r));
}
get shouldSaveBatch() {
// this could be any check, for this example is is record cont
return this.batch.length >= this.batchSize;
}
async processRecords() {
// save the records
const records = await this.saveBatch();
// besure to emit them
this.pushRecords(records);
return records;
}
async saveBatch() {
const records = this.batch;
this.batch = [];
console.log(`Saving batch [noOfRecords=${records.length}]`);
// This is where you should save/update/delete the records
return new Promise(res => {
setTimeout(() => res(records), 100);
});
}
}
const processCsv = ({ file, batchSize }) =>
new Promise((res, rej) => {
let recordCount = 0;
fs.createReadStream(file)
// catch file read errors
.on('error', err => rej(err))
.pipe(csv.parse({ headers: true }))
// catch an parsing errors
.on('error', err => rej(err))
// pipe into our processing stream
.pipe(new PersistStream({ batchSize }))
.on('error', err => rej(err))
.on('data', () => {
recordCount += 1;
})
.on('end', () => res({ event: 'end', recordCount }));
});
const file = path.resolve(__dirname, `batch_write.csv`);
// end early after 30000 records
processCsv({ file, batchSize: 5 })
.then(({ event, recordCount }) => {
console.log(`Done Processing [event=${event}] [recordCount=${recordCount}]`);
})
.catch(e => {
console.error(e.stack);
});
https://gist.github.com/doug-martin/b434a04f164c81da82165f4adcb144ec

ssh2-sftp-client get multiple files - error "write after end"

I am trying to load multiple files from ftp but I am getting error: "write after end". I am not able to find the issue.
connections.js
async function getFtpFileContent(_path) {
const ftpConfig = {
host: config.FTPhost.replace('https://', ''),
username: config.FTPusername,
password: config.FTPpassword
};
let ftpFileContent;
let sftp = new StfpClient();
try {
await sftp.connect(ftpConfig);
const stream = await sftp.get(_path);
ftpFileContent = await readStream(sftp, stream);
stream.close();
await sftp.end();
} catch (error) {
console.log('FTP Error: \n', error);
}
return ftpFileContent;
}
function readStream(sftp, stream) {
return new Promise((resolve, reject) => {
let body;
stream.on('data', chunk => {
body += chunk;
});
stream.on('end', () => {
resolve(body);
});
stream.on('error', err => reject(err));
});
}
calling in the next method:
async function getFiles(req, res) {
// first file
let fileContent = await conn.getFtpFileContent(filePath);
// second file
let fileContent2 = await conn.getFtpFileContent(filePath2);
}
When I call the method getFtpFileContent second time I got the mentioned error ("write after end").
Could you please help me? Where I doing the mistake?
I'm not sure where your problem is but should you be calling stream.close() in the getFtpFileContent function? I don't see anything about a close method for a stream, only a close event.
I found the issue. The issue was in CSV converter which I used later. The converter used writable stream which was not close after convert.

Node.js Script Failing Silently?

I've written a Node.js script that uses the download, axios, and fs modules to extract urls from JSON provided by the Federal Register, and download the associated PDF files. However, the script routinely fails to download all of the PDFs.
For whatever reason, my script "stalls" before downloading all of the PDF files. Meaning, it starts off great (downloads maybe 70, 80 files) but then stalls. It doesn't fire my catch block, or fail in any way. It just stops downloading.
The number of files varies based on what wifi connection I'm on. However, I've never been able to get the code to finish, and fire the .then block in my code. Ideally, I would like to use the .then block to process the files once they are downloaded.
Here is the code:
// The callback function that writes the file...
function writeFile(path, contents, cb){
mkdirp(getDirName(path), function(err){
if (err) return cb(err)
fs.writeFile(path, contents, cb)
})
};
// The function that gets the JSON...
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(downloadPDFS)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
// The function that downloads the data and triggers my write callback...
function downloadPDFS(res) {
const downloadPromises = res.data.results.map(item => (
download(item.pdf_url)
.then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
))
return Promise.all(downloadPromises).then((res) => console.log("DONE"))
}
My project is on Github here, in case you'd like to install it and try for yourself. Here's a summary of what's going on, in plain English:
The script fetches JSON from a server, which contains the urls to all 126 PDFs. It then passes an array of these urls to the synchronous map function. Each of the urls is transformed into a promise, with the download module. That promise is implicitly returned, and stored in the Promise.all wrapper. When the download promise resolves (the document is done downloading) my custom writeFile function will trigger, writing the PDF file with the downloaded data. When all of the files have downloaded, the Promise.all wrapper should resolve. But that doesn't happen.
What is going wrong?
EDIT --
As you can see below, the script runs for a while, but then it just stalls and doesn't download any more files...
If it really is a rate issue then there's a few ways you can solve it (depending on how the API is rate limited)
Below there are 3 solutions in one
rateLimited ... this fires off requests limited to a given number of requests per second
singleQueue ... one request at a time, no rate limit, just all requests in series
multiQueue ... at most a given number of requests "in flight" at a time
const rateLimited = perSecond => {
perSecond = isNaN(perSecond) || perSecond < 0.0001 ? 0.0001 : perSecond;
const milliSeconds = Math.floor(1000 / perSecond);
let promise = Promise.resolve(Date.now);
const add = fn => promise.then(lastRun => {
const wait = Math.max(0, milliSeconds + lastRun - Date.now);
promise = promise.thenWait(wait).then(() => Date.now);
return promise.then(fn);
});
return add;
};
const singleQueue = () => {
let q = Promise.resolve();
return fn => q = q.then(fn);
};
const multiQueue = length => {
length = isNaN(length) || length < 1 ? 1 : length;
const q = Array.from({ length }, () => Promise.resolve());
let index = 0;
const add = fn => {
index = (index + 1) % length;
return q[index] = q[index].then(fn);
};
return add;
};
// uncomment one, and only one, of the three "fixup" lines below
let fixup = rateLimited(10); // 10 per second for example
//let fixup = singleQueue; // one at a time
//let fixup = multiQueue(6); // at most 6 at a time for example
const writeFile = (path, contents) => new Promise((resolve, reject) => {
mkdirp(getDirName(path), err => {
if (err) return reject(err);
fs.writeFile(path, contents, err => {
if (err) return reject(err);
resolve();
})
})
});
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(downloadPDFS)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function downloadPDFS(res) {
const downloadPromises = res.data.results.map(item => fixup(() =>
download(item.pdf_url)
.then(data => writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data))
.then(() => console.log("FILE WRITTEN: ", item.pdf_file_name))
));
return Promise.all(downloadPromises).then(() => console.log("DONE"));
}
I've also refactored the code a bit so downloadPDFS uses promises only - all the node-callback style code is put into writeFile
As Jaromanda pointed out, this is likely to do with the API limiting my access, not with an error in the script.
I added a filter to the script, to select less data, and it works. As follows:
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(downloadPDFS)
.then(() => {
console.log("DONE")
})
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function downloadPDFS(res) {
const EPA = res.data.results.filter((item) => {
return item.agencies[0].raw_name === "ENVIRONMENTAL PROTECTION AGENCY"; //// THIS FILTER
});
const downloadPromises = EPA.map(item => ( //// ONLY DOWNLOADING SOME OF THE DATA
download(item.pdf_url)
.then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
))
return Promise.all(downloadPromises)
}

Categories