I have the following snippet of code below. It currently works, but I'm hoping to optimize/refactor it a bit.
Basically, it fetches JSON data, extracts the urls for a number of PDFs from the response, and then downloads those PDFs into a folder.
I'm hoping to refactor this code in order to process the PDFs once they are all downloaded. Currently, I'm not sure how to do that. There are a lot of nested asynchronous functions going on.
How might I refactor this to allow me to tack on another .then call before my error handler, so that I can then process the PDFs that are downloaded?
const axios = require("axios");
const moment = require("moment");
const fs = require("fs");
const download = require("download");
const mkdirp = require("mkdirp"); // Makes nested files...
const getDirName = require("path").dirname; // Current directory name...
const today = moment().format("YYYY-MM-DD");
function writeFile(path, contents, cb){
mkdirp(getDirName(path), function(err){
if (err) return cb(err)
fs.writeFile(path, contents, cb)
})
};
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then((res) => {
res.data.results.forEach((item) => {
download(item.pdf_url).then((data) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err){
console.log(err);
} else {
console.log("FILE WRITTEN: ", item.pdf_file_name);
}
})
})
})
})
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
})
Thanks for any help you all can provide.
P.S. –– When I simply tack on the .then call right now, it fires immediately. This means that my forEach loop is non-blocking? I thought that forEach loops were blocking.
The current forEach will run synchronously, and will not wait for the asynchronous operations to complete. You should use .map instead of forEach so you can map each item to its Promise from download. Then, you can use Promise.all on the resulting array, which will resolve once all downloads are complete:
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(processResults)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function processResults(res) {
const downloadPromises = res.data.results.map((item) => (
download(item.pdf_url).then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
));
return Promise.all(downloadPromises)
.then(() => {
console.log('all done');
});
}
If you wanted to essentially block the function on each iteration, you would want to use an async function in combination with await instead.
Related
I have a function that uses Axios to download a zip file and extract the file into a temporary directory. The process itself works as intended, but I'm having difficulty awaiting the final result before proceeding. I'll admit that I don't fully understand how to use promises, but that's what I need help learning.
Here is the complete code:
const axios = require('axios');
const StreamZip = require('node-stream-zip');
// Pipedream: steps.trigger.raw_event.body.result_set.download_links.json.all_pages
// Testing: https://api.countdownapi.com/download/results/04_NOVEMBER_2021/1900/Collection_Results_F4C0B671_51_All_Pages.zip
const all_pages = 'https://api.countdownapi.com/download/results/04_NOVEMBER_2021/1900/Collection_Results_F4C0B671_51_All_Pages.zip';
let fileName = 'all_pages.zip';
async function asyncFunc() {
return await axios.get(all_pages, {responseType: "stream"})
.then(res => {
console.log("Waiting ...")
if (res.status === 200) {
const path = require("path");
const SUB_FOLDER = "";
fileName = fileName || all_pages.split("/").pop();
const dir = path.resolve(__dirname, SUB_FOLDER, fileName);
res.data.pipe(fs.createWriteStream(dir));
res.data.on("end", () => {
console.log("Download Completed");
const zip = new StreamZip({
file: dir,
storeEntries: true
});
zip.on('error', function (err) {
console.error('[ERROR]', err);
});
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
// console.log(zip.entries());
});
zip.on('entry', function (entry) {
const pathname = path.resolve('./tmp', entry.name);
if (/\.\./.test(path.relative('./tmp', pathname))) {
console.warn("[zip warn]: ignoring maliciously crafted paths in zip file:", entry.name);
return;
}
if ('/' === entry.name[entry.name.length - 1]) {
console.log('[DIR]', entry.name);
return;
}
console.log('[FILE]', entry.name);
zip.stream(entry.name, function (err, stream) {
if (err) {
console.error('Error:', err.toString());
return;
}
stream.on('error', function (err) {
console.log('[ERROR]', err);
});
// example: print contents to screen
// stream.pipe(process.stdout);
// example: save contents to file
fs.mkdir(path.dirname(pathname), {recursive: true}, function () {
stream.pipe(fs.createWriteStream(pathname));
}
);
});
});
});
} else {
console.log(`ERROR >> ${res.status}`);
}
})
.catch(err => {
console.log("Error ", err);
});
}
(async () => {
try {
await asyncFunc();
console.log('Finished')
} catch (error) {
console.error(error);
}
})();
As I said, the code itself works in that it'll download the zip file and extract the contents—however, my test console.log('Finished') fires just after the Axios get. Here are the results of the order of operations:
Waiting ...
Finished
Download Completed
[FILE] Collection_Results_F4C0B671_51_Page_1.json
[FILE] Collection_Results_F4C0B671_51_Page_2.json
[FILE] Collection_Results_F4C0B671_51_Page_3.json
[FILE] Collection_Results_F4C0B671_51_Page_4.json
[FILE] Collection_Results_F4C0B671_51_Page_5.json
[FILE] Collection_Results_F4C0B671_51_Page_6.json
[FILE] Collection_Results_F4C0B671_51_Page_7.json
All entries read: 7
I've tried reading other articles on Promises and similar questions, and I've tried many options without any luck.
A major advantage of using Async/Await is that you can avoid deeply nested, difficult to read code - such as yours. It makes much more sense to break this code into functional units. Rather than thinking about all this code as "must be together", think "works better when apart".
So the entry point can call axios, use .then() to fire off the data file download, use .then() to fire off unzipping, use then() to fire off stream writing function.
You have created a dilemma by using the callback version of StreamZip. It would simplify things a lot if you used the Promise version the API.
Something like the following is easier to rationalize about the order of operation.
try {
console.log('Starting')
axios.get(all_pages, {responseType: "stream"})
.then(download)
.then(unzip)
.then(writeFile)
console.log('Finished')
} catch (error) {
console.error(error);
}
If you want the Finished statement to show up after all the entries are read, why not just add it to this section of the code?
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
// console.log(zip.entries());
// ADD THE FINISHED STATEMENT HERE
});
Edit
Base on the docs you can do the following after the end of the stream.
const stm = await zip.stream('path/inside/zip.txt');
stm.on('end', () => {
zip.close();
// FINISHED AT THIS POINT ?
})
This is another place where you can say you are done streaming (Finished). Depending on the usage you may not have to close the zip here.
I am refactoring some code that was using http module in Node to use got instead. I tried the following:
function get(url, filePath) {
return new Promise((resolve, reject) => {
got.stream(url).on
("response", response => {
const newFile = fs.createWriteStream(filePath);
response.pipe(newFile);
newFile.on("finish", () => {
newFile.close(resolve());
});
newFile.on("error", err => {
reject(err);
});
}).on
("error", err => {
reject(err);
});
});
}
The finish event never fired. The file (filePath) is created with 0 bytes.
The block of code using newFile was something that worked when I was using the Node http module.
What is the proper way to pipe got.stream to a file?
Per the got() documentation, you want to pipe the stream directly to your file and if you use pipeline() to do it, it will collect errors and report completion.
const pipeline = promisify(stream.pipeline);
const fsp = require('fs').promises;
function get(url, filePath) {
return pipeline(
got.stream(url),
fs.createWriteStream(filePath)
);
}
// usage
get(...).then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
FYI, the point of got.stream() is to return a stream that you can directly use as a stream and since you want it to go to a file, you can pipe that stream to that file. I use pipeline() instead of .pipe() because pipeline has much more complete error handling that .pipe(), though in non-error conditions, .pipe() would also work.
Here's a version that cleans up the output file if there's an error:
function get(url, filePath) {
return pipeline(
got.stream(url),
fs.createWriteStream(filePath)
).catch(err => {
fsp.unlink(filePath).catch(err => {
if (err.code !== 'ENOENT') {
// trying to delete output file upon error
console.log('error trying to delete output file', err);
}
});
throw err;
});
}
I have a series of python scripts that I want to call via node-cmd in my Node JS application. They rely on each other, so I cannot execute them in parallel. I also cannot use a fixed waiting time, as they always have different execution time. Right now, upon call of my code, all scripts are called at the same time and therefore error... see my code:
pythoncaller: function(req, callback) {
var cmd=require('node-cmd');
cmd.get(`python3 first.py`,
function(err, data, stderr){
console.log(err);
console.log(stderr);
console.log(data);
});
cmd.get(`python3 second.py`,
function(err, data, stderr){
console.log(err);
console.log(stderr);
console.log(data);
});
cmd.get(`python3 third.py"`,
function(err, data, stderr){
console.log(err);
console.log(stderr);
console.log(data);
});
cmd.get(`python3 last.py"`,
function(err, data, stderr){
console.log(err);
console.log(stderr);
console.log(data);
callback(data);
});
},
Do you know a solution on how to execute those scripts not in parallel?
You can Promisify the callback style functions and use .then to execute them one after another. something like this
const cmd = require('node-cmd');
const Promise = require("bluebird");
const getAsync = Promise.promisify(cmd.get, { multiArgs: true, context: cmd });
var cmd = require('node-cmd');
getAsync(`python3 first.py`)
.then(data => console.log(data))
.then(() => getAsync(`python3 second.py`)
.then(data => console.log(data))
.then(() => getAsync(`python3 third.py"`)
.then(data => console.log(data))
.then(() => getAsync(`python3 last.py"`)
.then(data => console.log(data));
It is also mentioned on node-cmd readme. See here - https://www.npmjs.com/package/node-cmd#with-promises
According to doc, you may promisify cmd.get.
Alternative to .then with await below
// copy pasted
// either Promise = require('bluebird') or use (require('util').promisify)
const getAsync = Promise.promisify(cmd.get, { multiArgs: true, context: cmd })
pythoncaller: async function(req, callback) {
try {
let data
data = await getAsync(`python3 first.py`)
data = await getAsync(`python3 second.py`)
data = await getAsync(`python3 third.py"`)
// ...
} catch (e) {
return callback(e)
}
return callback()
}
I am trying to compile tex files into PFD using data from a firestore database. After completion the script doens't terminate. I found one can use process.exit() to make it quit. However, it terminates the child processes still cimpling the tex files. So, I need an asynchronous function.
The guides I found on how to make them did not particularly help me. I am still very new to javascript and any bloat is still highly confusion to me.
The guides I found on how to make them did not particularly help me. I am still very new to javascript and any bloat is still highly confusion to me.
prepending below mentioned makePDF function with async and the call of the function with await does not work and is, to my understanding, outdated.
I tried implementing a promise, but don't understand how their syntax works. Does simply appending .then() to the function call in the for loop suffice to make the loop wait for the functions completion?
How do I make this specific asynchronous?
Does it matter that it already uses asynchronous functions in its body?
I understand that a promise is used to return what ever the producer has produced to a consumer. Now, my producer doesn't produce anything to be returned. Does this matter at all?
My function called from the for loop:
function makePDF(object){
let input = fs.readFileSync('main.tex', 'utf8');
const outNameTex = object.ID + '.tex';
const outNamePDF = object.ID + '.pdf';
makeTEX(object, input, outNameTex);
const infile = fs.createReadStream(outNameTex);
const outfile = fs.createWriteStream(outNamePDF);
const pdf = latex(infile);
pdf.pipe(outfile);
pdf.on('error', err => console.error(err));
pdf.on('finish', () => {console.log('PDF generated!')});
}
And my function with the loop:
firebase.auth().onAuthStateChanged((user) => {
if (user) {
console.log('user');
db.collection('objects').where('printed', '==', false).get().then((snapshot) => {
snapshot.forEach((doc) => {
console.table(doc.data());
makePDF(doc.data());
})
process.exit();
})
.catch((err) => {
console.log('Error getting documents', err);
});
} else {
console.log('no user');
}
});
It outputs a table for each document, but no PDF generated.
async/await can be tricky to use with for loops, that is because async functions return a promise... if you convert the async/await syntax to native promise syntax you might figure out what the issue is.
What you want to do is use Array.map to map/convert each doc to a promise that resolves once the makePDF is done, then use Promise.all to wait for all the promises to resolve..
The solution should look something like this
function makePDF(object){
return new Promise((resolve, reject) => {
let input = fs.readFileSync('main.tex', 'utf8');
const outNameTex = object.ID + '.tex';
const outNamePDF = object.ID + '.pdf';
makeTEX(object, input, outNameTex);
const infile = fs.createReadStream(outNameTex);
const outfile = fs.createWriteStream(outNamePDF);
const pdf = latex(infile);
pdf.pipe(outfile);
pdf.on('error', reject);
pdf.on('finish', () => {console.log('PDF generated!'); resolve();});
}
firebase.auth().onAuthStateChanged((user) => {
if (user) {
console.log('user');
db.collection('objects').where('printed', '==', false).get().then((snapshot) => {
const promiseArr = snapshot.docs.map((doc) => {
console.table(doc.data());
return makePDF(doc.data());
})
Promise.all(promiseArr)
.then(() => {
process.exit();
})
})
.catch((err) => {
console.log('Error getting documents', err);
});
} else {
console.log('no user');
}
});
I am trying to unzip a file first and then await for that unzipping file to be complete before i loop through each file and upload it to an S3 bucket. The first function unzipPromise is running fine, and everything is getting unzipped in the proper directory, but the uploadS3Promise is not running at all. I am not getting errors through this process, it just runs and unzips the file and never touches the uploadS3Promise function.
function unzipInput(file, client, project_number, oldpath, newpath) {
path = `zips/${client}/${project_number}/${file}/`;
function unzipPromise() {
return new Promise((resolve, reject) => {
fse.mkdirsSync(path);
fs.rename(oldpath, newpath, err => {
if (err) {
throw err;
}
});
fs.createReadStream(newpath).pipe(unzip.Extract({ path }));
});
}
function uploadS3Promise() {
console.log("running");
return new Promise((resolve, reject) => {
// fs.unlinkSync(newpath);
fs.readdirSync(newpath).forEach(file => {
uploadToS3(file, client, project_number, path);
console.log(file, "test");
});
if (err) reject(err);
else resolve("success");
});
}
// New code with async:
(async () => {
try {
await unzipPromise();
await uploadS3Promise();
} catch (e) {
console.error(e);
}
})();
}
uploadS3Promise doesn't run because the code is still awaiting for unzipPromise to complete. The code will not execute further unless you resolve or reject a promise.
So in you code ...
function unzipPromise(){
...
resolve(...)
...
}
On an unrelated note, I think it would be more readable not to name function names end in promise. Like just call them unzip and uploadS3. We don't name our function usually by return types right, like we never say intIndexOf, and so on.
You supposed to call resolve after unzip path is done or reject if error occured.
And since streams are EventEmitter, you can listen to events and interact with it
const stream = fs.createReadStream(newpath).pipe(unzip.Extract({ path }))
stream
.on('error', (err) => reject(err))
.on('finish', () => resolve())
Personally, I'd use a .then to break the process down a bit.
unzipPromise().then(res => {
console.log('resolved promise 1');
uploadS3Promise();
} rej => {
console.log('rejected promise 1, sorry!');
});
Also- "unzipInput" is never resolved or rejected.