I have a function that uses Axios to download a zip file and extract the file into a temporary directory. The process itself works as intended, but I'm having difficulty awaiting the final result before proceeding. I'll admit that I don't fully understand how to use promises, but that's what I need help learning.
Here is the complete code:
const axios = require('axios');
const StreamZip = require('node-stream-zip');
// Pipedream: steps.trigger.raw_event.body.result_set.download_links.json.all_pages
// Testing: https://api.countdownapi.com/download/results/04_NOVEMBER_2021/1900/Collection_Results_F4C0B671_51_All_Pages.zip
const all_pages = 'https://api.countdownapi.com/download/results/04_NOVEMBER_2021/1900/Collection_Results_F4C0B671_51_All_Pages.zip';
let fileName = 'all_pages.zip';
async function asyncFunc() {
return await axios.get(all_pages, {responseType: "stream"})
.then(res => {
console.log("Waiting ...")
if (res.status === 200) {
const path = require("path");
const SUB_FOLDER = "";
fileName = fileName || all_pages.split("/").pop();
const dir = path.resolve(__dirname, SUB_FOLDER, fileName);
res.data.pipe(fs.createWriteStream(dir));
res.data.on("end", () => {
console.log("Download Completed");
const zip = new StreamZip({
file: dir,
storeEntries: true
});
zip.on('error', function (err) {
console.error('[ERROR]', err);
});
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
// console.log(zip.entries());
});
zip.on('entry', function (entry) {
const pathname = path.resolve('./tmp', entry.name);
if (/\.\./.test(path.relative('./tmp', pathname))) {
console.warn("[zip warn]: ignoring maliciously crafted paths in zip file:", entry.name);
return;
}
if ('/' === entry.name[entry.name.length - 1]) {
console.log('[DIR]', entry.name);
return;
}
console.log('[FILE]', entry.name);
zip.stream(entry.name, function (err, stream) {
if (err) {
console.error('Error:', err.toString());
return;
}
stream.on('error', function (err) {
console.log('[ERROR]', err);
});
// example: print contents to screen
// stream.pipe(process.stdout);
// example: save contents to file
fs.mkdir(path.dirname(pathname), {recursive: true}, function () {
stream.pipe(fs.createWriteStream(pathname));
}
);
});
});
});
} else {
console.log(`ERROR >> ${res.status}`);
}
})
.catch(err => {
console.log("Error ", err);
});
}
(async () => {
try {
await asyncFunc();
console.log('Finished')
} catch (error) {
console.error(error);
}
})();
As I said, the code itself works in that it'll download the zip file and extract the contents—however, my test console.log('Finished') fires just after the Axios get. Here are the results of the order of operations:
Waiting ...
Finished
Download Completed
[FILE] Collection_Results_F4C0B671_51_Page_1.json
[FILE] Collection_Results_F4C0B671_51_Page_2.json
[FILE] Collection_Results_F4C0B671_51_Page_3.json
[FILE] Collection_Results_F4C0B671_51_Page_4.json
[FILE] Collection_Results_F4C0B671_51_Page_5.json
[FILE] Collection_Results_F4C0B671_51_Page_6.json
[FILE] Collection_Results_F4C0B671_51_Page_7.json
All entries read: 7
I've tried reading other articles on Promises and similar questions, and I've tried many options without any luck.
A major advantage of using Async/Await is that you can avoid deeply nested, difficult to read code - such as yours. It makes much more sense to break this code into functional units. Rather than thinking about all this code as "must be together", think "works better when apart".
So the entry point can call axios, use .then() to fire off the data file download, use .then() to fire off unzipping, use then() to fire off stream writing function.
You have created a dilemma by using the callback version of StreamZip. It would simplify things a lot if you used the Promise version the API.
Something like the following is easier to rationalize about the order of operation.
try {
console.log('Starting')
axios.get(all_pages, {responseType: "stream"})
.then(download)
.then(unzip)
.then(writeFile)
console.log('Finished')
} catch (error) {
console.error(error);
}
If you want the Finished statement to show up after all the entries are read, why not just add it to this section of the code?
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
// console.log(zip.entries());
// ADD THE FINISHED STATEMENT HERE
});
Edit
Base on the docs you can do the following after the end of the stream.
const stm = await zip.stream('path/inside/zip.txt');
stm.on('end', () => {
zip.close();
// FINISHED AT THIS POINT ?
})
This is another place where you can say you are done streaming (Finished). Depending on the usage you may not have to close the zip here.
I am trying to upload a JSON file to my express app, read data from the JSON file and save it as a new document in my MongoDB database.
The documents get saved but the fields don't get added, and that is due to the fields being undefined for some reason, How do I fix this?
When I console.log(obj) i can see the data but when I console.log(obj['DataSource_Name']) for example I get undefined why?
app.post("/upload-config", uploads.single("txt"), async (req, res) => {
// Read the file and send to the callback
fs.readFile(req.file.path, handleFile);
let obj;
// Write the callback function
function handleFile(err, data) {
try {
obj = JSON.parse(data);
console.log(obj["DataSource_Name"]);
} catch (err) {
console.log(err);
}
}
const config = new ConfigM({
DataSource_Name: obj.DataSource_Name,
DataSource_ID: obj.DataSource_ID,
DataSource_Type: obj.DataSource_Type,
DataSource_ICON: obj.DataSource_ICON,
DoubleQuotes_Text: obj.DoubleQuotes_Text,
Delimeter: obj.Delimeter,
Field_list: obj.Field_list,
});
try {
await config.save();
res.send(obj);
} catch (err) {
console.error(err);
}
});
obj is only going to be defined when fs.readFile is ready which is NOT as soon as you called it. That happens asyncronously. So after fs.readFile() is called, the code just continues executing, creating the config object and obj is still not defined.
If you would like to use callback functions as you have here, you probably want your config creation, config.save() and res.send(obj) to be part of the readFile callback. Maybe to try before refactoring you could simply move the closing } right after the } of your final catch.
app.post("/upload-config", uploads.single("txt"), async (req, res) => {
// Read the file and send to the callback
fs.readFile(req.file.path, handleFile);
let obj;
// Write the callback function
function handleFile(err, data) {
try {
obj = JSON.parse(data);
console.log(obj["DataSource_Name"]);
} catch (err) {
console.log(err);
}
const config = new ConfigM({
DataSource_Name: obj.DataSource_Name,
DataSource_ID: obj.DataSource_ID,
DataSource_Type: obj.DataSource_Type,
DataSource_ICON: obj.DataSource_ICON,
DoubleQuotes_Text: obj.DoubleQuotes_Text,
Delimeter: obj.Delimeter,
Field_list: obj.Field_list,
});
try {
await config.save();
res.send(obj);
} catch (err) {
console.error(err);
}
}
});
I am not certain what you precisely are doing by reading the file locally while you got it from the post. Be aware that you will want a lot more security guards all over the place and not accepting path names from the request to decide the path name on your server. But I guess that's all a topic for another stackoverflow question :)
I am refactoring some code that was using http module in Node to use got instead. I tried the following:
function get(url, filePath) {
return new Promise((resolve, reject) => {
got.stream(url).on
("response", response => {
const newFile = fs.createWriteStream(filePath);
response.pipe(newFile);
newFile.on("finish", () => {
newFile.close(resolve());
});
newFile.on("error", err => {
reject(err);
});
}).on
("error", err => {
reject(err);
});
});
}
The finish event never fired. The file (filePath) is created with 0 bytes.
The block of code using newFile was something that worked when I was using the Node http module.
What is the proper way to pipe got.stream to a file?
Per the got() documentation, you want to pipe the stream directly to your file and if you use pipeline() to do it, it will collect errors and report completion.
const pipeline = promisify(stream.pipeline);
const fsp = require('fs').promises;
function get(url, filePath) {
return pipeline(
got.stream(url),
fs.createWriteStream(filePath)
);
}
// usage
get(...).then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
FYI, the point of got.stream() is to return a stream that you can directly use as a stream and since you want it to go to a file, you can pipe that stream to that file. I use pipeline() instead of .pipe() because pipeline has much more complete error handling that .pipe(), though in non-error conditions, .pipe() would also work.
Here's a version that cleans up the output file if there's an error:
function get(url, filePath) {
return pipeline(
got.stream(url),
fs.createWriteStream(filePath)
).catch(err => {
fsp.unlink(filePath).catch(err => {
if (err.code !== 'ENOENT') {
// trying to delete output file upon error
console.log('error trying to delete output file', err);
}
});
throw err;
});
}
I have the following snippet of code below. It currently works, but I'm hoping to optimize/refactor it a bit.
Basically, it fetches JSON data, extracts the urls for a number of PDFs from the response, and then downloads those PDFs into a folder.
I'm hoping to refactor this code in order to process the PDFs once they are all downloaded. Currently, I'm not sure how to do that. There are a lot of nested asynchronous functions going on.
How might I refactor this to allow me to tack on another .then call before my error handler, so that I can then process the PDFs that are downloaded?
const axios = require("axios");
const moment = require("moment");
const fs = require("fs");
const download = require("download");
const mkdirp = require("mkdirp"); // Makes nested files...
const getDirName = require("path").dirname; // Current directory name...
const today = moment().format("YYYY-MM-DD");
function writeFile(path, contents, cb){
mkdirp(getDirName(path), function(err){
if (err) return cb(err)
fs.writeFile(path, contents, cb)
})
};
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then((res) => {
res.data.results.forEach((item) => {
download(item.pdf_url).then((data) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err){
console.log(err);
} else {
console.log("FILE WRITTEN: ", item.pdf_file_name);
}
})
})
})
})
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
})
Thanks for any help you all can provide.
P.S. –– When I simply tack on the .then call right now, it fires immediately. This means that my forEach loop is non-blocking? I thought that forEach loops were blocking.
The current forEach will run synchronously, and will not wait for the asynchronous operations to complete. You should use .map instead of forEach so you can map each item to its Promise from download. Then, you can use Promise.all on the resulting array, which will resolve once all downloads are complete:
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(processResults)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function processResults(res) {
const downloadPromises = res.data.results.map((item) => (
download(item.pdf_url).then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
));
return Promise.all(downloadPromises)
.then(() => {
console.log('all done');
});
}
If you wanted to essentially block the function on each iteration, you would want to use an async function in combination with await instead.
This question already has answers here:
Why is my variable unaltered after I modify it inside of a function? - Asynchronous code reference
(7 answers)
Closed 7 days ago.
var content;
fs.readFile('./Index.html', function read(err, data) {
if (err) {
throw err;
}
content = data;
});
console.log(content);
Logs undefined, why?
To elaborate on what #Raynos said, the function you have defined is an asynchronous callback. It doesn't execute right away, rather it executes when the file loading has completed. When you call readFile, control is returned immediately and the next line of code is executed. So when you call console.log, your callback has not yet been invoked, and this content has not yet been set. Welcome to asynchronous programming.
Example approaches
const fs = require('fs');
// First I want to read the file
fs.readFile('./Index.html', function read(err, data) {
if (err) {
throw err;
}
const content = data;
// Invoke the next step here however you like
console.log(content); // Put all of the code here (not the best solution)
processFile(content); // Or put the next step in a function and invoke it
});
function processFile(content) {
console.log(content);
}
Or better yet, as Raynos example shows, wrap your call in a function and pass in your own callbacks. (Apparently this is better practice) I think getting into the habit of wrapping your async calls in function that takes a callback will save you a lot of trouble and messy code.
function doSomething (callback) {
// any async callback invokes callback with response
}
doSomething (function doSomethingAfter(err, result) {
// process the async result
});
There is actually a Synchronous function for this:
http://nodejs.org/api/fs.html#fs_fs_readfilesync_filename_encoding
Asynchronous
fs.readFile(filename, [encoding], [callback])
Asynchronously reads the entire contents of a file. Example:
fs.readFile('/etc/passwd', function (err, data) {
if (err) throw err;
console.log(data);
});
The callback is passed two arguments (err, data), where data is the contents of the file.
If no encoding is specified, then the raw buffer is returned.
SYNCHRONOUS
fs.readFileSync(filename, [encoding])
Synchronous version of fs.readFile. Returns the contents of the file named filename.
If encoding is specified then this function returns a string. Otherwise it returns a buffer.
var text = fs.readFileSync('test.md','utf8')
console.log (text)
function readContent(callback) {
fs.readFile("./Index.html", function (err, content) {
if (err) return callback(err)
callback(null, content)
})
}
readContent(function (err, content) {
console.log(content)
})
Using Promises with ES7
Asynchronous use with mz/fs
The mz module provides promisified versions of the core node library. Using them is simple. First install the library...
npm install mz
Then...
const fs = require('mz/fs');
fs.readFile('./Index.html').then(contents => console.log(contents))
.catch(err => console.error(err));
Alternatively you can write them in asynchronous functions:
async function myReadfile () {
try {
const file = await fs.readFile('./Index.html');
}
catch (err) { console.error( err ) }
};
This line will work,
const content = fs.readFileSync('./Index.html', 'utf8');
console.log(content);
var data = fs.readFileSync('tmp/reltioconfig.json','utf8');
use this for calling a file synchronously,
without encoding its showing output as a buffer.
As said, fs.readFile is an asynchronous action. It means that when you tell node to read a file, you need to consider that it will take some time, and in the meantime, node continued to run the following code. In your case it's: console.log(content);.
It's like sending some part of your code for a long trip (like reading a big file).
Take a look at the comments that I've written:
var content;
// node, go fetch this file. when you come back, please run this "read" callback function
fs.readFile('./Index.html', function read(err, data) {
if (err) {
throw err;
}
content = data;
});
// in the meantime, please continue and run this console.log
console.log(content);
That's why content is still empty when you log it. node has not yet retrieved the file's content.
This could be resolved by moving console.log(content) inside the callback function, right after content = data;. This way you will see the log when node is done reading the file and after content gets a value.
From Node v8
Use the built in promisify library to make these old callback functions more elegant.
const fs = require('fs');
const util = require('util');
const readFile = util.promisify(fs.readFile);
async function doStuff() {
try {
const content = await readFile(filePath, 'utf8');
console.log(content);
} catch (e) {
console.error(e);
}
}
From Node v10
You can use the promises version of fs API:
import { promises as fs } from 'fs';
async function doStuff() {
try {
const content = await fs.readFile(filePath, 'utf8');
console.log(content);
} catch (e) {
console.error(e);
}
}
const fs = require('fs')
function readDemo1(file1) {
return new Promise(function (resolve, reject) {
fs.readFile(file1, 'utf8', function (err, dataDemo1) {
if (err)
reject(err);
else
resolve(dataDemo1);
});
});
}
async function copyFile() {
try {
let dataDemo1 = await readDemo1('url')
dataDemo1 += '\n' + await readDemo1('url')
await writeDemo2(dataDemo1)
console.log(dataDemo1)
} catch (error) {
console.error(error);
}
}
copyFile();
function writeDemo2(dataDemo1) {
return new Promise(function(resolve, reject) {
fs.writeFile('text.txt', dataDemo1, 'utf8', function(err) {
if (err)
reject(err);
else
resolve("Promise Success!");
});
});
}
sync and async file reading way:
//fs module to read file in sync and async way
var fs = require('fs'),
filePath = './sample_files/sample_css.css';
// this for async way
/*fs.readFile(filePath, 'utf8', function (err, data) {
if (err) throw err;
console.log(data);
});*/
//this is sync way
var css = fs.readFileSync(filePath, 'utf8');
console.log(css);
Node Cheat Available at read_file.
var path = "index.html"
const readFileAsync = fs.readFileSync(path, 'utf8');
// console.log(readFileAsync)
using simple readFileSync works for me.
var fs = require('fs');
var path = (process.cwd()+"\\text.txt");
fs.readFile(path , function(err,data)
{
if(err)
console.log(err)
else
console.log(data.toString());
});
var content;
fs.readFile('./Index.html', function read(err, data) {
if (err) {
throw err;
}
content = data;
});
console.log(content);
This is just because node is asynchronous and it will not wait for the read function and as soon as the program starts it will console the value as undefined, Which is actually true because there is no value assigned to content variable.
To handle we can use promises, generators etc.
We can use promise in this way.
new Promise((resolve,reject)=>{
fs.readFile('./index.html','utf-8',(err, data)=>{
if (err) {
reject(err); // in the case of error, control flow goes to the catch block with the error occured.
}
else{
resolve(data); // in the case of success, control flow goes to the then block with the content of the file.
}
});
})
.then((data)=>{
console.log(data); // use your content of the file here (in this then).
})
.catch((err)=>{
throw err; // handle error here.
})
The following is function would work for async wrap or promise then chains
const readFileAsync = async (path) => fs.readFileSync(path, 'utf8');
you can read file by
var readMyFile = function(path, cb) {
fs.readFile(path, 'utf8', function(err, content) {
if (err) return cb(err, null);
cb(null, content);
});
};
Adding on you can write to file,
var createMyFile = (path, data, cb) => {
fs.writeFile(path, data, function(err) {
if (err) return console.error(err);
cb();
});
};
and even chain it together
var readFileAndConvertToSentence = function(path, callback) {
readMyFile(path, function(err, content) {
if (err) {
callback(err, null);
} else {
var sentence = content.split('\n').join(' ');
callback(null, sentence);
}
});
};
To put it roughly, you're dealing with node.js which is asynchronous in nature.
When we talk about async, we're talking about doing or processing info or data while dealing with something else. It is not synonymous to parallel, please be reminded.
Your code:
var content;
fs.readFile('./Index.html', function read(err, data) {
if (err) {
throw err;
}
content = data;
});
console.log(content);
With your sample, it basically does the console.log part first, thus the variable 'content' being undefined.
If you really want the output, do something like this instead:
var content;
fs.readFile('./Index.html', function read(err, data) {
if (err) {
throw err;
}
content = data;
console.log(content);
});
This is asynchronous. It will be hard to get used to but, it is what it is.
Again, this is a rough but fast explanation of what async is.
I like using fs-extra because all functions are promisified, right out of the box, so you can use await. So your code could look like this:
(async () => {
try {
const content = await fs.readFile('./Index.html');
console.log(content);
} catch (err) {
console.error(err);
}
})();