Piping got.stream to a file - javascript

I am refactoring some code that was using http module in Node to use got instead. I tried the following:
function get(url, filePath) {
return new Promise((resolve, reject) => {
got.stream(url).on
("response", response => {
const newFile = fs.createWriteStream(filePath);
response.pipe(newFile);
newFile.on("finish", () => {
newFile.close(resolve());
});
newFile.on("error", err => {
reject(err);
});
}).on
("error", err => {
reject(err);
});
});
}
The finish event never fired. The file (filePath) is created with 0 bytes.
The block of code using newFile was something that worked when I was using the Node http module.
What is the proper way to pipe got.stream to a file?

Per the got() documentation, you want to pipe the stream directly to your file and if you use pipeline() to do it, it will collect errors and report completion.
const pipeline = promisify(stream.pipeline);
const fsp = require('fs').promises;
function get(url, filePath) {
return pipeline(
got.stream(url),
fs.createWriteStream(filePath)
);
}
// usage
get(...).then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
FYI, the point of got.stream() is to return a stream that you can directly use as a stream and since you want it to go to a file, you can pipe that stream to that file. I use pipeline() instead of .pipe() because pipeline has much more complete error handling that .pipe(), though in non-error conditions, .pipe() would also work.
Here's a version that cleans up the output file if there's an error:
function get(url, filePath) {
return pipeline(
got.stream(url),
fs.createWriteStream(filePath)
).catch(err => {
fsp.unlink(filePath).catch(err => {
if (err.code !== 'ENOENT') {
// trying to delete output file upon error
console.log('error trying to delete output file', err);
}
});
throw err;
});
}

Related

I'm having issues with awaiting multiple async functions in Nodejs

I have a function that uses Axios to download a zip file and extract the file into a temporary directory. The process itself works as intended, but I'm having difficulty awaiting the final result before proceeding. I'll admit that I don't fully understand how to use promises, but that's what I need help learning.
Here is the complete code:
const axios = require('axios');
const StreamZip = require('node-stream-zip');
// Pipedream: steps.trigger.raw_event.body.result_set.download_links.json.all_pages
// Testing: https://api.countdownapi.com/download/results/04_NOVEMBER_2021/1900/Collection_Results_F4C0B671_51_All_Pages.zip
const all_pages = 'https://api.countdownapi.com/download/results/04_NOVEMBER_2021/1900/Collection_Results_F4C0B671_51_All_Pages.zip';
let fileName = 'all_pages.zip';
async function asyncFunc() {
return await axios.get(all_pages, {responseType: "stream"})
.then(res => {
console.log("Waiting ...")
if (res.status === 200) {
const path = require("path");
const SUB_FOLDER = "";
fileName = fileName || all_pages.split("/").pop();
const dir = path.resolve(__dirname, SUB_FOLDER, fileName);
res.data.pipe(fs.createWriteStream(dir));
res.data.on("end", () => {
console.log("Download Completed");
const zip = new StreamZip({
file: dir,
storeEntries: true
});
zip.on('error', function (err) {
console.error('[ERROR]', err);
});
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
// console.log(zip.entries());
});
zip.on('entry', function (entry) {
const pathname = path.resolve('./tmp', entry.name);
if (/\.\./.test(path.relative('./tmp', pathname))) {
console.warn("[zip warn]: ignoring maliciously crafted paths in zip file:", entry.name);
return;
}
if ('/' === entry.name[entry.name.length - 1]) {
console.log('[DIR]', entry.name);
return;
}
console.log('[FILE]', entry.name);
zip.stream(entry.name, function (err, stream) {
if (err) {
console.error('Error:', err.toString());
return;
}
stream.on('error', function (err) {
console.log('[ERROR]', err);
});
// example: print contents to screen
// stream.pipe(process.stdout);
// example: save contents to file
fs.mkdir(path.dirname(pathname), {recursive: true}, function () {
stream.pipe(fs.createWriteStream(pathname));
}
);
});
});
});
} else {
console.log(`ERROR >> ${res.status}`);
}
})
.catch(err => {
console.log("Error ", err);
});
}
(async () => {
try {
await asyncFunc();
console.log('Finished')
} catch (error) {
console.error(error);
}
})();
As I said, the code itself works in that it'll download the zip file and extract the contents—however, my test console.log('Finished') fires just after the Axios get. Here are the results of the order of operations:
Waiting ...
Finished
Download Completed
[FILE] Collection_Results_F4C0B671_51_Page_1.json
[FILE] Collection_Results_F4C0B671_51_Page_2.json
[FILE] Collection_Results_F4C0B671_51_Page_3.json
[FILE] Collection_Results_F4C0B671_51_Page_4.json
[FILE] Collection_Results_F4C0B671_51_Page_5.json
[FILE] Collection_Results_F4C0B671_51_Page_6.json
[FILE] Collection_Results_F4C0B671_51_Page_7.json
All entries read: 7
I've tried reading other articles on Promises and similar questions, and I've tried many options without any luck.
A major advantage of using Async/Await is that you can avoid deeply nested, difficult to read code - such as yours. It makes much more sense to break this code into functional units. Rather than thinking about all this code as "must be together", think "works better when apart".
So the entry point can call axios, use .then() to fire off the data file download, use .then() to fire off unzipping, use then() to fire off stream writing function.
You have created a dilemma by using the callback version of StreamZip. It would simplify things a lot if you used the Promise version the API.
Something like the following is easier to rationalize about the order of operation.
try {
console.log('Starting')
axios.get(all_pages, {responseType: "stream"})
.then(download)
.then(unzip)
.then(writeFile)
console.log('Finished')
} catch (error) {
console.error(error);
}
If you want the Finished statement to show up after all the entries are read, why not just add it to this section of the code?
zip.on('ready', function () {
console.log('All entries read: ' + zip.entriesCount);
// console.log(zip.entries());
// ADD THE FINISHED STATEMENT HERE
});
Edit
Base on the docs you can do the following after the end of the stream.
const stm = await zip.stream('path/inside/zip.txt');
stm.on('end', () => {
zip.close();
// FINISHED AT THIS POINT ?
})
This is another place where you can say you are done streaming (Finished). Depending on the usage you may not have to close the zip here.

How can I write file to mpeg movie file to disk in the correct format?

I'm trying to download an external ts (mpeg) file and write it to disk, but I can't understand how I can get the file content correctly formatted.
I get the file on disk, but it does not play in media player, and when I view attributes of the file it doesn't show any video/sound info like ts files typically do. I've tried many different encodings and I am not even shore if this is the root to the problem or if I need to somehow indicate the contentType, write file attributes or what I am missing here.
Code that writes unplayable file:
getFile = (url) => {
https.get(url, r => {
let rawData = [];
r.on("data", chunk => {
rawData += chunk;
});
r.on("end", () => {
fs.writeFile(`./resources/test.ts`, rawData, "utf-8", err => {
if (err) {
console.log(`error writing file`);
return;
}
console.log(`Wrote successfully`);
});
});
});
};
This is quite new territory to me. I'm used to serving files and setting content types of all kinds, not downloading them. Would much appreciate help!
UTF-8 encoding will pad bytes that have the most significant bit set, which corrupts binary files. You can save this to disk without buffering all the data by calling res.pipe() to a Writable stream created by fs.createWriteStream():
getFile = url =>
https.get(url, res => {
res.pipe(fs.createWriteStream('./resources/test.ts', 'binary'))
.on('error', console.error)
.on('finish', () => { console.log('wrote successfully') })
})
If you want the caller to be able to tell when the download has completed and whether there was an error, you can wrap the return value in a Promise and use stream.finished():
const { finished } = require('stream')
const getFile = url => new Promise((resolve, reject) => {
https.get(url, res => {
const stream = res.pipe(
fs.createWriteStream('./resources/test.ts', 'binary')
)
finished(stream, err => {
if (err) reject(err)
else resolve()
})
})
})
// usage
getFile('some url').then(() => {
console.log('wrote successfully')
}, error => {
console.error(error)
})
Since you're requesting binary data, you have to set the encoding accordingly on the response and when calling writeFile. The default encoding is utf-8, which is probably causing the problem here. You could try something like:
getFile = (url) => {
https.get(url, r => {
let rawData = '';
r.setEncoding('binary');
r.on('data', chunk => {
rawData += chunk;
});
r.on('end', () => {
fs.writeFile(`./resources/test.ts`, rawData, 'binary', err => {
if (err) {
console.log(`error writing file`);
return;
}
console.log(`Wrote successfully`);
});
});
});
}

Refactoring Complicated Nested Node.js Function

I have the following snippet of code below. It currently works, but I'm hoping to optimize/refactor it a bit.
Basically, it fetches JSON data, extracts the urls for a number of PDFs from the response, and then downloads those PDFs into a folder.
I'm hoping to refactor this code in order to process the PDFs once they are all downloaded. Currently, I'm not sure how to do that. There are a lot of nested asynchronous functions going on.
How might I refactor this to allow me to tack on another .then call before my error handler, so that I can then process the PDFs that are downloaded?
const axios = require("axios");
const moment = require("moment");
const fs = require("fs");
const download = require("download");
const mkdirp = require("mkdirp"); // Makes nested files...
const getDirName = require("path").dirname; // Current directory name...
const today = moment().format("YYYY-MM-DD");
function writeFile(path, contents, cb){
mkdirp(getDirName(path), function(err){
if (err) return cb(err)
fs.writeFile(path, contents, cb)
})
};
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then((res) => {
res.data.results.forEach((item) => {
download(item.pdf_url).then((data) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err){
console.log(err);
} else {
console.log("FILE WRITTEN: ", item.pdf_file_name);
}
})
})
})
})
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
})
Thanks for any help you all can provide.
P.S. –– When I simply tack on the .then call right now, it fires immediately. This means that my forEach loop is non-blocking? I thought that forEach loops were blocking.
The current forEach will run synchronously, and will not wait for the asynchronous operations to complete. You should use .map instead of forEach so you can map each item to its Promise from download. Then, you can use Promise.all on the resulting array, which will resolve once all downloads are complete:
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(processResults)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function processResults(res) {
const downloadPromises = res.data.results.map((item) => (
download(item.pdf_url).then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
));
return Promise.all(downloadPromises)
.then(() => {
console.log('all done');
});
}
If you wanted to essentially block the function on each iteration, you would want to use an async function in combination with await instead.

Second async await function is not calling

I am trying to unzip a file first and then await for that unzipping file to be complete before i loop through each file and upload it to an S3 bucket. The first function unzipPromise is running fine, and everything is getting unzipped in the proper directory, but the uploadS3Promise is not running at all. I am not getting errors through this process, it just runs and unzips the file and never touches the uploadS3Promise function.
function unzipInput(file, client, project_number, oldpath, newpath) {
path = `zips/${client}/${project_number}/${file}/`;
function unzipPromise() {
return new Promise((resolve, reject) => {
fse.mkdirsSync(path);
fs.rename(oldpath, newpath, err => {
if (err) {
throw err;
}
});
fs.createReadStream(newpath).pipe(unzip.Extract({ path }));
});
}
function uploadS3Promise() {
console.log("running");
return new Promise((resolve, reject) => {
// fs.unlinkSync(newpath);
fs.readdirSync(newpath).forEach(file => {
uploadToS3(file, client, project_number, path);
console.log(file, "test");
});
if (err) reject(err);
else resolve("success");
});
}
// New code with async:
(async () => {
try {
await unzipPromise();
await uploadS3Promise();
} catch (e) {
console.error(e);
}
})();
}
uploadS3Promise doesn't run because the code is still awaiting for unzipPromise to complete. The code will not execute further unless you resolve or reject a promise.
So in you code ...
function unzipPromise(){
...
resolve(...)
...
}
On an unrelated note, I think it would be more readable not to name function names end in promise. Like just call them unzip and uploadS3. We don't name our function usually by return types right, like we never say intIndexOf, and so on.
You supposed to call resolve after unzip path is done or reject if error occured.
And since streams are EventEmitter, you can listen to events and interact with it
const stream = fs.createReadStream(newpath).pipe(unzip.Extract({ path }))
stream
.on('error', (err) => reject(err))
.on('finish', () => resolve())
Personally, I'd use a .then to break the process down a bit.
unzipPromise().then(res => {
console.log('resolved promise 1');
uploadS3Promise();
} rej => {
console.log('rejected promise 1, sorry!');
});
Also- "unzipInput" is never resolved or rejected.

Javascript Promises : Can I know which part of promise chain caused error?

(Please excuse my English)
I am learning about javascript promises, now.
Below sample code is a simple javascript code for node.js(my node.js version is v10.0.0), which asynchronously reads and parses a JSON file using promise chain.
const fs = require("fs");
function readFileAsync(filename) {
return new Promise((resolve, reject) => {
fs.readFile(filename, 'utf8', (error, result) => {
if (error)
reject(error);
else
resolve(result);
});
});
}
readFileAsync('test.json')
.then(res => JSON.parse(res))
.then(res => { console.log('JSON=', res); })
.catch(err => { console.log(err.message); });
I found that this sample code generates different formats of error messages.
For example, if it cannot find 'test.json', the error message is...
ENOENT: no such file or directory, open '/home/node/test.json'
If it cannot parse 'test.json', the error message is...
Unexpected token / in JSON at position 31
I want to modify the sample code to generate same format of error message always containing JSON file name.
To do so, firstly I should know which part of promise chain caused error. How can I know?
There are two ways to arrived what you want.
Promise.then has two arguments, see below code and you can get more information here
readFileAsync('test.json')
.then(res => JSON.parse(res))
.then(res => { console.log('JSON=', res); }, error => {
// here can catch error of previous then function
});
Another way is modify the function readFileAsync
function readFileAsync(filename) {
return new Promise(resolve => {
fs.readFile(filename, (error, result) => {
if (error)
resolve(null); // you can resolve whatever you want
else
resolve(result);
});
});
}
And .catch() will not catch any error of readFileAsync.
Below sample code is a my solution. Thank you, Bergi and Stephen.
I choose this solution because I want to know exactly where in the chain the error occurred and what is the error.
const fs = require("fs");
function readFileAsync(filename) {
return new Promise((resolve, reject) => {
fs.readFile(filename, 'utf8', (error, result) => {
if (error)
reject(error);
else
resolve(result);
});
});
}
function readJsonAsync(filename, fnCallBack) {
function fnMessage(n, str) {
console.log(`[${n}:${filename}]`, str);
}
readFileAsync(filename)
.then(
res => JSON.parse(res),
err => { fnMessage(-1, err.message); }
).then(
res => {
// if some errors occured at the previous step, res === undefined
if (res !== undefined)
fnCallBack(filename, res);
},
err => { fnMessage(-2, err.message); }
);
}
function printJSON(filename, json) {
console.log(`JSON[${filename}]:`, json);
}
readJsonAsync('test.json', printJSON);
My solution has a prerequisite. The prerequisite is...
There is no simple way to break a promise chain even if some errors
occured at previous steps of the chain.
Is this prerequisite right?

Categories