Node.js Script Failing Silently? - javascript

I've written a Node.js script that uses the download, axios, and fs modules to extract urls from JSON provided by the Federal Register, and download the associated PDF files. However, the script routinely fails to download all of the PDFs.
For whatever reason, my script "stalls" before downloading all of the PDF files. Meaning, it starts off great (downloads maybe 70, 80 files) but then stalls. It doesn't fire my catch block, or fail in any way. It just stops downloading.
The number of files varies based on what wifi connection I'm on. However, I've never been able to get the code to finish, and fire the .then block in my code. Ideally, I would like to use the .then block to process the files once they are downloaded.
Here is the code:
// The callback function that writes the file...
function writeFile(path, contents, cb){
mkdirp(getDirName(path), function(err){
if (err) return cb(err)
fs.writeFile(path, contents, cb)
})
};
// The function that gets the JSON...
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(downloadPDFS)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
// The function that downloads the data and triggers my write callback...
function downloadPDFS(res) {
const downloadPromises = res.data.results.map(item => (
download(item.pdf_url)
.then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
))
return Promise.all(downloadPromises).then((res) => console.log("DONE"))
}
My project is on Github here, in case you'd like to install it and try for yourself. Here's a summary of what's going on, in plain English:
The script fetches JSON from a server, which contains the urls to all 126 PDFs. It then passes an array of these urls to the synchronous map function. Each of the urls is transformed into a promise, with the download module. That promise is implicitly returned, and stored in the Promise.all wrapper. When the download promise resolves (the document is done downloading) my custom writeFile function will trigger, writing the PDF file with the downloaded data. When all of the files have downloaded, the Promise.all wrapper should resolve. But that doesn't happen.
What is going wrong?
EDIT --
As you can see below, the script runs for a while, but then it just stalls and doesn't download any more files...

If it really is a rate issue then there's a few ways you can solve it (depending on how the API is rate limited)
Below there are 3 solutions in one
rateLimited ... this fires off requests limited to a given number of requests per second
singleQueue ... one request at a time, no rate limit, just all requests in series
multiQueue ... at most a given number of requests "in flight" at a time
const rateLimited = perSecond => {
perSecond = isNaN(perSecond) || perSecond < 0.0001 ? 0.0001 : perSecond;
const milliSeconds = Math.floor(1000 / perSecond);
let promise = Promise.resolve(Date.now);
const add = fn => promise.then(lastRun => {
const wait = Math.max(0, milliSeconds + lastRun - Date.now);
promise = promise.thenWait(wait).then(() => Date.now);
return promise.then(fn);
});
return add;
};
const singleQueue = () => {
let q = Promise.resolve();
return fn => q = q.then(fn);
};
const multiQueue = length => {
length = isNaN(length) || length < 1 ? 1 : length;
const q = Array.from({ length }, () => Promise.resolve());
let index = 0;
const add = fn => {
index = (index + 1) % length;
return q[index] = q[index].then(fn);
};
return add;
};
// uncomment one, and only one, of the three "fixup" lines below
let fixup = rateLimited(10); // 10 per second for example
//let fixup = singleQueue; // one at a time
//let fixup = multiQueue(6); // at most 6 at a time for example
const writeFile = (path, contents) => new Promise((resolve, reject) => {
mkdirp(getDirName(path), err => {
if (err) return reject(err);
fs.writeFile(path, contents, err => {
if (err) return reject(err);
resolve();
})
})
});
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(downloadPDFS)
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function downloadPDFS(res) {
const downloadPromises = res.data.results.map(item => fixup(() =>
download(item.pdf_url)
.then(data => writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data))
.then(() => console.log("FILE WRITTEN: ", item.pdf_file_name))
));
return Promise.all(downloadPromises).then(() => console.log("DONE"));
}
I've also refactored the code a bit so downloadPDFS uses promises only - all the node-callback style code is put into writeFile

As Jaromanda pointed out, this is likely to do with the API limiting my access, not with an error in the script.
I added a filter to the script, to select less data, and it works. As follows:
axios.get(`http://federalregister.gov/api/v1/public-inspection-documents.json?conditions%5Bavailable_on%5D=${today}`)
.then(downloadPDFS)
.then(() => {
console.log("DONE")
})
.catch((err) => {
console.log("COULD NOT DOWNLOAD FILES: \n", err);
});
function downloadPDFS(res) {
const EPA = res.data.results.filter((item) => {
return item.agencies[0].raw_name === "ENVIRONMENTAL PROTECTION AGENCY"; //// THIS FILTER
});
const downloadPromises = EPA.map(item => ( //// ONLY DOWNLOADING SOME OF THE DATA
download(item.pdf_url)
.then(data => new Promise((resolve, reject) => {
writeFile(`${__dirname}/${today}/${item.pdf_file_name}`, data, (err) => {
if(err) reject(err);
else resolve(console.log("FILE WRITTEN: ", item.pdf_file_name));
});
}))
))
return Promise.all(downloadPromises)
}

Related

Promise function doesn't trigger after another promise

I'm working on a microcontroller that would either take docx files or html strings in input and would transform it into a singular pdf file and return its link as an ouput.
My code looks like this so far:
// 'files' is an array of uploaded docx files.
const uploaded = files.map((file) => {
return new Promise((resolve, reject) => {
pump(
file.toBuffer(),
fs.createWriteStream(join(__dirname, 'files', file.filename))
.on('finish', resolve)
)
})
})
Promise.all(uploaded)
// Is triggered
.then(async () => await convertFiles())
// Is not triggered
.then(async () => {
// concatStoreFiles() is an external function because I need it somewhere else too
test = await concatStoreFiles(join(__dirname, 'files'))
console.log({test})
res.send(test)
})
const convertFiles = () => {
return new Promise((resolve, reject) => {
const cmd = `soffice --headless --convert-to pdf --outdir ${join(__dirname, 'files')} ${join(__dirname, 'files', '*.*')}`
exec(cmd, (error, stdout, stderror) => {
if (error) console.warn(error)
resolve(stdout ?? stderror)
})
})
}
concatStoreFile.js
module.exports = async function concatFiles (dirPath, outPath) {
return new Promise ((resolve, reject) => {
const existingFiles = []
fs.readdir(dirPath, (e, files) => {
files.forEach((file) => {
// is added to the files list only if finishing with ".pdf"
if (/[\d\w_-]+.pdf/.matches(file)) {
existingFiles.push(file)
}
});
resolve(existingFiles)
})
})
}
I'm working with Insomnia for my development / test process, and it tells me that I get an empty response. However, I'm supposed to get an array of pdf files existing in a specific directory. I'm not even getting console.log({test}), so I don't think my second then() is triggered.
I'm really rusty with async / await and Promise syntaxes, what should I do in this situation?
Thank you in advance
The #fastify/multipart's toBuffer() API returns a Promise, not a buffer. Checkout this article
So you need to write something like:
const uploaded = files.map(processFile)
async function processFile (file) {
const buffer = await file.toBuffer()
const storedFileName = join(__dirname, 'files', file.filename)
const writeStream = fs.createWriteStream(storedFileName)
return new Promise((resolve, reject) => {
pump(buffer, writeStream, (err) => {
if(err) { return reject(err) }
resolve(storedFileName)
})
}
}
Moreover, to improve the code, I returned the storedFileName instead of recalculating it.
You can convert this:
.then(async () => await convertFiles())
to this:
.then(() => convertFiles())
Mixing async/await and promise then/catch leads to hidden bugs hard to find

How to do promise inside promise

I am trying to do pagination with promise. Here's my workflow.
Step 1 : Total page in pagination is 50. The url will be like url?page=1
Step 2 : In each page i will get 50 products, which i need to call a seperate api.
The only condition is, during pagination, the second page should called only if first page's 50 api call is done. Once all the 50 pages are fetched, it should return the promise.
What i have so far is.
let promises = [];
paginate(50);
Promise.all(promises)
.catch(function(err) {
console.log('err',err);
})
.then(() => {
console.log('All Done!!!!!');
});
function paginate(loop){
promises.push(
axios(config)
.then(function (response) {
// Got 50 products
})
.catch(function (error) {
console.log('err',err);
})
)
In the place Got 50 products i need to still iterate the 50 products in axios. I am not sure whether it is possible to do promise inside promise.
As the server can't bear all sudden loads, the only condition is to iterate the second 50 products (or next page's products) only after first (or previous 50 api is called).
Edit :
// Here i have got 50 products
As i told, On each page i will get 50 products, i will call another api for all those 50 products. I have given the code below.
Only constraint is on first page response, the response 50's api should be called.. it's like product?product=1 . the next 50 should be called only after first 50's api called
for (let index = 0; index < response.length; index++) {
const element = response[index];
//Here call api for one item
axios(config)
.then(function (elemen t) {
// Here i have got 50 products
})
.catch(function (error) {
console.log('err',err);
})
}
You're probably not looking for Promise.all (which is meant for running things in parallel), but for recursion:
fetchPages(0, 49)
.then(console.log);
function fetchPages(from, to, results = []) {
if (from <= to) {
return fetchPage(from)
.then(res => results.push(...res))
.then(() => fetchPages(from + 1, to, results)); // calls itself
} else {
return results;
}
}
function fetchPage(n) {
console.log(`Fetching page ${n}`);
// Here, you would return axios()...
// But just for this demo, I fake that:
const results = new Array(50).fill(null)
.map((_,i) => `Product ${i} from page ${n}`);
return new Promise(resolve => setTimeout(() => resolve(results), 100));
}
Edit
The code above solves the problem of pages needing to be fetched only one after the other. We can keep the fetchPages function as it is. Now, since every page will contain products which will need to be fetched individually, we'll just edit fetchPage a bit.
There are multiple ways this could be done. Here are some:
Solution A: fetch every product in parallel
If the server can handle 50 requests going off at once, you could use Promise.all:
function fetchPage(n) {
console.log(`Fetching page ${n}`);
return axios(`/page?page=${n}`)
.then(productIds => {
return Promise.all(productIds.map(fetchProduct));
});
}
function fetchProduct(id) {
return axios(`/product?product=${id}`);
}
Solution B: fetch every product in sequence
If the server can't handle multiple requests going off at once, you could use recursion once again:
function fetchPage(n) {
console.log(`Fetching page ${n}`);
return axios(`/page?page=${n}`)
.then(fetchproducts);
}
function fetchProducts(productIds, results = []) {
if (productIds.length) {
const productId = productIds.shift();
return fetchProduct(productId)
.then(res => results.push(res))
.then(() => fetchProducts(productIds, results)); // calls itself
} else {
return results;
}
}
function fetchProduct(id) {
return axios(`/product?product=${id}`);
}
Solution C: fetch products X requests at a time
If the server can handle X requests going off at once, you could use a module like queue, which can help you with concurrency:
const queue = require('queue'); // Don't forget to $ npm install queue
const MAX_CONCURRENT_CALLS = 4; // 4 calls max at any given time
function fetchPage(n) {
console.log(`Fetching page ${n}`);
return axios(`/page?page=${n}`)
.then(fetchproducts);
}
function fetchProducts(productIds) {
const q = queue();
q.concurrency = MAX_CONCURRENT_CALLS;
const results = [];
q.push(
...productIds.map(productId => () => {
return fetchProduct(productId)
.then(product => results.push(product));
})
);
return new Promise((resolve, reject) => {
q.start(function (err) {
if (err) return reject(err);
resolve(results);
});
});
}
function fetchProduct(id) {
return axios(`/product?product=${id}`);
}

Firebase Google Cloud Function: createReadStream results in empty file

I try to process a Video file (stored in Google Firebase storage) through a Google Cloud Function. I have working code that download the entire video files into the NodeJS Google cloud function: await bucket.file(filePath).download({ destination: tempFile }).
But the goal is only to read the framerate, therefore the headers of the videofile would suffice. But createReadStream gives me an empty tempFile. Any advise much appreciated!
exports.checkFramerate = functions.region('europe-west1').storage.object().onFinalize(async (object, context) => {
const bucket = admin.storage().bucket(object.bucket); // Bucket class
const filePath = object.name; // videos/xbEXdMNFb1Blbd9r2E8m/comp_test.mp4
const fileName = filePath.split('/').pop(); // comp_test.mp4
const bucketDir = path.dirname(filePath); // videos/xbEXdMNFb1Blbd9r2E8m
const tempFile = path.join(os.tmpdir(), 'temp.mp4')
fs.closeSync(fs.openSync(tempFile, 'w'))
console.log("tempFile size1", fs.statSync(tempFile).size)
// await bucket.file(filePath).download({ destination: tempFile }); // this works: tempFile size2 = 3180152
await bucket.file(filePath).createReadStream({ // this does not work: tempFile size2 = 0
start: 10000,
end: 20000
})
.on('error', function(err) {console.log(err)})
.pipe(fs.createWriteStream(tempFile));
console.log("tempFile size2", fs.statSync(tempFile).size)
mi(tempFile).then(data => {
console.log("frameRate", data[0].general.frame_rate[0])
return data[0].general.frame_rate[0];
}).catch(e => {console.error(e)});
});
I tried implementing even the example of https://googleapis.dev/nodejs/storage/latest/File.html#createReadStream but to no avail. remoteFile.download works beautifully but remoteFile.createReadStream gives me empty files...
const remoteFile = bucket.file(filePath);
const localFilename = tempFile;
remoteFile.createReadStream()
.on('error', function(err) {})
.on('response', function(response) {})
.on('end', function() {})
.pipe(fs.createWriteStream(localFilename));
fs.stat(localFilename, (err, stats) => {
if (err) {console.log(err)}
return console.log("stats async",stats.size)
})
as mentioned, promise should be used
reading json file example
let buf = '';
const loadData = async () => {
return await new Promise((resolve, reject) => {
storage.bucket('bucket-name').file('test-config.json')
.createReadStream()
.on('error', reject)
.on('data', function(d) {
buf += d;
}).on('end', function() {
resolve(buf)
});
})
}
const data = await loadData()
Your problem is that the stream API isn't promisifed. So, the await does nothing, and your function continues before the stream is piped, and the file is still zero-length when you stat it the second time.
The download method works just fine because it returns a Promise.
This answer outlines the general approach you need to take. In summary though, you basically want the section of your code that does the piping to read like this:
const stream = bucket.file(filePath).createReadStream({
start: 10000,
end: 20000
})
.pipe(fs.createWriteStream(tempFile));
await new Promise((resolve, reject) => {
stream.on('finish', resolve);
stream.on('error', reject);
});
console.log("tempFile size2", fs.statSync(tempFile).size)
Your function will then wait until the finish event occurs when the piping is complete and the stream is closed. Obviously you probably want to do something more clever with the error handler too, but this is the general form of what you need.

Even after pushing element in the array.Array is showing empty outside the loop

I'm trying to upload multiple images using cloudinary in node.js application.
Storing every image URL in an array. But my array is empty outside the loop. Can't understand why.
const postCreate = (req,res,next) => {
req.body.post.images = [];
const file_length = req.files.length;
let arr = [];
//console.log(req.files);
new Promise((resolve, reject) => {
req.files.forEach((file,index) => {
i = index;
cloudinary.v2.uploader.upload(file.path)
.then(image => {
//console.log(image);
req.body.post.images.push({
url: image.secure_url,
public_id: image.public_id
});
console.log("array", req.body.post.images);//there array is containing the element which is pushed.
});
console.log("arr", req.body.post.images);//but there it is showing empty array .Can't understand why array is empty.
});
resolve();
}).then(() => {
Post.create(req.body.post)
.then(post => {
//console.log(req.body.post.images);
res.redirect(`/posts/${post.id}`);
}).catch(err => {
console.log('Error will saving posts from db ', err);
return next(err);
});
});
Each of the uploads is asynchronous and returns a promise.
You need to have all those promises resolve before moving on to the final then()
You can map an array of those promises and use Promise.all() to return the full array to the final then()
Something like:
const doUpload = (file) => {
// return the upload promise
return cloudinary.v2.uploader.upload(file.path).then(image => {
return {
url: image.secure_url,
public_id: image.public_id
};
});
}
const postCreate = (req, res, next) => {
// map array of individual upload promises
const uploadPromises = req.files.map(doUpload);
Promise.all(uploadPromises).then(imagesArray => {
// assign new array to post body
req.body.post.images = imagesArray;
Post.create(req.body.post)
.then(post => {
//console.log(req.body.post.images);
res.redirect(`/posts/${post.id}`);
}).catch(err => {
console.log('Error will saving posts from db ', err);
return next(err);
});
}).catch(err=> console.log('One of the uploads failed'));
}
The 2nd log message actually gets called first while the array is empty because the code in the then block is waiting for something asynchronous to complete.
your problem is your print function fire before loop is complated so you have to use async-await for proper solution and learn more about this topic
please refer https://blog.risingstack.com/mastering-async-await-in-nodejs for your solution
it describe the async await for your proper output

Unable to get promises to work, subsequent promises not being called

I'm trying to extend some existing code with additional promises, but they are a new topic for me at the moment and i'm obviously missing something. This is running as part of a build scrip for npm.
All i am currently trying to make happen is for the final then to be called after the pack operation has happened for each architecture. I have tried wrapping it in a
return new Promise
But at the moment i am not returning anything from that function so i'm not sure what i should include in the resolve call at the end. If i just call the resolve with a true nothing happens, and wrapping it in a promise seems to cause the function to not actually run, and no errors are caught anywhere?
I'm guessing i am going about this completely wrong, all i want to achieve is to run another function once the previous one has completed?
Here's the code as it stands with the additional .then that i can't get to be called.
function build(cfg) {
return new Promise((resolve, reject) => {
webpack(cfg, (err, stats) => {
if (err) return reject(err);
resolve(stats);
});
});
}
function startPack() {
console.log('start pack...');
build(electronCfg)
.then(() => build(cfg))
.then(() => del('release'))
.then(paths => {
if (shouldBuildAll) {
// build for all platforms
const archs = ['ia32', 'x64'];
const platforms = ['linux', 'win32', 'darwin'];
platforms.forEach(plat => {
archs.forEach(arch => {
pack(plat, arch, log(plat, arch));
});
});
} else {
// build for current platform only
pack(os.platform(), os.arch(), log(os.platform(), os.arch()));
}
})
.then(() => {
console.log('then!');
})
.catch(err => {
console.error(err);
});
}
function pack(plat, arch, cb) {
// there is no darwin ia32 electron
if (plat === 'darwin' && arch === 'ia32') return;
const iconObj = {
icon: DEFAULT_OPTS.icon + (() => {
let extension = '.png';
if (plat === 'darwin') {
extension = '.icns';
} else if (plat === 'win32') {
extension = '.ico';
}
return extension;
})()
};
const opts = Object.assign({}, DEFAULT_OPTS, iconObj, {
platform: plat,
arch,
prune: true,
'app-version': pkg.version || DEFAULT_OPTS.version,
out: `release/${plat}-${arch}`,
'osx-sign': true
});
packager(opts, cb);
}
You didn't say what log is, but if it's a plain logging function, then it looks like you're passing in undefined (the result from calling log(...)) as the cb argument to pack. Perhaps you meant:
pack(plat, arch, () => log(plat, arch));
In any case, this won't do anything to wait for packing to finish. I don't know why you're not seeing any console output, but if you're looking for this output to happen after all the packing has finished, then you need to wrap packager in a promise. Something like:
var pack = (plat, arch) => new Promise(resolve => {
// ...
packager(opts, resolve);
});
And then use Promise.all instead of forEach to do all the packaging (in parallel if that's OK):
.then(paths => {
if (!shouldBuildAll) {
return pack(os.platform(), os.arch());
}
return Promise.all(['linux', 'win32', 'darwin'].map(plat =>
Promise.all(['ia32', 'x64'].map(arch => pack(plat, arch))));
})
.then(() => console.log('then!'))
.catch(err => console.error(err));

Categories