How to wait all Promises to resolve in ExpressJS? - javascript

Background:
I have a simple REST API in ExpressJS that allows phasing multiple pages together. The page numbers are dynamic.
Issue:
Because of performance constraints, I would like to implement async promises when fetching multiple web pages, wait for all of them finished download, phrase them with my desired format, and return back to the output.
After I researched all the stuff about promises and async online (since I'm still new to this async topic), Most of them told me to use Promise.all, but I just can't get it to work somehow.
Current output when navigating to GET /xxx
{
username: xxx
parsedHTML: []
}
Targeted output:
{
username: xxx
parsedHTML: [
"BUNCH OF ANALYSED HTML",
"BUNCH OF ANALYSED HTML",
"BUNCH OF ANALYSED HTML",
...
]
}
Code
const express = require("express");
const http = require("http");
const fetch = require('node-fetch');
const app = express();
app.get("/:username", (req, res)=>{
const username = req.params.username;
let page = 3; //Will be obtained dynamically. But for now, I'll put a constant here
res.json({
username: username,
parsedHTML: getParsedHTML(username, page),
});
console.log("Page sent")
})
function getParsedHTML(username, page) {
let promises = [];
let analyses = [];
for (var i = 1; i < (page + 1); i++) {
promises.push(fetch(`https://example.com/profile/${username}/?page=${i}`)
.then((c) => c.text()));
// console.log(`Added promise`)
}
Promise.all(promises).then(()=>{
for (let i = 0; i < promises.length; i++) {
let promise = promises[i];
analyses.push(analyse(promise));
}
})
return analyses;
}
function analyse(html){
// Some synchronous analyse stuff here
// Right now it do nothing
return html;
}
app.listen(3000, () => console.log('API listening on port ' + 3000 + '!'))
Any helps would be appreciated. Thanks a lot.

You're calling Promise.all on the promises correctly, but the getParsedHTML function isn't waiting for that Promise.all call to resolve before returning. So, your res.json is running immediately, synchronously, and the analyses that are returned is an empty array.
Return the Promise.all call instead, and make sure to analyze the responses (from the Promise.all call) rather than the Promises:
return Promise.all(promises).then((responses)=>{
for (let i = 0; i < responses.length; i++) {
let response = responses[i];
analyses.push(analyse(response));
}
}).then(() => analyses);
But you can significantly clean up your code by mapping the resulting array of responses:
function getParsedHTML(username, page) {
let promises = [];
for (var i = 1; i < (page + 1); i++) {
promises.push(fetch(`https://example.com/profile/${username}/?page=${i}`)
.then((c) => c.text()));
}
return Promise.all(promises)
.then((responses) => responses.map(analyse));
}
Also make sure for to wait for getParsedHTML to wait for the returned Promises to resolve before sending res.json:
app.get("/:username", (req, res)=>{
const username = req.params.username;
let page = 3; //Will be obtained dynamically. But for now, I'll put a constant here
getParsedHTML(username, page)
.then((parsedHTML) => {
res.json({
username,
parsedHTML
});
console.log("Page sent")
});
})

Related

Fetching synchroneously from redis in Node through async/await

I am learning Javascript and Vue from Java from few days and not able to solve a problem with my Node, Express app through async/await. The below code is receiving a list of Stock symbols from request and then checking through a loop if details against any of the symbols is already cached in redis.
var controllers = {
getCurrentPrice: function(req, res) {
var symbolsArray = req.body.symbols;
var results = [];
var tmpArray = [];
_(symbolsArray).each( async function(symbol, iPos) {
client.hget("realtime", symbol, function(err, reply) {
if(reply) {
await results.push(reply);
} else {
await tmpArray.push(symbol);
}
console.log("reply", reply);
});
});
console.log("Results so far ", results);
if( !tmpArray || tmpArray.length == 0 ) { //will be fetching these now }
}
}
Getting output in the inner console statement but not for the outer one. I have tried looking at few solutions through net like through redis-co to promisify the redis calls but could not exactly solve it.
There are several things wrong here:
Doing await on the result of a .push() doesn't do anything useful. You use await on a promise.
Your .each() loop doesn't wait for each asycnhronous operation to finish, therefore you have no way of knowing when all the async operations are done
I'd suggest using a regular for loop where async/await will pause for the loop:
const util = require('util');
client.hgetP = util.promisify(client.hget);
var controllers = {
getCurrentPrice: async function(req, res) {
var symbolsArray = req.body.symbols;
var results = [];
var tmpArray = [];
for (let symbol of symbolsArray) {
let reply = await client.hgetP("realtime", symbol);
if (reply) {
results.push(reply);
} else {
tempArray.push(symbol);
}
}
// do any further processing of tempArray here
console.log(results);
return results; // this will be the resolved value of the returned promise
}
}
Sample usage:
obj.getCurrentPrice.then(results => {
console(results);
}).catch(err => {
console.log(err);
});

How to wait for a loop to finish running before sending back a response in NodeJs?

I'm trying to send back an array of objects after a GET request to a URL but I can't figure out how to structure the logic so the return array fully populates before sending it to the client.
Below is my server side code for responding to the request. I used passport.js previously in the code to create a login portal and user object. I'm trying to take the "connections" array from the user who is making the request and sending them back an array of objects with the connection's name and picture. I know that the code below is incorrect syntax wise but it's an overview of what I'm trying to accomplish. I've tried doing it the callback way but that just kept leading me in circles because I couldn't figure out the correct logic for it.
router.get('/data', function(req, res, next) {
var IdArr = req.user.connections;
var retArr = [];
function getUsers() {
for (i = 0; i < IdArr.length; i++) {
User.getUserById(IdArr[i], function(err, patient) {
retArr[i] = {name:patient.name, pic:patient.profileImage};
});
}
}
function getDataAndSend() {
function(getUsers(), function sendRes() { // I know this is incorrect syntax
console.log(retArr);
res.json(retArr);
});
}
getDataAndSend();
});
First of all, you should declare i, and when you do it should be with block-scope (let), so that the nested callback function will use the same variable.
You could check how many entries in retArr have been retrieved and call res.json once you know you have them all.
router.get('/data', function(req, res, next) {
var IdArr = req.user.connections;
var retArr = [];
for (let i = 0; i < IdArr.length; i++) {
User.getUserById(IdArr[i], function(err, patient) {
retArr[i] = {name:patient.name, pic:patient.profileImage};
if (Object.keys(retArr).length === IdArr.length) res.json(retArr);
});
}
});
I think the easiest way to deal with these things is with Promises. Any async function written in the callback way (e.g. this User.getUserById) can be turned into a function that returns a promise. You just wrap the call around a new Promise and resolve when you're done.
Say in your case.
function promiseGetById(id) {
return new Promise((resolve, reject) =>
User.getUserById(id, (err, pat) => resolve(pat))
);
}
Then something like
Promise.all(IdArr.map(id => promiseGetById(id))).then(arr =>
res.json(
arr.map(patient => ({ name: patient.name, pic: patient.profileImage }))
)
);
Alternatively if you don't like promises you can do it by having a counter or something which in each callback you increment and then in your callback you res.json when the counter is the correct value.
Change all your function logic to return promises and use async/await for code clarity.
const getUserById = (id) => {
return new Promise((resolve, reject) => {
User.getUserById(IdArr[i], function(err, patient) {
resolve({name:patient.name, pic:patient.profileImage});
});
});
}
const getAllUsers = async(idArr) => {
const retArr = [];
// for...of loop to await without continuing the loop
// but this will execute only sequentially
for (let id of idArr) {
const ret = await getUserById(id);
retArr.push(ret);
}
// for parallel execution, use Promise.all()
await Promise.all([...idArr.map(id => getUserById(id))]);
return retArr;
}
router.get('/data', async (req, res, next) => {
var IdArr = req.user.connections;
var retArr = await getAllUsers(IdArr);
console.log(retArr);
res.json(retArr);
});
You seem to be trying to extract way to much functions for an algorithm that isn't that complicated. The code below initialy extract the information it needs for the request. Then, it populates the array needed for the response and simply sends it.
router.get('/data', (req, res, next) => {
const idArray = req.user.connections;
let responseArray = [];
// loop to populate responseArray
for (let id of idArray) {
User.getUserById(id, (err, patient) => {
// add to the end of the array the patient informations
responseArray.push({
name: patient.name,
pic: patient.profileImage
});
});
}
// send back responseArray
res.json(responseArray);
});

Implementing exponential backoff with Node.js and Promises

I'm making about 70 requests to an API in my code. I'm getting an error response telling me that I'm making requests too quickly one after the other and I have decided to use the idea of exponential backoff to get through this problem.
Currently, this is what my code looks like:
let backoffTime = 1;
for (let i = 0; i < fileIds.length; i++) {
let fileId = fileIds[i];
getFileName(fileId, auth)
.then((name) => {
// do some work
})
.catch((err) => {
// assumes that the error is "request made too soon"
backoff(backoffTime);
backoffTime *= 2;
i--;
console.log(err);
});
}
function backoff(time) {
let milliseconds = time * 1000;
let start = (new Date()).getTime();
while (((new Date()).getTime() - start) < milliseconds) {
// do nothing
}
}
My getFileName function makes the request to the API and returns a Promise.
Currently this does not work because Promises are async (kinda). My for loop runs really fast and calls the getFileName function which makes those API requests really fast. Then, it gets the error for some of the API calls in which case it updates the backoffTime. This implementation doesn't work.
Any idea how I can implement this correctly?
First of all blocking the browser with a nearly infinite loop is a very very bad idea, just use promises:
const delay = ms => new Promise(res => setTimeout(res, ms));
Then just await the promise before continuing the loop and use the delay:
(async function() {
for (let i = 0; i < fileIds.length; i++) {
let fileId = fileIds[i];
await getFileName(fileId, auth)
.then((name) => {
// do some work
})
.catch((err) => {
// assumes that the error is "request made too soon"
backoffTime *= 2;
i--;
console.log(err);
return delay(backoffTime);
});
}
})();
The easiest way is to use async/await and then either await each request, or if it is too slow for you, then create chunks with i.e. 15 requests and Promise.all such chunks.
You can also use this: https://caolan.github.io/async/parallelLimit.js.html
It will require some extra work to transition promises into callbacks and vice versa, but it will do the best job.
This is the function: parallelLimit(tasks, limit, callback)
const tasks = [];
// This for-cycle will just prepare the tasks into array of functions
for (let i = 0; i < fileIds.length; i++) {
tasks.push(callback => doWorkThatReturnsPromise(fileIds[i])
.then(val => callback(null, val))
.catch(callback));
}
async.parallelLimit(tasks, 15, (err, values) => {
// do something after it finishes
})
You can fix that using closures, here is an example:
for (let i = 0; i < fileIds.length; i++) {
let fileId = fileIds[i];
doCall(fileId);
}
function doCall(fileId, backoffTime = 1){
getFileName(fileId, auth)
.then((name) => {
// do some work
})
.catch((err) => {
setTimeout(() => {
doCall(fileId, (backoffTime * 2));
}, backoffTime * 1000);
});
}
I've replaced the backoff function which pauses the execution using a while loop with a setTimeout call.
This code can cause an infinite recursion, you should probably add some checks to prevent that.

How do I do a nested database query within a for loop?

supportChat: function(){
return functions.https.onRequest((req, res) => {
var userId = req.query.userId;
var object = {};
db.ref("/users/-KwZ2N38Q6a1a87p982/threads").orderByKey().once('value').then(function(snapshot) {
var snap = snapshot.val();
var keys = Object.keys(snap);
for (var i = 0; i < keys.length; i++){
k = keys[i];
db.ref("/threads/"+k+"/lastMessage").orderByKey().once('value').then(function(snapshot){
var snapshot = snapshot.val();
if (snapshot[i]["sender"] != "-KwZ2N38Q6a1a87p982"){
object[snap[i]["messageID"]] = snapshot;
}
});
}
console.log(object);
return res.status(200).send(object);
});
});
},
Each user in my database has a threads child, which shows all the chat threads they have going. Then we have another threads section of the database which has all the data of that thread.
What I'm trying to do is check a particular users' threads id's against the threads section of the database to find all the threads where the last message in the thread was not sent by me (current user).
I have no idea why I'm struggling with this. What is the right way to send all the snapshot.val() of each thread that meets my condition to the endpoint all in one push? Maybe my approach is way off.
To know when all the promises in your for loop are done, you can accumulate the array of promises that you get from the loop and then use Promise.all() to know when they are all done.
You also need to protect your for loop index so that each invocation of the for loop maintains its own index so it is still correct when your async .then() handler is called. You can do that by switching your for loop to use let i instead of var i.
supportChat: function(){
return functions.https.onRequest((req, res) => {
let userId = req.query.userId;
let object = {};
db.ref("/users/-KwZ2N38Q6a1a87p982/threads").orderByKey().once('value').then(function(snapshot) {
let snap = snapshot.val();
let keys = Object.keys(snap);
let promises = [];
for (let i = 0; i < keys.length; i++){
let k = keys[i];
promises.push(db.ref("/threads/"+k+"/lastMessage").orderByKey().once('value').then(function(snapshot){
let snapshot = snapshot.val();
if (snapshot[i]["sender"] != "-KwZ2N38Q6a1a87p982"){
object[snap[i]["messageID"]] = snapshot;
}
}));
}
return Promise.all(promises).then(() => {
console.log(object);
return res.send(object);
});
}).catch(err => {
console.log(err);
return res.sendStatus(500);
});
});
},
Other comments on the code:
FYI, if you're trying to actually return something from the supportChat() function, please specify what that is. Right now, it is not clear what you expect to return from that function call.
And, you don't need the .status(200) part of this:
res.status(200).send(object);
You can just do:
res.send(object);
all by itself and that will automatically use a status of 200.
And, you need a .catch() handler to catch errors and send a response in the error condition.

Trying to read a directory created by node js after execution of a function

I created a node application which does is that it scraps google and downloads top 15 images and then store it in HDD in a folder which is the query received after compressing. Now problem that I'm facing is When I'm going back to read that folder using readdirSync and storing the results in error, it returns an empty array, what is wrong with the code.
request(url, function (error, response, body) {
if (!error) {
var $ = cheerio.load(body);
var imgNodes = $('#ires td a img');
// imgNodes is merely an array-like object, sigh.
// This is purposedly old-school JS because newer stuff doesn't work:
var urls = [];
for(let i = 0; i <= 14; i++){
let imgNode = imgNodes[i];
urls.push(imgNode.attribs['src']);
}
// console.log(urls);
const processCompress = new Promise(resolve => {
fs.mkdir(path.join(__dirname,'Photos',query), function (error) {
let j = 0;
if(!error){
for(i in urls){
console.log(i);
var source = tinify.fromUrl(urls[i]);
source.toFile(path.join(__dirname,'Photos', query,"optimized_"+ ++j +".jpg"));
}
}});
resolve();
});
const getFiles = new Promise(resolve => {
fs.readdirSync(path.join(__dirname,'Photos', query)).forEach(function (file) {
fileName.push(path.join(__dirname,'Photos',query,file));
});
resolve();
});
function colourMeBw(){
for(let k = 0; k < fileName.length; k++){
Jimp.read(fileName[k], (err, image) => {
if (err) throw err;
image.greyscale().write(fileName[k]);
});
}}
processCompress.then(() => getFiles);
colourMeBw();
} else {
console.log("We’ve encountered an error: " + error);
}
There are a number of things wrong with your code:
In processCompress(), you are resolving the promise before fs.mkdir() is done and before any of the images have been fetched and written.
In getFiles() you are wrapping a synchronous I/O function in a promise. The first problem is that you shouldn't be using synchronous I/O at all. That is the fastest way to wreck the scalability of your server. Then, once you switch to the async version of fs.readdir(), you have to resolve the promise appropriately.
There's no way to know when colourMeBw() is actually done.
You should never iterate an array with for(i in urls) for a variety of reasons. In ES6, you can use for (url of urls). In ES5, you can use either a traditional for (var i = 0; i < urls.length; i++) {} or urls.forEach().
You have no error propagation. The whole process would choke if you got an error in the middle somewhere because later parts of the process would still continue to try to do their work even though things have already failed. There's no way for the caller to know what errors happened.
There's no way to know when everything is done.
Here's a version of your code that uses promises to properly sequence things, propagate all errors appropriately and tell you when everything is done. I don't myself know the tinify and Jimp libraries so I consulted their documentation to see how to use them with promises (both appear to have promise support built-in). I used the Bluebird promise library to give me promise support for the fs library and to take advantage of Promise.map() which is convenient here.
If you didn't want to use the Bluebird promise library, you could promisify the fs module other ways or event promisify individual fs methods you want to use with promises. But once you get used to doing async programming with promises, you're going to want to use it for all your fs work.
This is obviously untested (no way to run this here), but hopefully you get the general idea for what we're trying to do.
const Promise = require('bluebird');
const fs = Promise.promisifyAll(require('fs'));
const Jimp = require('jimp');
const rp = require('request-promise');
rp(url).then(function(body) {
var $ = cheerio.load(body);
var imgNodes = $('#ires td a img');
// imgNodes is merely an array-like object, sigh.
// This is purposedly old-school JS because newer stuff doesn't work:
var urls = [];
for (let i = 0; i <= 14; i++) {
let imgNode = imgNodes[i];
urls.push(imgNode.attribs['src']);
}
// console.log(urls);
const processCompress = function() {
return fs.mkdirAsync(path.join(__dirname, 'Photos', query).then(function(error) {
let j = 0;
return Promise.map(urls, function(url) {
var source = tinify.fromUrl(url);
return source.toFile(path.join(__dirname, 'Photos', query, "optimized_" + ++j + ".jpg"));
});
});
});
const getFiles = function() {
return fs.readdirAsync(path.join(__dirname, 'Photos', query).then(function(files) {
return files.map(function(file) {
return path.join(__dirname, 'Photos', query, file);
});
});
};
function colourMeBw(fileList) {
return Promise.map(fileList, function(file) {
return Jimp.read(file).greyscale().write(file);
});
}
return processCompress().then(getFiles).then(colourMeBw);
}).then(function() {
// all done here
}).catch(function(err) {
// error here
});
Your query variable in use here does not appear to be defined anywhere so I am assuming it is defined in a higher scope.
Note that one big advantage of using promises for a multi-stage operation like this is that all errors end up on one single place, no matter where they occurred in the overall multi-level process.
Note: If you are processing a large number of images or a medium number of large images, this could end up using a fair amount of memory because this code processes all the images in parallel. One of the advantages of Bluebird's Promise.map() is that is has an optional concurrency option that specifies how many of the requests should be "in-flight" at once. You can dial that down to a medium number to control memory usage if necessary.
Or, you could change the structure so that rather than compress all, then convert all to greyscale, you could compress one, convert it grey scale, then move on to the next, etc...
I read the code and I think what you are trying to do is something like this:
const cheerio = require("cheerio");
const fetch = require("node-fetch");
const tinify = require("tinify");
const fs = require("fs");
const path = require("path");
const getImages = url => {
return fetch(url)
.then(responseToText)
.then(bodyToImageUrls)
.then(makePhotoDirectory)
.then(downloadFiles)
.then(processImageData)
.then(doImageManipulation)
.catch(error => {
console.log("We’ve encountered an error: " + error);
});
};
const responseToText = res => res.text();
const bodyToImageUrls = body => {
const $ = cheerio.load(body);
return $("img").attr("src");
};
const imgNodesToUrls = imgNodes => {
return imgNodes.map(imgNode => imgNode.name);
};
const makePhotoDirectory = urls => {
const dir = path.join(__dirname, "Photos");
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir);
}
return urls;
};
const downloadFiles = urls => {
/*
I could not run this since I don't have a Tinify API key
but I assume that it returns a promise.
*/
const promises = urls.map(url => tinify.fromUrl(url));
return Promise.all(promises);
};
const processImageData = imageData => {
const promises = imageData.map((data, i) => {
const fileUrl = path.join(__dirname, "Photos", `optimized_${i}.jpg`);
return data.toFile(fileUrl);
});
return Promise.all(promises);
};
const doImageManipulation = images => {
// Add your image manipulation here
};
I thought that the resolve() call directly after fs.mkdir is wrong, because mkdir is working asnyc, so the resolve is reached without performing the whole work of mkdir.
const processCompress = new Promise(resolve => {
fs.mkdir(path.join(__dirname, 'Photos', query), function(error) {
let j = 0;
if (!error) {
for (i in urls) {
console.log(i);
var source = tinify.fromUrl(urls[i]);
source.toFile(path.join(__dirname, 'Photos', query, "optimized_" + ++j + ".jpg"));
}
}
resolve(); // <---- inside callback from mkdir.
});
// call the resolve from inside the mkDirs-callback function
// resolve();
});
I hope that will fix your problem.

Categories