Node js async module series dependencies

Node js async module series dependencies - javascript

I want to know how to handle dependencies in using the async library in Node.js, look at the following example:
db.open(function(error, client) {
client.collection(('my-collection', function(error, collection) {
collection.insert({ "email": "test#gmail.com" }, function(error, docs) {
// Do stuff.
});
});
});
Using the async library:
async.parallel([
function(callback) {
db.open(function(error, client) {
callback(error, client);
});
},
function(callback) {
// How do I access the "client" variable at this point?
}
],
function(results){
// Do stuff.
});

You are using async parallell, which runs all functions together and they can finish in any order.
you can use the async waterfall function which passes varibles from one callback to the next function eg.
async.waterfall([
function(callback){
callback(null, 'one', 'two');
},
function(arg1, arg2, callback){
callback(null, 'three');
},
function(arg1, callback){
// arg1 now equals 'three'
callback(null, 'done');
}
], function (err, result) {
// result now equals 'done'
});
or you can use the auto function, which allowes you to specify which other functions must finish first eg.
async.auto({
get_data: function(callback){
// async code to get some data
},
make_folder: function(callback){
// async code to create a directory to store a file in
// this is run at the same time as getting the data
},
write_file: ['get_data', 'make_folder', function(callback){
// once there is some data and the directory exists,
// write the data to a file in the directory
callback(null, filename);
}],
email_link: ['write_file', function(callback, results){
// once the file is written let's email a link to it...
// results.write_file contains the filename returned by write_file.
}]
});
so what you could do in your case is this:
async.auto({
dbReady: function(callback) {
db.open(function(error, client) {
callback(error, client);
});
},
connected: ['dbReady', function(callback, results){
// How do I access the "client" variable at this point?
console.log(results.dbReady);
}
},
function(results){
// Do stuff.
});
Have a look at this to see all the available functions and their uses

Related

async waterfall not following order when mysql query

I am trying to do a waterfall async but i don't get the expected output that i want.
Basically my waterfall works as expected if i use an array instead of the query
so i guess i am doing something wrong on the callback of the query but i don't know what.
Code when it works with what i expect using array:
function range(start, end) {
var foo = [];
for (var i = start; i <= end; i++) {
foo.push(i);
}
return foo;
}
users = range(1,2)
obj = [1,2];
async.forEachLimit(users, 1, function(user, userCallback){
async.waterfall(
[
function(callback) { // query the data to get the category and specific number of rows
results = {sku_config:'A',img:'http//blabla',sku_config:'B',img:'http//bloblo'}
callback(null, results);
},
function(obj,callback) {
async.eachSeries(obj, function (sku, callback) {
var url = sku.img;
var sku = sku.sku_config;
console.log("loop");
request.get(url, {encoding: null} , function(error, response, body) {
console.log('request');
});
callback(null);
}, function(responsetoendofloop){
callback(null);
});
},
],
function (err) {
console.log('Finish');
userCallback(null);
}
);
}, function(err){
console.log("User For Loop Completed");
});
output:
loop
request
loop
request
Finish
loop
request
loop
request
Finish
User For Loop Completed
But when i try to query the data with mysql here comes the problem
code:
async.forEachLimit(users, 1, function(user, userCallback){
async.waterfall(
[
function(callback) { // query the data to get the category and specific number of rows
connection.query(query_sku,
['Fashion',1,2],
function(err, results, fields) {
if (err)
throw err;
callback(null, results);
});
},
function(obj,callback) {
async.eachSeries(obj, function (sku, callback) {
var url = sku.img;
var sku = sku.sku_config;
console.log("loop");
request.get(url, {encoding: null} , function(error, response, body) {
console.log('request');
});
callback(null);
}, function(responsetoendofloop){
callback(null);
});
},
],
function (err) {
console.log('Finish');
userCallback(null);
}
);
}, function(err){
console.log("User For Loop Completed");
});
output:
loop
loop
Finish
loop
loop
Finish
User For Loop Completed
request
request
request
request
All the request gets executed at the end :(
If you have idea on what i could fix.
Thanks

The first problem you have is that your callbacks have the exact same name, this could cause major problems. The callbacks you are meaning to call can not be differentiated, which could cause your program to execute pieces of code that shouldn't be executed until later.
The second problem is that the callback is placed outside of the request.get function. The nature of node js means that it does not wait until the request.get function returns and instead just calls the callback straight away. By placing the callback inside of the request.get function it is forced to wait until the request function returns and then the callback is called. A revised version of your code is below.
async.forEachLimit(users, 1, function(user, userCallback){
async.waterfall(
[
function(callback) { // query the data to get the category and specific number of rows
connection.query(query_sku,
['Fashion',1,2],
function(err, results, fields) {
if (err)
throw err;
callback(null, results);
});
},
function(obj,callback) {
async.eachSeries(obj, function (sku, seriesCallback) {
var url = sku.img;
var sku = sku.sku_config;
console.log("loop");
request.get(url, {encoding: null} , function(error, response, body) {
console.log('request');
seriesCallback(null);
});
}, function(responsetoendofloop){
callback(null);
});
},
],
function (err) {
console.log('Finish');
userCallback(null);
});
}, function(err){
console.log("User For Loop Completed");
});

Your callback(null); inside async.eachSeries are after request.
To fix just put inside request like this.
request.get(url, {encoding: null} , function(error, response, body) {
console.log('request');
callback(null);
});
Plus to be clear what you actually calling rename callback functions. For example callback inside eachSeries call next
function(obj,callback) {
async.eachSeries(obj, function (sku, next) {
var url = sku.img;
var sku = sku.sku_config;
console.log("loop");
request.get(url, {encoding: null} , function(error, response, body) {
console.log('request');
next(null);
});
}, function(responsetoendofloop){
callback(null);
});
}
Hope this helps.

Async.js: Is a variable created in a waterfall task available in another task?

I am freshly discovering Async.js and I wondered what was the behavior of async.waterfall when a document is found by a database query, and how to use it through different tasks.
I have this piece of code :
var _arg1;
var _arg2;
async.waterfall([
function (callback) { // First "dummy" callback
callback(null, _arg1, _arg2);
},
function (arg1, arg2, callback) {
foo1(arg1, arg2, callback); // Built-in function, for example a database query returning a document
},
function (arg3, callback) {
foo2(arg3, callback); // arg3 is the document found by the query
},
function (callback) {
foo3(arg3, callback); // Here I would like to use `arg3` again
}],
function (err, result) {
if (err) {
console.log(err);
}
}
);
Is it possible to use arg1 in my second task, without storing the variables every time ?
I think I could do something like this, but I'm sure it is not the best way to do it :
var _arg1;
var _arg2;
var _arg3;
async.waterfall([
function (callback) { // First "dummy" callback
callback(null, _arg1, _arg2);
},
function (arg1, arg2, callback) {
foo1(arg1, arg2, callback); // Built-in function, for example a database query returning a document
},
function (arg3, callback) {
_arg3 = arg3
foo2(arg3, callback); // arg3 is the document found by the query
},
function (callback) {
foo3(_arg3, callback); // Make use gain of `_arg3` ?
}],
function (err, result) {
if (err) {
console.log(err);
}
}
);
What is the best way to manipulate different variables created in tasks?

Each waterfall function has his own scope, so it's not possible to share variables. I can only think this two options:
Declare outside the waterfall the variables (the one you did)
Pass the variables to the next function using the callbacks
Second one would be something like this:
var _arg1;
var _arg2;
async.waterfall([
function (callback) { // First "dummy" callback
callback(null, _arg1, _arg2);
},
function (arg1, arg2, callback) {
foo1(arg1, arg2, callback); // Built-in function, for example a database query returning a document
},
function (arg3, callback) {
foo2(arg3, function(err) {
if (err) {
callback(err);
}
callback(null, arg3);
});
},
function (arg3, callback) {
foo3(arg3, callback); // You can use `arg3` here again
}],
function (err) {
if (err) {
console.log(err);
}
}
);
I couldn't say which one is better, in my code depending on the situation I use the first or second one.
It's been pointed out in the comments, but if you are getting to create something new I also recommend you to use Promises, code created through Async.js can get quite nasty in complex situations, especially waterfall and similar ones.

Best way to process results of multiple, sequential, dependent mongo queries in NodeJS

High level
I'm new to JS and Node. I'm working on an API endpoint that should return a status value. To compute the status value I need to make two sequential mongo queries where the second set of queries depend on the first query. The second set of queries will give me a status for each value found in the first query, of which I will pick one based on some logic. What is the best way to do it in NodeJS?
Specifics
Here are parts of my first attempt.
function getItemStatus(key, value, callback) {
MongoClient.connect(mongo_url, function(err, db) {
if (err) { return console.dir(err); }
db.collection('status', function(err, coll) {
if (err) { return console.dir(err); }
coll.distinct("_id.metric", function(err, metrics) {
if (err) { return console.dir(err); }
console.log('metrics : ', metrics);
_.foreach(metrics, function(metric) {
var query = {"_id": {
"$gte" : {key: key, value: value, created: new Date("1800-01-01T00:00:00"), metric : metric},
"$lte" : {key: key, value: value, created: new Date("2100-01-01T00:00:00"), metric : metric}}};
coll.find(query, {sort: {"_id.created": -1}, limit: 1})
I make a connection, query for a set of metric values using a distinct query. For each metric I then want to ask for the latest status. Ideally I'd like to have the entire set of statuses so that I could write a function taking this set and deciding on which status will be returned. My problem is passing the statuses back "up the chain" so that I can process the set of statuses.
In a synchronous situation I would simply write something like this
val metrics = getDistinctMetrics(key, value)
val statuses = metrics.map(getStatusForMetric)
val status = filterStatuses(statuses)
How can I accomplish this in JavaScript/NodeJS?
UPDATED to highlight the fact that the first queries will trigger several queries in the second step, i.e. one for each result found by the first query.

As I understand your question you want to execute queries parallel or in a waterfall mode and do some logic on the final result. You should look into a library allowing parallel/waterfall execution. Like this
Waterfall: Waterfall
async.waterfall([
function(callback) {
callback(null, 'one', 'two');
},
function(arg1, arg2, callback) {
// arg1 now equals 'one' and arg2 now equals 'two'
callback(null, 'three');
},
function(arg1, callback) {
// arg1 now equals 'three'
callback(null, 'done');
}
], function (err, result) {
// result now equals 'done'
});
Parallel: Parallel
async.parallel({
collectionOne: function (callback) {
collectionOne.find(query, function (err, result) {
if (err) {
return handleError(res, err);
}
callback(null, result);
})
},
collectionTwo: function (callback) {
collectionTwo.find(query, function (err, result) {
if (err) {
return handleError(res, err);
}
callback(null, result);
})
},
collectionThree: function (callback) {
collectionThree.find(query, function (err, result) {
if (err) {
return handleError(res, err);
}
callback(null, result);
})
},
collectionFour: function (callback) {
collectionFour.find(query, function (err, result) {
if (err) {
return handleError(res, err);
}
callback(null, result);
})
},
}, function (err, results) {
return res.status(200).json(results);
});
And in the final callback you can doo some logic or return response.

In your sample code, you are making network calls inside another network calls, which can lead to callback hell which can lead to misbehave of queries, in order to overcome that, you can use promises.
This will help you in avoiding callback hell as well as your query will also be resolved.
Sample code:-
new Promise (function(resolve, reject){
return db.collection.find(function).exec()
.then(function(result_of_first_query){
return db.collection.findOne(function).exec() //in this yopu can user the result of first query
}).then(function(result_of_second_query){
resolve(result_of_second_query);
})
})
You can add more queries with each .then

Results of tasks in async.auto

I am kind of confused with the logic of results which go from one task to the other task in async.auto. For example in the following code logic I added some data to models in task1, which is initially an output from initialtask and in finalTask added data to models from task1 is reflected in results.initialTask1 as well. Similarly added data in task2 is reflected in results.initialTask1 in finalTask.
To sum up all of results.initialTask1, results.task1[0], results.task2[0], results.task3[0] are identical in finalTask. Is this the logic of async.auto? Or is it something like reference by pointer in C++ which causes whatever changes for models in task1, it reflects in models in initialTask as well?
async.auto({
initialTask: function(callback) {
//Do some operations
callback(null, name, initialModels);
},
task1: ['initialTask', function(callback, results) {
var models = results.initialTask[1];
//Add some more data to models
callback(null, models);
}],
task2: ['initialTask', function(callback, results) {
var models = results.initialTask[1];
//Add some more data to models
callback(null, models);
}],
task3: ['initialTask', function(callback, results) {
var models = results.initialTask[1];
//Add some more data to models
callback(null, models);
}],
finalTask: ['task1', 'task2', 'task3', function(callback, results) {
//Here the followings are the same: results.initialTask[1], results.task1[0], results.task2[0], results.task3[0]
}]
});
I'm looking for any answer which helps me make sure that is the logic or not? I'm not necessarily looking for any official documents or ...

This is expected behavior. Basically async.auto will execute all the functions in the order it deems necessary. So in your case initialTask will be called first. Then task1, task2, and task3 will be called in parallel. Finally finalTask will be called with the results. The reason all the values are the same is related to JavaScript's call-by-sharing, meaning if you change a function parameter itself, then it won't affect the item that was fed into the parameter. If you change the internals of the parameter, it will carry up to the item.
More info here.
Example:
async.auto({
// this function will just be passed a callback
readData: async.apply(fs.readFile, 'data.txt', 'utf-8'),
showData: ['readData', function(results, cb) {
// results.readData is the file's contents
// ...
}]
}, callback);
async.auto({
get_data: function(callback) {
console.log('in get_data');
// async code to get some data
callback(null, 'data', 'converted to array');
},
make_folder: function(callback) {
console.log('in make_folder');
// async code to create a directory to store a file in
// this is run at the same time as getting the data
callback(null, 'folder');
},
write_file: ['get_data', 'make_folder', function(results, callback) {
console.log('in write_file', JSON.stringify(results));
// once there is some data and the directory exists,
// write the data to a file in the directory
callback(null, 'filename');
}],
email_link: ['write_file', function(results, callback) {
console.log('in email_link', JSON.stringify(results));
// once the file is written let's email a link to it...
// results.write_file contains the filename returned by write_file.
callback(null, {'file':results.write_file,
'email':'user#example.com'});
}]
}, function(err, results) {
console.log('err = ', err);
console.log('results = ', results);
});

async.auto is very useful and powerful function which is provided by Async Lib .it have 3 fields
1-task
2- concurrency
3-callback
In Async.auto, Each function depends on its parent function except the first function, if any function will get any error during execution .then their child function or say .. their below-defined function will not get executed further, an error will occur with callback and the main callback will immediately return with an error
1- Task :- an Object
2- concurrency :- An optional integer for determining the maximum number of tasks that can be run in parallel. By default, as many as possible.
3- callback:- return the response
exapmle-
AnyService.prototype.forgetPassword = function (res, email, isMobile, callback) {
Logger.info("In AnyService service forgetPassword email...", email);
db.User.findOne({
email: email.toLowerCase(),
deleted: false
}, function (err, user) {
if (!user) {
configurationHolder.responseHandler(res, null, configurationHolder.LoginMessage.registerFirst, true, 403)
} else {
async.auto({
token: function (next, results) {
return gereratePasswordToken(next, email, user, isMobile);
},
sendMail: ['token', function (next, result) {
return SendMailService.prototype.forgetPasswordMail(next, result.token, email, user.fullName);
}]
}, function (err, result) {
if (err == null && result != null) {
configurationHolder.ResponseUtil.responseHandler(res, null, configurationHolder.LoginMessage.forgotPassword, false, 200)
} else {
callback(new Error(configurationHolder.errorMessage.oops))
}
})
}
});
}

Understanding Node.JS async.parallel

I need to request data from two web servers. The tasks are independent; therefore, I am using aync.parallel. Now I am only writing 'abc', 'xyz', and 'Done' to the body of my web page.
Since tasks are performed at the same time, can I run into a strange output? E.g.,
xab
cyz
The code.
var async = require('async');
function onRequest(req, res) {
res.writeHead(200, {
"Content-Type" : "text/plain"
});
async.parallel([ function(callback) {
res.write('a');
res.write('b');
res.write('c\n');
callback();
}, function(callback) {
res.write('x');
res.write('y');
res.write('z\n');
callback();
} ], function done(err, results) {
if (err) {
throw err;
}
res.end("\nDone!");
});
}
var server = require('http').createServer(onRequest);
server.listen(9000);

If you want to be absolutely certain in the order in which the results are printed, you should pass your data (abc\n and xyz\n) through the callbacks (first parameter is the error) and handle/write them in the final async.parallel callback's results argument.
async.parallel({
one: function(callback) {
callback(null, 'abc\n');
},
two: function(callback) {
callback(null, 'xyz\n');
}
}, function(err, results) {
// results now equals to: results.one: 'abc\n', results.two: 'xyz\n'
});

We Keep Coding

JavaScript is the programming language of the Web.

Node js async module series dependencies - javascript

Related

async waterfall not following order when mysql query

Async.js: Is a variable created in a waterfall task available in another task?

Best way to process results of multiple, sequential, dependent mongo queries in NodeJS

Results of tasks in async.auto

Understanding Node.JS async.parallel

Categories

Resources