Async.eachSeries executing in parallel - javascript

I'm trying to understand async library from node.js and apply it, but it doesn't work as expected. Called are made in parallels when I was expecting them to be made in series (i.e. one after another)
Here is my code
var users = [{_id:'1',username:'user1'},{_id:'2',username:'user2'}];
async.eachSeries(users,function function1(user,callbackEach){
var username = user.username;
var incomes = [{source:'sourceA',provider:providerA},{source:'sourceB',provider:providerB},{source:'sourceC',provider:providerC}];
async.eachSeries(incomes,function function2(income,callbackSmallEach){
var source = income.source;
income.provider.getEarnings(user._id,username,yesterday,function callbackFromGetEarnings(err,result){
if (err){
// error
} else {
income.earnings = {day : new Number(result)};
income.provider.getMonthEarnings(user._id,username,yesterday,function callbackFromGetMonthEarnings(err,monthTotal){
if (err){
// error
} else {
income.earnings.month = new Number(monthTotal);
callbackSmallEach();
}
});
}
});
},
function sendEmails(err){
if (err) {
// error
} else {
// send email
}
});
console.log("Just before calling callbackEach()");
callbackEach();
});
getEarnings and getMonthEarnings use an external provider, so some time can occur until callback functions callbackFromGetEarnings and callbackFromGetMonthEarnings are called.
My problem is that I dont want both calls to getEarnings to be executed in parallel. I want function getEarnings for user2 to be called only after getEarnings for user1 has returned (and corresponding callbacks have been made).
As you can see I have tried to make the calls for user1 and 2 in a serie, with async.eachSeries but calls are made in parallel. When I execute, the log Just before calling callbackEach() is always executed before the callback function callbackFromGetEarnings is called...
I hope this is clear enough.

The problem is where you are calling your callback. Example:
async.eachSeries(something, function(item, callback) {
async.eachSeries(item.somethingElse, function(subitem, callback2) {
//do something
return callback2();
}, function() {
//when all item.somethingElse is done, call the upper callback
return callback();
})
}, function() {
console.log('done');
})
This way, for each item in something, you will execute in series all the item.somethingElse, before going to the next item.

Related

Append items to an array from an async function

I am having trouble appending items to an array and would like assistance in performing such. I have reviewed this and understood zero of it.
Here is my current code which is being ran in AWS Lambda (Node.js 10.x):
var sesData = ["array0", "array1"];
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
console.log("Scan succeeded.");
data.Items.forEach(function(itemdata) {
// append new value to the sesData array
sesData.push("Item :" + JSON.stringify(itemdata));
console.log(sesData);
console.log("Item :",JSON.stringify(itemdata));
});
// continue scanning if we have more items in case it is lots of data
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("Scanning for more...");
params2.ExclusiveStartKey = data.LastEvaluatedKey;
dynamoDB.scan(params2, onScan);
}
}
}
function generateEmailParams (body) {
return {
Source: myEmail,
Destination: { ToAddresses: [myEmail] },
ReplyToAddresses: [myEmail],
Message: {
Body: {
Text: {
Charset: 'UTF-8',
Data: `Message sent. \nContent: \n${sesData}`
}
},
Subject: {
Charset: 'UTF-8',
Data: `Subject`
}
}
}
}
//End email Params
exports.handler = function(event, context) {
console.log("Incoming: ", event);
dynamoDB.scan(params2, onScan); // scanning DDB
console.log('===SENDING EMAIL===');
const emailParams = generateEmailParams(event.body)
var email = ses.sendEmail(emailParams, function(err, data){
if(err) console.log(err);
else {
console.log("===EMAIL SENT===");
console.log(data); // log data
console.log("EMAIL CODE END"); //log end of email
console.log('EMAIL: ', email); // log email
context.succeed(event);
}
});
};
All the ses items are just sending the onScan function data via email. That works fine and is not the trouble, its that sesData never appends. The console.log’s print out the data from dynamoDB without issue but the push to array does not work.
After searching all over the internet, I do not really understand what’s going on as there are no errors so I am missing some sort of logic.
Async means the order of the code doesn't get executed in the same order you wrote it.
dynamoDB.Scan is a async function. You are talking to your DynamoDB, and it takes time, it might be only few ms, but NodeJS wil continue on to the next line of code while it finishes the scan function.
Lets take the following example
let count = 1
console.log(count)
setTimeout(function() {
count = 2
console.log(count)
}, 1000)
count = 3
console.log(count)
setTimeout is a async function, it executes after x ms, in this case 1000 ms = 1 sec. So its similar to your dynamoDB.scan function, it's starts right away, but it takes some time to finish and meanwhile, nodeJS will continue running your code line by line.
So the order the code is 1, 2, 3. But when you run the snippet it comes 1,3,2. Even if you set the timeout to 0 ms, it will still be 1,3,2
let count = 1
console.log(count)
setTimeout(function() {
count = 2
console.log(count)
}, 0)
count = 3
console.log(count)
This is because its an async function and will be put on the bottom of the callstack. Callstack is a fancy javascript word.
To understand this checkout: https://www.youtube.com/watch?v=8aGhZQkoFbQ
Its a really good video that explains how JavaScript works and not to difficult to understand.
Note that the onScan function is a callback function and is executed when the dynamoDb.scan method has finished. So its like "Hey DynamoDB, call the scan function and then do this onScan stuff I created"
You programemd it so that when DynamoDB.scan has finished it should call onScan, which adds to sesData array, but the generateParams function is outside the callback so it gets called right way after you called dynamoDb.scan, not after it has finsed.
So what is happening in your code is this:
You create array sesData
You call dynamoDB.scan and pass it the onScan callback function.
The function starts, but it async and it takes time to connect to dynamoDB, etc
You call generateParams, but the onScan function hasn't finished.
Your code generated the email before the onScan function added items to
sesData.
To fix this, you need to:
Include the generateParams in the onScan callback.
Use promise chains with .then
Use async/await
I haven't used AWS but quick googling shows that scan can return a promise by doing
dyanmoDb.scan(params).promise()
Notice that there is no callback function here, because it returns a promise.
You can then do something like
exports.handler = async function(event, context) {
...
await dynamoDb.scan(params).promise()
...
console.log(sesData)
}

Manipulating data in a callback function

I have a nested function that does some I/O and calls a callback once it has finished with the resulting data. Something like this:
function getStatus(returnCallback, errorCallback) {
sendRequest('someData', returnCallback, errorCallback)
}
whereby sendRequest() is a function that interacts with hardware and calls the returCallback with the data that it got from the hardware or the errorCallback in case something went wrong.
My problem now is, that the data that the hardware returns is a really long string that consists of different numbers that represent different parameters. What I want to do is manipulate the data that is given to the returnCallback and create and object with a property for each parameter. Is there a way to do that? I already tried using async.waterfall
function getStatus(returnCallback, errorCallback) {
let returnArray = {};
async.waterfall([
function (callback) {
sendRequest('someData', callback, errorCallback);
},
function (data, callback) {
returnArray.statusBits = data.slice(0, 6);
returnArray.faultBits = data.slice(7, 13);
returnArray.alertBits = data.slice(14, 20);
returnArray.pumpRotationSpeed = parseInt(data.slice(21, 26));
returnArray.motorPower = parseInt(data.slice(27, 31));
returnArray.frequencyConverterTemperature = parseInt(data.slice(36, 39));
returnArray.pumpOperationTime = parseInt(data.slice(44, 48));
callback(null, returnArray)
}
], returnCallback(returnArray));
but that does nothing. As it looks the second function in the waterfall is never called. This might be because the callback from the first function is not structured as expected in the waterfall and it returns with callback(data) instead of callback(null, data)
In async.waterfall callback, the first argument is error, also, you are supposed to wait for the end of the waterfall before exiting the function. The correct code would be:
function getStatus(returnCallback, errorCallback) {
let returnArray = {};
async.waterfall([
function (callback) {
//First step
sendRequest('someData', function (data) {
//Everything is fine, continue
callback(null, data);
}, function (error) {
//Error, skip all remaining step, and handle the error
callback(error);
});
},
function (data, callback) {
//Second step
returnArray.statusBits = data.slice(0, 6);
returnArray.faultBits = data.slice(7, 13);
returnArray.alertBits = data.slice(14, 20);
returnArray.pumpRotationSpeed = parseInt(data.slice(21, 26));
returnArray.motorPower = parseInt(data.slice(27, 31));
returnArray.frequencyConverterTemperature = parseInt(data.slice(36, 39));
returnArray.pumpOperationTime = parseInt(data.slice(44, 48));
callback(null, returnArray)
}
//In normal case, error will be null, and the param will be the last passed to the callback of the last step
], function (error, returnArray) {
//If there is a error (like error in step 1)
if(error) {
//Handle the error
errorCallback(error);
} else {
//No error, continue with the normal callback
returnCallback(returnArray);
}
});
}
What you want to do is
manipulate the data that is given to the returnCallback and create and
object with a property for each parameter.
You have
function getStatus(returnCallback, errorCallback) {
sendRequest('someData', returnCallback, errorCallback)
}
If I understood what you are trying to do,
function getStatus(function(err, status) {
if (err) return new Error('Something went wrong');
else sendRequest(status);
}
//get what you need here
var status = ... )
Here, the getStatus function start by executing the callback function in a parallel process. The status and err parameters will be place as placeholders in the memory. At the same time, the getStatus is doing what he needs to do to retrieve the status that you want and store it as a variable. When the reading is done, the result is place inside the placeholders of the parallel process and then the execution is finished.
This asynchronous method is coming from the fact that you are reading data inside hardware and it takes some time to retrieve it. The synchronous way would have block the tasks and wait for every steps to be completed, while asynchronously, it allows to not block at every step, but start the other tasks while it is finishing the previous ones.

node.js for loop iterating before the function call returns hence creating object issue

I have node code that fetches the list of websites to crawl, and on iterating through the result, it calls a crawling function written using phantom. But before the crawling function returns the result, the loop is iterating number of times and hence number of calls to crawling function which is not able to handle it. I need immediate answer for my issue.
Please somebody take me out of this well.
my main page code
db.fetch_serviceEntity(function(serviceEntityData){
if(serviceEntityData!=""){
serviceEntityData.forEach(function(item){
console.log(item.website_url);
db.fetch_entityId(item.id,function(entityId){
crawler.getCount(item.website_url, item.name, function(rCount){
console.log("number of fresh reviews to crawl : ", parseInt(rCount) - parseInt(item.review_count));
if(rCount > item.review_count){
fetchReviews(item.website_url, entityId.id, parseInt(rCount) - parseInt(item.review_count), function(){
db.updateReviewCount(item.id, rCount, function(){
process.exit(0);
});
});
}
});
});
};
});
}
else {
console.log("No websites to crawl/database error");
}
process.exit(0);
});
my crawl function is here
crawler.prototype.crawl = function(webUrl, callback){
console.log(webUrl);
this.driver.create({ path: require('phantomjs').path }, function (err, browser) {
return browser.createPage(function (err,page) {
return page.open(webUrl, function (err,status) {
console.log("opened site? ", status);
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function (err) {
// Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function () {
return page.evaluate(function () {
//Get what you want from the page using jQuery.
var reviews = [];
$('li div.review').each(function () {
if($(this).find('div.ypassport').text()){
var d = new Date($(this).find('span.rating-qualifier').text().trim());
var temp = {
id : $(this).attr('data-review-id'),
entity_id : "",
user_id : $(this).attr('data-signup-object').split(":")[1],
}
reviews.push(temp);
}
});
return {
reviews: reviews
};
}, function (err,result) {
browser.exit();
callback(result);
});
}, 5000);
});
});
});
});
};
I am using node-phantom-simple for writing crawling function
my problem here is -> because for loop makes number of calls to it, crawl function giving me error that some or other object not created.
for example in the code it says "createpage is not a function of undefined" hence the meaning is browser object not created.
sometimes it says "open is not a function of undefined " hence the "page" object not creating.
You have async functions, but if you process.exit(0) when you return from your first function, all database connections are dropped and no db.updateReviewCount is called. So you get more or less arbitrary results, depending on how's the one who closes first.
(Beside that, the code is a callback hell. Maybe you want to create smaller functions and then chain them with a library like async or co or even by hand.)

NodeJS async callback. How to return the list, from a async callback?

So basically I am making a database query, to get all posts with a certain id, then add them to a list, so I can return. But the list is returned, before the callback has finished.
How do I prevent it from being returned before callback has finished?
exports.getBlogEntries = function(opid) {
var list12 =[];
Entry.find({'opid' : opid}, function(err, entries) {
if(!err) {
console.log("adding");
entries.forEach( function(currentEntry){
list12.push(currentEntry);
});
}
else {
console.log("EEEERROOR");
}
//else {console.log("err");}
});
console.log(list12);
return list12;
};
ALL callback is asynchronous, so we don't have any guarantee if they will run exactly in the order we have leave them.
To fix it and make the process "synchronous" and guarantee an order executation you have two solutions:
First: make all process in nested list:
instead of this:
MyModel1.find({}, function(err, docsModel1) {
callback(err, docsModel1);
});
MyModel2.find({}, function(err, docsModel2) {
callback(err, docsModel2);
});
use this:
MyModel1.find({}, function(err, docsModel1) {
MyModel2.find({}, function(err, docsModel2) {
callback(err, docsModel1, docsModel2);
});
});
The last snippet above guarantee us that MyModel2 will be executed AFTER MyModel1 is executed.
Second: Use some framework as Async. This framework is awesome and have several helper functions to execute code in series, parallels, whatever way we want.
Example:
async.series(
{
function1 : function(callback) {
//your first code here
//...
callback(null, 'some result here');
},
function2 : function(callback) {
//your second code here (called only after the first one)
callback(null, 'another result here');
}
},
function(err, results) {
//capture the results from function1 and function2
//if function1 raise some error, function2 will not be called.
results.function1; // 'some result here'
results.function2; // 'another result here'
//do something else...
}
);
You could use sync database calls but that would work around the concept of node.js.
The proper way is to pass a callback to the function that queries the database and then call the provided callback inside the database query callback.
How do I prevent it from being returned before callback has finished?
The callback is asynchronous, and you cannot avoid that. Hence, you must not return a list.
Instead, offer a callback for when it's filled. Or return a Promise for the list. Example:
exports.getBlogEntries = function(opid, callback) {
Entry.find({'opid': opid}, callback); // yes, that's it.
// Everything else was boilerplate code
};
There is an alternate way to handle this scenario. You can use the async module and when the forEach has finished then make the return call. Please find the code snippet below for the same:
var async = requires('async');
exports.getBlogEntries = function(opid) {
var list12 =[];
Entry.find({'opid' : opid}, function(err, entries) {
if(!err) {
console.log("adding");
async.forEachSeries(entries,function(entry,returnFunction){
list12.push(entry);
},function(){
console.log(list12);
return list12;
});
}
else{
console.log("EEEERROOR");
}
});
};

How could I optimize this code with too many nest functions?

I'm writing a Chrome extension with the socket api(though this doc is out of date, the latest version of the api is here), and I found that the code is really hard to organize:
All the methods are under the namespace chrome.experimental.socket, I would just use socket below for simplicity.
socket.create("tcp", {}, function(socketInfo){
var socketId = socketInfo.socketId;
socket.connect(socketId, IP, PORT, function(result){
if(!result) throw "Connect Error";
socket.write(socketId, data, function(writeInfo){
if(writeInfo.bytesWritten < 0) throw "Send Data Error";
socket.read(socketId, function(readInfo){
if(readInfo.resultCode < 0) throw "Read Error";
var data = readInfo.data; // play with the data
// then send the next request
socket.write(socketId, data, function(writeInfo){
socket.read(socketId, function(readInfo){
// ............
});
});
});
})
});
})
because both socket.write and socket.read are asynchronous, I have to nest the callbacks to make sure that the next request is send after the previous request got the correct response.
it's really hard to manage these nested functions, how could I improve it?
UPDATE
I'd like to have a method send which I can use as:
send(socketId, data, function(response){
// play with response
});
// block here until the previous send get the response
send(socketId, data, function(response){
// play with response
});
How about (something like) this?
var MySocket = {
obj: null,
data: null,
start: function() { ... some code initializing obj data, ending with this.create() call },
create: function() { ... some code initializing obj data, ending with this.connect() call },
connect: function() { ... some connection code, ending with this.write() call },
write: function() { ... some writing code that updates this.data, ending with this.read() call },
read: function() { ... you probably get the idea at this point )) ... },
};
This object could be used with MySocket.start() or something. The idea is to encapsulate all data (and nested calls) within the single (yet more-o-less globally usable) object.
Or even more, one can create two objects: one purely for writing purposes, and another for purely reading, each operating with its own data, then wrap them (and their inter-calls, so to speak) into a single SocketManager object.
Consider using an asynchronous continuation passing style, where functions end with a SetInterval call with the function they were passed. Then we construct a function that entwines two functions to call each other using this mechanism. The guts of it would be like this:
var handle;
// pairs two functions
function pair(firstfunc, secondfunc, startarg) {
var callbackToFirst = function(valuetofill) {
handle = setInterval(firstfunc(valuetofill,callbackToSecond));
};
var callbackToSecond = function(valuetofill) {
handle = setInterval(secondfunc(valuetofill,callbackToFirst));
};
callbackToFirst(startarg);
}
What we are doing here is constructing a pair of mutually-calling callbacks which take a single argument, which each contain references to the two inter-calling functions. We then kick off the process by calling the first callback.
Construct the pair for an example pair of read and write functions (assuming you've set the socketId in the enclosing object definition):
// starts read/write pair, sets internal variable 'handle' to
// interval handle for control
function startReadWrite(initialarg, myDataFunc) {
var readcall = function(value, func) {
readSocket(getData(myDataFunc(func)));
};
var writecall = function(value, func) {
writeSocket(checkBytesWritten(func));
};
handle = pair(readcall, writecall, initialarg);
}
The rest of the object is like this:
function myIO() {
var socketInfo, socketId, handle;
function create(func) {
socket.create('tcp',{},function(thisSocketInfo) {
socketInfo = thisSocketInfo;
}
setInterval(func(this),0);
}
function connect(IP, PORT, func) {
socket.connect(p_socketId, IP, PORT, function() {
if(!result) throw "Connect Error";
setInterval(func(result),0);
});
}
function readSocket(func) {
socket.read(p_socketId, function(readInfo){
setInterval(func(readInfo),0);
});
}
function writeSocket(data, func) {
socket.write(p_socketId, data, function(writeInfo){
setInterval(func(writeInfo),0)
});
}
function checkBytesWritten(writeInfo, func) {
if(writeInfo.bytesWritten < 0) throw "Send Data Error";
setInterval(func(writeInfo),0);
}
function getData(readInfo, func) {
if(readInfo.resultCode < 0) throw "Read Error";
var data = readInfo.data;
setInterval(func(data),0);
}
//** pair and startReadWrite go here **//
}
Finally the call to set the whole thing going:
var myIOobj = new myIO();
myIOobj.create(startReadWrite(myDataFunc));
Notes:
This is meant to demonstrate a style, not be ready code! Don't just copy and paste it.
No, I haven't tested this; I do javascript but not Chrome API stuff yet. I'm focussing on the callback mechanisms etc.
Be careful with the different classes of callback; single argument callbacks (like the read and write callbacks) which take a single value (as presumably defined by the API), and 2 argument callbacks (like most of the methods) which take an argument and a function to call at the end.
The getData method takes a callback and passes data to it; this callback (myDataFunc) is the function that actually gets to use the data. It needs to take a callback as a second argument and call it synchronously or asynchronously.
TLDR: Consider using asynchronous calls to avoid the nesting. I've given a vague example of a mechanism to have two functions call each other continuously using this style as seems to be needed.
Although I call it asynchonous, the setInterval calls will execute serially, but the key is that the stack is cleared after the parent call is done, rather than adding endless layers with nesting.

Categories