I'm setting up a scraper using NodeJS, and I'm having a hard time figuring out the right way to pass data around when using async.parallel.
Here's the batch function, which receives the list of zip codes in an array inside of the zip_results object. I'm trying to setup the array asyncTasks as an array of functions to be run by async. The function I want called for each zip code is Scraper.batchOne, and I want to pass it a zip code and the job version. Right now, the function is called immediately. I tried wrapping the call to Scraper.batchOne in an anonymous function, but that lost the scope of the index variable i and always sent in undefined values.
How can make it so that the function is passed to the array, along with some parameters?
// zip_results: {job_version: int, zip_codes: []}
Scraper.batch = function (zip_results) {
//tasks - An array or object containing functions to run, each function
//is passed a callback(err, result) it must call on completion with an
//error err (which can be null) and an optional result value.
var asyncTasks = [], job_version = zip_results.job_version;
for (var i=0; i < zip_results['zip_codes'].length; i++) {
asyncTasks.push(Scraper.batchOne(zip_results['zip_codes'][i], job_version));
}
// Call async to run these tasks in parallel, with a max of 2 at a time
async.parallelLimit(asyncTasks, 2, function(err, data) { console.log(data); });
};
Why don't you use async.eachLimit instead? (With async.parallel you would need to use bind / apply techniques)
async.eachLimit(zip_results['zip_codes'], 2, function(zip, next) {
Scraper.batchOne(zip, zip_results.job_version));
return next();
}, function(err) {
// executed when all zips are done
});
You can do a self invoking anonymous function and pass the parameters that you want to retain after the method is called like this:
(function(asyncTasksArr, index, zipResults, jobVersion){
return function(){
asyncTasksArr.push(Scraper.batchOne(zipResults['zip_codes'][index], jobVersion));
}
}(asyncTasks, i, zip_results, job_version));
Hope this helps.
Related
I have the following test code:
var async = require('async');
var GROUP = 'testGroup';
var opts = {
someKey: 'hi',
};
test(opts);
function test(options) {
async.series([
doThis.bind(null, options),
doThat.bind(null, options),
], function(results) {
debugger;
});
}
function doThis(options, cb) {
options.someKey = [GROUP, options.someKey].join('.');
return cb();
}
function doThat(options, cb) {
debugger;
options.someKey = [GROUP, options.someKey].join('.');
return cb();
}
When we hit the debugger in doThat(), options.someKey already has the value someGROUP.hi, so when the function finishes we end up with options.someKey === 'someGROUP.someGroup.hi'
How do we bind such that the original object does not change? The bind is necessary because we need to pass in options to the functions that run within async.series. Otherwise, we could just invoke the functions and pass in the object as a parameter.
I'm don't think your partially applying the options parameter to your doThis(), doThat() functions is especially pertinent.
You're passing the same javascript object/literal as aparameter to two functions and and then mutate that parameter.
If you don't to mutate that object then don't. Find some other way of returning the results of your operation. doThis() and doThat() could return values instead of modifying the parameter. You could gather them up in the final callback after the series gets called.
If you just want to preserve the intital value of opts, use lodash or something to make a deep clone of opts before you pass it into test.
This question already has answers here:
How do I return the response from an asynchronous call?
(41 answers)
Closed 8 years ago.
I am in the process of relearning Javascript and last week when writing this code for a university assignment I think that there is probably a much better way of executing this code
app.get('/member/all', function(req, res) {
connection.query('CALL GetAllMembers()', function(err,rows){
connection.query('CALL CountMembers()', function(err, allMembers){
console.log(err);
connection.query('CALL CountAllIndMembers()', function(err,indMembers){
console.log(err);
connection.query('CALL CountInactiveMembers()', function(err,inactiveMembers){
console.log(err);
connection.query('CALL CountAllMembersInGroups()', function(err,groupMembers){
console.log(err);
res.render('members', {members : rows[0], title : "All Members", groupMembers : groupMembers[0][0].AllGrpMembers,
inactiveMembers : inactiveMembers[0][0].AllInactiveMembers, indMembers : indMembers[0][0].AllIndMembers,
allMembers : allMembers[0][0].AllMembers, statistics : true});
});
});
});
});
});
});
});
When I was trying to declare variables under the app.get such as var allMembers... when the callback was executed I was unable to set allMembers = rowsFromTheCallback. It seemed that it was a local variable to that callback. I'm sure this is something to do with the variable scope and/or hoisting. Just wanted to ask you guys if there would be a better way to do this as even though this function works. It is very ugly to look at haha!
Thanks in advance
Jack
As far as scope goes, all the inner functions should be able to read and write to the outer variable unless it is shadowed by an inner variable declaration or function parameter.
The problem you are having might be related to the async-ness of the code. See this code:
function delay(n, cb){
setTimeout(function(){ bs(delay) }, delay);
}
function main(){
var allMembers = 17;
delay(500, function(){
console.log(allMembers); // This looks at the outer "allMembers"
allMembers = 18;
delay(200, function(allMembers){ // <-- SHADOW
console.log(allMembers); // This looks at the allMembers from "delay 200"'s callback
allMembers = 42;
});
delay(300, function(){
console.log(allMembers); //This is the outside "allMembers" again
});
});
return allMembers; // Still 17!
}
main();
main will return before the setTimeouts have even fired so its going to return the original value of that variable. In order to wait for the inner callbacks to run, the only way is to make main take a callback to signa when its done, instead of just returning.
function main(onResult){
delay(500, function(){
//...
onResult(allMembers);
});
// <-- no return value
});
main(function(allM){
console.log(allM);
});
See async library: https://github.com/caolan/async
async.series([
getAllMembers,
countMembers,
...
], function(err, results) {
// err contains an error if any of the functions fails. No more functions will be run.
// results is an array containing results of each function if all the functions executed without errors
}));
function getAllMembers(callback) {
connection.query('CALL CountMembers()', callback);
}
function countMembers(callback) {
...
}
If the execution order of the functions does not matter, async.parallel can be used instead of async.series.
There is power in using a library to handle and encapsulate "Continuation Passing Style" (CPS) interactions with your asynchronous calls. The following code isn't from a library, but I'm going to walk through it and use it as an example of one way to implement CPS.
Setting up a scope appropriate queue is the first step. This example uses about the most simple method for doing so:
var nextList = [];
After that we need a method to handle our first case, the need to queue tasks to be performed in the future. In this case I was focused on performing them in order so I named it next.
function next() {
var todo,
current,
task,
args = {};
if (arguments.length > 0) { // if called with parameters process them
// if parameters aren't in an array wrap them
if (!Array.isArray(arguments['0'])) {
todo = [arguments];
} else { // we were passed an array
todo = [];
arguments['0'].forEach(function (item) {
// for each item we were passed add it to todo
todo.push(item);
});
}
nextList = todo.concat(nextList);
// append the new items to the end of our list
}
if (nextList.length > 0) { // if there are still things to do
current = Array.prototype.slice.apply(nextList.shift());
task = current[0];
args = current.slice(1);
task.apply(null, args); // execute the next item in the list
}
}
This allows us to make calls like:
.map(function (filepath) {
tasks.push(
[
handleAsset,
{
'path': filepath,
}
]
);
});
tasks.push([done]);
next(tasks);
This will call handleAsset, which is async, once for each file, in order. This will allows you to take your code and change each of the nested calls into a separate function in the form:
function memberAll() {
app.get('/member/all', function(req, res) {
if (err) {
handleError(err, 'memberAll');
} else {
next(getAllMembers, 'parameters to that call if needed');
}
});
}
where handleError is a common error handler, and the next call allows you to pass on relevant parameters to the next function that is needed. Importantly in the success side of the if statement you could either:
conditionally call one of several functions
call next with an array of calls to make, for instance if you had functions for processFolder and processFile you could expect that processing a folder might involve processing other folders and files and that the number would vary
do nothing except call next() with no parameters and end the current branch
Embellishments can include writing a clean function for emptying the nextList, adding items to nextList without calling an item from the list, etc. The alternative at this point is to either use an existing library for this or to continue writing your own.
My issue is that an array I've bind'ed to an async function doesn't seem to get updated on subsequent calls of that function even though the bind'ed array is updated inside that function.
In the function below I call queryForData several times asynchronously. passing in History which is declared globally. LOG1 always prints out an empty array and LOG2 always prints out an array retrieved with correct data for that iteration. However, it doesn't seem to concat with the array retrieved in the other calls.
Please help
exports.callQuery = function(req, res) {
var http = require('http');
var history = [];
// loop over all entries in "Stocks" collection
// and call queryForData
Stocks.find(function (err, stocks){
stocks.forEach(function callback(entry){
queryForData(entry, this.history);
}.bind({history : history})
);
});
// perform an HTTP request for data and call the callback
// function which concats the data arrays together.
var queryForData = function(stockData, history) {
var options = {
host: 'query.blah.com',
path: '/blah'+stockData
};
var callback = function(response) {
var str = '';
//another chunk of data has been received, so append it to `str`
response.on('data', function (chunk) {
str += chunk;
});
//the whole response has been received, so we just print it out here
response.on('end', function () {
var data = JSON.parse(str);
console.log("LOG1: ", this.stocksHistoryData);
this.stocksHistoryData = this.stocksHistoryData.concat(data);
console.log("LOG2: ", this.stocksHistoryData);
}.bind({stocksHistoryData : history})
);
};
http.request(options, callback).end();
};
};
concat() returns a new array. So, you're overwriting the reference to the array with a new array that's never reachable outside that function's scope. It happens here:
this.stocksHistoryData = this.stocksHistoryData.concat(data);
Try replacing the above line with:
data.forEach(function(item){
this.stocksHistoryData.push(item);
}, this);
That way you always build up the state of the existing array.
I can't help you debug your code, but I can answer the question you pose. In JavaScript, objects are always passed "by reference" rather than "by value." So, in the terms of your question, JS bind keeps a reference to obj.
You issue is that your queryForData function never even uses this so it is irrelevant whether or not you bind anything to the function or not. Also if you are going to use this.history it would be inside the queryForData function body, not arguments that are being passed to it. I believe this code should be changed:
// loop over all entries in "Stocks" collection
// and call queryForData
Stocks.find(function (err, stocks){
stocks.forEach(function callback(entry){
queryForData(entry, this.history);
}.bind({history : history})
);
});
to this:
// loop over all entries in "Stocks" collection
// and call queryForData
Stocks.find(function (err, stocks){
stocks.forEach(function callback(entry){
queryForData(entry, history);
}.bind()
);
});
This should work fine as objects (arrays are objects) are always passed-by-reference in JavaScript. That way every invocation of queryForData will reference the same history object that is passed as an argument.
I've been developing in JavaScript for quite some time but net yet a cowboy developer, as one of the many things that always haunts me is synching JavaScript's callbacks.
I will describe a generic scenario when this concern will be raised: I have a bunch of operations to perform multiple times by a for loop, and each of the operations has a callback. After the for loop, I need to perform another operation but this operation can only execute successfully if all the callbacks from the for loop are done.
Code Example:
for ... in ... {
myFunc1(callback); // callbacks are executed asynchly
}
myFunc2(); // can only execute properly if all the myFunc1 callbacks are done
Suggested Solution:
Initiate a counter at the beginning of the loop holding the length of the loop, and each callback decrements that counter. When the counter hits 0, execute myFunc2. This is essentially to let the callbacks know if it's the last callback in sequence and if it is, call myFunc2 when it's done.
Problems:
A counter is needed for every such sequence in your code, and having meaningless counters everywhere is not a good practice.
If you recall how thread conflicts in classical synchronization problem, when multiple threads are all calling var-- on the same var, undesirable outcomes would occur. Does the same happen in JavaScript?
Ultimate Question:
Is there a better solution?
The good news is that JavaScript is single threaded; this means that solutions will generally work well with "shared" variables, i.e. no mutex locks are required.
If you want to serialize asynch tasks, followed by a completion callback you could use this helper function:
function serializeTasks(arr, fn, done)
{
var current = 0;
fn(function iterate() {
if (++current < arr.length) {
fn(iterate, arr[current]);
} else {
done();
}
}, arr[current]);
}
The first argument is the array of values that needs to be passed in each pass, the second argument is a loop callback (explained below) and the last argument is the completion callback function.
This is the loop callback function:
function loopFn(nextTask, value) {
myFunc1(value, nextTask);
}
The first argument that's passed is a function that will execute the next task, it's meant to be passed to your asynch function. The second argument is the current entry of your array of values.
Let's assume the asynch task looks like this:
function myFunc1(value, callback)
{
console.log(value);
callback();
}
It prints the value and afterwards it invokes the callback; simple.
Then, to set the whole thing in motion:
serializeTasks([1,2, 3], loopFn, function() {
console.log('done');
});
Demo
To parallelize them, you need a different function:
function parallelizeTasks(arr, fn, done)
{
var total = arr.length,
doneTask = function() {
if (--total === 0) {
done();
}
};
arr.forEach(function(value) {
fn(doneTask, value);
});
}
And your loop function will be this (only parameter name changes):
function loopFn(doneTask, value) {
myFunc1(value, doneTask);
}
Demo
The second problem is not really a problem as long as every one of those is in a separate function and the variable is declared correctly (with var); local variables in functions do not interfere with each other.
The first problem is a bit more of a problem. Other people have gotten annoyed, too, and ended up making libraries to wrap that sort of pattern for you. I like async. With it, your code might look like this:
async.each(someArray, myFunc1, myFunc2);
It offers a lot of other asynchronous building blocks, too. I'd recommend taking a look at it if you're doing lots of asynchronous stuff.
You can achieve this by using a jQuery deferred object.
var deferred = $.Deferred();
var success = function () {
// resolve the deferred with your object as the data
deferred.resolve({
result:...;
});
};
With this helper function:
function afterAll(callback,what) {
what.counter = (what.counter || 0) + 1;
return function() {
callback();
if(--what.counter == 0)
what();
};
}
your loop will look like this:
function whenAllDone() { ... }
for (... in ...) {
myFunc1(afterAll(callback,whenAllDone));
}
here afterAll creates proxy function for the callback, it also decrements the counter. And calls whenAllDone function when all callbacks are complete.
single thread is not always guaranteed. do not take it wrong.
Case 1:
For example, if we have 2 functions as follows.
var count=0;
function1(){
alert("this thread will be suspended, count:"+count);
}
function2(){
//anything
count++;
dump(count+"\n");
}
then before function1 returns, function2 will also be called, if 1 thread is guaranteed, then function2 will not be called before function1 returns. You can try this. and you will find out count is going up while you are being alerted.
Case 2: with Firefox, chrome code, before 1 function returns (no alert inside), another function can also be called.
So a mutex lock is indeed needed.
There are many, many ways to achieve this, I hope these suggestions help!
First, I would transform the callback into a promise! Here is one way to do that:
function aPromise(arg) {
return new Promise((resolve, reject) => {
aCallback(arg, (err, result) => {
if(err) reject(err);
else resolve(result);
});
})
}
Next, use reduce to process the elements of an array one by one!
const arrayOfArg = ["one", "two", "three"];
const promise = arrayOfArg.reduce(
(promise, arg) => promise.then(() => aPromise(arg)), // after the previous promise, return the result of the aPromise function as the next promise
Promise.resolve(null) // initial resolved promise
);
promise.then(() => {
// carry on
});
If you want to process all elements of an array at the same time, use map an Promise.all!
const arrayOfArg = ["one", "two", "three"];
const promise = Promise.all(arrayOfArg.map(
arg => aPromise(arg)
));
promise.then(() => {
// carry on
});
If you are able to use async / await then you could just simply do this:
const arrayOfArg = ["one", "two", "three"];
for(let arg of arrayOfArg) {
await aPromise(arg); // wow
}
// carry on
You might even use my very cool synchronize-async library like this:
const arrayOfArg = ["one", "two", "three"];
const context = {}; // can be any kind of object, this is the threadish context
for(let arg of arrayOfArg) {
synchronizeCall(aPromise, arg); // synchronize the calls in the given context
}
join(context).then(() => { // join will resolve when all calls in the context are finshed
// carry on
});
And last but not least, use the fine async library if you really don't want to use promises.
const arrayOfArg = ["one", "two", "three"];
async.each(arrayOfArg, aCallback, err => {
if(err) throw err; // handle the error!
// carry on
});
This question in summary is to figure out how to pass variables between javascript functions without: returning variables, passing parameters between primary functions, using global variables, and forcing function 1 to wait for function 2 to finish. I figured out a jQuery solution and posted in below (in the answers section).
Old Post: I initialize a set of four functions, each calling on each other in a different way. At the end of it, I need the final modified product (an array) returned to the initializing function.
Global variables don't force the initial function to wait. And returning it backwards four times doesn't work either. How do you pass a modified variable back to its initializing function, if you can't return it? Or why isn't it returning?
(the maze starts at initFunctionA, ends at functionD)
classOne = {
initFunctionA : function() {
classTwo.functionB(functionD, array);
// I NEED ACCESS TO ARRAY2 HERE
},
functionD : function(data, array) {
var array2 = // modifications to array
}
}
{...}
classTwo = {
functionB : function(callback, array) {
$.ajax({
success: function(ret){
classTwo.functionC(ret, callback, array)
}
});
},
functionC : function(ret, callback, array) {
callback(ret.data.x, array);
}
}
Change your callback (at the call site) such that you capture the return value of functionD. Then, change functionD so that it returns array2. I've added this access to the example below as a convenience. (Also, be sure to include semicolons where "required" if you want to make JSLint happy.)
classOne = {
initFunctionA : function() {
var self = this;
classTwo.functionB(function() {
var array2 = functionD.apply(self, arguments);
// ACCESS ARRAY2 HERE
}, array);
},
functionD : function(data, array) {
var array2 = // modifications to array
return array2;
}
};
{...}
classTwo = {
functionB : function(callback, array) {
$.ajax({
success: function(ret){
classTwo.functionC(ret, callback, array)
}
});
},
functionC : function(ret, callback, array) {
callback(ret.data.x, array);
}
};
You can't make it work with a pattern like you've written there; it's simply not possible in Javascript because there's no such thing as "waiting". Your ajax code has to take a callback parameter (which you've got, though it's not clear where it comes from or what it does), and that initial function should pass in code to do what you need with the array after the ajax call finishes.
I would use an object constructor:
function ClassOne() {
this.array2 = [];
}
ClassOne.prototype.initFunctionA = function() {
// ...
}
ClassOne.prototype.functionD = function(data, array) {
// Can use array2 EX: this.array2
}
var classOne = new ClassOne();
This is how I understand your problem:
classTwo handles an AJAX call and may modify the result. classOne makes use of classTwo to get some data and needs the resulting data.
If so, how's this:
classOne = {
initFunctionA : function() {
var array = ['a','b','c'];
classTwo.functionB(this.functionD, array);
},
functionD : function(data, array) {
// This function is called when the AJAX returns.
var array2 = // modifications to array
}
}
{...}
classTwo = {
functionB : function(callback, array) {
$.ajax({
success: function(ret){
classTwo.functionC(ret, callback, array)
}
});
},
functionC : function(ret, callback, array) {
callback(ret.data.x, array);
}
}
So classOne.initFunctionA calls classTwo.functionB which sets up an ajax call. When the ajax call completes successfully, classTwo.functionC is called with the result and the initial array. From here, classOne.functionD is called with ret.data.x and the array.
Okay! I found a way to pass variables between functions without:
making global variables
making object properties (Chaos's solution)
passing parameters
These three were suggested here as the only ways.
Accessing variables from other functions without using global variables
But, if you you can't pass parameters directly, and you need one function to wait for the other (i.e, can't rely on references), and you're using asynchronous calls to the server in an intermediate function, then your only solution is:
Using jQuery...
Create this object in the DOM (dynamically if you don't want to muddy your markup):
<div id='#domJSHandler" style="display: none;"></div>
Then in the function that must wait:
//Function & Class Set 2
$('#domJSHandler').bind('proceedWithAction', function(event, param1, param2) {
// action set 2
});
And in the function to be waited on:
//Function & Class Set 1
// action set 1
$('#domJSHandler').triggerHandler('proceedWithAction', [param1, param2]);
Essentially encase the last actions you need to perform in a jQuery bind custom event on an invisible DOM object. Trigger that event from JS with jQuery's triggerHandler. Pass your parameters and voila!
I'm sure SO will give me crap for this (and for narcissistically accepting my own answer) but I think it's pretty brilliant for a uber-newbie and it worked for me.
So :p Stack Overflow
(jk You've all saved my ass many times and I love you all :)