Using async with multiple functions javascript - javascript

How to use async correctly with multiple dependent functions...
Here was my attempt which is not working out, it is within an async.waterfall function:
function (urlsCreated, cb) {
var z, artist, title, added_on;
z = [];
async.mapSeries(urlsCreated, function (url, next) {
scrape_music.total_pages(50, url, function (array, total, extra) {
scrape_music.each(artist, title, added_on, array, url, function (result) {
});
});
}, function (z) {
console.log(z);
});
}
Everything is working fine up to this part...
basically urlsCreated is an array of 2 urls...
Then I called a mapSeries assuming that it will iterate between them...
The way it should be working is, it iterates through each url in the array, then for each url it should get a total page count for the url, and then for each page count which is added to the array parameter/callback of the total_pages, should be iterated within...
so basically the arrays are: urlsCreated (containing 2 links) --> array (containing total pages in total_pages method) --> result (.each method should grab off each page, number of pages included in the array beforehand) and then repeat for amount of urls in urlsCreated...
Any help would be wonderful, currently nothing gets printed for z, and essentially I just want an array filled with objects that are returned in result from scrape_music.each.
EDIT----
Here is the code for those functions.
//loop thrugh each page and find jquery elements that match
Scrape.prototype.each = function (artist, title, added_on, array, urls, cb) {
console.log('entered each');
console.log(array);
var $trs, list;
list = [];
this.page(array, urls, function ($page) {
//$trs selects all the rows from 1-50
$trs = $page('tr').slice(11, -3);
$trs.map(function (i, item) {
var result;
result = {};
result.artist = $page(item).find('td').eq(1).text();
result.title = $page(item).find('td').eq(2).text();
result.added_on = $page(item).find('td').eq(3).text();
list.push(result);
});
cb(list);
});
};
Scrape.prototype.total_pages = function (divide, url, cb) {
return request("" + url + config.url.pageQ + 0, function (err, res, body) {
if (err) { throw err; }
var page, select, match, total, matches, array, extra;
array = [];
page = cheerio.load(body);
select = page('tr').slice(9, 10);
match = page(select).find('td').eq(1).text();
matches = match.slice(-18, -14).trim();
total = Math.round(matches / divide);
extra = matches % divide;
for(x = 0; x < total; x++) {
array.push(x);
}
cb(array, total, extra);
});
};
//used to loop through all pages
Scrape.prototype.page = function (array, urls, cb) {
return array.forEach(function (i) {
return request("" + urls + config.url.pageQ + i, function (err, res, body) {
//console.log(urls + config.url.pageQ + i);
if (err) { throw err; }
cb(cheerio.load(body));
});
});
};

function (urlsCreated, cb) {
var artist, title, added_on;
async.mapSeries(urlsCreated, function (url, next) {
scrape_music.total_pages(50, url, function (array, total, extra) {
// 1:
scrape_music.each(artist, title, added_on, array, url, function (result) {
// 2:
next(null, result);
});
});
}, function (err, z) {
// 3:
console.log(z);
});
}
each() here can't be an iterator (not sure what it does) as you can only call next() for asyncMap once per iteration. If the callback is called when the iterating is done then it's fine
Tell async this iteration is done. The first argument is any error
The second argument is the new array

Related

Joining the result of two different url requests together

I have a Node js program, however I am struggling to understand how to join the response details together in one variable.
Let me explain what I mean:
I have different URL's which I do a request.get on all of them at the same time (it needs to be the same time).
The response I get are different, depending on the URL ending - the response can either be a Dog-List or a Cat-List
I then need to get information from the Dog-list, therefore I have an if statement saying that if the response contains a Dog-List, print out 'in dog list' and run the required code.
If it is a Cat-List, print out 'in cat list' and run the required code.
FINALLY, get the dog and cat info and join it together to form one variable.
This is the code:
var urls = ['http://111.1:1111/cats', 'http://111.1:1111/dogs'];
function test() {
async.map(urls, function(url, cb) {
request.get(url, function(error, response, body) {
if (error) {
console.log(error);
} else {
parseString(body, function(result) {
var age1 = 0;
var age2 = 0;
if (result.Return['Cat-List']) {
console.log('in cat list');
age1 = result.Return['Cat-List'].age;
} else if (result.Return['Dog-List']) {
console.log('in dog list');
age2 = result.Return['Dog-List'].age;
}
data = age1 + age2;
console.log(data);
});
}
});
});
}
However, as you would expect, because I am using an 'if' statement after getting a response from each URL, I will only go either into the dog or cat list, therefore my results can never be combined.
This is what I get printed when I run the program:
in cat list
2
in dog list
3
that is because the cat age is 2, and the dog age is 5.
This is what I would want the cmd to look like:
in cat list
in dog list
5
Is there a different way to do this without using the if statements? Or just doing something else to join the results together? I am just struggling to get to this result. Any help would be appreciated!
If you want the data value to be used anywhere then you need to run a callback with it with something like:
cb(null, data);
Where null means that there's no error.
You also need another callback as an argument to async.map to call when everything is done with something like:
function test() {
async.map(urls, function (url, cb) {
// code to run for every value
},
function (err, results) {
// code to run after everything is finished
// here you can sum the results
});
}
For more info see:
https://caolan.github.io/async/docs.html#map
Full example - not tested:
var urls = ['http://111.1:1111/cats', 'http://111.1:1111/dogs'];
function test() {
async.map(urls, function (url, cb) {
request.get(url, function(error, response, body) {
if (error) {
console.log(error);
} else {
parseString(body, function(result) {
var age = 0;
if (result.Return['Cat-List']) {
console.log('in cat list');
age = result.Return['Cat-List'].age;
} else if (result.Return['Dog-List']) {
console.log('in dog list');
age = result.Return['Dog-List'].age;
}
cb(null, age);
});
}
});
}, function (err, results) {
var sum = results.reduce((a, b) => a + b, 0);
console.log(sum);
});
}

Callback until for loop with query is done

I posted a question before and realized my problem actually was async functions. I managed to work out most of it, but I got one little problem left. Using async I used waterfall to create an order for the some queries...
exports.getMenu = function(id_restaurant, callback){
async.waterfall([
async.apply(firstQuery, id_restaurant),
secondQuery,
thirdQuery,
fourthQuery,
formMenu
], function(err, result){
if(err){
console.log(err);
}
callback(result);
});
};
Everything works until fourthQuery, where I have to loop to get all dishes of a menu.
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback){
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if(err) {
console.log(err);
callback(true);
return;
}
for (var i = 0; i < size; i++) {
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [array_secciones[i]],
function(err, results2) {
if(err) {
console.log(err);
callback(true);
return;
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes[i] = results2;
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[i]));
// this prints the result but only if it has a value over 2
});
};
}); // pool
console.log("I'm sending " + dishes); // this logs an empty array
callback(null, dishes, array_nombresSecc);
};
So what i can see that happens from printing the value of 'i' each loop is that it always has the value of 2. Because that's 'size' value. Also, even though it's saving results of index '2' I believe the callback is being done even before the for loop is done, because my fifth function is recieving an empty array.
How can i make my code wait to callback until my for loop is done?
NOTE: Sorry, part of my code is in spanish, tried to translate the important parts of it.
There are a few ways to handle this, one is to look into promise architecture. Promise.all will let you supply one callback to handle the values from each child promise.
To use what you've already got, however, I'd push the values into your dishes array, rather than assigning them specifically to i indexes, then check the size of that array at the end of each connection. When the array length matches the size, fire the callback. (as seen below)
If you need a way to tie each result to that specific i value, I'd recommend pushing them as an object
dishes.push({'index': i, 'dish': results2})
Afterward, if you need the array of just dishes, you can sort the array by that index value and run a map function.
dishes.sort(function(a,b){ return a.index - b.index; })
dishes = dishes.map(function(a){ return a.dish })
Here's the code adjusted:
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback) {
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if (err) {
console.log(err);
callback(true);
return;
}
for (var i = 0; i < size; i++) {
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [array_secciones[i]],
function(err, results2) {
if (err) {
console.log(err);
callback(true);
return;
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes.push(results2)
if(dishes.length == size){
console.log("I'm sending " + dishes);
callback(null, dishes, array_nombresSecc)
}
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[i]));
// this prints the result but only if it has a value over 2
});
};
}); // pool
;
};
Since you're already using the async, I would suggest replacing the for() loop in fourthQuery with async.each().
The updated fourthQuery would look like this:
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback){
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if(err) {
console.log(err);
callback(true);
return;
}
async.each(array_secciones,
function(item, itemCallback) {
// Function fun for each item in array_secciones
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [item],
function(err, results2) {
if(err) {
console.log(err);
return itemCallback(true);
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes.push(results2);
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[dishes.length-1]));
// this prints the result but only if it has a value over 2
return itemCallback();
});
},
function(err) {
// Function run after all items in array are processed or an error occurs
console.log("I'm sending " + dishes); // this logs an empty array
callback(null, dishes, array_nombresSecc);
});
}); // pool
};
Alternatively, you can use async.map(), which handles gathering the results in the final callback so doesn't rely on the dishes variable.

node.js compare two arrays with objects

I need to remove all documents from my mongo db, which dont exists in new array with objects.
So I have array with objects like :
var items = [
{product_id:15, pr_name: 'a', description : 'desc'},
{product_id:44, pr_name: 'b', description : 'desc2'}
{product_id:32, pr_name: 'c', description : 'desc3'}];
and I have array with db values which I get by calling Model.find({}).
So now I do it in a 'straight' way:
async.each(products, function (dbProduct, callback) { //cycle for products removing
var equals = false;
async.each(items, function(product, callback){
if (dbProduct.product_id === product.product_id){
product.description = dbProduct.description;// I need to save desc from db product to new product
equals = true;
}
callback();
});
if (!equals) {
log.warn("REMOVE PRODUCT " + dbProduct.product_id);
Product.remove({ _id: dbProduct._id }, function (err) {
if (err) return updateDBCallback(err);
callback();
});
}
});
But its blocks the whole app and its very slow, because I have around 5000 values in my items array and in database too. So its very huge cycle numbers.
Maybe there can be a faster way?
UPDATE1
Using code below, from TbWill4321 answer:
var removeIds = [];
// cycle for products removing
async.each(products, function (dbProduct, callback) {
for ( var i = 0; i < items.length; i++ ) {
if (dbProduct.product_id === product.product_id) {
// I need to save desc from db product to new product
product.description = dbProduct.description;
// Return early for performance
return callback();
}
}
// Mark product to remove.
removeIds.push( dbProduct._id );
log.warn("REMOVE PRODUCT " + dbProduct.product_id);
return callback();
}, function() {
Product.remove({ _id: { $in: removeIds } }, function (err) {
if (err) return updateDBCallback(err);
// Continue Here.
// TODO
});
});
Its takes around 11 sec(blocks whole web-app) and takes 12 362 878 cycles for me.
So maybe somebody can advise me something?
The Async library does not execute synchronous code in an asynchronous fashion.
5000 items is not a huge number for JavaScript, as I've worked on Big Data set's with 5 million+ points and it doesn't take long. You can get better performance by structuring like this:
var removeIds = [];
// cycle for products removing
async.each(products, function (dbProduct, callback) {
for ( var i = 0; i < items.length; i++ ) {
if (dbProduct.product_id === product.product_id) {
// I need to save desc from db product to new product
product.description = dbProduct.description;
// Return early for performance
return callback();
}
}
// Mark product to remove.
removeIds.push( dbProduct._id );
log.warn("REMOVE PRODUCT " + dbProduct.product_id);
return callback();
}, function() {
Product.remove({ _id: { $in: removeIds } }, function (err) {
if (err) return updateDBCallback(err);
// Continue Here.
// TODO
});
});
Among the many problems you may have, off the top of my head you may want to start off by changing this bit:
Product.remove({ _id: dbProduct._id }, function (err) {
if (err) return updateDBCallback(err);
callback();
});
Being within a .each() call, you'll make one call to the database for each element you want to delete. It's better to store all the ids in one array and then make a single query to delete all elements that have an _id that is in that array. Like this
Product.remove({ _id: {$in: myArrayWithIds} }, function (err) {
if (err) return updateDBCallback(err);
callback();
});
On another note, since async will execute synchronously, node.js does offer setImmediate() (docs here), that will execute the function from within the event loop. So basically you can "pause" execution of new elements and serve any incoming requests to simulate "non-blocking" processing.

recursive in callback functions

I have a function with callback, where I'm using "listTables" method of dynamoDB, which returns just 100 table names and if there is anymore tables, it returns another field called "LastEvaluatedTableName" which we can use in our new query in listTables to query another 100 tables from the mentioned "LastEvaluatedTableName"; how can I have recursion in callbacks in javascript in this logic?
I have tried the following which is not correct:
module.exports.ListTables = function (start, callback) {
var params;
if (start) {
params = {
"ExclusiveStartTableName": start
};
}
dynamodb.listTables(params, function (err, data) {
var totalData = [];
totalData.push(data);
if (data.LastEvaluatedTableName) {
data = module.exports.ListTables(data.LastEvaluatedTableName);
}
callback(err, totalData);
});
}
Please let me know if you need more clarifications!
Thanks!
You need to concat your data, not replace it each time:
dynamodb.listTables(params, function (err, data) {
if (data.LastEvaluatedTableName) {
data.concat(module.exports.ListTables(data.LastEvaluatedTableName));
}
callback(err, data);
});
UPDATE
Based on the info from the comment, sounds like you need something like this:
module.exports.ListTables = function (start, callback, totalData) {
var params;
if (start) {
params = {
"ExclusiveStartTableName": start
};
}
if (!totalData) {
totalData = [];
}
dynamodb.listTables(params, function (err, data) {
totalData = totalData.concat(data.TableNames);
if (data.LastEvaluatedTableName) {
module.exports.ListTables(data.LastEvaluatedTableName, callback, totalData);
}
else {
callback(err, totalData);
}
});
}

Node/Express - How to wait until For Loop is over to respond with JSON

I have a function in my express app that makes multiple queries within a For Loop and I need to design a callback that responds with JSON when the loop is finished. But, I'm not sure how to do this in Node yet. Here is what I have so far, but it's not yet working...
exports.contacts_create = function(req, res) {
var contacts = req.body;
(function(res, contacts) {
for (var property in contacts) { // for each contact, save to db
if( !isNaN(property) ) {
contact = contacts[property];
var newContact = new Contact(contact);
newContact.user = req.user.id
newContact.save(function(err) {
if (err) { console.log(err) };
}); // .save
}; // if !isNAN
}; // for
self.response();
})(); // function
}; // contacts_create
exports.response = function(req, res, success) {
res.json('finished');
};
There are a few problems with your code besides just the callback structure.
var contacts = req.body;
(function(res, contacts) {
...
})(); // function
^ you are redefining contacts and res in the parameter list, but not passing in any arguments, so inside your function res and contacts will be undefined.
Also, not sure where your self variable is coming from, but maybe you defined that elsewhere.
As to the callback structure, you're looking for something like this (assuming contacts is an Array):
exports.contacts_create = function(req, res) {
var contacts = req.body;
var iterator = function (i) {
if (i >= contacts.length) {
res.json('finished'); // or call self.response() or whatever
return;
}
contact = contacts[i];
var newContact = new Contact(contact);
newContact.user = req.user.id
newContact.save(function(err) {
if (err)
console.log(err); //if this is really a failure, you should call response here and return
iterator(i + 1); //re-call this function with the next index
});
};
iterator(0); //start the async "for" loop
};
However, you may want to consider performing your database saves in parallel. Something like this:
var savesPending = contacts.length;
var saveCallback = function (i, err) {
if (err)
console.log('Saving contact ' + i + ' failed.');
if (--savesPending === 0)
res.json('finished');
};
for (var i in contacts) {
...
newContact.save(saveCallback.bind(null, i));
}
This way you don't have to wait for each save to complete before starting the next round-trip to the database.
If you're unfamiliar with why I used saveCallback.bind(null, i), it's basically so the callback can know which contact failed in the event of an error. See Function.prototype.bind if you need a reference.

Categories