Javascript async in nested for loop MongoDB - javascript

I have an asynchronous function inside a for loop nested in another for loop.
// recipesArray is an array of arrays of objects
// recipeObject is an array of objects
// currentRecipe is an object
connectToDb(function(){
// LOOP 1
for (var i=0, l=recipesArray.length; i < l; i++) {
// recipeObject is an
var recipeObject = recipesArray[i];
// LOOP 2
for (var x=0, y=recipeObject.length; x < y; x++) {
var currentRecipe = recipeObject[x];
// this is an asynchronous function
checkRecipe(currentRecipe, function (theRecipe) {
if (theRecipe === undefined) {
console.log('RECIPE NOT FOUND');
} else {
console.log('RECIPE FOUND', theRecipe);
}
});
}
}
});
I need to add data to the recipesArray based on the results of the checkRecipe function.
I've been trying different things...
- do i try to keep track of i and x...
- do i try to have multiple callbacks...
- do i even need to do all of that, or is there some other way....
I also tried using the async library for node(which actually has been very helpful with other situations), but the forEach doesn't take objects(only an array).
Stuck.
Any suggestions would be greatly appreciated.

Assuming checkRecipe() can be run in parallel with no limits, here's how you might use async.each():
connectToDb(function() {
async.each(recipesArray, function(subArray, callback) {
async.each(subArray, function(currentRecipe, callback2) {
checkRecipe(currentRecipe, function(theRecipe) {
if (theRecipe === undefined)
return callback2(new Error('Recipe not found'));
callback2();
});
}, callback);
}, function(err) {
if (err)
return console.error('Error: ' + err);
// success, all recipes found
});
});

Related

Log only shows one of four rows of data

I am writing a small Node js application for automatic vehicle location system.
Here is the code for where I am getting trouble.
markerData contains 4 rows but only in the log I can see the last row.
for (var i = 0, len = markerData.length; i < len; i++) {
var thisMarker = markerData[i];
sql.connect(config, function (err) {
var request = new sql.Request();
request.input('myval', sql.Int, thisMarker.id);
request.query('SELECT d.id, d.name, d.lastupdate, p.latitude, p.longitude, p.speed, p.course FROM dbo.devices AS d INNER JOIN dbo.positions AS p ON d.positionid = p.id AND d.id = p.deviceid WHERE (d.id = #myval)', function (err, recordset2) {
if (typeof recordset2 != 'undefined') {
thisMarker.position.lat = recordset2[0].latitude;
thisMarker.position.long = recordset2[0].longitude;
console.log(recordset2[0].id);
}
});
});
}
Please help me to solve the issue.
As var is not a block level variable in terms of scope, when `sql' module takes time to connect to the database asynchronously, the synchronous loop may change the value of the variable that's why you have the last row printed since the variable holds the reference to the last object at the time of successful connection.
Instead of _.each, I would recommend to use async module with async.each since you have few asynchronous operation to get rid of a synchronous loop.
You can check for samples here,
http://justinklemm.com/node-js-async-tutorial/
Here is your updated code with async.each
-> Install async module with npm install async --save
-> Then add the below reference in the required place,
// Reference
var async = require('async');
-> Modified code:
sql.connect(config, function (err) {
if(err) {
console.log('Connection error: ');
console.log(err);
} else {
async.each(markerData, function(thisMarker, callback) {
var request = new sql.Request();
request.input('myval', sql.Int, thisMarker.id);
request.query('SELECT d.id, d.name, d.lastupdate, p.latitude, p.longitude, p.speed, p.course FROM dbo.devices AS d INNER JOIN dbo.positions AS p ON d.positionid = p.id AND d.id = p.deviceid WHERE (d.id = #myval)', function (err, recordset2) {
if(err) {
console.log(err);
callback();
} else {
if (typeof recordset2 != 'undefined') {
thisMarker.position.lat = recordset2[0].latitude;
thisMarker.position.long = recordset2[0].longitude;
console.log(recordset2[0].id);
} else {
console.log('Recordset empty for id: ' + thisMarker.id);
}
callback();
}
});
}, function(err){
if(err) {
console.log(err);
}
});
}
});
I'm not entirely sure how your library works, but presumably recordset2 is an array of records. recordset2[0] is therefore the first record. If you want the next one you should probably try recordset2[1] and so on and so forth.
Arrays: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array
You'll probably need to loop through all the elements in the array at some point. use a for loop for that:
for (var i = 0; i < recordset2.length; i++ {
console.log(recordset2[i])
}
That will print out everything your query returns.

How can I throttle stack of api requests?

I have an array of ids, and I want to make an api request for each id, but I want to control how many requests are made per second, or better still, have only 5 open connections at any time, and when a connection is complete, fetch the next one.
Currently I have this, which just fires off all the requests at the same time:
_.each([1,2,3,4,5,6,7,8,9,10], function(issueId) {
github.fetchIssue(repo.namespace, repo.id, issueId, filters)
.then(function(response) {
console.log('Writing: ' + issueId);
writeIssueToDisk(fetchIssueCallback(response));
});
});
Personally, I'd use Bluebird's .map() with the concurrency option since I'm already using promises and Bluebird for anything async. But, if you want to see what a hand-coded counter scheme that restricts how many concurrent requests can run at once looks like, here's one:
function limitEach(collection, max, fn, done) {
var cntr = 0, index = 0, errFlag = false;
function runMore() {
while (!errFlag && cntr < max && index < collection.length) {
++cntr;
fn(collection[index++], function(err, data) {
--cntr;
if (errFlag) return;
if (err) {
errFlag = true;
done(err);
} else {
runMore();
}
});
}
if (!errFlag && cntr === 0 && index === collection.length) {
done();
}
}
runMore();
}
With Bluebird:
function fetch(id) {
console.log("Fetching " + id);
return Promise.delay(2000, id)
.then(function(id) {
console.log(" Fetched " + id);
});
}
var ids = [1,2,3,4,5,6,7,8,9,10];
Promise.map(ids, fetch, { concurrency: 3 });
<script src="https://cdnjs.cloudflare.com/ajax/libs/bluebird/3.3.1/bluebird.min.js"></script>
<!-- results pane console output; see http://meta.stackexchange.com/a/242491 -->
<script src="http://gh-canon.github.io/stack-snippet-console/console.min.js"></script>
Divide your data into as many arrays as you want concurrent connections. Schedule with setTimeout, and have the completion callback handle the rest of the sub-array.
Wrap the setTimeout in a function of its own so that the variable values are frozen to their values at the time of delayed_fetch() invocation.
function delayed_fetch(delay, namespace, id, issueIds, filters) {
setTimeout(
function() {
var issueId=issueIds.shift();
github.fetchIssue(namespace, id, issueId, filters).then(function(response) {
console.log('Writing: ' + issueId);
writeIssueToDisk(fetchIssueCallback(response));
delayed_fetch(0, namespace, id, issueIds, filters);
});
}, delay);
}
var i=0;
_.each([ [1,2] , [3,4], [5,6], [7,8], [9,10] ], function(issueIds) {
var delay=++i*200; // millisecond
delayed_fetch(delay, repo.namespace, repo.id, issueIds, filters);
});
i'd recommend using throat just for this: https://github.com/ForbesLindesay/throat
Using Bluebird
function getUserFunc(user) {
//Get a collection of user
}
function getImageFunc(id) {
//get a collection of image profile based on id of the user
}
function search(response) {
return getUsersFunc(response).then(response => {
const promises = response.map(items => return items.id);
const images = id => {
return getImagesFunc(id).then(items => items.image);
};
return Promise.map(promises, images, { concurrency: 5 });
});
}
Previously i used ES6 function Promise.all(), but it doesn't work like what i'm expecting. Then go with third party library bluebird.js and Work like a charm.

Giving a provided Node JS callback my own custom Callback

First off I thought I'd get this problem solved after this great thread: nodeJs callbacks simple example
However, I am still unsure of how to proceed. Like the title hints at: I need a callback given to a callback who already has node arguments being passed to it
Code:
(function()
var reqs = {
http: require('http'),
path: require('path'),
fs: require('fs')
};
reqs.http.createServer(function (request, response) {
response.writeHead(200, {
'Content-Type': 'text/plain'
});
response.end('Hello HTTP!');
}).listen(8080);
var printCount = function(count) {
console.log(count);
};
var callCount = function(err, list, callback) {
var count = 0;
if(err) throw err;
// console.log(err);
for (var i = 0; i < list.length; i++) {
// console.log(reqs.path.extname(list[i]));
if(reqs.path.extname(list[i]) === ".png" || reqs.path.extname(list[i]) === ".jpg")
{
count++;
console.log(count);
}
}
callback(count);
};
//count images from executing directory
var countImages = function(dirName) {
var imageCount = reqs.fs.readdir(dirName, callCount(null, null, printCount));
};
countImages(__dirname);
})();
I think the key line here is
var imageCount = reqs.fs.readdir(dirName, callCount(null, null, printCount));
I'm passing the printCount function to the same function that is called back after fs.readdir asynchronously executes but it seems that me passing null to its first two arguments is overriding Node functionality that passes the callback err and list automatically. How can I get around this? I simply want to count the images in the executing directory and be able to store that value in my main function.
Pretty new to event style programming. Any extra reading suggestions are welcome. There is tons of content out there but I really want to get this up and running for a meeting this weekend. Thanks guys!
you can't quite do what you are doing, you are doing callCount(null, null, printCount) which executes the function. But you need to pass a function as a callback. What you want is something like the following, which captures the call back you want and returns a function you can pass as a callback to your api call
var callCount = function(callback) {
return function(err, list) {
var count = 0;
if(err) throw err;
// console.log(err);
for (var i = 0; i < list.length; i++) {
// console.log(reqs.path.extname(list[i]));
if(reqs.path.extname(list[i]) === ".png" || reqs.path.extname(list[i]) === ".jpg")
{
count++;
console.log(count);
}
}
callback(count);
}
}
and then
reqs.fs.readdir(dirName, callCount(printCount));

node.js compare two arrays with objects

I need to remove all documents from my mongo db, which dont exists in new array with objects.
So I have array with objects like :
var items = [
{product_id:15, pr_name: 'a', description : 'desc'},
{product_id:44, pr_name: 'b', description : 'desc2'}
{product_id:32, pr_name: 'c', description : 'desc3'}];
and I have array with db values which I get by calling Model.find({}).
So now I do it in a 'straight' way:
async.each(products, function (dbProduct, callback) { //cycle for products removing
var equals = false;
async.each(items, function(product, callback){
if (dbProduct.product_id === product.product_id){
product.description = dbProduct.description;// I need to save desc from db product to new product
equals = true;
}
callback();
});
if (!equals) {
log.warn("REMOVE PRODUCT " + dbProduct.product_id);
Product.remove({ _id: dbProduct._id }, function (err) {
if (err) return updateDBCallback(err);
callback();
});
}
});
But its blocks the whole app and its very slow, because I have around 5000 values in my items array and in database too. So its very huge cycle numbers.
Maybe there can be a faster way?
UPDATE1
Using code below, from TbWill4321 answer:
var removeIds = [];
// cycle for products removing
async.each(products, function (dbProduct, callback) {
for ( var i = 0; i < items.length; i++ ) {
if (dbProduct.product_id === product.product_id) {
// I need to save desc from db product to new product
product.description = dbProduct.description;
// Return early for performance
return callback();
}
}
// Mark product to remove.
removeIds.push( dbProduct._id );
log.warn("REMOVE PRODUCT " + dbProduct.product_id);
return callback();
}, function() {
Product.remove({ _id: { $in: removeIds } }, function (err) {
if (err) return updateDBCallback(err);
// Continue Here.
// TODO
});
});
Its takes around 11 sec(blocks whole web-app) and takes 12 362 878 cycles for me.
So maybe somebody can advise me something?
The Async library does not execute synchronous code in an asynchronous fashion.
5000 items is not a huge number for JavaScript, as I've worked on Big Data set's with 5 million+ points and it doesn't take long. You can get better performance by structuring like this:
var removeIds = [];
// cycle for products removing
async.each(products, function (dbProduct, callback) {
for ( var i = 0; i < items.length; i++ ) {
if (dbProduct.product_id === product.product_id) {
// I need to save desc from db product to new product
product.description = dbProduct.description;
// Return early for performance
return callback();
}
}
// Mark product to remove.
removeIds.push( dbProduct._id );
log.warn("REMOVE PRODUCT " + dbProduct.product_id);
return callback();
}, function() {
Product.remove({ _id: { $in: removeIds } }, function (err) {
if (err) return updateDBCallback(err);
// Continue Here.
// TODO
});
});
Among the many problems you may have, off the top of my head you may want to start off by changing this bit:
Product.remove({ _id: dbProduct._id }, function (err) {
if (err) return updateDBCallback(err);
callback();
});
Being within a .each() call, you'll make one call to the database for each element you want to delete. It's better to store all the ids in one array and then make a single query to delete all elements that have an _id that is in that array. Like this
Product.remove({ _id: {$in: myArrayWithIds} }, function (err) {
if (err) return updateDBCallback(err);
callback();
});
On another note, since async will execute synchronously, node.js does offer setImmediate() (docs here), that will execute the function from within the event loop. So basically you can "pause" execution of new elements and serve any incoming requests to simulate "non-blocking" processing.

Integrating asynchronous mongo call within an inner forEach loop

I got two loops, the outer loops over the users and the inner one loops over the venueID's of each user. Within the inner loop I want to look up the venue and attach it to an array defined in the outer look (userItem). However because forEach is synchronous and the mongo database look up is asynchronous the result always remains empty. I've tried to integrate this answer but to no avail. How to do this?
ret = [];
users.forEach(function(user) {
var userItem = user.getSanitised('ADM');
userItem.venues = [];
var tmp = [];
userItem.adminVenueIds.forEach(function(adminVenueId){
tmp.push(function(callback) {
Venue.findOne({_id:adminVenueId}, function(error, venue) {
callback(null, venue.toObject());
});
});
});
async.parallel(userItem.venues, function(err, result) {
/* this code will run after all calls finished the job or
when any of the calls passes an error */
if (err)
return console.log(err);
userItem.venues.push(result);
});
ret.push(userItem);
});
Tried the following as well but doesn't work also
users.forEach(function(user) {
var userItem = [];
async.series({
setUserItem : function(callback)
{
userItem = user.getSanitised('ADM');
callback(null, 'OK');
},
setUserVenues : function(callback)
{
userItem.venues = [];
user.adminVenueIds.forEach(function(adminVenueId,index) {
Venue.findOne({_id:adminVenueId}, function(error, venue) {
userItem.venues.push(venue.toObject());
if((index+1) == user.adminVenueIds.length)
callback(null, 'OK');
});
});
}
},
function(error, results) {
if(error)
winston.error(error);
ret.push(userItem);
}
);
});
You could simply put an if statement (in your case put the conditional as the array length) then when the loop is done you could then make it continue doing its thing by calling a function to continue (or put your code in there, but it will start to look messy)
var ret = [];
var test = [];
for (var i = 0; i < 20; i++) {
for (var x = 0; x < 20; x++) {
setTimeout(function() {
test.push("Test"+x);
if (x === 20) {
finishIt();
}
}, 300)
}
}
function finishIt() {
console.log(test);
ret.push(test);
}
I think you might want to look into using Mongoose. It is a NodeJS application layer on top of MongoDB that provides a more SQL like experience.
http://mongoosejs.com
I ended up with the following solution. It's dirty but I guess that's just nodejs being nodejs.
users.forEach(function(user) {
var userItem = user.getSanitised('ADM');
userItem.venues = [];
user.adminVenueIds.forEach(function(adminVenueId) {
Venue.findOne({_id:adminVenueId}, function(error, venue) {
userItem.venues.push(venue.toObject());
});
});
(function(){
if(userItem.venues.length == user.adminVenueIds.length) {
ret.push(userItem);
} else {
setTimeout(arguments.callee, 30);
}
})();
});

Categories