Populating async array with a function called right before. - javascript

var request = require('request'),
requests = [],
values = [],
request("url1", function());
function() {
.....
for (x in list){
requests.push(requestFunction(x));
}
}
requestFunction(x){
request("url2", function (e,r,b) {
....
return function(callback) {
values[i] = b
}
});
}
async.parallel(requests, function (allResults) {
// values array is ready at this point
// the data should also be available in the allResults array
console.log(values);
});
I new to node. Issue is that the request needs to be called to populate the requests callback array. But the issue is the async.parallel will run before the requests array is full and need run all the callbacks. Where do I move this async so it runs after the requests array is full?

Asynchronous programming is all about chaining blocks. This allows node to efficiently run its event queue, while ensuring that your steps are done in order. For example, here's a query from a web app I wrote:
app.get("/admin", isLoggedIn, isVerified, isAdmin, function (req, res) {
User.count({}, function (err, users) {
if (err) throw err;
User.count({"verified.isVerified" : true}, function (err2, verifiedUsers) {
if (err2) throw err2;
Course.count({}, function (err3, courses) {
// and this continues on and on — the admin page
// has a lot of information on all the documents in the database
})
})
})
})
Notice how I chained function calls inside of one another. Course.count({}, ...) could only be called once User.count({"verified.isVerified" : true}, ...) was called. This means the i/o is never blocked and the /admin page is never rendered without the required information.
You didn't really give enough information regarding your problem (so there might be a better way to fix it), but I think you could, for now, do this:
var request = require('request'),
requests = [],
values = [],
length; // a counter to store the number of times to run the loop
request("url1", function() {
length = Object.keys(list).length;
// referring to the list below;
// make sure list is available at this level of scope
for (var x in list){
requests.push(requestFunction(x));
length--;
if (length == 0) {
async.parallel(requests, function (allResults) {
console.log(values); // prints the values array
});
}
}
}
function requestFunction(x) {
request("url2", function (e,r,b) {
values[i] = b;
return b;
}
}
I am assuming that requestFunction() takes a while to load, which is why async.parallel is running before the for (var x in list) loop finishes. To force async.parallel to run after the loop finishes, you'll need a counter.
var length = Object.keys(list).length;
This returns the number of keys in the list associative array (aka object). Now, every time you run through the for loop, you decrement length. When length == 0, you then run your async.parallel process.
edit: You could also write the requests.push() part as:
requests.push(
(function () {
request("url2", function (e,r,b) {
values[i] = b;
return b;
}
})()
);
I think it's redundant to store b in both values and requests, but I have kept it as you had it.

Related

How can I insert objects in SQL Server running a function in a loop? ConnectionError: .connect can not be called on a Connection in `Connecting` state

I'm working in a NodeJS project, this project I decided to change the way I'm doing it because this way wasn't working, let me try to explain it.
I need to insert data into a SQL Server DB, so I did a function insertOffice() this function opens a connection using Tedious, then fetchs data to an url with data from an array data2 to load coords, and then with this coords creates an object, then inserts this object into a DB. When inserting only one part of my data2 array, it works, by only sendind data[0] it adds:
{
latjson: 1,
lonjson: 1,
idoficina: "1",
}
But I want to insert both of the parts of my array, changing data2[0] to data2[index], to be able to insert all my array, so I tried creating another function functionLooper()that loops insertOffice() to insert my data from my array data2.
I builded this little code to learn how to loop a function, this prints index that is the value I use for bringing idoficina.
As you can see functionLooper() runs the code twice, so it can read fully data2 array, I have this little code that works with the same logic, I builded my full code using this:
function insertOffice(index) {
console.log(index);
}
function functionLooper() {
for (let i = 0; i < 5; i++) {
let response = insertOffice(i);
}
}
functionLooper();
This prints:
0
1
2
3
4
So my code it's supposed to send index
I'm expecting my code to loop my insertOffice() and being able to insert my objects, the issue is that this doesn't seems to work as I am getting this error:
C:\...\node_modules\tedious\lib\connection.js:993
throw new _errors.ConnectionError('`.connect` can not be called on a Connection in `' + this.state.name + '` state.');
^
ConnectionError: `.connect` can not be called on a Connection in `Connecting` state.
this is my code:
var config = {
....
};
const data2 = [
...
];
var connection = new Connection(config);
function insertOffice(index) {
console.log(index)
connection.on("connect", function (err) {
console.log("Successful connection");
});
connection.connect();
const request = new Request(
"EXEC SPInsert #Data1, ... ",
function (err) {
if (err) {
console.log("Couldn't insert, " + err);
} else {
console.log("Inserted")
}
}
);
console.log(myObject.Id_Oficina)
request.addParameter("Data1", TYPES.SmallInt, myObject.Id_Oficina);
request.on("row", function (columns) {
columns.forEach(function (column) {
if (column.value === null) {
console.log("NULL");
} else {
console.log("Product id of inserted item is " + column.value);
}
});
});
request.on("requestCompleted", function () {
connection.close();
});
connection.execSql(request);
}
function functionLooper() {
for (let i = 0; i < 2; i++) {
let response = insertOffice(i);
}
}
functionLooper();
I do not know if this is the right way to do it (looping the inserting function insertOffice()twice), if you know a better way to do it and if you could show me how in an example using a similar code to mine, would really appreciate it.
You're approaching an asynchronous problem as if it's a synchronous one. You're also making your life a bit harder by mixing event based async tasks with promise based ones.
For example, connection.connect() is asynchronous (meaning that it doesn't finish all its work before the next lines of code is executed), it is only done when connection emits the connect event. So the trigger for starting the processing of your data should not be started until this event is fired.
For each of the events in your loop they are not running one at a time but all at the same time because the fetch() is a promise (asynchronous) it doesn't complete before the next iteration of the loop. In some cases it may have even finished before the database connection is ready, meaning the code execution has moved on to DB requests before the connection to the database is established.
To allow your code to be as manageable as possible you should aim to "promisify" the connection / requests so that you can then write an entirely promise based program, rather than mixing promises and events (which will be pretty tricky to manage - but is possible).
For example:
const connection = new Connection(config);
// turn the connection event into a promise
function connect() {
return new Promise((resolve, reject) => {
connection.once('connect', (err) => err ? reject(err) : resolve(connection));
connection.connect()
});
}
// insert your data once the connection is ready and then close it when all the work is done
function insertOffices() {
connect().then((conn) => {
// connection is ready I can do what I want
// NB: Make sure you return a promise here otherwise the connection.close() call will fire before it's done
}).then(() => {
connection.close();
});
}
The same approach can be taken to "promisify" the inserts.
// turn a DB request into a promise
function request(conn) {
return new Promise((resolve, reject) => {
const request = new Request(...);
request.once('error', reject);
request.once('requestCompleted', resolve);
conn.execSql(request);
});
}
This can then be combined to perform a loop where it's executed one at a time:
function doInserts() {
return connect().then((conn) => {
// create a "chain" of promises that execute one after the other
let inserts = Promise.resolve();
for (let i = 0; i < limit; i++) {
inserts = inserts.then(() => request(conn));
}
return inserts;
}).then(() => connection.close())
}
or in parallel:
function doInserts() {
return connect().then((conn) => {
// create an array of promises that all execute independently
// NB - this probably won't work currently because it would need
// multiple connections to work (rather than one)
let inserts = [];
for (let i = 0; i < limit; i++) {
inserts.push(request(conn));
}
return Promise.all(inserts);
}).then(() => connection.close())
}
Finally I could fix it, I'm sharing my code for everyone to could use it and do multiple inserts, thanks to Dan Hensby, I didn't do it his way but used part of what he said, thanks to RbarryYoung and MichaelSun90 who told me how, just what I did was changing my
var connection = new Connection(config);
to run inside my
function insertOffice(index) { ... }
Looking like this:
function insertOffice(index) {
var connection = new Connection(config);
....
}

Node.JS How to set a variable outside the current scope

I have some code that I cant get my head around, I am trying to return an array of object using a callback, I have a function that is returning the values and then pushing them into an array but I cant access this outside of the function, I am doing something stupid here but can't tell what ( I am very new to Node.JS )
for (var index in res.response.result) {
var marketArray = [];
(function () {
var market = res.response.result[index];
createOrUpdateMarket(market, eventObj , function (err, marketObj) {
marketArray.push(marketObj)
console.log('The Array is %s',marketArray.length) //Returns The Array is 1.2.3..etc
});
console.log('The Array is %s',marketArray.length) // Returns The Array is 0
})();
}
You have multiple issues going on here. A core issue is to gain an understanding of how asynchronous responses work and which code executes when. But, in addition to that you also have to learn how to manage multiple async responses in a loop and how to know when all the responses are done and how to get the results in order and what tools can best be used in node.js to do that.
Your core issue is a matter of timing. The createOrUpdateMarket() function is probably asynchronous. That means that it starts its operation when the function is called, then calls its callback sometime in the future. Meanwhile the rest of your code continues to run. Thus, you are trying to access the array BEFORE the callback has been called.
Because you cannot know exactly when that callback will be called, the only place you can reliably use the callback data is inside the callback or in something that is called from within the callback.
You can read more about the details of the async/callback issue here: Why is my variable unaltered after I modify it inside of a function? - Asynchronous code reference
To know when a whole series of these createOrUpdateMarket() operations are all done, you will have to code especially to know when all of them are done and you cannot rely on a simple for loop. The modern way to do that is to use promises which offer tools for helping you manage the timing of one or more asynchronous operations.
In addition, if you want to accumulate results from your for loop in marketArray, you have to declare and initialize that before your for loop, not inside your for loop. Here are several solutions:
Manually Coded Solution
var len = res.response.result.length;
var marketArray = new Array(len), cntr = 0;
for (var index = 0, index < len; index++) {
(function(i) {
createOrUpdateMarket(res.response.result[i], eventObj , function (err, marketObj) {
++cntr;
if (err) {
// need error handling here
}
marketArray[i] = marketObj;
// if last response has just finished
if (cntr === len) {
// here the marketArray is fully populated and all responses are done
// put your code to process the marketArray here
}
});
})(index);
}
Standard Promises Built Into Node.js
// make a version of createOrUpdateMarket that returns a promise
function createOrUpdateMarketAsync(a, b) {
return new Promise(function(resolve, reject) {
createOrUpdateMarket(a, b, function(err, marketObj) {
if (err) {
reject(err);
return;
}
resolve(marketObj);
});
});
}
var promises = [];
for (var i = 0; i < res.response.result.length; i++) {
promises.push(createorUpdateMarketAsync(res.response.result[i], eventObj));
}
Promise.all(promises).then(function(marketArray) {
// all results done here, results in marketArray
}, function(err) {
// an error occurred
});
Enhanced Promises with the Bluebird Promise library
The bluebird promise library offers Promise.map() which will iterate over your array of data and produce an array of asynchronously obtained results.
// make a version of createOrUpdateMarket that returns a promise
var Promise = require('bluebird');
var createOrUpdateMarketAsync = Promise.promisify(createOrUpdateMarket);
// iterate the res.response.result array and run an operation on each item
Promise.map(res.response.result, function(item) {
return createOrUpdateMarketAsync(item, eventObj);
}).then(function(marketArray) {
// all results done here, results in marketArray
}, function(err) {
// an error occurred
});
Async Library
You can also use the async library to help manage multiple async operations. In this case, you can use async.map() which will create an array of results.
var async = require('async');
async.map(res.response.result, function(item, done) {
createOrUpdateMarker(item, eventObj, function(err, marketObj) {
if (err) {
done(err);
} else {
done(marketObj);
}
});
}, function(err, results) {
if (err) {
// an error occurred
} else {
// results array contains all the async results
}
});

Getting values from a loop of asynchronous requests

I am trying to write a nodejs program that queries github for a list of repos (via a Node wrapper for the github API: https://www.npmjs.com/package/github) and retrieves the git clone url in an array, which I then wish to sort alphabetically.
Due to the asynchronous nature of the calls, I am not sure how to wait until all of the async requests are returned?
Here is the loop in question. repoArrayis an array of repos in [username/reponame] format
var urls = [];
for (var i=0; i < repoArray.length; i++) {
var components = repoArray[i].split('/');
github.repos.get({
user: components[0],
repo: components[1]
}, function(err, res) {
urls.push(res.ssh_url);
});
}
// do a case-insensitive sort
urls.sort(function(a,b) {
return a.localeCompare(b, 'en', {'sensitivity': 'base'});
});
console.log("urls: " + urls);
Basically, since github.repos.get() calls in the loop are all asynchronous/callback-based, when the code reaches urls.sort() and then the console.log(), none or some of the github.repos.get() calls are done yet.
I am not that familiar with promises or deferreds, but is that the way to go? I'm not sure how I could refactor that loop so that urls.sort() is called only after all the requests from the loop are complete?
The Async library is meant for exactly these scenarios, and is usually what people tend to use for these problems. It can help you execute asynchronous tasks in parallel and execute a callback when they all finish, using async.each.
var async = require('async');
var urls = [];
//make each HTTP request
function process(repo,callback){
var components = repo.split('/');
github.repos.get({
user: components[0],
repo: components[1]
}, function(err, res) {
if(err){
// call callback(err) if there is an error
return callback(err);
}
else{
urls.push(res.ssh_url);
// call callback(null) if it was a success,
return callback(null);
}
});
}
// this will iterate over repoArray and pass each repo to the 'process' function.
// if any of the calls to 'process' result in an error,
// the final callback will be immediately called with an error object
async.each(repoArray,process,function(error){
if(error){
console.error('uh-oh: '+error)
return;
}
else{
// do a case-insensitive sort
urls.sort(function(a,b) {
return a.localeCompare(b, 'en', {'sensitivity': 'base'});
});
console.log("urls: " + urls);
}
});
edit: since you are sorting them at the end, the urls will be in order.

How to write an asynchronous for-each loop in Express.js and mongoose?

I have a function that returns an array of items from MongoDB:
var getBooks = function(callback){
Comment.distinct("doc", function(err, docs){
callback(docs);
}
});
};
Now, for each of the items returned in docs, I'd like to execute another mongoose query, gather the count for specific fields, gather them all in a counts object, and finally pass that on to res.render:
getBooks(function(docs){
var counts = {};
docs.forEach(function(entry){
getAllCount(entry, ...){};
});
});
If I put res.render after the forEach loop, it will execute before the count queries have finished. However, if I include it in the loop, it will execute for each entry. What is the proper way of doing this?
I'd recommend using the popular NodeJS package, async. It's far easier than doing the work/counting, and eventual error handling would be needed by another answer.
In particular, I'd suggest considering each (reference):
getBooks(function(docs){
var counts = {};
async.each(docs, function(doc, callback){
getAllCount(entry, ...);
// call the `callback` with a error if one occured, or
// empty params if everything was OK.
// store the value for each doc in counts
}, function(err) {
// all are complete (or an error occurred)
// you can access counts here
res.render(...);
});
});
or you could use map (reference):
getBooks(function(docs){
async.map(docs, function(doc, transformed){
getAllCount(entry, ...);
// call transformed(null, theCount);
// for each document (or transformed(err); if there was an error);
}, function(err, results) {
// all are complete (or an error occurred)
// you can access results here, which contains the count value
// returned by calling: transformed(null, ###) in the map function
res.render(...);
});
});
If there are too many simultaneous requests, you could use the mapLimit or eachLimit function to limit the amount of simultaneous asynchronous mongoose requests.
forEach probably isn't your best bet here, unless you want all of your calls to getAllCount happening in parallel (maybe you do, I don't know — or for that matter, Node is still single-threaded by default, isn't it?). Instead, just keeping an index and repeating the call for each entry in docs until you're done seems better. E.g.:
getBooks(function(docs){
var counts = {},
index = 0,
entry;
loop();
function loop() {
if (index < docs.length) {
entry = docs[index++];
getAllCount(entry, gotCount);
}
else {
// Done, call `res.render` with the result
}
}
function gotCount(count) {
// ...store the count, it relates to `entry`...
// And loop
loop();
}
});
If you want the calls to happen in parallel (or if you can rely on this working in the single thread), just remember how many are outstanding so you know when you're done:
// Assumes `docs` is not sparse
getBooks(function(docs){
var counts = {},
received = 0,
outstanding;
outstanding = docs.length;
docs.forEach(function(entry){
getAllCount(entry, function(count) {
// ...store the count, note that it *doesn't* relate to `entry` as we
// have overlapping calls...
// Done?
--outstanding;
if (outstanding === 0) {
// Yup, call `res.render` with the result
}
});
});
});
In fact, getAllCount on first item must callback getAllCount on second item, ...
Two way: you can use a framework, like async : https://github.com/caolan/async
Or create yourself the callback chain. It's fun to write the first time.
edit
The goal is to have a mechanism that proceed like we write.
getAllCountFor(1, function(err1, result1) {
getAllCountFor(2, function(err2, result2) {
...
getAllCountFor(N, function(errN, resultN) {
res.sender tout ca tout ca
});
});
});
And that's what you will construct with async, using the sequence format.

MongoDB and Node js Asynchronous programming

I am trying to solve an exam problem, so I cannot post my exam code as it is. So I have simplified such that it addresses the core concept that I do not understand. Basically, I do not know how to slow down node's asynchronous execution so that my mongo code can catch up with it. Here is the code:
MongoClient.connect('mongodb://localhost:27017/somedb', function(err, db) {
if (err) throw err;
var orphans = [];
for (var i; i < 100000; i++) {
var query = { 'images' : i };
db.collection('albums').findOne(query, function(err, doc_album) {
if(err) throw err;
if (doc_album === null) {
orphans.push(i);
}
});
}
console.dir(orphans.length);
return db.close();
});
So I am trying to create an array of those images who do not match my query criteria. I end up with a orphans.length value of 0 since Node does not wait for the callbacks to finish. How can I modify the code such that the callbacks finish executing before I count the number of images in the array that did not meet my query criteria?
Thanks in advance for your time.
Bharat
I assume you want to do 100000 parallel DB calls. To "wait" 10000 calls completion in each call callback we increase finished calls counter and invoke main callback when last one finished. Note that very common mistake here is to use for loop variable as a closure inside callback. This does not work as expected as all 10000 handlers scheduled first and by the time first is executed loop variable is of the same, maximum value.
function getOrphans(cb) {
MongoClient.connect('mongodb://localhost:27017/somedb', function(err, db) {
if (err) cb(err);
var orphans = [];
var numResponses = 0;
var maxIndex = 100000
for (var i = 0; i < maxIndex; i++) {
// problem: by the time you get reply "i" would be 100000.
// closure variable changed to function argument:
(function(index) {
var query = { 'images' : index };
db.collection('albums').findOne(query, function(err, doc_album) {
numResponses++;
if(err) cb(err);
if (doc_album === null) {
orphans.push(index);
}
if (numResponses == maxIndex) {
db.close();
cb(null, orphans);
}
});
})(i); // this is "immediately executed function
}
});
}
getOrphans(function(err, o) {
if (err)
return console.log('error:', err);
console.log(o.length);
});
Im not suggesting this is the best way to handle this specific problem in Mongo, but if you need to wait to the DB to reply before continuing then just use the callback to start next request.
This is not obvious at first, but you can refer to the result processing function inside the function itself:
var i = 0;
var mycback = function(err, doc_album) {
// ... process i-th result ...
if (++i < 100000) {
db.collections("album").findOne({'images': i}, mycback);
} else {
// request is complete, "return" result
result_cback(null, res);
}
};
db.collections('album').findOne({'images': 0}, mycback);
This also means that your function itself will be async (i.e. will want a result_cback parameter to call with the result instead of using return).
Writing a sync function that calls an async one is just not possible.
You cannot "wait" for an event in Javascript... you must set up an handler for the result and then terminate.
Waiting for an event is done in event-based processing by writing a "nested event loop" and this is for example how message boxes are handled in most GUI frameworks. This is a capability that Javascript designers didn't want to give to programmers (not really sure why, though).
Since you know it does not wait for the call to come back. You can do the console.dir inside your callback function, this should work (although I haven't tested it)
db.collection('albums').findOne(query, function(err, doc_album) {
if(err) throw err;
if (doc_album === null) {
orphans.push(i);
}
console.dir(orphans.length);
});
You don't need to slow anything down. If you are simply trying to load 100,000 images from the albums collection, you could consider using the async framework. This will let you assign tasks until the job is complete.
Also, you probably don't want request 100,000 records one-by-one. Instead, you probably want to page them.

Categories