I am trying to load data from the twitter api, getting user information and save that in a temporary array. That array will then be loaded on the page for viewing. The array is getting loaded by the API call, but it doesn't display.
I think I need to use an asynchronous thing like React or Angular, not sure. Would love some input!
function getUserIds (userId) {
T.get('statuses/retweeters/ids', { id: userId }, function (err, data, response) {
for(var i = 0; i < data.ids.length; i++){
ids.push(data.ids[i]);
}
getUserInfo();
});
}
function getUserInfo() {
for(var i = 0; i < ids.length; i++) {
T.get('users/lookup', { user_id: ids[i] }, function (err, data, response) {
names.push(data[0].screen_name);
pics.push(data[0].profile_image_url_https);
console.log(names);
});
}
res.render('display', {names: names, pics:pics});
}
The issue is that you are running ids.length async calls and those will finish some time in the future. You have to render your page only when they are all done. But, your for loop is synchronous so you are calling res.render() before any of them have finished. In addition, your T.get() calls may finish in any order (if that matters).
I would normally use promises for coordinating multiple asynchronous operations since it is a very, very good tool for that. But, if you aren't using promises, here's a simple technique to test when you have all your results back:
function getUserInfo() {
var names = [];
var pics = [];
for(var i = 0; i < ids.length; i++) {
T.get('users/lookup', { user_id: ids[i] }, function (err, data, response) {
if (err) {
// decide what to display if you get an API error
names.push("unknown due to API error");
} else {
names.push(data[0].screen_name);
pics.push(data[0].profile_image_url_https);
console.log(names);
}
if (names.length === ids.length) {
res.render('display', {names: names, pics:pics});
}
});
}
}
As I said above, this does not necessarily collect the results in order. If you need them in order, then you could do something like this:
function getUserInfo() {
var names = new Array(ids.length);
var pics = new Array(ids.length);
var doneCntr = 0;
ids.forEach(function(id, i) {
T.get('users/lookup', { user_id: id }, function (err, data, response) {
if (err) {
// decide what to display if you get an API error
names[i] = "unknown due to API error";
} else {
names[i] = data[0].screen_name;
pics[i] = data[0].profile_image_url_https;
}
++doneCntr;
if (doneCntr === ids.length) {
res.render('display', {names: names, pics: pics});
}
});
});
}
My preferred solution would to be to use Promise.all() and use a promisified version of T.get().
Related
Im trying to make a webscraper(educational puposes), and I got really far, but this little issue is bugging me.
I made a request callback function, and im trying to get lines 75-78 to work. However to get this to work, I need PDF_LISTS and PDF_LINKS to initilaze to the right values.
I've already tried to make them global variables, and what not, for some reason that doesnt work. So my question is: How do I make a callback function that will call that for loop (75-78) and succesfully initilaze PDF_LISTS and PDF_LINKS to the correct values ?
(Dont worry I use this on educational content, with the prof's permission). First time posting here!
// URL_LINKS has the pdf links of the pages
PDF_LINKS = [];
// URL_LIST has the names of the pdf links
PDF_LIST = [];
function fillPDF(callback) {
request(url, function(err, res, body) {
$ = cheerio.load(body);
links = $('a'); //jquery get all hyperlinks
$(links).each(function(i, link) {
var value = $(link).attr('href');
// creates objects to hold the file
if (value.substring(value.length - 3, value.length) == "pdf") {
PDF_LINKS[i] = $(link).attr('href');
PDF_LIST[i] = $(link).text();
}
})
});
}
// must decleare fillPDF variable or else you wont initilze teh variables
fillPDF() {
//HERE I WANT PDF_LINKS and PDF_LIST to be intialized to 33.....
}
for (j = 0; j < PDF_LIST.length; j++) {
request(PDF_LINKS[j]).pipe(fs.createWriteStream(PDF_LIST[j]));
}
You may push your values into arrays using array's push method, avoiding array's element to be undefined.
You can put your final for loop into a function, and then use fillPDF();
You also need to call fillPDF's callback once the request is over.
PDF_LINKS = [];
PDF_LIST = [];
function fillPDF(callback) {
request(url, function(err, res, body) {
$ = cheerio.load(body);
links = $('a');
$(links).each(function(i, link) {
var value = $(link).attr('href');
if (value.slice(-3) == "pdf") {
PDF_LINKS.push(value);
PDF_LIST.push($(link).text());
}
})
callback();
});
}
function writePDF() {
for (j = 0; j < PDF_LIST.length; j++) {
request(PDF_LINKS[j]).pipe(fs.createWriteStream(PDF_LIST[j]));
}
}
fillPDF(writePDF);
I have an array of ids, and I want to make an api request for each id, but I want to control how many requests are made per second, or better still, have only 5 open connections at any time, and when a connection is complete, fetch the next one.
Currently I have this, which just fires off all the requests at the same time:
_.each([1,2,3,4,5,6,7,8,9,10], function(issueId) {
github.fetchIssue(repo.namespace, repo.id, issueId, filters)
.then(function(response) {
console.log('Writing: ' + issueId);
writeIssueToDisk(fetchIssueCallback(response));
});
});
Personally, I'd use Bluebird's .map() with the concurrency option since I'm already using promises and Bluebird for anything async. But, if you want to see what a hand-coded counter scheme that restricts how many concurrent requests can run at once looks like, here's one:
function limitEach(collection, max, fn, done) {
var cntr = 0, index = 0, errFlag = false;
function runMore() {
while (!errFlag && cntr < max && index < collection.length) {
++cntr;
fn(collection[index++], function(err, data) {
--cntr;
if (errFlag) return;
if (err) {
errFlag = true;
done(err);
} else {
runMore();
}
});
}
if (!errFlag && cntr === 0 && index === collection.length) {
done();
}
}
runMore();
}
With Bluebird:
function fetch(id) {
console.log("Fetching " + id);
return Promise.delay(2000, id)
.then(function(id) {
console.log(" Fetched " + id);
});
}
var ids = [1,2,3,4,5,6,7,8,9,10];
Promise.map(ids, fetch, { concurrency: 3 });
<script src="https://cdnjs.cloudflare.com/ajax/libs/bluebird/3.3.1/bluebird.min.js"></script>
<!-- results pane console output; see http://meta.stackexchange.com/a/242491 -->
<script src="http://gh-canon.github.io/stack-snippet-console/console.min.js"></script>
Divide your data into as many arrays as you want concurrent connections. Schedule with setTimeout, and have the completion callback handle the rest of the sub-array.
Wrap the setTimeout in a function of its own so that the variable values are frozen to their values at the time of delayed_fetch() invocation.
function delayed_fetch(delay, namespace, id, issueIds, filters) {
setTimeout(
function() {
var issueId=issueIds.shift();
github.fetchIssue(namespace, id, issueId, filters).then(function(response) {
console.log('Writing: ' + issueId);
writeIssueToDisk(fetchIssueCallback(response));
delayed_fetch(0, namespace, id, issueIds, filters);
});
}, delay);
}
var i=0;
_.each([ [1,2] , [3,4], [5,6], [7,8], [9,10] ], function(issueIds) {
var delay=++i*200; // millisecond
delayed_fetch(delay, repo.namespace, repo.id, issueIds, filters);
});
i'd recommend using throat just for this: https://github.com/ForbesLindesay/throat
Using Bluebird
function getUserFunc(user) {
//Get a collection of user
}
function getImageFunc(id) {
//get a collection of image profile based on id of the user
}
function search(response) {
return getUsersFunc(response).then(response => {
const promises = response.map(items => return items.id);
const images = id => {
return getImagesFunc(id).then(items => items.image);
};
return Promise.map(promises, images, { concurrency: 5 });
});
}
Previously i used ES6 function Promise.all(), but it doesn't work like what i'm expecting. Then go with third party library bluebird.js and Work like a charm.
I got two loops, the outer loops over the users and the inner one loops over the venueID's of each user. Within the inner loop I want to look up the venue and attach it to an array defined in the outer look (userItem). However because forEach is synchronous and the mongo database look up is asynchronous the result always remains empty. I've tried to integrate this answer but to no avail. How to do this?
ret = [];
users.forEach(function(user) {
var userItem = user.getSanitised('ADM');
userItem.venues = [];
var tmp = [];
userItem.adminVenueIds.forEach(function(adminVenueId){
tmp.push(function(callback) {
Venue.findOne({_id:adminVenueId}, function(error, venue) {
callback(null, venue.toObject());
});
});
});
async.parallel(userItem.venues, function(err, result) {
/* this code will run after all calls finished the job or
when any of the calls passes an error */
if (err)
return console.log(err);
userItem.venues.push(result);
});
ret.push(userItem);
});
Tried the following as well but doesn't work also
users.forEach(function(user) {
var userItem = [];
async.series({
setUserItem : function(callback)
{
userItem = user.getSanitised('ADM');
callback(null, 'OK');
},
setUserVenues : function(callback)
{
userItem.venues = [];
user.adminVenueIds.forEach(function(adminVenueId,index) {
Venue.findOne({_id:adminVenueId}, function(error, venue) {
userItem.venues.push(venue.toObject());
if((index+1) == user.adminVenueIds.length)
callback(null, 'OK');
});
});
}
},
function(error, results) {
if(error)
winston.error(error);
ret.push(userItem);
}
);
});
You could simply put an if statement (in your case put the conditional as the array length) then when the loop is done you could then make it continue doing its thing by calling a function to continue (or put your code in there, but it will start to look messy)
var ret = [];
var test = [];
for (var i = 0; i < 20; i++) {
for (var x = 0; x < 20; x++) {
setTimeout(function() {
test.push("Test"+x);
if (x === 20) {
finishIt();
}
}, 300)
}
}
function finishIt() {
console.log(test);
ret.push(test);
}
I think you might want to look into using Mongoose. It is a NodeJS application layer on top of MongoDB that provides a more SQL like experience.
http://mongoosejs.com
I ended up with the following solution. It's dirty but I guess that's just nodejs being nodejs.
users.forEach(function(user) {
var userItem = user.getSanitised('ADM');
userItem.venues = [];
user.adminVenueIds.forEach(function(adminVenueId) {
Venue.findOne({_id:adminVenueId}, function(error, venue) {
userItem.venues.push(venue.toObject());
});
});
(function(){
if(userItem.venues.length == user.adminVenueIds.length) {
ret.push(userItem);
} else {
setTimeout(arguments.callee, 30);
}
})();
});
I am trying to return data from this function. Console.log(documents) successfully shows the data in console. But this works only in body of the function. I can't return this data to the template. What should I do? Should I use some async package for node.js, or can be accomplished somehow like this?
Thank you.
var projects = req.user.projects;
var docs = [];
db.collection('documents', function(err, collection) {
for (i = 0; i < projects.length; i++) {
collection.find({'_projectDn': projects[i].dn},function(err, cursor) {
cursor.each(function(err, documents) {
if(documents != null){
console.log(documents);
//or docs += documents;
}
});
});
}
});
console.log(documents); // undefined
res.render('projects.handlebars', {
user : req.user,
documents: docs
});
Those db functions are async, which means that when you try to log it, the function hasn't finished yet. You can log it using a callback, for example:
function getDocuments(callback) {
db.collection('documents', function(err, collection) {
for (i = 0; i < projects.length; i++) {
collection.find({
'_projectDn': projects[i].dn
}, function(err, cursor) {
cursor.each(function(err, documents) {
if (documents !== null) {
console.log(documents);
callback(documents);// run the function given in the callback argument
}
});
});
}
});
}
//use the function passing another function as argument
getDocuments(function(documents) {
console.log('Documents: ' + documents);
});
In my Express route, I am trying to return a list of elements that I am grabbing from MongoDB using Mongoose. I'm basically iterating through an array of items, and making MongoDB calls to get the parameter objects that each item has. However, I'm having trouble making sure that I get all the parameters before I send the response. I've tried using promises, other async library functions, etc, but none of them have seemed to work.
The current iteration of the code looks like this (I have tried a lot of different things):
exports.findAll = function(req, res){
Flow.find({}, function(err, items) {
console.log(items);
var payload = {}
var params = [];
for (var i=0; i < items.length; i++) {
var count2 = 0;
async.whilst(
function() {
return ((items[i]) && (count2 < items[i].params.length));
},
function(callback) {
Parameter.findById(items[i].params[count2], function(err, out) {
params.push(out);
count2++;
callback();
});
},
function(err) {
console.log(params);
var payload = {
"flows": items,
"params": params
};
res.send(payload);
console.log('success: flows found');
}
);
}
This code sends a payload with params not being completely full.
What would be a good way to deal with this? Honestly I just want these database calls to be synchronous, but I just can't figure out how to make this work.
This doesn't really seem necessary as you can actually use the $in operator with all the results from your first query:
Flow.find({},function(err,items) {
var ids = [];
// blocking? yes, but should be minor - do better if there are problems
for ( var i=0; i < items.length; i++ ) {
for ( var n=0; n < items[i].params.length; n++ ) {
ids.push( items[i].params[n] );
}
}
Parameter.find({ "_id": { "$in": ids } },function(err,params) {
res.send({ "flows": items, "params": params });
});
});
So there should be no reason to execute multiple queries inside an async loop, or loops as your code seems to be missing as the direct cause of the problem there.