I've have the same question as earlier but having trouble relating the answers to my code. Here's my error: RangeError: Maximum call stack size exceeded.
The highly simplified pseudo version of my code is this:
function make_request(url, other_params){
request(url, function(response){
if(something) var some_var = 'some value';
else var some_var = '';
//do something with response to generate, some_var, and insert into DB
var my_arr = [some_var];
connection.query('INSERT my_table SET name = ?', my_arr, function(err, rows, fields) {
if(my_arr==''){
// generate new url to make new request
make_request(url, other_params);
}
});
});
}
connection.query('SELECT * from my_table', function(err, rows, fields){
var len =rows.length;
for(var i = 0; i < len; i++){
var url = rows[i].url;
make_request(url, other_params);
}
});
I've tried wrapping the internal make_request in setImmediate or setTimeout amongst a few other hacks, but nothing seems to prevent the call stack error. I'm able to add any library that would make this work. Any thoughts would be appreciated.
If I'm reading the code correctly, the block
if(my_arr==''){
// generate new url to make new request
function make_request(url, other_params);
}
Redefines make_request to be a null function. Don't you want to call it an this point, instead? I think that the null redefinition would leave you with an indirect recursion that has no way to terminate.
Try change your code
function make_request(url, other_params, callback){ // all async function mast have callback
// Hmmm...
// request(url, function(response){
request(url, function(err, response){ // according by doc - https://github.com/request/request
// Use short notation
var some_var = (smth) ? 'some-value' : '';
// do-smth. Mayby problem is here?
var my_arr = [some_var];
connection.query('INSERT my_table SET name = ?', my_arr, function(err, rows, fields) {
// Always check error
if(err)
return callback(err);
// I don't understand what is it. If some_var == '' then my_arr == [], not ''
// In any case this check must do on upper level
/*
if(my_arr=='')
make_request(url, other_params);
*/
callback(null, rows); // return data
});
});
}
var async = require('async');
connection.query('SELECT url from my_table', function(err, rows, fields){ // if you need one field then don't request all
// I repeat: Always check error. SQL can be correct, but db is busy.
if (err)
return console.log(err.message);
async.mapSeries(rows, make_request, function(err, results) {
if (err)
return console.log(err);
// do smth with results
console.log(results);
});
});
P.S. heinob in prev your question make very good answer.
Related
I am trying to build a result_arr of location objects to send as a response, but I am not sure how to send the response only when the entire array has been built. The response contains an empty array, but result_arr array is filled after the response has already been sent.
function handle_getLocations(req, res, done){
var con_id = req.body["contractor_id"];
console.log("Contractor ID :" + con_id.toString());
var result_arr = new Array();
employee.getActiveByContractor(con_id, function(err, employees){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
if(employees.length === 0) done(null);
for(var i=0;i<employees.length;i++){
assignment.getLocationsByEmployeeID(employees[i].employee_id, function(err, locations){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
console.log("Number of locations: " + locations.length.toString());
for(var j=0;j<locations.length;j++){
console.log("Assignment is: " + locations[j].assignment_id.toString());
location.getAllByID(locations[j].location_id, function(err, loc){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
var loc_obj = {};
loc_obj.display_name = loc[0].display_name;
loc_obj.location_id = loc[0].location_id;
console.log("Location is: " + loc_obj.display_name);
console.log("Location ID is: " + loc_obj.location_id.toString());
result_arr.push(loc_obj);
console.log(result_arr);
done(result_arr);
});
};
});
};
});
};
I know that in nodejs the idea is to not make blocking calls, but I am not sure how to make sure all of the information is sent in the response.
You are calling many asynchronous functions in the loop and do not have any logic to check when all they are completed to send the response back to the client.
I modified your code a bit to add the logic in VannilaJS way which is very messy below but working code.
Anyways I would suggest you to use promise based/asynchronous modules
like async, bluebird etc to handle this nicely. Using them, you
can improve readability and easy maintainability in your code to get
rid of callback hells and other disadvantages.
async http://caolan.github.io/async/
bluebird https://github.com/petkaantonov/bluebird
You can read more about this on the below link,
https://strongloop.com/strongblog/node-js-callback-hell-promises-generators/
function handle_getLocations(req, res, done){
var con_id = req.body["contractor_id"];
console.log("Contractor ID :" + con_id.toString());
var result_arr = new Array();
employee.getActiveByContractor(con_id, function(err, employees){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
if(employees.length === 0) done(null);
var employeesChecked = 0;
var errors = [];
function sendResponse(){
if(employeesChecked === employees.length) {
res.json(result_arr);
//done(result_arr); // If required, uncomment this line and comment the above line
}
}
for(var i=0;i<employees.length;i++){
assignment.getLocationsByEmployeeID(employees[i].employee_id, function(err, locations){
var locationsChecked = 0;
if (err) {
console.log(err);
errors.push(err);
++employeesChecked;
sendResponse();
} else {
console.log("Number of locations: " + locations.length.toString());
for(var j=0;j<locations.length;j++){
console.log("Assignment is: " + locations[j].assignment_id.toString());
location.getAllByID(locations[j].location_id, function(err, loc){
++locationsChecked;
if (err) {
console.log(err);
errors.push(err);
} else {
var loc_obj = {};
loc_obj.display_name = loc[0].display_name;
loc_obj.location_id = loc[0].location_id;
console.log("Location is: " + loc_obj.display_name);
console.log("Location ID is: " + loc_obj.location_id.toString());
result_arr.push(loc_obj);
console.log(result_arr);
}
if(locationsChecked === locations.length) {
++employeesChecked;
}
sendResponse();
});
}
}
});
}
});
}
In order not to consume much time during the request-response life time, you need to separate each logic in a single endpoint, but sometimes as your case, you may need to hit the database more than a time to fetch data that depends on another, so assuming that employee.getActiveByContractor returning promise and as it's an async method so you need to to chain it with .then like this:
employee.getActiveByContractor(con_id)
.then(function(employees) {
Also, you my need to read about Promise.
As Basim says, this is a good time to use Promises.
getLocationsByEmployeeID and getAllByID are async so they won't be done by the time the loop is finished and you send your response.
Promises are built into the latest Node.js version.
Learn here: https://www.udacity.com/course/javascript-promises--ud898
Suggestion:
Create promise wrappers for getLocationsByEmployeeID and getAllByID
Use Promise.all to make sure every getLocationsByEmployeeID and getAllByID are complete
return your http response within Promise.all's "success" callback
I'm trying to use async.each function to get an array with my results from two queries. After that, I need to render this results in a web page.
The async.each function calcule the variable results properly, but, I am not be able to export this variable outside the function and render it and I don't understand why.
Here I attached the code, where I tested it. I realized that when I call "callback1" the function(error) is not working and I don't get the variable list in the console (so I won't be able to render it later on). Please I would be grateful if someone could help me with that. Thanks a lot.
var list = [];
async.each(data,
function(elem, callback1){
var classgene = '';
var custom_gene = {};
custom_gene = {Name_Gene: elem['Name_Gene']};
if (elem['Type_Gene'] == "reference") {
async.waterfall([
function(callback2){
var id = elem['Id_Genes'];
geneModel.getGenesRefClass(id, function(error, data2){
classgene = data2[0]['Class_Name'];
custom_gene['classgene'] = classgene;
callback2(custom_gene);
});
},
], function(custom_gene, err){
list.push(custom_gene);
console.log(list);
callback1();
});
}
}, function(err){
// if any of the saves produced an error, err would equal that error
if(err){
console.log(list);
}else{
console.log(list);
}
});
Your code has a few problems:
It's not calling callback2() properly. It should be callback2(null, custom_gene) (the first argument is reserved for errors, or null if there aren't any). Preferably, you should also check for error being returned by geneModel.getGenesRefClass();
The previous issue also means that you need to swap the argument of function(custom_gene, err) (it should become function(err, custom_gene));
When elem['Type_Gene'] does not equal "reference", you should still call callback1(), otherwise async.each() doesn't know that the code is done;
So the code would become something like this:
var list = [];
async.each(data, function(elem, callback1) {
var classgene = '';
var custom_gene = { Name_Gene : elem['Name_Gene'] };
if (elem['Type_Gene'] == "reference") {
async.waterfall([
function(callback2) {
var id = elem['Id_Genes'];
geneModel.getGenesRefClass(id, function(error, data2){
if (error) return callback2(error);
classgene = data2[0]['Class_Name'];
custom_gene['classgene'] = classgene;
callback2(null, custom_gene);
});
},
], function(err, custom_gene) {
// If you want to propagate errors, uncomment the following:
// if (err) return callback1(err);
list.push(custom_gene);
console.log(list);
callback1();
});
} else {
callback1();
}
}, function(err){
// if any of the saves produced an error, err would equal that error
if (err) {
console.log('An error occurred!', err);
}
console.log(list);
});
I tried to scrap for thousands of pages. So I used async.timesSeries and async.waterfall. Each of functions work synchronously very well but they don't work together. What can I do?
The logic is simple.
Because I want to scrape pages are "http://udb.kr/local/category/390101?page="1~1167, async.timesSeries loop 1 to 1167
async.waterfall scraps components of pages
but messages that console shows me looks like this
info.NM values // just for explain, It shows me each attires of obj because I insert console.log(info.NM) for verifying.
info.NM values
info.NM values
info.NM values and randomly ----- page number -----
...
['done',
'done',
'done',
'done',
'done',
...
'done']
info.NM values again
.../Users/Snark/Dev/job_apply/cheerio_job_app_list.js:29
if (tObj[m+1].children != 0) {info.nAddr = tObj[m+1].firstChild.data}else{info.nAddr = null};
^
TypeError: Cannot read property 'children' of undefined
at /Users/Snark/Dev/job_apply/cheerio_job_app_list.js:29:17
at fn (/Users/Snark/node_modules/async/lib/async.js:746:34)
at /Users/Snark/node_modules/async/lib/async.js:1212:16
at /Users/Snark/node_modules/async/lib/async.js:166:37
at /Users/Snark/node_modules/async/lib/async.js:706:43
at /Users/Snark/node_modules/async/lib/async.js:167:37
at /Users/Snark/node_modules/async/lib/async.js:1208:30
at Request._callback (/Users/Snark/Dev/job_apply/cheerio_job_app_list.js:21:6)
at Request.self.callback (/Users/Snark/node_modules/request/request.js:198:22)
at emitTwo (events.js:87:13)
And this is js code.
var request = require("request"),
cheerio = require("cheerio"),
jsonfile = require("jsonfile"),
fs = require("fs"),
async = require("async");
var info = {},
dbArray = [];
var url = "http://udb.kr/local/category/390101?page=";
async.timesSeries(1166, function(n, next) {
var page = n + 1
async.waterfall([
function(callback) {
request(url + page, function(error, response, html) {
if (error) {
throw error
};
var $ = cheerio.load(html),
tObj = $('tbody tr td');
callback(null, tObj);
});
},
function(tObj, callback) {
for (var m = 0; m < 150; m = m + 5) {
if (tObj[m]) {
info.NM = tObj[m].firstChild.children[0].data
} else {
info.NM = null
};
if (tObj[m + 1].children != 0) {
info.nAddr = tObj[m + 1].firstChild.data
} else {
info.nAddr = null
};
console.log(info.NM);
dbArray.push(info);
}
callback(dbArray, callback);
},
function(dbArray, callback) {
fs.appendFile('./jobDB_l.json', JSON.stringify(dbArray), function (err) {
if (err)
throw err;
});
callback(null, 'done');
}
], function(err, result) {
console.log('----- ' +page+ '-----');
});
next(null, 'done');
}, function(err, result) {
console.log(result)
});
To get these to work together where you are using waterfall inside of each timesSeries iteration, you need to call the timesSeries done callback from the completion callback for the waterfall call. Right now, you are calling it long before that which means that timesSeries won't wait for the waterfall to be done.
You can do that by changing this:
], function(err, result) {
console.log('----- ' +page+ '-----');
});
next(null, 'done');
to this:
], function(err, result) {
console.log('----- ' +page+ '-----');
next(null, 'done');
});
It also seems odd that you have a hard-coded for loop limit of m < 150 rather than using the actual length of the content. You can easily run off the end of the content and potentially cause problems.
And, your error handling probably won't work well either. If you throw inside of the async request() callback, that's not going to go anywhere. You need much better error handling such as calling callback(error) to pass the error on to async.waterfall().
You also may want to surround all your DOM walking in a try/catch so if you throw any exceptions there, you can catch them yourself, analyze them and then fix the code.
if (tObj[m+1] && tObj[m+1].children != 0)
I finally figured out how callbacks work in node.js, but I'm trying now to get my code to execute in order.
The goal is to (in order):
Load the URL into cheerio
Parse through each <td> in the <tbody> on the page.
Once text elements are loaded into the data array, callback.
Call loopThroughData on the full Data array.
Loop through the data array and call the lookForPlayer array on each
one, which:
Runs a SELECT in my db that matches the player name passed from the
text element, and if there is no match in my db, INSERT them (I have
it just printing to the console for now for testing purposes).
The end goal is to go through every page (there is a separate URL for each date, so I am looping through the dates) and INSERT players that aren't in my database ONCE. The problem is that it goes through each SELECT before the INSERT queries are executed, so it's inserting them multiple times.
Here is the page I'm parsing, if it helps: http://www.basketball-reference.com/friv/dailyleaders.cgi?month=12&day=29&year=2014
Here is my code:
function loadPage (url, callback){
request(url, function(err, response, body){
if(!err && response.statusCode ==200){
var $ = cheerio.load(body);
rowsRemaining = $.length;
$('td', 'tbody').each(function(){
var text = $(this).text();
data.push(text);
rowsRemaining -= 1;
console.log('rows left: ',rowsRemaining);
});
}
if (rowsRemaining == 0){
console.log('$ length: ',$.length);
callback(data);
}
});
}
function loopThroughData (data, callback){
for(i=1;i<data.length;i+=26){
lookForPlayer(data[i].replace("'",""),function(name){
/* var insertPlayer = connection.query(
'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
}); */
console.log('i is currently = ',i);
});
}
callback();
}
function lookForPlayer(name, callback){
console.log('Looking for Player...');
var selectPlayer = connection.query(
"SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
if(err) throw err;
if(rows.length==0){
callback(name);
}
});
}
//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
console.log('d = ',d);
loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
console.log('Page loaded...');
loopThroughData(data,function(){
});
});
}
As you can see, I tried adding a rowsRemaining variable that is meant to make sure I've parsed the whole file before calling the callback in the loadPage function, but it never gets to that point. Note that I initialize a lot of these variables before these functions (rowsRemaining, data, etc).
It also seems to loop through every date before fully loading, parsing, and INSERTing the first page, which it should not be doing.
Here is the updated code based off of #Brant's answer
function loadPage (url, callback){
request(url, function(err, response, body){
if(!err && response.statusCode ==200){
var $ = cheerio.load(body);
console.log(url);
$('td', 'tbody').each(function(){
var text = $(this).text();
data.push(text);
});
}
callback(data);
});
}
function loopThroughData (data, callback){
for(i=1;i<data.length;i+=26){
lookForPlayer(data[i].replace("'",""),function(name){
var insertPlayer = connection.query(
'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
});
});
}
callback(data);
}
function lookForPlayer(name, callback){
var selectPlayer = connection.query(
"SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
if(err) throw err;
if(rows.length==0){
console.log(name,' was not found in DB!');
callback(name);
}
});
}
//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
validDatesArr.push(d);
}
async.eachSeries(validDatesArr,
function(validDatesArr, callback){
loadPage(baseURL+'/month='+validDatesArr.getMonth()+1+'&day='+validDatesArr.getDate()+'&year='+validDatesArr.getFullYear(),function(data){
loopThroughData(data, function(){
callback();
});
});
}, function(err){
if(!err){
console.log('We processed each date requests one by one');
}
}
);
So now it's loading the pages one by one, but it isn't executing the INSERT function in the loopThroughData function on that data. I would think I would just add another function to the async list, but this particular one is calling a function as opposed to using an anonymous one.
Modify your for loop to be as follows:
//loop through every day since the season started
var validDatesArr = [];
for (var d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
validDatesArr.push(d);
}
async.eachSeries(validDatesArr,
function(d, callback) {
loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
console.log('Page loaded...');
loopThroughData(data,function(){
callback();
});
});
}, function(err) {
if(!err) {
console.log('We processed each date request one by one')
}
}
);
And require async which can be found here: https://github.com/caolan/async
npm install async
You can nested the Async function to control the execute flow like in a sequence programming, be careful at the Pyramid of doom, the other solution is to use the Sync version of the async functions you used (if exist). You are not forced to write Async function if you do NOT need them, Node.js use a lot of Async function because is a Non-bloking language very powerful for web development. So do NOT use the asyn style and the callback in your functions !
I have a function in my express app that makes multiple queries within a For Loop and I need to design a callback that responds with JSON when the loop is finished. But, I'm not sure how to do this in Node yet. Here is what I have so far, but it's not yet working...
exports.contacts_create = function(req, res) {
var contacts = req.body;
(function(res, contacts) {
for (var property in contacts) { // for each contact, save to db
if( !isNaN(property) ) {
contact = contacts[property];
var newContact = new Contact(contact);
newContact.user = req.user.id
newContact.save(function(err) {
if (err) { console.log(err) };
}); // .save
}; // if !isNAN
}; // for
self.response();
})(); // function
}; // contacts_create
exports.response = function(req, res, success) {
res.json('finished');
};
There are a few problems with your code besides just the callback structure.
var contacts = req.body;
(function(res, contacts) {
...
})(); // function
^ you are redefining contacts and res in the parameter list, but not passing in any arguments, so inside your function res and contacts will be undefined.
Also, not sure where your self variable is coming from, but maybe you defined that elsewhere.
As to the callback structure, you're looking for something like this (assuming contacts is an Array):
exports.contacts_create = function(req, res) {
var contacts = req.body;
var iterator = function (i) {
if (i >= contacts.length) {
res.json('finished'); // or call self.response() or whatever
return;
}
contact = contacts[i];
var newContact = new Contact(contact);
newContact.user = req.user.id
newContact.save(function(err) {
if (err)
console.log(err); //if this is really a failure, you should call response here and return
iterator(i + 1); //re-call this function with the next index
});
};
iterator(0); //start the async "for" loop
};
However, you may want to consider performing your database saves in parallel. Something like this:
var savesPending = contacts.length;
var saveCallback = function (i, err) {
if (err)
console.log('Saving contact ' + i + ' failed.');
if (--savesPending === 0)
res.json('finished');
};
for (var i in contacts) {
...
newContact.save(saveCallback.bind(null, i));
}
This way you don't have to wait for each save to complete before starting the next round-trip to the database.
If you're unfamiliar with why I used saveCallback.bind(null, i), it's basically so the callback can know which contact failed in the event of an error. See Function.prototype.bind if you need a reference.