Forcing Sequential Code in node.js - javascript

I finally figured out how callbacks work in node.js, but I'm trying now to get my code to execute in order.
The goal is to (in order):
Load the URL into cheerio
Parse through each <td> in the <tbody> on the page.
Once text elements are loaded into the data array, callback.
Call loopThroughData on the full Data array.
Loop through the data array and call the lookForPlayer array on each
one, which:
Runs a SELECT in my db that matches the player name passed from the
text element, and if there is no match in my db, INSERT them (I have
it just printing to the console for now for testing purposes).
The end goal is to go through every page (there is a separate URL for each date, so I am looping through the dates) and INSERT players that aren't in my database ONCE. The problem is that it goes through each SELECT before the INSERT queries are executed, so it's inserting them multiple times.
Here is the page I'm parsing, if it helps: http://www.basketball-reference.com/friv/dailyleaders.cgi?month=12&day=29&year=2014
Here is my code:
function loadPage (url, callback){
request(url, function(err, response, body){
if(!err && response.statusCode ==200){
var $ = cheerio.load(body);
rowsRemaining = $.length;
$('td', 'tbody').each(function(){
var text = $(this).text();
data.push(text);
rowsRemaining -= 1;
console.log('rows left: ',rowsRemaining);
});
}
if (rowsRemaining == 0){
console.log('$ length: ',$.length);
callback(data);
}
});
}
function loopThroughData (data, callback){
for(i=1;i<data.length;i+=26){
lookForPlayer(data[i].replace("'",""),function(name){
/* var insertPlayer = connection.query(
'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
}); */
console.log('i is currently = ',i);
});
}
callback();
}
function lookForPlayer(name, callback){
console.log('Looking for Player...');
var selectPlayer = connection.query(
"SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
if(err) throw err;
if(rows.length==0){
callback(name);
}
});
}
//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
console.log('d = ',d);
loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
console.log('Page loaded...');
loopThroughData(data,function(){
});
});
}
As you can see, I tried adding a rowsRemaining variable that is meant to make sure I've parsed the whole file before calling the callback in the loadPage function, but it never gets to that point. Note that I initialize a lot of these variables before these functions (rowsRemaining, data, etc).
It also seems to loop through every date before fully loading, parsing, and INSERTing the first page, which it should not be doing.
Here is the updated code based off of #Brant's answer
function loadPage (url, callback){
request(url, function(err, response, body){
if(!err && response.statusCode ==200){
var $ = cheerio.load(body);
console.log(url);
$('td', 'tbody').each(function(){
var text = $(this).text();
data.push(text);
});
}
callback(data);
});
}
function loopThroughData (data, callback){
for(i=1;i<data.length;i+=26){
lookForPlayer(data[i].replace("'",""),function(name){
var insertPlayer = connection.query(
'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
});
});
}
callback(data);
}
function lookForPlayer(name, callback){
var selectPlayer = connection.query(
"SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
if(err) throw err;
if(rows.length==0){
console.log(name,' was not found in DB!');
callback(name);
}
});
}
//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
validDatesArr.push(d);
}
async.eachSeries(validDatesArr,
function(validDatesArr, callback){
loadPage(baseURL+'/month='+validDatesArr.getMonth()+1+'&day='+validDatesArr.getDate()+'&year='+validDatesArr.getFullYear(),function(data){
loopThroughData(data, function(){
callback();
});
});
}, function(err){
if(!err){
console.log('We processed each date requests one by one');
}
}
);
So now it's loading the pages one by one, but it isn't executing the INSERT function in the loopThroughData function on that data. I would think I would just add another function to the async list, but this particular one is calling a function as opposed to using an anonymous one.

Modify your for loop to be as follows:
//loop through every day since the season started
var validDatesArr = [];
for (var d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
validDatesArr.push(d);
}
async.eachSeries(validDatesArr,
function(d, callback) {
loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
console.log('Page loaded...');
loopThroughData(data,function(){
callback();
});
});
}, function(err) {
if(!err) {
console.log('We processed each date request one by one')
}
}
);
And require async which can be found here: https://github.com/caolan/async
npm install async

You can nested the Async function to control the execute flow like in a sequence programming, be careful at the Pyramid of doom, the other solution is to use the Sync version of the async functions you used (if exist). You are not forced to write Async function if you do NOT need them, Node.js use a lot of Async function because is a Non-bloking language very powerful for web development. So do NOT use the asyn style and the callback in your functions !

Related

Log only shows one of four rows of data

I am writing a small Node js application for automatic vehicle location system.
Here is the code for where I am getting trouble.
markerData contains 4 rows but only in the log I can see the last row.
for (var i = 0, len = markerData.length; i < len; i++) {
var thisMarker = markerData[i];
sql.connect(config, function (err) {
var request = new sql.Request();
request.input('myval', sql.Int, thisMarker.id);
request.query('SELECT d.id, d.name, d.lastupdate, p.latitude, p.longitude, p.speed, p.course FROM dbo.devices AS d INNER JOIN dbo.positions AS p ON d.positionid = p.id AND d.id = p.deviceid WHERE (d.id = #myval)', function (err, recordset2) {
if (typeof recordset2 != 'undefined') {
thisMarker.position.lat = recordset2[0].latitude;
thisMarker.position.long = recordset2[0].longitude;
console.log(recordset2[0].id);
}
});
});
}
Please help me to solve the issue.
As var is not a block level variable in terms of scope, when `sql' module takes time to connect to the database asynchronously, the synchronous loop may change the value of the variable that's why you have the last row printed since the variable holds the reference to the last object at the time of successful connection.
Instead of _.each, I would recommend to use async module with async.each since you have few asynchronous operation to get rid of a synchronous loop.
You can check for samples here,
http://justinklemm.com/node-js-async-tutorial/
Here is your updated code with async.each
-> Install async module with npm install async --save
-> Then add the below reference in the required place,
// Reference
var async = require('async');
-> Modified code:
sql.connect(config, function (err) {
if(err) {
console.log('Connection error: ');
console.log(err);
} else {
async.each(markerData, function(thisMarker, callback) {
var request = new sql.Request();
request.input('myval', sql.Int, thisMarker.id);
request.query('SELECT d.id, d.name, d.lastupdate, p.latitude, p.longitude, p.speed, p.course FROM dbo.devices AS d INNER JOIN dbo.positions AS p ON d.positionid = p.id AND d.id = p.deviceid WHERE (d.id = #myval)', function (err, recordset2) {
if(err) {
console.log(err);
callback();
} else {
if (typeof recordset2 != 'undefined') {
thisMarker.position.lat = recordset2[0].latitude;
thisMarker.position.long = recordset2[0].longitude;
console.log(recordset2[0].id);
} else {
console.log('Recordset empty for id: ' + thisMarker.id);
}
callback();
}
});
}, function(err){
if(err) {
console.log(err);
}
});
}
});
I'm not entirely sure how your library works, but presumably recordset2 is an array of records. recordset2[0] is therefore the first record. If you want the next one you should probably try recordset2[1] and so on and so forth.
Arrays: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array
You'll probably need to loop through all the elements in the array at some point. use a for loop for that:
for (var i = 0; i < recordset2.length; i++ {
console.log(recordset2[i])
}
That will print out everything your query returns.

Node.js maximum call stack exceeded

I've have the same question as earlier but having trouble relating the answers to my code. Here's my error: RangeError: Maximum call stack size exceeded.
The highly simplified pseudo version of my code is this:
function make_request(url, other_params){
request(url, function(response){
if(something) var some_var = 'some value';
else var some_var = '';
//do something with response to generate, some_var, and insert into DB
var my_arr = [some_var];
connection.query('INSERT my_table SET name = ?', my_arr, function(err, rows, fields) {
if(my_arr==''){
// generate new url to make new request
make_request(url, other_params);
}
});
});
}
connection.query('SELECT * from my_table', function(err, rows, fields){
var len =rows.length;
for(var i = 0; i < len; i++){
var url = rows[i].url;
make_request(url, other_params);
}
});
I've tried wrapping the internal make_request in setImmediate or setTimeout amongst a few other hacks, but nothing seems to prevent the call stack error. I'm able to add any library that would make this work. Any thoughts would be appreciated.
If I'm reading the code correctly, the block
if(my_arr==''){
// generate new url to make new request
function make_request(url, other_params);
}
Redefines make_request to be a null function. Don't you want to call it an this point, instead? I think that the null redefinition would leave you with an indirect recursion that has no way to terminate.
Try change your code
function make_request(url, other_params, callback){ // all async function mast have callback
// Hmmm...
// request(url, function(response){
request(url, function(err, response){ // according by doc - https://github.com/request/request
// Use short notation
var some_var = (smth) ? 'some-value' : '';
// do-smth. Mayby problem is here?
var my_arr = [some_var];
connection.query('INSERT my_table SET name = ?', my_arr, function(err, rows, fields) {
// Always check error
if(err)
return callback(err);
// I don't understand what is it. If some_var == '' then my_arr == [], not ''
// In any case this check must do on upper level
/*
if(my_arr=='')
make_request(url, other_params);
*/
callback(null, rows); // return data
});
});
}
var async = require('async');
connection.query('SELECT url from my_table', function(err, rows, fields){ // if you need one field then don't request all
// I repeat: Always check error. SQL can be correct, but db is busy.
if (err)
return console.log(err.message);
async.mapSeries(rows, make_request, function(err, results) {
if (err)
return console.log(err);
// do smth with results
console.log(results);
});
});
P.S. heinob in prev your question make very good answer.

Callback until for loop with query is done

I posted a question before and realized my problem actually was async functions. I managed to work out most of it, but I got one little problem left. Using async I used waterfall to create an order for the some queries...
exports.getMenu = function(id_restaurant, callback){
async.waterfall([
async.apply(firstQuery, id_restaurant),
secondQuery,
thirdQuery,
fourthQuery,
formMenu
], function(err, result){
if(err){
console.log(err);
}
callback(result);
});
};
Everything works until fourthQuery, where I have to loop to get all dishes of a menu.
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback){
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if(err) {
console.log(err);
callback(true);
return;
}
for (var i = 0; i < size; i++) {
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [array_secciones[i]],
function(err, results2) {
if(err) {
console.log(err);
callback(true);
return;
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes[i] = results2;
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[i]));
// this prints the result but only if it has a value over 2
});
};
}); // pool
console.log("I'm sending " + dishes); // this logs an empty array
callback(null, dishes, array_nombresSecc);
};
So what i can see that happens from printing the value of 'i' each loop is that it always has the value of 2. Because that's 'size' value. Also, even though it's saving results of index '2' I believe the callback is being done even before the for loop is done, because my fifth function is recieving an empty array.
How can i make my code wait to callback until my for loop is done?
NOTE: Sorry, part of my code is in spanish, tried to translate the important parts of it.
There are a few ways to handle this, one is to look into promise architecture. Promise.all will let you supply one callback to handle the values from each child promise.
To use what you've already got, however, I'd push the values into your dishes array, rather than assigning them specifically to i indexes, then check the size of that array at the end of each connection. When the array length matches the size, fire the callback. (as seen below)
If you need a way to tie each result to that specific i value, I'd recommend pushing them as an object
dishes.push({'index': i, 'dish': results2})
Afterward, if you need the array of just dishes, you can sort the array by that index value and run a map function.
dishes.sort(function(a,b){ return a.index - b.index; })
dishes = dishes.map(function(a){ return a.dish })
Here's the code adjusted:
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback) {
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if (err) {
console.log(err);
callback(true);
return;
}
for (var i = 0; i < size; i++) {
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [array_secciones[i]],
function(err, results2) {
if (err) {
console.log(err);
callback(true);
return;
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes.push(results2)
if(dishes.length == size){
console.log("I'm sending " + dishes);
callback(null, dishes, array_nombresSecc)
}
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[i]));
// this prints the result but only if it has a value over 2
});
};
}); // pool
;
};
Since you're already using the async, I would suggest replacing the for() loop in fourthQuery with async.each().
The updated fourthQuery would look like this:
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback){
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if(err) {
console.log(err);
callback(true);
return;
}
async.each(array_secciones,
function(item, itemCallback) {
// Function fun for each item in array_secciones
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [item],
function(err, results2) {
if(err) {
console.log(err);
return itemCallback(true);
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes.push(results2);
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[dishes.length-1]));
// this prints the result but only if it has a value over 2
return itemCallback();
});
},
function(err) {
// Function run after all items in array are processed or an error occurs
console.log("I'm sending " + dishes); // this logs an empty array
callback(null, dishes, array_nombresSecc);
});
}); // pool
};
Alternatively, you can use async.map(), which handles gathering the results in the final callback so doesn't rely on the dishes variable.

synchronic node js function

i am quiet new to java script and node js.
i have a problem with a simple function that i call, and it gets done more than one time.
this is my code
app.post('/checkGetSensorIds', function (req, res) {
var tables=['temperature', 'pressure', 'linear_acceleration'];
var ids= [1];
DButils.checkAllSensorsForId(connection, 1 , tables , function(idHasSensorsInfo){
console.log("idHasSensorsInfo is: \n" , idHasSensorsInfo);
});
res.end();
});
/*this function gets a user Id, and the table of all sensors the customer wants, and return true if this
user id has information in all the sesnsor tables that were requested, otherwise returns false*/
exports.checkAllSensorsForId= function(dbConnection, id , sensorsTables, callback){
var sensorsTablesLength= sensorsTables.length;
for (var i = 0; i < sensorsTables.length; i++) {
var tableName= sensorsTables[i];
DButils.checkSingleSensorForId(dbConnection, id, tableName, function(idHasSensorInfo){
if(idHasSensorInfo == false){
callback(false);
return;
}
//in case user have all info in db, we get here and need to return false
if(i == sensorsTablesLength){
callback(true);
return;
}
});
}
};
/*this function gets a user Id, and a single sensor table, and returns true if the user has information
in the requested sensor table, otherwise returns false*/
exports.checkSingleSensorForId= function(dbConnection , id , sensorTable, callback){
var myQuery = 'SELECT count(*) as IdCount FROM ' + sensorTable + ' WHERE id= ' + id;
var query = dbConnection.query(myQuery, function (err, row, result) {
console.log(query.sql);
if (err) {
console.log("checkSingleSensorForId error");
console.error(err);
return;
}
var count= row[0].IdCount;
var idHasSensorInfo = (count > 0);
callback(idHasSensorInfo);
});
};
console.log("idHasSensorsInfo is: \n" , idHasSensorsInfo); is a line that invoked 3 times, while should be only once.
someone has any idea why, and what i need to do to fix it?
You have this line:
DButils.checkAllSensorsForId(connection, 1 , tables , function(idHasSensorsInfo){
console.log("idHasSensorsInfo is: \n" , idHasSensorsInfo);
});
Then you have this:
exports.checkAllSensorsForId= function(dbConnection, id , sensorsTables, callback){
...
for (var i = 0; i < sensorsTables.length; i++) {
...
callback();
...
}
};
So the callback line will be invoked as many times as you call it, which in your case is probably 3 - all it does is call the function from above, so thats why you see it invoked 3 times.
I'm not sure exactly what you are trying to do, but if the callback should be only called once, make sure its ran only once - if it should 'cancel' the for - add a condition to the for or use a promise to resolve whenever you are ready.

Node/Express - How to wait until For Loop is over to respond with JSON

I have a function in my express app that makes multiple queries within a For Loop and I need to design a callback that responds with JSON when the loop is finished. But, I'm not sure how to do this in Node yet. Here is what I have so far, but it's not yet working...
exports.contacts_create = function(req, res) {
var contacts = req.body;
(function(res, contacts) {
for (var property in contacts) { // for each contact, save to db
if( !isNaN(property) ) {
contact = contacts[property];
var newContact = new Contact(contact);
newContact.user = req.user.id
newContact.save(function(err) {
if (err) { console.log(err) };
}); // .save
}; // if !isNAN
}; // for
self.response();
})(); // function
}; // contacts_create
exports.response = function(req, res, success) {
res.json('finished');
};
There are a few problems with your code besides just the callback structure.
var contacts = req.body;
(function(res, contacts) {
...
})(); // function
^ you are redefining contacts and res in the parameter list, but not passing in any arguments, so inside your function res and contacts will be undefined.
Also, not sure where your self variable is coming from, but maybe you defined that elsewhere.
As to the callback structure, you're looking for something like this (assuming contacts is an Array):
exports.contacts_create = function(req, res) {
var contacts = req.body;
var iterator = function (i) {
if (i >= contacts.length) {
res.json('finished'); // or call self.response() or whatever
return;
}
contact = contacts[i];
var newContact = new Contact(contact);
newContact.user = req.user.id
newContact.save(function(err) {
if (err)
console.log(err); //if this is really a failure, you should call response here and return
iterator(i + 1); //re-call this function with the next index
});
};
iterator(0); //start the async "for" loop
};
However, you may want to consider performing your database saves in parallel. Something like this:
var savesPending = contacts.length;
var saveCallback = function (i, err) {
if (err)
console.log('Saving contact ' + i + ' failed.');
if (--savesPending === 0)
res.json('finished');
};
for (var i in contacts) {
...
newContact.save(saveCallback.bind(null, i));
}
This way you don't have to wait for each save to complete before starting the next round-trip to the database.
If you're unfamiliar with why I used saveCallback.bind(null, i), it's basically so the callback can know which contact failed in the event of an error. See Function.prototype.bind if you need a reference.

Categories