I'm trying to generate a hashchain using the following code:
var async = require('async');
var _ = require('lodash');
var offset = 1e7;
var games = 1e7;
var game = games;
var serverSeed = '238asd1231hdsad123nds7a182312nbds1';
function loop(cb) {
var parallel = Math.min(game, 1000);
var inserts = _.range(parallel).map(function() {
return function(cb) {
serverSeed = genGameHash(serverSeed);
game--;
query('INSERT INTO `hash` SET `hash` = ' + pool.escape(serverSeed));
};
});
async.parallel(inserts, function(err) {
if (err) throw err;
// Clear the current line and move to the beginning.
var pct = 100 * (games - game) / games;
console.log('PROGRESS: ' + pct.toFixed(2) + '%')
if (game > 0){
loop(cb);
}else {
console.log('Done');
cb();
}
});
}
loop(function() {
console.log('Finished with SEED: ', serverSeed);
});
When I run this code it generates a hash chain of 1k hash's, while I'm trying to generate a chain of 1m hash's. It seems like async isn't working properly, but I have no idea why, there are no errors in console, nothing that points out a flaw.
Any ideas?
Do you can run it with smaller games (about 3000)?
Your parallel function nerver send done signal because the callback of inserts item never trigged. I think query function has two pramasters query(sql: string, callback?: (err, result) => void) (Typescript style).
I suggest you change your logic and flow like below block code:
var inserts = _.range(parallel).map(function() {
return function(cb) {
serverSeed = genGameHash(serverSeed);
query('INSERT INTO `hash` SET `hash` = ' + pool.escape(serverSeed), function(err, result) {
if(result && !err) {
game--;
}
cb(); // remember call the callback
});
};
});
In your code, you have used async.parallel, I think it is not good idea, too many connection has be open(1m). Recommeded for this case is parallelLimit
Related
I am writing a small Node js application for automatic vehicle location system.
Here is the code for where I am getting trouble.
markerData contains 4 rows but only in the log I can see the last row.
for (var i = 0, len = markerData.length; i < len; i++) {
var thisMarker = markerData[i];
sql.connect(config, function (err) {
var request = new sql.Request();
request.input('myval', sql.Int, thisMarker.id);
request.query('SELECT d.id, d.name, d.lastupdate, p.latitude, p.longitude, p.speed, p.course FROM dbo.devices AS d INNER JOIN dbo.positions AS p ON d.positionid = p.id AND d.id = p.deviceid WHERE (d.id = #myval)', function (err, recordset2) {
if (typeof recordset2 != 'undefined') {
thisMarker.position.lat = recordset2[0].latitude;
thisMarker.position.long = recordset2[0].longitude;
console.log(recordset2[0].id);
}
});
});
}
Please help me to solve the issue.
As var is not a block level variable in terms of scope, when `sql' module takes time to connect to the database asynchronously, the synchronous loop may change the value of the variable that's why you have the last row printed since the variable holds the reference to the last object at the time of successful connection.
Instead of _.each, I would recommend to use async module with async.each since you have few asynchronous operation to get rid of a synchronous loop.
You can check for samples here,
http://justinklemm.com/node-js-async-tutorial/
Here is your updated code with async.each
-> Install async module with npm install async --save
-> Then add the below reference in the required place,
// Reference
var async = require('async');
-> Modified code:
sql.connect(config, function (err) {
if(err) {
console.log('Connection error: ');
console.log(err);
} else {
async.each(markerData, function(thisMarker, callback) {
var request = new sql.Request();
request.input('myval', sql.Int, thisMarker.id);
request.query('SELECT d.id, d.name, d.lastupdate, p.latitude, p.longitude, p.speed, p.course FROM dbo.devices AS d INNER JOIN dbo.positions AS p ON d.positionid = p.id AND d.id = p.deviceid WHERE (d.id = #myval)', function (err, recordset2) {
if(err) {
console.log(err);
callback();
} else {
if (typeof recordset2 != 'undefined') {
thisMarker.position.lat = recordset2[0].latitude;
thisMarker.position.long = recordset2[0].longitude;
console.log(recordset2[0].id);
} else {
console.log('Recordset empty for id: ' + thisMarker.id);
}
callback();
}
});
}, function(err){
if(err) {
console.log(err);
}
});
}
});
I'm not entirely sure how your library works, but presumably recordset2 is an array of records. recordset2[0] is therefore the first record. If you want the next one you should probably try recordset2[1] and so on and so forth.
Arrays: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array
You'll probably need to loop through all the elements in the array at some point. use a for loop for that:
for (var i = 0; i < recordset2.length; i++ {
console.log(recordset2[i])
}
That will print out everything your query returns.
I'm wanting to return a value in the main of my AWS function. I'm having trouble getting the data to pass from the first callback so I can send it to the final one.
/** module used for outbound http requests */
var request = require('request');
/** module used for parsing XML easily. https://www.npmjs.com/package/xml2js*/
var parseString = require('xml2js').parseString;
exports.handler = (event, context, callback) => {
// testing array of coordinates
var arrayOfPoints = [39.7683800, -86.1580400, 41.881832, -87.623177];
var results = getXMLFromNOAA(arrayOfPoints);
callback(null, results); // <- returns 'undefined' in the AWS console. I'm assuming race condition.
};
/**
* getXMLFromNOAA
*
* This is a function used for figuring out the functionality of NOAA XML parsing
*
* #param arrayOfPoints {Array[Double]} - An evenly numbered index array of latitudes and longitudes
*
* #return result {XML/JSON} - weather information abled to be parsed
*/
function getXMLFromNOAA(arrayOfPoints, callback) {
var baseURL = "http://graphical.weather.gov/xml/sample_products/browser_interface/ndfdXMLclient.php?whichClient=NDFDgenLatLonList&lat=&lon=&listLatLon=";
// for-loop getting all points and dynamically adding them to the query url string
// iterate 2 at a time since they are coupled coordinates (e.g. [lat1, lng1, lat2, lng2, ... latN, lngN])
for(var i = 0; i < arrayOfPoints.length; i = i + 2)
{
// if we're at the end of the arrayOfPoints, finish up the chain of query coordinates
if( (i+2) == arrayOfPoints.length)
{
baseURL = baseURL.concat(arrayOfPoints[i]);
baseURL = baseURL.concat("%2C");
baseURL = baseURL.concat(arrayOfPoints[i+1]);
}
else
{
baseURL = baseURL.concat(arrayOfPoints[i]);
baseURL = baseURL.concat("%2C");
baseURL = baseURL.concat(arrayOfPoints[i+1]);
baseURL = baseURL.concat("+");
}
}
// TIME
baseURL = baseURL.concat("&lat1=&lon1=&lat2=&lon2=&resolutionSub=&listLat1=&listLon1=&listLat2=&listLon2=&resolutionList=&endPoint1Lat=&endPoint1Lon=&endPoint2Lat=&endPoint2Lon=&listEndPoint1Lat=&listEndPoint1Lon=&listEndPoint2Lat=&listEndPoint2Lon=&zipCodeList=&listZipCodeList=¢erPointLat=¢erPointLon=&distanceLat=&distanceLon=&resolutionSquare=&listCenterPointLat=&listCenterPointLon=&listDistanceLat=&listDistanceLon=&listResolutionSquare=&citiesLevel=&listCitiesLevel=§or=&gmlListLatLon=&featureType=&requestedTime=&startTime=&endTime=&compType=&propertyName=&product=time-series&begin=2016-09-04T00:00:00&end=2016-09-11T00:00:00");
// CHARACTERISTICS REQUESTED
// http://graphical.weather.gov/xml/docs/elementInputNames.php
baseURL = baseURL.concat("&Unit=e&maxt=maxt&mint=mint&temp=temp&appt=appt&rh=rh&sky=sky&wwa=wwa&iceaccum=iceaccum&ptornado=ptornado&phail=phail&ptstmwinds=ptstmwinds&pxtornado=pxtornado&pxhail=pxhail&ptotsvrtstm=ptotsvrtstm&wgust=wgust");
// Used for testing and seeing the final result
console.log(baseURL);
request(baseURL, function (error, response, body)
{
if (!error && response.statusCode == 200)
{
parseString(body, function (err, result) {
console.log('inside parseString: ' + result); // <- this prints but it won't show up in the main
// callback(null, result); <- doesnt work
return result; // doesnt work either
});
}
})
}
I want to be able to make my code more modular for scalability. I know theres a way to take the async process of the getXMlFromNOAA and perform them iteratively. I'm just not as familiar with JavaScript as I should be. Any help would really be appreciated.
You can use async module to make it more readable and flexible and also free from the asynchronous issue.
Write your stuff something like this
/** module used for outbound http requests */
var request = require('request');
var async = require('async');
/** module used for parsing XML easily. https://www.npmjs.com/package/xml2js*/
var parseString = require('xml2js').parseString;
exports.handler = (event, context, callback) => {
async.waterfall([
function(next) {
// testing array of coordinates
var arrayOfPoints = [39.7683800, -86.1580400, 41.881832, -87.623177];
var results = getXMLFromNOAA(arrayOfPoints, next);
},
function(baseURL, next) {
request(baseURL, function(error, response, body) {
if (!error && response.statusCode == 200) {
parseString(body, function(err, result) {
console.log('inside parseString: ' + result); // <- this prints but it won't show up in the main
if (!err)
next(null, result);
});
}
})
}
], function(err, result) {
if (!err) {
callback(null, results); // <- returns 'undefined' in the AWS console. I'm assuming race condition.
}
})
};
/**
* getXMLFromNOAA
*
* This is a function used for figuring out the functionality of NOAA XML parsing
*
* #param arrayOfPoints {Array[Double]} - An evenly numbered index array of latitudes and longitudes
*
* #return result {XML/JSON} - weather information abled to be parsed
*/
function getXMLFromNOAA(arrayOfPoints, next) {
var baseURL = "http://graphical.weather.gov/xml/sample_products/browser_interface/ndfdXMLclient.php?whichClient=NDFDgenLatLonList&lat=&lon=&listLatLon=";
// for-loop getting all points and dynamically adding them to the query url string
// iterate 2 at a time since they are coupled coordinates (e.g. [lat1, lng1, lat2, lng2, ... latN, lngN])
for (var i = 0; i < arrayOfPoints.length; i = i + 2) {
// if we're at the end of the arrayOfPoints, finish up the chain of query coordinates
if ((i + 2) == arrayOfPoints.length) {
baseURL = baseURL.concat(arrayOfPoints[i]);
baseURL = baseURL.concat("%2C");
baseURL = baseURL.concat(arrayOfPoints[i + 1]);
} else {
baseURL = baseURL.concat(arrayOfPoints[i]);
baseURL = baseURL.concat("%2C");
baseURL = baseURL.concat(arrayOfPoints[i + 1]);
baseURL = baseURL.concat("+");
}
}
// TIME
baseURL = baseURL.concat("&lat1=&lon1=&lat2=&lon2=&resolutionSub=&listLat1=&listLon1=&listLat2=&listLon2=&resolutionList=&endPoint1Lat=&endPoint1Lon=&endPoint2Lat=&endPoint2Lon=&listEndPoint1Lat=&listEndPoint1Lon=&listEndPoint2Lat=&listEndPoint2Lon=&zipCodeList=&listZipCodeList=¢erPointLat=¢erPointLon=&distanceLat=&distanceLon=&resolutionSquare=&listCenterPointLat=&listCenterPointLon=&listDistanceLat=&listDistanceLon=&listResolutionSquare=&citiesLevel=&listCitiesLevel=§or=&gmlListLatLon=&featureType=&requestedTime=&startTime=&endTime=&compType=&propertyName=&product=time-series&begin=2016-09-04T00:00:00&end=2016-09-11T00:00:00");
// CHARACTERISTICS REQUESTED
// http://graphical.weather.gov/xml/docs/elementInputNames.php
baseURL = baseURL.concat("&Unit=e&maxt=maxt&mint=mint&temp=temp&appt=appt&rh=rh&sky=sky&wwa=wwa&iceaccum=iceaccum&ptornado=ptornado&phail=phail&ptstmwinds=ptstmwinds&pxtornado=pxtornado&pxhail=pxhail&ptotsvrtstm=ptotsvrtstm&wgust=wgust");
// Used for testing and seeing the final result
console.log(baseURL);
//retun to callback after getting URL
next(null, baseURL)
}
Thats not how callbacks work. Change the handler to something like this:
exports.handler = (event, context, callback) => {
var arrayOfPoints = [39.7683800, -86.1580400, 41.881832, -87.623177];
getXMLFromNOAA(arrayOfPoints, function(result) {
console.log('returned');
console.log(result);
callback(null, result);
});
};
Also uncomment (go back) to the callback(result) in getXMLFromNOAA;
Also for NOAA there is a module. Always search node-modules/npmjs.com/npms.io first before reinventing the wheel. The example here https://github.com/thataustin/noaa-forecasts is very similar to what you are doing so I would start with that.
If you want to simplify async stuff in the future you can use babel with async and await keywords. But you will need to make sure you really learn callbacks and promises first.
I tried to scrap for thousands of pages. So I used async.timesSeries and async.waterfall. Each of functions work synchronously very well but they don't work together. What can I do?
The logic is simple.
Because I want to scrape pages are "http://udb.kr/local/category/390101?page="1~1167, async.timesSeries loop 1 to 1167
async.waterfall scraps components of pages
but messages that console shows me looks like this
info.NM values // just for explain, It shows me each attires of obj because I insert console.log(info.NM) for verifying.
info.NM values
info.NM values
info.NM values and randomly ----- page number -----
...
['done',
'done',
'done',
'done',
'done',
...
'done']
info.NM values again
.../Users/Snark/Dev/job_apply/cheerio_job_app_list.js:29
if (tObj[m+1].children != 0) {info.nAddr = tObj[m+1].firstChild.data}else{info.nAddr = null};
^
TypeError: Cannot read property 'children' of undefined
at /Users/Snark/Dev/job_apply/cheerio_job_app_list.js:29:17
at fn (/Users/Snark/node_modules/async/lib/async.js:746:34)
at /Users/Snark/node_modules/async/lib/async.js:1212:16
at /Users/Snark/node_modules/async/lib/async.js:166:37
at /Users/Snark/node_modules/async/lib/async.js:706:43
at /Users/Snark/node_modules/async/lib/async.js:167:37
at /Users/Snark/node_modules/async/lib/async.js:1208:30
at Request._callback (/Users/Snark/Dev/job_apply/cheerio_job_app_list.js:21:6)
at Request.self.callback (/Users/Snark/node_modules/request/request.js:198:22)
at emitTwo (events.js:87:13)
And this is js code.
var request = require("request"),
cheerio = require("cheerio"),
jsonfile = require("jsonfile"),
fs = require("fs"),
async = require("async");
var info = {},
dbArray = [];
var url = "http://udb.kr/local/category/390101?page=";
async.timesSeries(1166, function(n, next) {
var page = n + 1
async.waterfall([
function(callback) {
request(url + page, function(error, response, html) {
if (error) {
throw error
};
var $ = cheerio.load(html),
tObj = $('tbody tr td');
callback(null, tObj);
});
},
function(tObj, callback) {
for (var m = 0; m < 150; m = m + 5) {
if (tObj[m]) {
info.NM = tObj[m].firstChild.children[0].data
} else {
info.NM = null
};
if (tObj[m + 1].children != 0) {
info.nAddr = tObj[m + 1].firstChild.data
} else {
info.nAddr = null
};
console.log(info.NM);
dbArray.push(info);
}
callback(dbArray, callback);
},
function(dbArray, callback) {
fs.appendFile('./jobDB_l.json', JSON.stringify(dbArray), function (err) {
if (err)
throw err;
});
callback(null, 'done');
}
], function(err, result) {
console.log('----- ' +page+ '-----');
});
next(null, 'done');
}, function(err, result) {
console.log(result)
});
To get these to work together where you are using waterfall inside of each timesSeries iteration, you need to call the timesSeries done callback from the completion callback for the waterfall call. Right now, you are calling it long before that which means that timesSeries won't wait for the waterfall to be done.
You can do that by changing this:
], function(err, result) {
console.log('----- ' +page+ '-----');
});
next(null, 'done');
to this:
], function(err, result) {
console.log('----- ' +page+ '-----');
next(null, 'done');
});
It also seems odd that you have a hard-coded for loop limit of m < 150 rather than using the actual length of the content. You can easily run off the end of the content and potentially cause problems.
And, your error handling probably won't work well either. If you throw inside of the async request() callback, that's not going to go anywhere. You need much better error handling such as calling callback(error) to pass the error on to async.waterfall().
You also may want to surround all your DOM walking in a try/catch so if you throw any exceptions there, you can catch them yourself, analyze them and then fix the code.
if (tObj[m+1] && tObj[m+1].children != 0)
How can I refractor my code to get rid of this error from JSLinter?
I tried moving the entire function out to a var but the code wasn't able to run after that.
for (i = 0; i < timeDifference; i++) {
timestamp ++;
console.log(timestamp);
energyDatum.find({timestamp: timestamp}).toArray(function(err, result) {
var data = {};
result.forEach(function(element) {
data[element.deviceId] = element;
});
var roomRawData = [];
mappings.forEach(function(room) {
var hash = {};
hash.floor = room.floor;
hash.name = room.name;
hash.room_type = room.room_type;
hash.energy_ac = sumApplianceEnergy('energy_ac', room, data);
hash.energy_light = sumApplianceEnergy('energy_light', room, data);
hash.energy_socket_1 = sumApplianceEnergy('energy_socket_1', room, data);
hash.energy_socket_2 = sumApplianceEnergy('energy_socket_2', room, data);
hash.energy_socket_3 = sumApplianceEnergy('energy_socket_3', room, data);
hash.energy_total = hash.energy_ac + hash.energy_light + hash.energy_socket_1 + hash.energy_socket_2 + hash.energy_socket_3;
hash.timestamp = timestamp;
roomRawData.push(hash);
});
roomRaw.insert(roomRawData, {w:1}, function(err, result) { console.log('done'); });
});
lastTimestamp.update({_id: timestampId}, {timestamp: timestamp});
}
JSLinter shows this message because your code has potential errors.
Take a look at this line:
energyDatum.find({timestamp: timestamp}).toArray(...);
This method is async, right? It means that the callback of toArray method
is called after the for loop finishes its iterations, and therefore timestamp
variable (when you use it inside this callback) doesn't have a value of current iteration,
but instead it has value incremented for timeDifference times.
To solve this problem you could move this callback to another function:
var getIterationFunc = function(timestamp) {
return function(err, result) {
var data = {};
// rest of function ...
}
}
and then use it:
energyDatum.find({timestamp: timestamp}).toArray(getIterationFunc(timestamp));
I believe this error should be fixed now. Hope this helps.
P.S. sorry for my English
I'm trying to create a NodeJS application to pull SQL records and insert them into MongoDB. The tables I'm interested in are somewhat large (1million+ records). For small datasets (< 200,000) my app works great, but running against the full table starts to eat up RAM and bring the server to a crawl.
It looks like Node is running through my "for" loop, branching off processes for each SQL sub select, and then running the MongoDB updates.
I never see "Mongo Connected!" until the last "Getting Responses for Activity #" is written to the screen.
#!/var/node/bin/node
var odbc = require("odbc");
var db = new odbc.Database();
var MongoClient = require('mongodb').MongoClient;
var format = require('util').format;
db.open("DSN=<DSN>;SERVER=<SERVER>;DATABASE=<DB>;UID=<UID>;PWD=<PWD>", function (err) {
if(err) throw err;
console.log("SQL Connected!");
var sqlstr = "SELECT TOP 1000 * FROM tbl_A NOLOCK";
console.log("Executing '" + sqlstr + "' against SQL Server");
db.query(sqlstr, function (sql1err, rows, moreResults) {
if (sql1err) throw sql1err;
for (var i = 0; i < rows.length; i++) {
InsertActivity(db, rows[i], i, rows.length, function () {});
}
});
});
function InsertActivity(sql, activity, cur, total, callback) {
console.log("Getting Responses for Activity #" + activity.ActivityID);
var rsql = "SELECT * FROM tbl_Responses NOLOCK WHERE ActivityID = " + activity.ActivityID;
sql.query(rsql, function (sqlerr, rows, moreResults) {
if (sqlerr) console.log(sqlerr);
activity.resonses = rows;
MongoClient.connect('mongodb://localhost:27017/m', function (merr, mdb) {
console.log("Mongo Connected!");
mdb.collection("activity").insert(activity, function () {
console.log("Inserted Activity #" + activity.ActivityID + " inserted into Mongo");
mdb.close(function () { console.log("Mongo Disconnected!"); });
callback();
});
});
if (cur == total - 1) sql.close(function () { console.log("SQL Disconnected!"); });
});
console.log(rsql);
}
What you need is unfortunately an undocumented function (I'll fix that). The function is db.queryResult which returns the result object that allows you to fetch rows individually. That will avoid buffering the entire result set into memory.
https://github.com/wankdanker/node-odbc/blob/master/test/test-query-select-fetch.js
var db = require('odbc')();
db.open(connectionString, function (err) {
db.queryResult('select * from tbl_A NOLOCK', function (err, result) {
fetchMore();
function fetchMore() {
result.fetch(function (err, data) {
if (!data) {
//we're all done, clean up
}
doStuffWithData(data, function (err) {
fetchMore();
});
});
}
});
});
function doStuffWithData(data, cb) {
//do stuff
cb(null);
}