Fill Azure Mobile Services from Scheduler - javascript

I created code like this for getting news from xml export from another website and I am trying to fill with it my database.
function UpdateLunchTime() {
var httpRequest = require('request');
var xml2js = require('xml2js');
var parser = new xml2js.Parser();
var url = 'http://www...com/export/xml/actualities';
httpRequest.get({
url: url
}, function(err, response, body) {
if (err) {
console.warn(statusCodes.INTERNAL_SERVER_ERROR,
'Some problem.');
} else if (response.statusCode !== 200) {
console.warn(statusCodes.BAD_REQUEST,
'Another problem');
} else {
//console.log(body);
parser.parseString(body, function (err2, result) {
//console.log(result.Root.event);
var count = 0;
for (var i=0;i<result.Root.event.length;i++)
{
//console.log(result.Root.event[i]);
InsertActionToDatabase(result.Root.event[i]);
}
/*
result.Root.event.forEach(function(entry) {
InsertActionToDatabase(entry);
});
*/
});
}
});
}
function InsertActionToDatabase(action)
{
var queryString = "INSERT INTO Action (title, description, ...) VALUES (?, ?, ...)";
mssql.query(queryString, [action.akce[0], action.description[0],...], {
success: function(insertResults) {
},
error: function(err) {
console.log("Problem: " + err);
}
});
}
For individual actualities it's working fine but when I run it over whole xml I get this error:
Error: [Microsoft][SQL Server Native Client 10.0][SQL Server]Resource ID : 1. The request limit for the database is 180 and has been reached. See 'http://go.microsoft.com/fwlink/?LinkId=267637' for assistance.
And for a few last objects I get this error:
Error: [Microsoft][SQL Server Native Client 10.0]TCP Provider: Only one usage of each socket address (protocol/network address/port) is normally permitted.
Thanks for help

The problem is that you're trying to make too many concurrent (insert) operations in your database. Remember that in node.js (almost) everything is asynchronous, so when you call InsertActionToDatabase for one of the items, this operation will start right away and not wait before it finishes to return. So you're basically trying to insert all of the events at once, and as the error message said there's a limit on the number of concurrent connections which can be made to the SQL server.
What you need to do is to change your loop to run asynchronously, by waiting for one of the operations to complete before starting the next one (you can also "batch" send a smaller number of operations at once, continuing after each batch is complete, but the code is a little more complicated) as shown below.
var count = result.Root.event.length;
var insertAction = function(index) {
if (index >= count) return;
InsertActionToDatabase(result.Root.event[i], function() {
insertAction(index + 1);
});
}
insertAction(0);
And the InsertActionToDatabase function would take a callback parameter to be called when it's done.
function InsertActionToDatabase(item, done) {
var table = tables.getTable('event');
table.insert(item, {
success: function() {
console.log('Inserted event: ', item);
done();
}
});
}

Related

Handle async DB calls

I did a couple of projects with node.js and I'm aware of the async behaviour and that one should usually use callback functions, etc. But one thing that bothers me ist the following.
I'm developing an Alexa skill and I have a function that handles the User intent:
'MyFunction': function() {
var toSay = ""; // Holds info what Alexa says
// Lot of checks and calculations what needs to be said by Alexa (nothing special)
if(xyz) {
toSay = "XYZ";
}else if(abc) {
toSay = "ABC";
}else{
toSay = "Something";
}
// Here is the "tricky" party
if(someSpecialEvent) {
toSay += " "+askDatabaseForInput(); // Add some information from database to string
}
this.emit(':ask', toSay, this.t('REPROMT_SPEECH')); // Gives the Info to Alexa (code execution stops here)
}
As mentioned in the code, there is some code which is usually used to find out what the output to Alexa should be.
Only on rare events, "someSpecialEvent", I need to query the database and add information to the String "toSay".
Querying the DB would look something like:
function askDatabaseForInput() { // The function to query the DB
var params = {
TableName: "MyTable",
OtherValues: "..."
};
// Do the Query
docClient.query(params, function(err, data) {
// Of course here are some checks if everything worked, etc.
var item = data.Items[0];
return item; // Item SHOULD be returned
});
return infoFromDocClient; // Which is, of course not possible
}
Now I know, that in the first function "'MyFunction'" I could just pass the variable "toSay" down to the DB Function and then to the DB Query and if everything is fine, I would do the "this.emit()" in the DB Query function. But for me, this looks very dirty and not much reusable.
So is there a way I can use "askDatabaseForInput()" to return DB information and just add it to a String? This means making the asynchronous call synchronous.
Making a synchronous call wouldn't affect the user experience, as the code isn't doing anything else anyway and it just creates the String and is (maybe) waiting for DB input.
Thanks for any help.
So you could do 2 things:
Like the person who commented says you could use a callback:
function askDatabaseForInput(callback) {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
docClient.query(params, function(err, data) {
if (err) {
callback(err, null)
} else {
var item = data.Items[0];
callback(null, item);
}
});
}
or you could use promises:
function askDatabaseForInput() {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
return new Promise(function (resolve, reject) {
docClient.query(params, function(err, data) {
if (err) {
reject(err)
} else {
var item = data.Items[0];
resolve(item);
}
});
});
}
you can then either put a function in where you call askDatabaseForInput or do askDatabaseForInput.then(....).
In the function or the .then you would add what you retrieved from the database to the variable toSay
hope this helps

JavaScript: How can I make sure that this function only returns the fully populated array?

I'm using the Request library. I have an array of URLs (var resources) that I want to populate with the text content of those URLs.
My code looks like this:
var resources = [<URL_1>, <URL_2>, <URL_3>, ...];
var resourcesText = resourceToText(resources);
function resourcesToText(resources) {
var text = [];
for (var i in resources) {
request(resources[i], fetch);
}
function fetch(error, response, body) {
if (!error) {
text.push(body);
} else {
console.log('Sorry. I couldn\'t parse that resource. ' + error);
}
}
return text;
};
The problem is that resourceToText() returns the array before fetch() has had time to populate it. Thus, resourcesText ends up being empty. I assume higher order functions are meant to deal with this kind of problem but I can't fathom a way to work the callback.
What's the best approach to this problem?
Make an asynchronous function out of resourcesToText by:
Using an additional callback:
function resourcesToText(resources, callback) {
var text = [],
requestCount = resources.length;
for (var i in resources) {
request(resources[i], fetch);
}
function fetch(error, response, body) {
if (!error) {
text.push(body);
} else {
console.log('Sorry. I couldn\'t parse that resource. ' + error);
}
requestCount--;
if(requestCount <= 0){
callback(text);
}
}
};
Call the callback with your result text after you you got all the results of your requests using a counter (requestCount).
Now the call to resourcesToTextlooks like:
resourcesToText(resources, function(text){
...
});
Using deferred/promises
Instead of using a callback, you could also implement the concept of deferred/promises. There are many libraries out there, such as q (https://github.com/kriskowal/q)

Parse Javascript Cloud Code .save() works only on a single user

I use Parse in iOS to run a cloud code method that gets an ID in it's request and receives a number in the response.
The purpose of the cloud code function is to take the request ID and add it to a field of 3 different users.
Here is the cloud code method in Javascript:
amount = 3;
// Use Parse.Cloud.define to define as many cloud functions as you want.
// For example:
Parse.Cloud.define("addToIDs", function(request, response) {
var value = request.params.itemId;
var query = new Parse.Query(Parse.User);
query.ascending("createdAt");
query.limit(100);
query.find({
success: function(results) {
var sent = 0;
for (var i = 0; i < results.length; i++) {
var idlst = results[i].get("idString");
if (idlst != null && idlst.indexOf(value) <= -1) {
idlst += value+"|";
results[i].set("idString", idlst);
results[i].save();
sent = sent+1;
}
if (sent >= amount) {
break;
}
}
response.success(sent);
},
error: function() {
response.error("Test failed");
}
});
});
When running this cloud code method I get a response of '3' meaning it called .save for 3 users. The problem is that when i go back to look in the Database viewer in the parse website it actually only updated a single user (Its always the same user). No matter how many times i run this code, it will only actually update the first user..
Anyone know why this is happening?
Both save and saveAll are asynchronous, so you should make sure the saving process is done.
Also note that, the user object can only be updated by the owner or request with masterkey.
The following code should work:
var amount = 3;
Parse.Cloud.define("addToIDs", function(request, response) {
var value = request.params.itemId;
var query = new Parse.Query(Parse.User);
query.ascending("createdAt");
query.limit(100);
return query.find()
.then(function(results) { // success
var toSave = [];
var promise = new Parse.Promise();
for (var i = 0; i < results.length; i++) {
var idlst = results[i].get("idString");
if (idlst != null && idlst.indexOf(value) <= -1) {
idlst += value+"|";
results[i].set("idString", idlst);
toSave.push(results[i]);
}
if (toSave.length >= amount) {
break;
}
}
// use saveAll to save multiple object without bursting multiple request
Parse.Object.saveAll(toSave, {
useMasterKey: true,
success: function(list) {
promise.resolve(list.length);
},
error: function() {
promise.reject();
}
});
return promise;
}).then(function(length) { // success
response.success(length);
}, function() { // error
response.error("Test failed");
});
});
The reason this is happening is two-fold:
save() is an asynchronous method, and
response.success() will immediately kill your running code as soon as it's called.
So what's happening is that inside your for loop you're running save() several times, but since it's asynchronous, they're simply thrown into the processing queue and your for loop continues on through. So it's quickly throwing all of your save()'s into the processing queue, and then it reaches your response.success() call but, by the time it's reached, only one of the save()'s has had a chance to successfully process.

Insert document loop - RangeError: Maximum call stack size exceeded

I am literally giving my first steps with node and mongodb and I have recently hit this RangeError wall.
Here's what I am trying to do, I have a file that contains a list of countries that I would like to add to my mongo db. This would be part of my "seed" mechanism to get the app running.
I load the json and then I iterate through the collection of objects and add them one by one to the 'Countries' collection.
However, everytime I run the code, I get a "RangeError: Maximum call stack size exceeded".
I have googled around but none of the suggested solutions seem to apply for me.
My guess is there is something wrong with my insertCountry function...
Anyways, here's my code:
var mongoose = require('mongoose');
var countries = require('./seed/countries.json');
// mongodb
var Country = mongoose.Schema({
name: String,
code: String,
extra: [Extra]
});
var Extra = mongoose.Schema({
exampleField: Boolean,
anotherField: Boolean
});
var mCountry = mongoose.model('Countries', Country);
var mExtra = mongoose.model('Extras', Extra);
// do connection
mongoose.connect('...');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error'));
db.once('open', function callback() {
});
// async function
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
});
}
callback(null,document);
});
};
// doing countries
var Country = mongoose.model('Countries');
var Extras = mongoose.model('Extras');
for(i = 0; i < countries.length; i++)
{
nCountry = new Country();
nCountry.name = countries[i].name;
nCountry.code = countries[i].code;
nCountry.benefits = new Extras();
nCountry.benefits.exampleField = false;
nCountry.benefits.anotherField = false;
insertCountry(nCountry, function (err, value) {
console.log(value.name + ' added to collection (callback)');
});
}
I have been using some guides I have found to build this so this might not be optimal code. Any best pratices, standards, guides or tutorials you can share are most welcome!
Your callback is in the wrong place. It is not waiting for the insert operation to complete before you return from it's own callback. Altering your code:
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
callback(null,document);
});
}
});
};
That is part of your problem, but it does not completely solve it. The other part is the loop which also does not wait for the wrapping function to complete before moving on. You want something like asyc.eachSeries in order to wait for inserts to complete before performing the next iteration. This is mostly why you are exceeding the call stack:
async.eachSeries(
countries,
function(current,callback) {
// make your nCountry object
insertCountry(nCountry,function(err,value) {
// do something, then
callback(err);
})
},
function(err) {
// called where done, err contains err where set
console.log( "done" );
}
);
There is really still and issue with the array, which must be reasonably large if you are exceeding the call stack limit. You probably should look at using event streams to process that rather that load everything in memory to the array.
Personally, if you were just trying not to insert duplicates for a field and had MongoDB 2.6 available I would just use the Bulk Operations API with "unordered operations" and allow non fatal failures on the duplicate keys. Coupled with the fact that bulk operations are sent in "batches" and not one at a time, this is much more efficient than checking for the presence on every request:
var Country = mongoose.Schema({
name: String,
code: { type: String, unique: true }, // define a unique index
extra: [Extra]
});
var insertCountries = function(countries,callback) {
var bulk = Country.collection.initializeUnorderedBulkOp();
var counter = 0;
async.eachSeries(
countries,
function(current,callback) {
// same object construction
bulk.insert(nCountry);
counter++;
// only send once every 1000
if ( counter % 1000 == 0 ) {
bulk.execute(function(err,result) {
// err should generally not be set
// but result would contain any duplicate errors
// along with other insert responses
// clear to result and callback
bulk = Country.collection.initializeUnorderedBulkOp();
callback();
});
} else {
callback();
}
},
function(err) {
// send anything still queued
if ( counter % 1000 != 0 )
bulk.execute(function(err,result) {
// same as before but no need to reset
callback(err);
});
}
);
};
mongoose.on("open",function(err,conn) {
insertCountries(countries,function(err) {
console.log("done");
});
});
Keeping in mind that unlike the methods implemented directly on the mongoose models, the native driver methods require that a connection is actually established before they can be called. Mongoose "queues" these up for you, but otherwise you need something to be sure the connection is actually open. The example of the "open" event is used here.
Take a look at event streams as well. If you are constructing an array large enough to cause a problem by missing callback execution then you probably should not be loading it all in memory from whatever your source is. Stream processing that source combined with an approach as shown above should provide efficient loading.

Batch requests in Node.js

My program is communicating with a web service that only accepts ~10 requests per second. From time to time, my program sends 100+ concurrent requests to the web service, causing my program to crash.
How do I limit concurrent requests in Node.js to 5 per second? Im using the request library.
// IF EVENT AND SENDER
if(data.sender[0].events && data.sender[0].events.length > 0) {
// FIND ALL EVENTS
for(var i = 0; i < data.sender[0].events.length; i++) {
// IF TYPE IS "ADDED"
if(data.sender[0].events[i].type == "added") {
switch (data.sender[0].events[i].link.rel) {
case "contact" :
batch("added", data.sender[0].events[i].link.href);
//_initContacts(data.sender[0].events[i].link.href);
break;
}
// IF TYPE IS "UPDATED"
} else if(data.sender[0].events[i].type == "updated") {
switch (data.sender[0].events[i].link.rel){
case "contactPresence" :
batch("updated", data.sender[0].events[i].link.href);
//_getContactPresence(data.sender[0].events[i].link.href);
break;
case "contactNote" :
batch("updated", data.sender[0].events[i].link.href);
// _getContactNote(data.sender[0].events[i].link.href);
break;
case "contactLocation" :
batch("updated", data.sender[0].events[i].link.href);
// _getContactLocation(data.sender[0].events[i].link.href);
break;
case "presenceSubscription" :
batch("updated", data.sender[0].events[i].link.href);
// _extendPresenceSubscription(data.sender[0].events[i].link.href);
break;
}
}
};
And then the homegrown batch method:
var updated = [];
var added = [];
var batch = function(type, url){
console.log("batch called");
if (type === "added"){
console.log("Added batched");
added.push(url);
if (added.length > 5) {
setTimeout(added.forEach(function(req){
_initContacts(req);
}), 2000);
added = [];
}
}
else if (type === "updated"){
console.log("Updated batched");
updated.push(url);
console.log("Updated length is : ", updated.length);
if (updated.length > 5){
console.log("Over 5 updated events");
updated.forEach(function(req){
setTimeout(_getContactLocation(req), 2000);
});
updated = [];
}
}
};
And an example of the actual request:
var _getContactLocation = function(url){
r.get(baseUrl + url,
{ "strictSSL" : false, "headers" : { "Authorization" : "Bearer " + accessToken }},
function(err, res, body){
if(err)
console.log(err);
else {
var data = JSON.parse(body);
self.emit("data.contact", data);
}
}
);
};
Using the async library, the mapLimit function does exactly what you want. I can't provide an example for your specific use case as you did not provide any code.
From the readme:
mapLimit(arr, limit, iterator, callback)
The same as map only no more than "limit" iterators will be simultaneously
running at any time.
Note that the items are not processed in batches, so there is no guarantee that
the first "limit" iterator functions will complete before any others are
started.
Arguments
arr - An array to iterate over.
limit - The maximum number of iterators to run at any time.
iterator(item, callback) - A function to apply to each item in the array.
The iterator is passed a callback(err, transformed) which must be called once
it has completed with an error (which can be null) and a transformed item.
callback(err, results) - A callback which is called after all the iterator
functions have finished, or an error has occurred. Results is an array of the
transformed items from the original array.
Example
async.mapLimit(['file1','file2','file3'], 1, fs.stat, function(err, results){
// results is now an array of stats for each file
});
EDIT: Now that you provided code, I see that your use is a bit different from what I assumed. The async library is more useful when you know all the tasks to run up front. I don't know of a library off hand that will easily solve this for you. The above note is likely still relevant to people searching this topic so I'll leave it in.
Sorry, I don't have time to restructure your code, but this is an (un-tested) example of a function that makes an asynchronous request while self-throttling itself to 5 requests per second. I would highly recommend working off of this to come up with a more general solution that fits your code base.
var throttledRequest = (function () {
var queue = [], running = 0;
function sendPossibleRequests() {
var url;
while (queue.length > 0 && running < 5) {
url = queue.shift();
running++;
r.get(url, { /* YOUR OPTIONS HERE*/ }, function (err, res, body) {
running--;
sendPossibleRequests();
if(err)
console.log(err);
else {
var data = JSON.parse(body);
self.emit("data.contact", data);
}
});
}
}
return function (url) {
queue.push(url);
sendPossibleRequests();
};
})();
Basically, you keep a queue of all the data to be asynchronously processed (such as urls to be requested) and then after each callback (from a request) you try to launch off as many remaining requests as possible.
This is precisely what node's Agent class is designed to address. Have you done something silly like require('http').globalAgent.maxSockets = Number.MAX_VALUE or passed agent: false as a request option?
With Node's default behavior, your program will not send more than 5 concurrent requests at a time. Additionally, the Agent provides optimizations that a simple queue cannot (namely HTTP keepalives).
If you try to make many requests (for example, issue 100 requests from a loop), the first 5 will begin and the Agent will queue the remaining 95. As requests complete, it starts the next.
What you probably want to do is create an Agent for your web service requests, and pass it in to every call to request (rather than mixing requests in with the global agent).
var http=require('http'), svcAgent = http.Agent();
request({ ... , agent: svcAgent });

Categories