Sails.js: Nested MongoDB queries - javascript

I am using Sails v0.11 and am developing an standalone importer script in order to import data to mongoDB and - that is now the not-working part - build the associations between the models.
For this process I introduced temporary helper properties in the models in order to find the associated records and replace them by in real MongoDB _ids.
The script starts Sails in order to be able use its features (waterline, etc.):
var app = Sails();
app.load({
hooks: { grunt: false },
log: { level: 'warn' }
}, function sailsReady(err){
processUsers() finds all users and their _ids and iterates over them to invoke a second function addOrgsToOneUser()
var processUsers = function() {
// Iterate through all users in order to retrieve their _ids and
app.models['user'].native(function(err, collection) {
collection.find({}, projectionOrgInUser).toArray(function (err, users) {
Async.eachSeries(users, function (user, next){
// prepare userInOrgs
whereUserInOrg = { orgId: { $in: userInOrgs } };
//This is invoking
addOrgsToOneUser(user, whereUserInOrg);
next();
}, function afterwards (err) {
if (err) {
console.error('Import failed, error details:\n',err);
return process.exit(1);
}
console.log("done");
return process.exit(0); // This returns too early, not executing the addOrgsToOneUser
});
});
});
};
addOrgsToOneUser() finds all orgs belonging to THIS user and updates then the orgs array property of THIS user
var addOrgsToOneUser = function(user, whereUserInOrg) {
var projectionUserInOrg = "...";
// Find all orgs that this user is associated to and store it in inOrgs
app.models['org'].native(function(err, collection) {
collection.find(whereUserInOrg, projectionUserInOrg).toArray(function (err, orgs) {
// prepare inOrgs which is needed for updating
//update user to have an updated orgs array based on inOrgs.
app.models['user'].update({'id' : user._id.toString()}, {'orgs': inOrgs}).exec(function afterwards(err, updated){
console.log('Updated user ' + user._id.toString() + ' to be in their orgs');
});
});
});
}
Problem:
Process.exit(0) is called before the query/update of saddOrgsToOneUser() has completed. It behaves as expected if saddOrgsToOneUser() contains just a console.log for instance, but queries are triggered ansynchronously of course.
In case I comment out Process.exit(0), the script never stops, but the queries are executed as intented.
As the script will have further nested queries, I need a better approach to this as manually kill this script ...
How is nesting queries and iterating over their results done properly?
Thank you very much,
Manuel

addOrgsToOneUser is asynchronous. next() needs to be called after everything is done inside addOrgsToOneUser. The way I would do it is to pass in a callback (next) and call it when everything is done. So the call is
addOrgsToOneUser(user, whereUserInOrg, next);
and the addOrgsToOneUser will have an extra argument:
var addOrgsToOneUser = function(user, whereUserInOrg, callback) {
var projectionUserInOrg = "...";
// Find all orgs that this user is associated to and store it in inOrgs
app.models['org'].native(function(err, collection) {
collection.find(whereUserInOrg, projectionUserInOrg).toArray(function (err, orgs) {
// prepare inOrgs which is needed for updating
//update user to have an updated orgs array based on inOrgs.
app.models['user'].update({'id' : user._id.toString()}, {'orgs': inOrgs}).exec(function afterwards(err, updated){
console.log('Updated user ' + user._id.toString() + ' to be in their orgs');
callback(); // your original next() is called here
});
});
});
}

Related

Handle async DB calls

I did a couple of projects with node.js and I'm aware of the async behaviour and that one should usually use callback functions, etc. But one thing that bothers me ist the following.
I'm developing an Alexa skill and I have a function that handles the User intent:
'MyFunction': function() {
var toSay = ""; // Holds info what Alexa says
// Lot of checks and calculations what needs to be said by Alexa (nothing special)
if(xyz) {
toSay = "XYZ";
}else if(abc) {
toSay = "ABC";
}else{
toSay = "Something";
}
// Here is the "tricky" party
if(someSpecialEvent) {
toSay += " "+askDatabaseForInput(); // Add some information from database to string
}
this.emit(':ask', toSay, this.t('REPROMT_SPEECH')); // Gives the Info to Alexa (code execution stops here)
}
As mentioned in the code, there is some code which is usually used to find out what the output to Alexa should be.
Only on rare events, "someSpecialEvent", I need to query the database and add information to the String "toSay".
Querying the DB would look something like:
function askDatabaseForInput() { // The function to query the DB
var params = {
TableName: "MyTable",
OtherValues: "..."
};
// Do the Query
docClient.query(params, function(err, data) {
// Of course here are some checks if everything worked, etc.
var item = data.Items[0];
return item; // Item SHOULD be returned
});
return infoFromDocClient; // Which is, of course not possible
}
Now I know, that in the first function "'MyFunction'" I could just pass the variable "toSay" down to the DB Function and then to the DB Query and if everything is fine, I would do the "this.emit()" in the DB Query function. But for me, this looks very dirty and not much reusable.
So is there a way I can use "askDatabaseForInput()" to return DB information and just add it to a String? This means making the asynchronous call synchronous.
Making a synchronous call wouldn't affect the user experience, as the code isn't doing anything else anyway and it just creates the String and is (maybe) waiting for DB input.
Thanks for any help.
So you could do 2 things:
Like the person who commented says you could use a callback:
function askDatabaseForInput(callback) {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
docClient.query(params, function(err, data) {
if (err) {
callback(err, null)
} else {
var item = data.Items[0];
callback(null, item);
}
});
}
or you could use promises:
function askDatabaseForInput() {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
return new Promise(function (resolve, reject) {
docClient.query(params, function(err, data) {
if (err) {
reject(err)
} else {
var item = data.Items[0];
resolve(item);
}
});
});
}
you can then either put a function in where you call askDatabaseForInput or do askDatabaseForInput.then(....).
In the function or the .then you would add what you retrieved from the database to the variable toSay
hope this helps

Bulk insert in MongoDB using mongoose

I currently have a collection in Mongodb say "Collection1".
I have the following array of objects that need to be into inserted into MongoDB. I am using Mongoose API. For now, I am iterating through the array and inserting each of them into mongo.
This is ok for now, but will be a problem when the data is too big.
I need a way of inserting the data in bulk into MongoDB without repetition.
I am not sure how to do this. I could not find a bulk option in Mongoose.
My code below
myData = [Obj1,Obj2,Obj3.......]
myData.forEach(function(ele){
//console.log(ele)
saveToMongo(ele);
});
function saveToMongo(obj){
(new Collection1(obj)).save(function (err, response) {
if (err) {
// console.log('Error while inserting: ' + obj.name + " " +err);
} else {
// console.log('Data successfully inserted');
}
});
return Collection1(obj);
}
You might want to use the insertMany() method here if you're using the latest Mongoose version 4.4.X and greater, which essentially uses Model.collection.insertMany() under the hood and the driver might handle parallelizing >= 1000 docs for you.
myData = [Obj1, Obj2, Obj3.......];
Collection1.insertMany(myData, function(error, docs) {});
or using Promises for better error handling
Collection1.insertMany(myData)
.then(function(docs) {
// do something with docs
})
.catch(function(err) {
// error handling here
});
It works by creating a bunch of documents, calls .validate() on them in parallel, and then calls the underlying driver's insertMany() on the result of toObject({ virtuals: false }); of each doc.
Although insertMany() doesn't trigger pre-save hooks, it has better performance because it only makes 1 round-trip to the server rather than 1 for each document.
For Mongoose versions ~3.8.8, ~3.8.22, 4.x which support MongoDB Server >=2.6.x, you could use the Bulk API as follows
var bulk = Collection1.collection.initializeOrderedBulkOp(),
counter = 0;
myData.forEach(function(doc) {
bulk.insert(doc);
counter++;
if (counter % 500 == 0) {
bulk.execute(function(err, r) {
// do something with the result
bulk = Collection1.collection.initializeOrderedBulkOp();
counter = 0;
});
}
});
// Catch any docs in the queue under or over the 500's
if (counter > 0) {
bulk.execute(function(err,result) {
// do something with the result here
});
}
you can pass an array of objects to mongoose model create function
var Collection1 = mongoose.model('Collection1');
Collection1.create(myData,function(err){
if(err) ...
});

Insert document loop - RangeError: Maximum call stack size exceeded

I am literally giving my first steps with node and mongodb and I have recently hit this RangeError wall.
Here's what I am trying to do, I have a file that contains a list of countries that I would like to add to my mongo db. This would be part of my "seed" mechanism to get the app running.
I load the json and then I iterate through the collection of objects and add them one by one to the 'Countries' collection.
However, everytime I run the code, I get a "RangeError: Maximum call stack size exceeded".
I have googled around but none of the suggested solutions seem to apply for me.
My guess is there is something wrong with my insertCountry function...
Anyways, here's my code:
var mongoose = require('mongoose');
var countries = require('./seed/countries.json');
// mongodb
var Country = mongoose.Schema({
name: String,
code: String,
extra: [Extra]
});
var Extra = mongoose.Schema({
exampleField: Boolean,
anotherField: Boolean
});
var mCountry = mongoose.model('Countries', Country);
var mExtra = mongoose.model('Extras', Extra);
// do connection
mongoose.connect('...');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error'));
db.once('open', function callback() {
});
// async function
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
});
}
callback(null,document);
});
};
// doing countries
var Country = mongoose.model('Countries');
var Extras = mongoose.model('Extras');
for(i = 0; i < countries.length; i++)
{
nCountry = new Country();
nCountry.name = countries[i].name;
nCountry.code = countries[i].code;
nCountry.benefits = new Extras();
nCountry.benefits.exampleField = false;
nCountry.benefits.anotherField = false;
insertCountry(nCountry, function (err, value) {
console.log(value.name + ' added to collection (callback)');
});
}
I have been using some guides I have found to build this so this might not be optimal code. Any best pratices, standards, guides or tutorials you can share are most welcome!
Your callback is in the wrong place. It is not waiting for the insert operation to complete before you return from it's own callback. Altering your code:
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
callback(null,document);
});
}
});
};
That is part of your problem, but it does not completely solve it. The other part is the loop which also does not wait for the wrapping function to complete before moving on. You want something like asyc.eachSeries in order to wait for inserts to complete before performing the next iteration. This is mostly why you are exceeding the call stack:
async.eachSeries(
countries,
function(current,callback) {
// make your nCountry object
insertCountry(nCountry,function(err,value) {
// do something, then
callback(err);
})
},
function(err) {
// called where done, err contains err where set
console.log( "done" );
}
);
There is really still and issue with the array, which must be reasonably large if you are exceeding the call stack limit. You probably should look at using event streams to process that rather that load everything in memory to the array.
Personally, if you were just trying not to insert duplicates for a field and had MongoDB 2.6 available I would just use the Bulk Operations API with "unordered operations" and allow non fatal failures on the duplicate keys. Coupled with the fact that bulk operations are sent in "batches" and not one at a time, this is much more efficient than checking for the presence on every request:
var Country = mongoose.Schema({
name: String,
code: { type: String, unique: true }, // define a unique index
extra: [Extra]
});
var insertCountries = function(countries,callback) {
var bulk = Country.collection.initializeUnorderedBulkOp();
var counter = 0;
async.eachSeries(
countries,
function(current,callback) {
// same object construction
bulk.insert(nCountry);
counter++;
// only send once every 1000
if ( counter % 1000 == 0 ) {
bulk.execute(function(err,result) {
// err should generally not be set
// but result would contain any duplicate errors
// along with other insert responses
// clear to result and callback
bulk = Country.collection.initializeUnorderedBulkOp();
callback();
});
} else {
callback();
}
},
function(err) {
// send anything still queued
if ( counter % 1000 != 0 )
bulk.execute(function(err,result) {
// same as before but no need to reset
callback(err);
});
}
);
};
mongoose.on("open",function(err,conn) {
insertCountries(countries,function(err) {
console.log("done");
});
});
Keeping in mind that unlike the methods implemented directly on the mongoose models, the native driver methods require that a connection is actually established before they can be called. Mongoose "queues" these up for you, but otherwise you need something to be sure the connection is actually open. The example of the "open" event is used here.
Take a look at event streams as well. If you are constructing an array large enough to cause a problem by missing callback execution then you probably should not be loading it all in memory from whatever your source is. Stream processing that source combined with an approach as shown above should provide efficient loading.

Why does creating and deleting an Azure Table fail?

I am wondering why trying to run the following test suite fails when I try to delete the table I have stored entities in. The error I get is the following
1) Azure Storage cloud storage operations "after all" hook:
Error: The specified resource does not exist. RequestId:3745d709-fa5e-4a2b-b517-89edad3efdd2
Time:2013-12-03T22:26:39.5532356Z
If I comment out the actual insertion of data it fails every other time, and if I try to do the insertion of data it fails every time with an additional "The table specified does not exist.".
For the first case this seems to indicate that there is some kind of delay in the table creation, so in every other test it is successful, and for the second case it seems to indicate that even though my callbacks are being called after table creation, the table(s) still aren't ready for data insertion.
The test suite and associated code looks like this:
describe('cloud storage operations', function () {
var storage;
before(function (done) {
this.timeout(5000);
storage = AzureStorage.usingTable('TEST', done);
});
after(function (done) {
storage.deleteTable(done);
});
it('should store without trouble', function (done) {
storage.save(factory.createChangeSet()).then(done, done);
});
});
... // snipped from azure.js
var AzureStorage = function (storageClient, tableName, callback) {
assert(storageClient && tableName && partitionKey, "Missing parameters");
this.storageClient = storageClient;
this.tableName = tableName;
var defaultCallback = function (err) { if (err) { throw error; } };
this.storageClient.createTableIfNotExists(this.tableName, function () {
callback();
} || defaultCallback);
};
AzureStorage.usingTable = function (tableName, callback) {
return new AzureStorage(
azure.createTableService(accountName, accountKey)
, tableName
, callback
);
};
AzureStorage.prototype.deleteTable = function (callback) {
this.storageClient.deleteTable(this.tableName, callback);
};
I've hit this using the c# library as well but I'm pretty sure the error message indicated the table could not be created because an operation was still in process for a table of the same name. Thinking of the backend supporting storage, it makes sense that it would not be instant. The table needs to be removed from the 3 local replicas as well as the replicas in the paired data center.
With that kind of async operation, it is going to be challenging to build up an tear them down fast enough for tests.
A workaround might be to increment a value appended to the "TEST" table name that would be unique to that test run.

How to use "q" module for refactoring mongoose code?

I'm using mongoose to insert some data into mongodb. The code looks like:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
// insert users
conn.collection('users').insert([{/*user1*/},{/*user2*/}], function(err, docs) {
var user1 = docs[0], user2 = docs[1];
// insert channels
conn.collection('channels').insert([{userId:user1._id},{userId:user2._id}], function(err, docs) {
var channel1 = docs[0], channel2 = docs[1];
// insert articles
conn.collection('articles').insert([{userId:user1._id,channelId:channel1._id},{}], function(err, docs) {
var article1 = docs[0], article2 = docs[1];
}
});
};
You can see there are a lot of nested callbacks there, so I'm trying to use q to refactor it.
I hope the code will look like:
Q.fcall(step1)
.then(step2)
.then(step3)
.then(step4)
.then(function (value4) {
// Do something with value4
}, function (error) {
// Handle any error from step1 through step4
})
.end();
But I don't know how to do it.
You'll want to use Q.nfcall, documented in the README and the Wiki. All Mongoose methods are Node-style. I'll also use .spread instead of manually destructuring .then.
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
var users = conn.collection('users');
var channels = conn.collection('channels');
var articles = conn.collection('articles');
function getInsertedArticles() {
return Q.nfcall(users.insert.bind(users), [{/*user1*/},{/*user2*/}]).spread(function (user1, user2) {
return Q.nfcall(channels.insert.bind(channels), [{userId:user1._id},{userId:user2._id}]).spread(function (channel1, channel2) {
return Q.nfcall(articles.insert.bind(articles), [{userId:user1._id,channelId:channel1._id},{}]);
});
})
}
getInsertedArticles()
.spread(function (article1, article2) {
// you only get here if all three of the above steps succeeded
})
.fail(function (error) {
// you get here if any of the above three steps failed
}
);
In practice, you will rarely want to use .spread, since you usually are inserting an array that you don't know the size of. In that case the code can look more like this (here I also illustrate Q.nbind).
To compare with the original one is not quite fair, because your original has no error handling. A corrected Node-style version of the original would be like so:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
function getInsertedArticles(cb) {
// insert users
conn.collection('users').insert([{/*user1*/},{/*user2*/}], function(err, docs) {
if (err) {
cb(err);
return;
}
var user1 = docs[0], user2 = docs[1];
// insert channels
conn.collection('channels').insert([{userId:user1._id},{userId:user2._id}], function(err, docs) {
if (err) {
cb(err);
return;
}
var channel1 = docs[0], channel2 = docs[1];
// insert articles
conn.collection('articles').insert([{userId:user1._id,channelId:channel1._id},{}], function(err, docs) {
if (err) {
cb(err);
return;
}
var article1 = docs[0], article2 = docs[1];
cb(null, [article1, article2]);
}
});
};
}
getInsertedArticles(function (err, articles) {
if (err) {
// you get here if any of the three steps failed.
// `articles` is `undefined`.
} else {
// you get here if all three succeeded.
// `err` is null.
}
});
With alternative deferred promise implementation, you may do it as following:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
// Setup 'pinsert', promise version of 'insert' method
var promisify = require('deferred').promisify
mongoose.Collection.prototype.pinsert = promisify(mongoose.Collection.prototype.insert);
var user1, user2;
// insert users
conn.collection('users').pinsert([{/*user1*/},{/*user2*/}])
// insert channels
.then(function (users) {
user1 = users[0]; user2 = users[1];
return conn.collection('channels').pinsert([{userId:user1._id},{userId:user2._id}]);
})
// insert articles
.match(function (channel1, channel2) {
return conn.collection('articles').pinsert([{userId:user1._id,channelId:channel1._id},{}]);
})
.done(function (articles) {
// Do something with articles
}, function (err) {
// Handle any error that might have occurred on the way
});
Considering Model.save instead of Collection.insert (quite the same in our case).
You don't need to use Q, you can wrap yourself the save method and return directly a Mongoose Promise.
First create an utility method to wrap the save function, that's not very clean but something like:
//Utility function (put it in a better place)
var saveInPromise = function (model) {
var promise = new mongoose.Promise();
model.save(function (err, result) {
promise.resolve(err, result);
});
return promise;
}
Then you can use it instead of save to chain your promises
var User = mongoose.model('User');
var Channel = mongoose.model('Channel');
var Article = mongoose.model('Article');
//Step 1
var user = new User({data: 'value'});
saveInPromise(user).then(function () {
//Step 2
var channel = new Channel({user: user.id})
return saveInPromise(channel);
}).then(function (channel) {
//Step 3
var article = new Article({channel: channel.id})
return saveInPromise(article);
}, function (err) {
//A single place to handle your errors
});
I guess that's the kind of simplicity we are looking for.. right? Of course the utility function can be implemented with better integration with Mongoose.
Let me know what you think about that.
By the way there is an issue about that exact problem in the Mongoose Github:
Add 'promise' return value to model save operation
I hope it's gonna be solved soon. I think it takes some times because they are thinking of switching from mpromise to Q: See here and then here.
Two years later, this question just popped up in my RSS client ...
Things have moved on somewhat since May 2012 and we might choose to solve this one in a different way now. More specifically, the Javascript community has become "reduce-aware" since the decision to include Array.prototype.reduce (and other Array methods) in ECMAScript5. Array.prototype.reduce was always (and still is) available as a polyfill but was little appreciated by many of us at that time. Those who were running ahead of the curve may demur on this point, of course.
The problem posed in the question appears to be formulaic, with rules as follows :
The objects in the array passed as the first param to conn.collection(table).insert() build as follows (where N corresponds to the object's index in an array):
[ {}, ... ]
[ {userId:userN._id}, ... ]
[ {userId:userN._id, channelId:channelN._id}, ... ]
table names (in order) are : users, channels, articles.
the corresopnding object properties are : user, channel, article (ie the table names without the pluralizing 's').
A general pattern from this article by Taoofcode) for making asynchronous call in series is :
function workMyCollection(arr) {
return arr.reduce(function(promise, item) {
return promise.then(function(result) {
return doSomethingAsyncWithResult(item, result);
});
}, q());
}
With quite light adaptation, this pattern can be made to orchestrate the required sequencing :
function cascadeInsert(tables, n) {
/*
/* tables: array of unpluralisd table names
/* n: number of users to insert.
/* returns promise of completion|error
*/
var ids = []; // this outer array is available to the inner functions (to be read and written to).
for(var i=0; i<n; i++) { ids.push({}); } //initialize the ids array with n plain objects.
return tables.reduce(function (promise, t) {
return promise.then(function (docs) {
for(var i=0; i<ids.length; i++) {
if(!docs[i]) throw (new Error(t + ": returned documents list does not match the request"));//or simply `continue;` to be error tolerant (if acceptable server-side).
ids[i][t+'Id'] = docs[i]._id; //progressively add properties to the `ids` objects
}
return insert(ids, t + 's');
});
}, Q());
}
Lastly, here's the promise-returning worker function, insert() :
function insert(ids, t) {
/*
/* ids: array of plain objects with properties as defined by the rules
/* t: table name.
/* returns promise of docs
*/
var dfrd = Q.defer();
conn.collection(t).insert(ids, function(err, docs) {
(err) ? dfrd.reject(err) : dfrd.resolve(docs);
});
return dfrd.promise;
}
Thus, you can specify as parameters passed to cascadeInsert, the actual table/property names and the number of users to insert.
cascadeInsert( ['user', 'channel', 'article'], 2 ).then(function () {
// you get here if everything was successful
}).catch(function (err) {
// you get here if anything failed
});
This works nicely because the tables in the question all have regular plurals (user => users, channel => channels). If any of them was irregular (eg stimulus => stimuli, child => children), then we would need to rethink - (and probably implement a lookup hash). In any case, the adaptation would be fairly trivial.
Today we have mongoose-q as well. A plugin to mongoose that gives you stuff like execQ and saveQ which return Q promises.

Categories