How to use "q" module for refactoring mongoose code? - javascript

I'm using mongoose to insert some data into mongodb. The code looks like:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
// insert users
conn.collection('users').insert([{/*user1*/},{/*user2*/}], function(err, docs) {
var user1 = docs[0], user2 = docs[1];
// insert channels
conn.collection('channels').insert([{userId:user1._id},{userId:user2._id}], function(err, docs) {
var channel1 = docs[0], channel2 = docs[1];
// insert articles
conn.collection('articles').insert([{userId:user1._id,channelId:channel1._id},{}], function(err, docs) {
var article1 = docs[0], article2 = docs[1];
}
});
};
You can see there are a lot of nested callbacks there, so I'm trying to use q to refactor it.
I hope the code will look like:
Q.fcall(step1)
.then(step2)
.then(step3)
.then(step4)
.then(function (value4) {
// Do something with value4
}, function (error) {
// Handle any error from step1 through step4
})
.end();
But I don't know how to do it.

You'll want to use Q.nfcall, documented in the README and the Wiki. All Mongoose methods are Node-style. I'll also use .spread instead of manually destructuring .then.
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
var users = conn.collection('users');
var channels = conn.collection('channels');
var articles = conn.collection('articles');
function getInsertedArticles() {
return Q.nfcall(users.insert.bind(users), [{/*user1*/},{/*user2*/}]).spread(function (user1, user2) {
return Q.nfcall(channels.insert.bind(channels), [{userId:user1._id},{userId:user2._id}]).spread(function (channel1, channel2) {
return Q.nfcall(articles.insert.bind(articles), [{userId:user1._id,channelId:channel1._id},{}]);
});
})
}
getInsertedArticles()
.spread(function (article1, article2) {
// you only get here if all three of the above steps succeeded
})
.fail(function (error) {
// you get here if any of the above three steps failed
}
);
In practice, you will rarely want to use .spread, since you usually are inserting an array that you don't know the size of. In that case the code can look more like this (here I also illustrate Q.nbind).
To compare with the original one is not quite fair, because your original has no error handling. A corrected Node-style version of the original would be like so:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
function getInsertedArticles(cb) {
// insert users
conn.collection('users').insert([{/*user1*/},{/*user2*/}], function(err, docs) {
if (err) {
cb(err);
return;
}
var user1 = docs[0], user2 = docs[1];
// insert channels
conn.collection('channels').insert([{userId:user1._id},{userId:user2._id}], function(err, docs) {
if (err) {
cb(err);
return;
}
var channel1 = docs[0], channel2 = docs[1];
// insert articles
conn.collection('articles').insert([{userId:user1._id,channelId:channel1._id},{}], function(err, docs) {
if (err) {
cb(err);
return;
}
var article1 = docs[0], article2 = docs[1];
cb(null, [article1, article2]);
}
});
};
}
getInsertedArticles(function (err, articles) {
if (err) {
// you get here if any of the three steps failed.
// `articles` is `undefined`.
} else {
// you get here if all three succeeded.
// `err` is null.
}
});

With alternative deferred promise implementation, you may do it as following:
var mongoose = require('mongoose');
mongoose.connect('mongo://localhost/test');
var conn = mongoose.connection;
// Setup 'pinsert', promise version of 'insert' method
var promisify = require('deferred').promisify
mongoose.Collection.prototype.pinsert = promisify(mongoose.Collection.prototype.insert);
var user1, user2;
// insert users
conn.collection('users').pinsert([{/*user1*/},{/*user2*/}])
// insert channels
.then(function (users) {
user1 = users[0]; user2 = users[1];
return conn.collection('channels').pinsert([{userId:user1._id},{userId:user2._id}]);
})
// insert articles
.match(function (channel1, channel2) {
return conn.collection('articles').pinsert([{userId:user1._id,channelId:channel1._id},{}]);
})
.done(function (articles) {
// Do something with articles
}, function (err) {
// Handle any error that might have occurred on the way
});

Considering Model.save instead of Collection.insert (quite the same in our case).
You don't need to use Q, you can wrap yourself the save method and return directly a Mongoose Promise.
First create an utility method to wrap the save function, that's not very clean but something like:
//Utility function (put it in a better place)
var saveInPromise = function (model) {
var promise = new mongoose.Promise();
model.save(function (err, result) {
promise.resolve(err, result);
});
return promise;
}
Then you can use it instead of save to chain your promises
var User = mongoose.model('User');
var Channel = mongoose.model('Channel');
var Article = mongoose.model('Article');
//Step 1
var user = new User({data: 'value'});
saveInPromise(user).then(function () {
//Step 2
var channel = new Channel({user: user.id})
return saveInPromise(channel);
}).then(function (channel) {
//Step 3
var article = new Article({channel: channel.id})
return saveInPromise(article);
}, function (err) {
//A single place to handle your errors
});
I guess that's the kind of simplicity we are looking for.. right? Of course the utility function can be implemented with better integration with Mongoose.
Let me know what you think about that.
By the way there is an issue about that exact problem in the Mongoose Github:
Add 'promise' return value to model save operation
I hope it's gonna be solved soon. I think it takes some times because they are thinking of switching from mpromise to Q: See here and then here.

Two years later, this question just popped up in my RSS client ...
Things have moved on somewhat since May 2012 and we might choose to solve this one in a different way now. More specifically, the Javascript community has become "reduce-aware" since the decision to include Array.prototype.reduce (and other Array methods) in ECMAScript5. Array.prototype.reduce was always (and still is) available as a polyfill but was little appreciated by many of us at that time. Those who were running ahead of the curve may demur on this point, of course.
The problem posed in the question appears to be formulaic, with rules as follows :
The objects in the array passed as the first param to conn.collection(table).insert() build as follows (where N corresponds to the object's index in an array):
[ {}, ... ]
[ {userId:userN._id}, ... ]
[ {userId:userN._id, channelId:channelN._id}, ... ]
table names (in order) are : users, channels, articles.
the corresopnding object properties are : user, channel, article (ie the table names without the pluralizing 's').
A general pattern from this article by Taoofcode) for making asynchronous call in series is :
function workMyCollection(arr) {
return arr.reduce(function(promise, item) {
return promise.then(function(result) {
return doSomethingAsyncWithResult(item, result);
});
}, q());
}
With quite light adaptation, this pattern can be made to orchestrate the required sequencing :
function cascadeInsert(tables, n) {
/*
/* tables: array of unpluralisd table names
/* n: number of users to insert.
/* returns promise of completion|error
*/
var ids = []; // this outer array is available to the inner functions (to be read and written to).
for(var i=0; i<n; i++) { ids.push({}); } //initialize the ids array with n plain objects.
return tables.reduce(function (promise, t) {
return promise.then(function (docs) {
for(var i=0; i<ids.length; i++) {
if(!docs[i]) throw (new Error(t + ": returned documents list does not match the request"));//or simply `continue;` to be error tolerant (if acceptable server-side).
ids[i][t+'Id'] = docs[i]._id; //progressively add properties to the `ids` objects
}
return insert(ids, t + 's');
});
}, Q());
}
Lastly, here's the promise-returning worker function, insert() :
function insert(ids, t) {
/*
/* ids: array of plain objects with properties as defined by the rules
/* t: table name.
/* returns promise of docs
*/
var dfrd = Q.defer();
conn.collection(t).insert(ids, function(err, docs) {
(err) ? dfrd.reject(err) : dfrd.resolve(docs);
});
return dfrd.promise;
}
Thus, you can specify as parameters passed to cascadeInsert, the actual table/property names and the number of users to insert.
cascadeInsert( ['user', 'channel', 'article'], 2 ).then(function () {
// you get here if everything was successful
}).catch(function (err) {
// you get here if anything failed
});
This works nicely because the tables in the question all have regular plurals (user => users, channel => channels). If any of them was irregular (eg stimulus => stimuli, child => children), then we would need to rethink - (and probably implement a lookup hash). In any case, the adaptation would be fairly trivial.

Today we have mongoose-q as well. A plugin to mongoose that gives you stuff like execQ and saveQ which return Q promises.

Related

Handle async DB calls

I did a couple of projects with node.js and I'm aware of the async behaviour and that one should usually use callback functions, etc. But one thing that bothers me ist the following.
I'm developing an Alexa skill and I have a function that handles the User intent:
'MyFunction': function() {
var toSay = ""; // Holds info what Alexa says
// Lot of checks and calculations what needs to be said by Alexa (nothing special)
if(xyz) {
toSay = "XYZ";
}else if(abc) {
toSay = "ABC";
}else{
toSay = "Something";
}
// Here is the "tricky" party
if(someSpecialEvent) {
toSay += " "+askDatabaseForInput(); // Add some information from database to string
}
this.emit(':ask', toSay, this.t('REPROMT_SPEECH')); // Gives the Info to Alexa (code execution stops here)
}
As mentioned in the code, there is some code which is usually used to find out what the output to Alexa should be.
Only on rare events, "someSpecialEvent", I need to query the database and add information to the String "toSay".
Querying the DB would look something like:
function askDatabaseForInput() { // The function to query the DB
var params = {
TableName: "MyTable",
OtherValues: "..."
};
// Do the Query
docClient.query(params, function(err, data) {
// Of course here are some checks if everything worked, etc.
var item = data.Items[0];
return item; // Item SHOULD be returned
});
return infoFromDocClient; // Which is, of course not possible
}
Now I know, that in the first function "'MyFunction'" I could just pass the variable "toSay" down to the DB Function and then to the DB Query and if everything is fine, I would do the "this.emit()" in the DB Query function. But for me, this looks very dirty and not much reusable.
So is there a way I can use "askDatabaseForInput()" to return DB information and just add it to a String? This means making the asynchronous call synchronous.
Making a synchronous call wouldn't affect the user experience, as the code isn't doing anything else anyway and it just creates the String and is (maybe) waiting for DB input.
Thanks for any help.
So you could do 2 things:
Like the person who commented says you could use a callback:
function askDatabaseForInput(callback) {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
docClient.query(params, function(err, data) {
if (err) {
callback(err, null)
} else {
var item = data.Items[0];
callback(null, item);
}
});
}
or you could use promises:
function askDatabaseForInput() {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
return new Promise(function (resolve, reject) {
docClient.query(params, function(err, data) {
if (err) {
reject(err)
} else {
var item = data.Items[0];
resolve(item);
}
});
});
}
you can then either put a function in where you call askDatabaseForInput or do askDatabaseForInput.then(....).
In the function or the .then you would add what you retrieved from the database to the variable toSay
hope this helps

How can I run a callback after a loop of Promises?

Specifically, given a list of data, I want to loop over that list and do a fetch for each element of that data before I combine it all afterward. The thing is, as written, the code iterates through the entire list immediately, starting all the operations at once. Then, even though the fetch operations are still running, the then call I have after all that runs, before the data could've been processed.
I read something about putting all the Promises in an array, then passing that array to a Promise.all() call, followed by a then that will have access to all that processed data as intended, but I'm not sure how exactly to go about doing it in this case, since I have nested Promises in this for loop.
for(var i in repoData) {
var repoName = repoData[i].name;
var repoUrl = repoData[i].url;
(function(name, url) {
Promise.all([fetch(`https://api.github.com/repos/${username}/${repoData[i].name}/commits`),
fetch(`https://api.github.com/repos/${username}/${repoData[i].name}/pulls`)])
.then(function(results) {
Promise.all([results[0].json(), results[1].json()])
.then(function(json) {
//console.log(json[0]);
var commits = json[0];
var pulls = json[1];
var repo = {};
repo.name = name;
repo.url = url;
repo.commitCount = commits.length;
repo.pullRequestCount = pulls.length;
console.log(repo);
user.repositories.push(repo);
});
});
})(repoName, repoUrl);
}
}).then(function() {
var payload = new Object();
payload.user = user;
//console.log(payload);
//console.log(repoData[0]);
res.send(payload);
});
Generally when you need to run asynchronous operations for all of the items in an array, the answer is to use Promise.all(arr.map(...)) and this case appears to be no exception.
Also remember that you need to return values in your then callbacks in order to pass values on to the next then (or to the Promise.all aggregating everything).
When faced with a complex situation, it helps to break it down into smaller pieces. In this case, you can isolate the code to query data for a single repo into its own function. Once you've done that, the code to query data for all of them boils down to:
Promise.all(repoData.map(function (repoItem) {
return getDataForRepo(username, repoItem);
}))
Please try the following:
// function to query details for a single repo
function getDataForRepo(username, repoInfo) {
return Promise
.all([
fetch(`https://api.github.com/repos/${username}/${repoInfo.name}/commits`),
fetch(`https://api.github.com/repos/${username}/${repoInfo.name}/pulls`)
])
.then(function (results) {
return Promise.all([results[0].json(), results[1].json()])
})
.then(function (json) {
var commits = json[0];
var pulls = json[1];
var repo = {
name: repoInfo.name,
url: repoInfo.url,
commitCount: commits.length,
pullRequestCount: pulls.length
};
console.log(repo);
return repo;
});
}
Promise.all(repoData.map(function (repoItem) {
return getDataForRepo(username, repoItem);
})).then(function (retrievedRepoData) {
console.log(retrievedRepoData);
var payload = new Object();
payload.user = user;
//console.log(payload);
//console.log(repoData[0]);
res.send(payload);
});

Sails.js: Nested MongoDB queries

I am using Sails v0.11 and am developing an standalone importer script in order to import data to mongoDB and - that is now the not-working part - build the associations between the models.
For this process I introduced temporary helper properties in the models in order to find the associated records and replace them by in real MongoDB _ids.
The script starts Sails in order to be able use its features (waterline, etc.):
var app = Sails();
app.load({
hooks: { grunt: false },
log: { level: 'warn' }
}, function sailsReady(err){
processUsers() finds all users and their _ids and iterates over them to invoke a second function addOrgsToOneUser()
var processUsers = function() {
// Iterate through all users in order to retrieve their _ids and
app.models['user'].native(function(err, collection) {
collection.find({}, projectionOrgInUser).toArray(function (err, users) {
Async.eachSeries(users, function (user, next){
// prepare userInOrgs
whereUserInOrg = { orgId: { $in: userInOrgs } };
//This is invoking
addOrgsToOneUser(user, whereUserInOrg);
next();
}, function afterwards (err) {
if (err) {
console.error('Import failed, error details:\n',err);
return process.exit(1);
}
console.log("done");
return process.exit(0); // This returns too early, not executing the addOrgsToOneUser
});
});
});
};
addOrgsToOneUser() finds all orgs belonging to THIS user and updates then the orgs array property of THIS user
var addOrgsToOneUser = function(user, whereUserInOrg) {
var projectionUserInOrg = "...";
// Find all orgs that this user is associated to and store it in inOrgs
app.models['org'].native(function(err, collection) {
collection.find(whereUserInOrg, projectionUserInOrg).toArray(function (err, orgs) {
// prepare inOrgs which is needed for updating
//update user to have an updated orgs array based on inOrgs.
app.models['user'].update({'id' : user._id.toString()}, {'orgs': inOrgs}).exec(function afterwards(err, updated){
console.log('Updated user ' + user._id.toString() + ' to be in their orgs');
});
});
});
}
Problem:
Process.exit(0) is called before the query/update of saddOrgsToOneUser() has completed. It behaves as expected if saddOrgsToOneUser() contains just a console.log for instance, but queries are triggered ansynchronously of course.
In case I comment out Process.exit(0), the script never stops, but the queries are executed as intented.
As the script will have further nested queries, I need a better approach to this as manually kill this script ...
How is nesting queries and iterating over their results done properly?
Thank you very much,
Manuel
addOrgsToOneUser is asynchronous. next() needs to be called after everything is done inside addOrgsToOneUser. The way I would do it is to pass in a callback (next) and call it when everything is done. So the call is
addOrgsToOneUser(user, whereUserInOrg, next);
and the addOrgsToOneUser will have an extra argument:
var addOrgsToOneUser = function(user, whereUserInOrg, callback) {
var projectionUserInOrg = "...";
// Find all orgs that this user is associated to and store it in inOrgs
app.models['org'].native(function(err, collection) {
collection.find(whereUserInOrg, projectionUserInOrg).toArray(function (err, orgs) {
// prepare inOrgs which is needed for updating
//update user to have an updated orgs array based on inOrgs.
app.models['user'].update({'id' : user._id.toString()}, {'orgs': inOrgs}).exec(function afterwards(err, updated){
console.log('Updated user ' + user._id.toString() + ' to be in their orgs');
callback(); // your original next() is called here
});
});
});
}

Get data from 2nd level pointer in Parse

I have a setup with Three relevant classes: _User, Article, and Profile. In Article I have a pointer named author for _User, and in Profile, I have the same; a pointer, but named user, for _User.
Now, I want to retrieve data from Article, with the cols firstname and lastname in Profile, where the pointer in Article matches the objectId in _User, and the pointer in Profile.
Basically what I would solve with an inner join in SQL.
How do I go about this with just one parse call?
This is what I have so far:
var Article = Parse.Object.extend("Article");
var query = new Parse.Query(Article);
query.include("category");
query.find({
success: function(results) {
console.log("Successfully retrieved " + results.length + " article(s):");
// Do something with the returned Parse.Object values
for (var i = 0; i < results.length; i++) {
var object = results[i];
console.log(object.get('title'));
console.log(object.get('content'));
console.log(object.get('category').get("categoryName"));
}
},
error: function(error) {
console.log("Error: " + error.code + " " + error.message);
}
});
Its a pleasure answering questions where the OP took the trouble to include a complete (and minimal) description of the data and the desired result.
I think I understand that you want to get Articles, and for each you want to get Profiles, and that the Profiles and articles are (logically) joined via a common pointer to User.
This can be done using an additional query per article. For clarity and maintainability, I like to break these things up into short, logical promise-returning functions, so...
// for handy array functions, like _.map, and turning var args into simple arrays
var _ = require('underscore');
// note: include underscore this way in cloud code (nodejs)
// for browser, see underscorejs.org to add to your project
// this will answer a promise that is fulfilled with an array of the form:
// [ { article:article_object, profile:profile_object }, {...}, ...]
function articlesWithProfiles() {
var query = new Parse.Query("Article");
query.include("category");
query.include("author");
return query.find().then(function(articles) {
var promises = _.map(articles, function(article) {
return profileForArticle(article);
});
return Parse.Promise.when(promises);
});
}
// return a promise that's fulfilled by associating the given article with it's profile
function profileForArticle(article) {
var author = article.get("author");
var query = new Parse.Query("Profile");
query.equalTo("user", author);
return query.first().then(function(profile) {
return { article:article, profile:profile };
});
}
// call it like this
articlesWithProfiles().then(function() {
// see edit below
var result = _.toArray(arguments);
console.log(JSON.stringify(result));
}, function(error) {
// handle error
console.log(JSON.stringify(error));
});

Insert document loop - RangeError: Maximum call stack size exceeded

I am literally giving my first steps with node and mongodb and I have recently hit this RangeError wall.
Here's what I am trying to do, I have a file that contains a list of countries that I would like to add to my mongo db. This would be part of my "seed" mechanism to get the app running.
I load the json and then I iterate through the collection of objects and add them one by one to the 'Countries' collection.
However, everytime I run the code, I get a "RangeError: Maximum call stack size exceeded".
I have googled around but none of the suggested solutions seem to apply for me.
My guess is there is something wrong with my insertCountry function...
Anyways, here's my code:
var mongoose = require('mongoose');
var countries = require('./seed/countries.json');
// mongodb
var Country = mongoose.Schema({
name: String,
code: String,
extra: [Extra]
});
var Extra = mongoose.Schema({
exampleField: Boolean,
anotherField: Boolean
});
var mCountry = mongoose.model('Countries', Country);
var mExtra = mongoose.model('Extras', Extra);
// do connection
mongoose.connect('...');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error'));
db.once('open', function callback() {
});
// async function
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
});
}
callback(null,document);
});
};
// doing countries
var Country = mongoose.model('Countries');
var Extras = mongoose.model('Extras');
for(i = 0; i < countries.length; i++)
{
nCountry = new Country();
nCountry.name = countries[i].name;
nCountry.code = countries[i].code;
nCountry.benefits = new Extras();
nCountry.benefits.exampleField = false;
nCountry.benefits.anotherField = false;
insertCountry(nCountry, function (err, value) {
console.log(value.name + ' added to collection (callback)');
});
}
I have been using some guides I have found to build this so this might not be optimal code. Any best pratices, standards, guides or tutorials you can share are most welcome!
Your callback is in the wrong place. It is not waiting for the insert operation to complete before you return from it's own callback. Altering your code:
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
callback(null,document);
});
}
});
};
That is part of your problem, but it does not completely solve it. The other part is the loop which also does not wait for the wrapping function to complete before moving on. You want something like asyc.eachSeries in order to wait for inserts to complete before performing the next iteration. This is mostly why you are exceeding the call stack:
async.eachSeries(
countries,
function(current,callback) {
// make your nCountry object
insertCountry(nCountry,function(err,value) {
// do something, then
callback(err);
})
},
function(err) {
// called where done, err contains err where set
console.log( "done" );
}
);
There is really still and issue with the array, which must be reasonably large if you are exceeding the call stack limit. You probably should look at using event streams to process that rather that load everything in memory to the array.
Personally, if you were just trying not to insert duplicates for a field and had MongoDB 2.6 available I would just use the Bulk Operations API with "unordered operations" and allow non fatal failures on the duplicate keys. Coupled with the fact that bulk operations are sent in "batches" and not one at a time, this is much more efficient than checking for the presence on every request:
var Country = mongoose.Schema({
name: String,
code: { type: String, unique: true }, // define a unique index
extra: [Extra]
});
var insertCountries = function(countries,callback) {
var bulk = Country.collection.initializeUnorderedBulkOp();
var counter = 0;
async.eachSeries(
countries,
function(current,callback) {
// same object construction
bulk.insert(nCountry);
counter++;
// only send once every 1000
if ( counter % 1000 == 0 ) {
bulk.execute(function(err,result) {
// err should generally not be set
// but result would contain any duplicate errors
// along with other insert responses
// clear to result and callback
bulk = Country.collection.initializeUnorderedBulkOp();
callback();
});
} else {
callback();
}
},
function(err) {
// send anything still queued
if ( counter % 1000 != 0 )
bulk.execute(function(err,result) {
// same as before but no need to reset
callback(err);
});
}
);
};
mongoose.on("open",function(err,conn) {
insertCountries(countries,function(err) {
console.log("done");
});
});
Keeping in mind that unlike the methods implemented directly on the mongoose models, the native driver methods require that a connection is actually established before they can be called. Mongoose "queues" these up for you, but otherwise you need something to be sure the connection is actually open. The example of the "open" event is used here.
Take a look at event streams as well. If you are constructing an array large enough to cause a problem by missing callback execution then you probably should not be loading it all in memory from whatever your source is. Stream processing that source combined with an approach as shown above should provide efficient loading.

Categories