Singleton MongoDB connection in Node - javascript

What is the best way to set up a singleton in Node for Mongodb? I tried the following code, but it does not work when making a lot of calls rapidly.
The singleton does not get set up before subsequent calls, and thus it tries opening too many connections and eventually fails. The below call works well for making infrequent calls.
Anyone have suggestions on the best practice here?
var db_singleon;
var getConnection= function getConnection(callback)
{
if (db_singleton)
{
callback(null,db_singleton);
}
else
{
var connURL = mongoURI; //set in env variables
mongodb.connect(connURL,function(err,db){
if(err)
console.error("Error creating new connection "+err);
else
{
db_singleton=db;
console.error("created new connection");
}
callback(err,db_singleton);
return;
});
}
}

node modules are singletons by theirselves, just make db module somewhere:
var mongo = require('mongojs');
var config = require('path/to/config');
var connection = mongo.connect(config.connection, config.collections);
module.exports = connection;
and then require('path/to/db') it in your models, etc.

Related

How to Reuse Mongo Db Connection Object in different module

I want to use MongoDB connection object to different module. Problem is whenever my app.js run it should get instantiate then i can pass to other function of module.
Basically Mongo connection should be create only 1 time,
Currently i am using this code whenever we want to call MongoDb.How we can reuse MongoDb connection object outside of callback function.
var mongodb = require("mongodb");
mongodb.MongoClient.connect(url, function (err, db) {
if (err) {
console.log(err);
process.exit(1);
}
// Save database object from the callback for reuse.
console.log("Database connection ready");
});
//let say here i want to use
db.collection("cname").find({}) //but i am getting db as undefined.
The "false good idea" would be to use an other variable in the upper scope to store your db instance:
var mongodb = require("mongodb");
var dbInstance;
mongodb.MongoClient.connect(url, function (err, db) {
if (err) {
console.log(err);
process.exit(1);
}
// Save database object from the callback for reuse.
console.log("Database connection ready");
dbInstance = db;
});
// Here I don't know if dbInstance is define or not -> big problem!
Still, if this looks like a good idea, it's not. It will give you an error about dbInstance being undefined. mongodb.MongoClient.connect is async, so you'll need to wait for the callback before using "dbInstance".
It's better to use functions to which you'll pass the db instance as a argument:
var mongodb = require("mongodb");
var myModule = require("myModule")
var useDbInstance = function useDbInstance(dbInstance) {
//let say here i want to use
dbInstance.collection("cname").find({});
myModule(dbInstance)
.doSomething();
};
mongodb.MongoClient.connect(url, function (err, db) {
if (err) {
console.log(err);
process.exit(1);
}
// Save database object from the callback for reuse.
console.log("Database connection ready");
useDbInstance(db);
});
Then you could wrap you code in Promises to have a better control over the async flow and avoid the "callback hell".
I hope this helps :)

Asynchronous initialization of express.js (or similar) apps

Consider an example: I have the following express.js app (see code snippet below). I want to have one persistent connection to the DB, and one persistent connection to my own service (which required async call to start) during entire app lifetime. And there are a few entry points, i.e. one can access my app not only via HTTP protocol. Of course, I want to avoid service initialization code duplication and there could be several such async-initializing services.
/* app.js */
var app = require('express')();
// set views, use routes, etc.
var db = require('monk/mongoose/etc')(...); // happily, usually it's a sync operation
var myService = require('./myService'); // however, it's possible to have several such services
myService.init(function(err, result) {
// only here an initialization process is finished!
});
module.exports.app = app;
/* http_server.js (www entry point) */
var app = require('app');
// create an HTTP server with this app and start listening
/* telnet_server.js (other entry point) */
var app = require('app');
// create a Telnet server with this app and start listening
In the code snippet above, by the time http (or telnet, or any other) server is starting, there is no guarantee, that myService already has initialized.
So, I have to somehow reorganize my app creation code. For now I stick with the next solution:
/* app.js */
var app = require('express')();
module.exports.app = app;
module.exports.init = function(callback) {
var myService = require('./myService');
myService.init(callback);
}
/* entry_point.js */
var app = require('app');
app.init(function(err) {
if (!err) {
// create an HTTP/Telnet/etc server and start listening
}
});
So, my question is: what is the common way to initialize services required asynchronous call to start?
I would recommend you to promisify the initialization function of your services(s) and then use them in the following manner:
const app = require('express')();
const util = require('util');
const myService = require('./myService');
const myServiceInit = util.promisify(myService.init);
Promise.all([myServiceInit]).then(() => {
// delayed listening of your app
app.listen(2000);
}).catch(err => {
// handle error here
});
I've used Promise.all in order for you to add initialization of multiple internal services.
The pre-requisite of promisifying your init function is that it should be using an error first callback mechanism. You can read more about it here Node Official Doc
Hope this helps your cause.
I've created a gist here with a sample of the code I normally use for this task. (It uses the Q promise library, but could easily be modified to use any other promises lib).
The basic idea is to describe the app backbone as a sequence of asynchronous initialization steps. Each step calls one or more async functions and binds the result to a name; the startup process only progresses to the next initialization step once all values are resolved for the current step, and subsequent steps can then access all values resolved by previous steps. This allows the dependency order of services and components within the app to be easily described.
For example, a backbone can be defined as follows:
var app = [
{ s1: startService1 },
{ s2: startService2, s3: startService3 },
{ s4: startService4 }
]
(Note that in each step definition, just references to each function are given; the start() function - shown in the gist - will invoke each function in the correct order).
Each of the startXxx vars is a function which takes a single argument, and returns a deferred promise, e.g.:
function startService4( app ) {
var s1 = app.s1;
var s2 = app.s2;
var deferred = Q.defer();
// ... start the service, do async stuff ...
return deferred;
}
The function's app argument represents the configured app backbone, and results from previous initialization steps are available as its named properties.
I've used this pattern in fairly complicated systems, and find it a simple, flexible and effective way to define a system's high level structure.

How to prevent nested promise from hanging?

The following code works (the user object is written to the console), however the process doesn't exit. I believe one of the promises must not be resolved?
var Promise = require("bluebird");
var mongodb = require('mongodb');
Promise.promisifyAll(mongodb);
mongodb.MongoClient.connectAsync("mongodb://localhost/test")
.then(function(db){
var users = db.collection('users');
return users.findOneAsync({userName: "someuser"});
})
.then(function (result) {
console.log(result);
})
.catch(function(e){
//handle error
});
What is wrong with this code?
MongoDB creates a persistent connection which you're supposed to use for the whole lifecycle of your application.
When you're done with it - close it. That is - call db.close()
If you want to write saner code, use Promise.using and a disposer for making a saner connectAsync which does resource management for you.

How can I promisify the MongoDB native Javascript driver using bluebird?

I'd like to use the MongoDB native JS driver with bluebird promises. How can I use Promise.promisifyAll() on this library?
The 2.0 branch documentation contains a better promisification guide https://github.com/petkaantonov/bluebird/blob/master/API.md#promisification
It actually has mongodb example which is much simpler:
var Promise = require("bluebird");
var MongoDB = require("mongodb");
Promise.promisifyAll(MongoDB);
When using Promise.promisifyAll(), it helps to identify a target prototype if your target object must be instantiated. In case of the MongoDB JS driver, the standard pattern is:
Get a Db object, using either MongoClient static method or the Db constructor
Call Db#collection() to get a Collection object.
So, borrowing from https://stackoverflow.com/a/21733446/741970, you can:
var Promise = require('bluebird');
var mongodb = require('mongodb');
var MongoClient = mongodb.MongoClient;
var Collection = mongodb.Collection;
Promise.promisifyAll(Collection.prototype);
Promise.promisifyAll(MongoClient);
Now you can:
var client = MongoClient.connectAsync('mongodb://localhost:27017/test')
.then(function(db) {
return db.collection("myCollection").findOneAsync({ id: 'someId' })
})
.then(function(item) {
// Use `item`
})
.catch(function(err) {
// An error occurred
});
This gets you pretty far, except it'll also help to make sure the Cursor objects returned by Collection#find() are also promisified. In the MongoDB JS driver, the cursor returned by Collection#find() is not built from a prototype. So, you can wrap the method and promisify the cursor each time. This isn't necessary if you don't use cursors, or don't want to incur the overhead. Here's one approach:
Collection.prototype._find = Collection.prototype.find;
Collection.prototype.find = function() {
var cursor = this._find.apply(this, arguments);
cursor.toArrayAsync = Promise.promisify(cursor.toArray, cursor);
cursor.countAsync = Promise.promisify(cursor.count, cursor);
return cursor;
}
I know this has been answered several times, but I wanted to add in a little more information regarding this topic. Per Bluebird's own documentation, you should use the 'using' for cleaning up connections and prevent memory leaks.
Resource Management in Bluebird
I looked all over the place for how to do this correctly and information was scarce so I thought I'd share what I found after much trial and error. The data I used below (restaurants) came from the MongoDB sample data. You can get that here: MongoDB Import Data
// Using dotenv for environment / connection information
require('dotenv').load();
var Promise = require('bluebird'),
mongodb = Promise.promisifyAll(require('mongodb'))
using = Promise.using;
function getConnectionAsync(){
// process.env.MongoDbUrl stored in my .env file using the require above
return mongodb.MongoClient.connectAsync(process.env.MongoDbUrl)
// .disposer is what handles cleaning up the connection
.disposer(function(connection){
connection.close();
});
}
// The two methods below retrieve the same data and output the same data
// but the difference is the first one does as much as it can asynchronously
// while the 2nd one uses the blocking versions of each
// NOTE: using limitAsync seems to go away to never-never land and never come back!
// Everything is done asynchronously here with promises
using(
getConnectionAsync(),
function(connection) {
// Because we used promisifyAll(), most (if not all) of the
// methods in what was promisified now have an Async sibling
// collection : collectionAsync
// find : findAsync
// etc.
return connection.collectionAsync('restaurants')
.then(function(collection){
return collection.findAsync()
})
.then(function(data){
return data.limit(10).toArrayAsync();
});
}
// Before this ".then" is called, the using statement will now call the
// .dispose() that was set up in the getConnectionAsync method
).then(
function(data){
console.log("end data", data);
}
);
// Here, only the connection is asynchronous - the rest are blocking processes
using(
getConnectionAsync(),
function(connection) {
// Here because I'm not using any of the Async functions, these should
// all be blocking requests unlike the promisified versions above
return connection.collection('restaurants').find().limit(10).toArray();
}
).then(
function(data){
console.log("end data", data);
}
);
I hope this helps someone else out who wanted to do things by the Bluebird book.
Version 1.4.9 of mongodb should now be easily promisifiable as such:
Promise.promisifyAll(mongo.Cursor.prototype);
See https://github.com/mongodb/node-mongodb-native/pull/1201 for more details.
We have been using the following driver in production for a while now. Its essentially a promise wrapper over the native node.js driver. It also adds some additional helper functions.
poseidon-mongo - https://github.com/playlyfe/poseidon-mongo

JavaScript leaking memory (Node.js/Restify/MongoDB)

Update 4: By instantiating the restify client (see controllers/messages.js) outside of the function and calling global.gc() after every request it seems the memory growth rate has been reduced a lot (~500KB per 10secs). Yet, the memory usage is still constantly growing.
Update3: Came across this post: https://journal.paul.querna.org/articles/2011/04/05/openssl-memory-use/
It might be worth noting that I'm using HTTPS with Restify.
Update 2: Updated the code below to the current state. I've tried swapping out Restify with Express. Sadly this didn't make any difference. It seems that the api call at the end of the chain (restify -> mongodb -> external api) causes everything to retain to memory.
Update 1: I have replaced Mongoose with the standard MongoDB driver. Memory usage seems to grow less fast, yet the leak remains..
I've been working on trying to locate this leak for a couple of days now.
I'm running an API using Restify and Mongoose and for every API call I do at least one MongoDB lookup. I've got about 1-2k users that hit the API multiple times in a day.
What I have tried
I've isolated my code to just using Restify and used ApacheBench to fire a huge amount of requests (100k+). The memory usage stays around 60MB during the test.
I've isolated my code to just using Restify and Mongoose and tested it the same way as above. Memory usage stays around 80MB.
I've tested the full production code locally using ApacheBench. Memory usage stays around 80MB.
I've automatically dumped the heap on intervals. The biggest heap dump I had was 400MB. All I can see that there are tons of Strings and Arrays but I cannot clearly see a pattern in it.
So, what could be wrong?
I've done the above tests using just one API user. This means that Mongoose only grabs the same document over and over. The difference with production is that a lot of different users hit the API meaning mongoose gets a lot of different documents.
When I start the nodejs server the memory quickly grows to 100MB-200MB. It eventually stabilizes around 500MB. Could this mean that it leaks memory for every user? Once every user has visited the API it will stabilize?
I've included my code below which outlines the general structure of my API. I would love to know if there's a critical mistake in my code or any other approach to finding out what is causing the high memory usage.
Code
app.js
var restify = require('restify');
var MongoClient = require('mongodb').MongoClient;
// ... setup restify server and mongodb
require('./api/message')(server, db);
api/message.js
module.exports = function(server, db) {
// Controllers used for retrieving accounts via MongoDB and communication with an external api
var accountController = require('../controllers/accounts')(db);
var messageController = require('../controllers/messages')();
// Restify bind to put
server.put('/api/message', function(req, res, next) {
// Token from body
var token = req.body.token;
// Get account by token
accountController.getAccount(token, function(error, account) {
// Send a message using external API
messageController.sendMessage(token, account.email, function() {
res.send(201, {});
return next();
});
});
});
};
controllers/accounts.js
module.exports = function(db) {
// Gets account by a token
function getAccount(token, callback) {
var ObjectID = require('mongodb').ObjectID;
var collection = db.collection('accounts');
collection.findOne({
token: token
}, function(error, account) {
if (error) {
return callback(error);
}
if (account) {
return callback('', account);
}
return callback('Account not found');
});
}
};
controllers/messages.js
module.exports = function() {
function sendMessage(token, email, callback) {
// Get a token used for external API
getAccessToken(function() {}
// ... Setup client
// Do POST
client.post('/external_api', values, function(err, req, res, obj) {
return callback();
});
});
}
return {
sendMessage: sendMessage
};
};
Heap snapshot of suspected leak
Might be a bug in getters, I got it when using virtuals or getters for mongoose schema https://github.com/LearnBoost/mongoose/issues/1565
It's actually normal to only see string and arrays, as most programs are largely based on them. The profiler that allow sorting by total object count are therefore not of much use as they many times give the same results for many different programs.
A better way to use the memory profiling of chrome is to take one snapshot for example after one user calls an API, and then a second heap snapshot after a second user called the API.
The profiler gives the possibility to compare two snapshots and see what is the difference between one and the other (see this tutorial), this will help understand why the memory grew in an unexpected way.
Objects are retained in memory because there is still a reference to them that prevents the object from being garbage collected.
So another way to try to use the profiler to find memory leaks is to look for an object that you believe should not be there and see what is it's retaining paths, and see if there are any unexpected paths.
Not sure whether this helps, but could you try to remove unnecessary returns?
api/message.js
// Send a message using external API
messageController.sendMessage(token, account.email, function() {
res.send(201, {});
next(); // remove 'return'
});
controllers/accounts.js
module.exports = function(db) {
// Gets account by a token
function getAccount(token, callback) {
var ObjectID = require('mongodb').ObjectID;
var collection = db.collection('accounts');
collection.findOne({
token: token
}, function(error, account) {
if (error) {
callback(error); // remove 'return'
} else if (account) {
callback('', account); // remove 'return'
} else {
callback('Account not found'); // remove 'return'
}
});
}
return { // I guess you missed to copy this onto the question.
getAccount: getAccount
};
};
controllers/messages.js
// Do POST
client.post('/external_api', values, function(err, req, res, obj) {
callback(); // remove 'return'
});
Your issue is in the getAccount mixed with how GC work's.
When you chain lots of function the GC only clears one at a time and the older something is on memory the less chances it has of being collected so on your get account you need at least that I can count 6 calls to global.gc() or auto executes before it can be collected by this time the GC assumes its something that it probably wont collect so it doesn't check it anyway.
collection{
findOne{
function(error, account){
callback('', account)
sendMessage(...)
getAccessToken(){
Post
}
}
}
}
}
}
as suggested by Gene remove this chaining.
PS: This is just a representation of how the GC works and depends on Implementation but you get the point.

Categories