Ive written and basic Node app (my first) to insert many csv rows into mongo (items array in the code below). Once all items have been inserted the db connection should be closed and the program exited.
The issue ive been working with is figuring out when to close the db connection once all inserts have returned a result. Ive gotten it working by counting all of the insert result callbacks but to me this feels clunky. I know one improvement I could make is to batch the inserts via an array to the insert function but ill still need to have my code be aware of when all inserts have completed (assuming it would be bad to insert 100k items in one query). Is there and better way (my code feels hacky) to do this?
Hack part...
function (err, result) {
queryCompletedCount++;
if (err) console.log(err);
//Not sure about doing it this way
//Close db once all queries have returned a result
if (queryCompletedCount === items.length) {
db.close();
console.log("Finish inserting data: " + new Date());
}
}
Full insert code
MongoClient.connect(dbConnectionURL, function (err, db) {
if (err) {
console.log("Error connecting to DB: " + err);
} else {
var productCollection = db.collection('products');
console.log("Connected to DB");
console.log("Start inserting data: " + new Date());
var queryCompletedCount = 0;
for (var i = 0; i < items.length; i++) {
productCollection.insert([{
manufacturerCode: null,
name: items[i].name,
description: null
}], function (err, result) {
queryCompletedCount++;
if (err) console.log(err);
//Not sure about doing it this way
//Close db once all queries have returned a result
if (queryCompletedCount === items.length) {
db.close();
console.log("Finish inserting data: " + new Date());
}
});
}
}
});
What do you think about realizing this issue with async module like this:
async = require('async')
async.eachSeries(items, function (item, next) {
productCollection.insert(productCollection.insert(
[{
manufacturerCode: null,
name: item.name,
description: null
}], function (err, result) {
if (err) {
return next(err);
}
next();
})
)
}, function () {
// this will be called after all insertion completed
db.close();
console.log("Finish inserting data: " + new Date());
});
What you need here is MongoDB's Write Concern, configured in the strictest way.
There are two levels of Write Concern. The first is the write mode, in which case the query returns only if the result is written to the configured number of mongo instances. In your case I suppose there is a single instance, but for future you may configure it as "w": "majority". The second level is the Journal concern, where by setting "j": 1 your query will return only when the data is written into the journal.
So in your case you best Write Concern configuration might be {"w": "majority", "j": 1}. Just add it as the last argument of your insert statement.
Related
I'm writing a simple online videogame with node js and I want to manage the score of each player saving it in a database(mysql).
Now in the server side I have a piece of code like this:
socket.on('game_over',function(data){
for (var i = 0; i < players.length; i++) {
if(players[i].id == data.id){
var sql;
sql='UPDATE login SET email=? WHERE username=?'
connection.query(sql, [data.score,"d"],function(error, results, fields) {
console.log(sql);
console.log(error);
if (error) throw error;
console.log(result);
});
players.splice(i,1);
break;
}
}
socket.broadcast.emit('p_disconnect',data.id);
});
When I start my server and a game_over signal is recived, my server disconnect.
The print of the sql query is correct and I don't see any error since it return me 'null'
Why my server disconnect after that, and more importantly what can I do to keep the server up?
Without the connection.query part it works like it should
Based on the comment on the question:
// assuming
const players = [
{id: 1, otherInfo: 'foobar' },
{id: 2, otherInfo: 'foobar' },
]
const connection = mysql.connect() // something like this
// When the game ends we assume that the game_over event is fired
socket.on('game_over',function (data) {
// "I need to find the correct player"
const correctPlayer = players.find(player => player.id === data.id)
// "and delete it from the list of the active players"
const position = players.indexOf(correctPlayer)
players.splice(position, 1)
// "and update the database with it's score" => depends on your DB structure
const query = `UPDATE youTable SET score = ${data.score} WHERE playerId = ${correctPlayer.id}`
// here depends on how you want to manage the query result (some examples)
// run query (is async cause how js works) and just log the result
connection.query(sql, function(error, results, fields) {
// this code is executed when the query ends
console.log(error, results, fields)
}
// this code is executed after starting the query
socket.broadcast.emit('p_disconnect', data.id);
// run query and emit event after the query ends
connection.query(sql, function(error, results, fields) {
socket.broadcast.emit('p_disconnect', data.id);
console.log(error, results, fields)
}
});
I did a couple of projects with node.js and I'm aware of the async behaviour and that one should usually use callback functions, etc. But one thing that bothers me ist the following.
I'm developing an Alexa skill and I have a function that handles the User intent:
'MyFunction': function() {
var toSay = ""; // Holds info what Alexa says
// Lot of checks and calculations what needs to be said by Alexa (nothing special)
if(xyz) {
toSay = "XYZ";
}else if(abc) {
toSay = "ABC";
}else{
toSay = "Something";
}
// Here is the "tricky" party
if(someSpecialEvent) {
toSay += " "+askDatabaseForInput(); // Add some information from database to string
}
this.emit(':ask', toSay, this.t('REPROMT_SPEECH')); // Gives the Info to Alexa (code execution stops here)
}
As mentioned in the code, there is some code which is usually used to find out what the output to Alexa should be.
Only on rare events, "someSpecialEvent", I need to query the database and add information to the String "toSay".
Querying the DB would look something like:
function askDatabaseForInput() { // The function to query the DB
var params = {
TableName: "MyTable",
OtherValues: "..."
};
// Do the Query
docClient.query(params, function(err, data) {
// Of course here are some checks if everything worked, etc.
var item = data.Items[0];
return item; // Item SHOULD be returned
});
return infoFromDocClient; // Which is, of course not possible
}
Now I know, that in the first function "'MyFunction'" I could just pass the variable "toSay" down to the DB Function and then to the DB Query and if everything is fine, I would do the "this.emit()" in the DB Query function. But for me, this looks very dirty and not much reusable.
So is there a way I can use "askDatabaseForInput()" to return DB information and just add it to a String? This means making the asynchronous call synchronous.
Making a synchronous call wouldn't affect the user experience, as the code isn't doing anything else anyway and it just creates the String and is (maybe) waiting for DB input.
Thanks for any help.
So you could do 2 things:
Like the person who commented says you could use a callback:
function askDatabaseForInput(callback) {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
docClient.query(params, function(err, data) {
if (err) {
callback(err, null)
} else {
var item = data.Items[0];
callback(null, item);
}
});
}
or you could use promises:
function askDatabaseForInput() {
var params = {
TableName: "MyTable",
OtherValues: "..."
};
return new Promise(function (resolve, reject) {
docClient.query(params, function(err, data) {
if (err) {
reject(err)
} else {
var item = data.Items[0];
resolve(item);
}
});
});
}
you can then either put a function in where you call askDatabaseForInput or do askDatabaseForInput.then(....).
In the function or the .then you would add what you retrieved from the database to the variable toSay
hope this helps
I'm using Node.js with MongoDB, I'm also using Monk for db access. I have the below code :
console.time("start");
collection.findOne({name: "jason"},
function(err, document) {
for(var i = 0; i < document.friends.length; i++) // "friends is an array contains ids of the user's friends"
{
collection.findOne({id: document.friends[i]}, function(err, doc)
{
console.log(doc.name);
});
}
});
console.log("The file was saved!");
console.timeEnd("start");
I have two questions regarding this code :
I see the execution time and "The file was saved!" string first, then I see the names of the friends coming in the console. Why is that? Shouldn't I see the names first then the execution time? Is it because the async nature of Node.js?
Names are printing very slowly in the console, the speed is like one name in two seconds. Why is it so slow? Is there a way to make the process faster?
EDIT:
Is it a good idea to break friends list to smaller pieces and call friends asynchronously? Would it make the process faster?
EDIT 2:
I changed my code to this :
collection.find({ id: { "$in": document.friends}}).then(function(err, doc)
{
console.log(doc.name);
if(err) {
return console.log(err);
}
}
This doesn't give an error, but this doesn't print anything either.
Thanks in advance.
Answer for question 1:
Yes, you are right.
Is it because the async nature of Node.js.
And to prevent that Node.js provides some mechanism for that you can use it otherwise you can do it on your own manually by setting one flag.
Answer for question 2:
you can use $in instead of findOne, it will be ease and fast.
e.g. .find({ "fieldx": { "$in": arr } })
arr :- In this you need to provide whole array.
yes, it's because javascript's async nature.
As you have called db from for loop javascript will not wait for it's response and continue the execution so it will print the file was saved first.
about your ans 2
It's making a dbCall for every friend then it's obvious that it will take some time that's why it's taking 1 or 2 secs for every friend.
console.time("start");
collection.findOne({name: "jason"},
function(err, document) {
for(var i = 0; i < document.friends.length; i++) // "friends is an array contains ids of the user's friends"
{
console.log("InsideforLoop Calling " + i + " friend");
collection.findOne({id: document.friends[i]}, function(err, doc)
{
console.log(doc.name);
});
console.log("Terminating " + i + "-----");
}
});
console.log("The file was saved!");
console.timeEnd("start");
This will make your async and db doubts more clear.
As you will see it will print all console in line.
InsideforLoop Calling 0 friend
Terminating 0 -----
and so on....Like this
console.log(doc.name);
but this will be printed asynchronusly
Added
collection.findOne({name: "jason"},
function(err, document) {
//you can do this
collection.find({id: $in:{document.friends}, function(err, doc)
{
console.log(doc);
});
});
Find All Details in one call
collection.aggregate([
{
$match:{
id :{ "$in" : document.friends},
}
}
]).exec(function ( e, d ) {
console.log( d )
if(!e){
// your code when got data successfully
}else{
// your code when you got the error
}
});
collection.findOne({name: "jason"},
function(err, document) {
if(document != undefined){
collection.find({ id: { "$in": document.friends}}).then(function(err, doc)
{
console.log(doc.name);
if(err) {
return console.log(err);
}
}
}
});
Answer to 1: Yes, it is because node is async. The part where it logs names is executed only when the first findOne returns, whereas the file was saved is executed straight away.
I am using Sails v0.11 and am developing an standalone importer script in order to import data to mongoDB and - that is now the not-working part - build the associations between the models.
For this process I introduced temporary helper properties in the models in order to find the associated records and replace them by in real MongoDB _ids.
The script starts Sails in order to be able use its features (waterline, etc.):
var app = Sails();
app.load({
hooks: { grunt: false },
log: { level: 'warn' }
}, function sailsReady(err){
processUsers() finds all users and their _ids and iterates over them to invoke a second function addOrgsToOneUser()
var processUsers = function() {
// Iterate through all users in order to retrieve their _ids and
app.models['user'].native(function(err, collection) {
collection.find({}, projectionOrgInUser).toArray(function (err, users) {
Async.eachSeries(users, function (user, next){
// prepare userInOrgs
whereUserInOrg = { orgId: { $in: userInOrgs } };
//This is invoking
addOrgsToOneUser(user, whereUserInOrg);
next();
}, function afterwards (err) {
if (err) {
console.error('Import failed, error details:\n',err);
return process.exit(1);
}
console.log("done");
return process.exit(0); // This returns too early, not executing the addOrgsToOneUser
});
});
});
};
addOrgsToOneUser() finds all orgs belonging to THIS user and updates then the orgs array property of THIS user
var addOrgsToOneUser = function(user, whereUserInOrg) {
var projectionUserInOrg = "...";
// Find all orgs that this user is associated to and store it in inOrgs
app.models['org'].native(function(err, collection) {
collection.find(whereUserInOrg, projectionUserInOrg).toArray(function (err, orgs) {
// prepare inOrgs which is needed for updating
//update user to have an updated orgs array based on inOrgs.
app.models['user'].update({'id' : user._id.toString()}, {'orgs': inOrgs}).exec(function afterwards(err, updated){
console.log('Updated user ' + user._id.toString() + ' to be in their orgs');
});
});
});
}
Problem:
Process.exit(0) is called before the query/update of saddOrgsToOneUser() has completed. It behaves as expected if saddOrgsToOneUser() contains just a console.log for instance, but queries are triggered ansynchronously of course.
In case I comment out Process.exit(0), the script never stops, but the queries are executed as intented.
As the script will have further nested queries, I need a better approach to this as manually kill this script ...
How is nesting queries and iterating over their results done properly?
Thank you very much,
Manuel
addOrgsToOneUser is asynchronous. next() needs to be called after everything is done inside addOrgsToOneUser. The way I would do it is to pass in a callback (next) and call it when everything is done. So the call is
addOrgsToOneUser(user, whereUserInOrg, next);
and the addOrgsToOneUser will have an extra argument:
var addOrgsToOneUser = function(user, whereUserInOrg, callback) {
var projectionUserInOrg = "...";
// Find all orgs that this user is associated to and store it in inOrgs
app.models['org'].native(function(err, collection) {
collection.find(whereUserInOrg, projectionUserInOrg).toArray(function (err, orgs) {
// prepare inOrgs which is needed for updating
//update user to have an updated orgs array based on inOrgs.
app.models['user'].update({'id' : user._id.toString()}, {'orgs': inOrgs}).exec(function afterwards(err, updated){
console.log('Updated user ' + user._id.toString() + ' to be in their orgs');
callback(); // your original next() is called here
});
});
});
}
I am literally giving my first steps with node and mongodb and I have recently hit this RangeError wall.
Here's what I am trying to do, I have a file that contains a list of countries that I would like to add to my mongo db. This would be part of my "seed" mechanism to get the app running.
I load the json and then I iterate through the collection of objects and add them one by one to the 'Countries' collection.
However, everytime I run the code, I get a "RangeError: Maximum call stack size exceeded".
I have googled around but none of the suggested solutions seem to apply for me.
My guess is there is something wrong with my insertCountry function...
Anyways, here's my code:
var mongoose = require('mongoose');
var countries = require('./seed/countries.json');
// mongodb
var Country = mongoose.Schema({
name: String,
code: String,
extra: [Extra]
});
var Extra = mongoose.Schema({
exampleField: Boolean,
anotherField: Boolean
});
var mCountry = mongoose.model('Countries', Country);
var mExtra = mongoose.model('Extras', Extra);
// do connection
mongoose.connect('...');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error'));
db.once('open', function callback() {
});
// async function
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
});
}
callback(null,document);
});
};
// doing countries
var Country = mongoose.model('Countries');
var Extras = mongoose.model('Extras');
for(i = 0; i < countries.length; i++)
{
nCountry = new Country();
nCountry.name = countries[i].name;
nCountry.code = countries[i].code;
nCountry.benefits = new Extras();
nCountry.benefits.exampleField = false;
nCountry.benefits.anotherField = false;
insertCountry(nCountry, function (err, value) {
console.log(value.name + ' added to collection (callback)');
});
}
I have been using some guides I have found to build this so this might not be optimal code. Any best pratices, standards, guides or tutorials you can share are most welcome!
Your callback is in the wrong place. It is not waiting for the insert operation to complete before you return from it's own callback. Altering your code:
var insertCountry = function(document, callback) {
db.model('Countries').count({code: document.code}, function (err, count) {
if (count < 1) {
db.collection('Countries').insert(document, function (err, result) {
if (!err) {
console.log('country ' + document.name + ' added');
}
else {
console.log('- [' + document.name + '] ' + err);
}
callback(null,document);
});
}
});
};
That is part of your problem, but it does not completely solve it. The other part is the loop which also does not wait for the wrapping function to complete before moving on. You want something like asyc.eachSeries in order to wait for inserts to complete before performing the next iteration. This is mostly why you are exceeding the call stack:
async.eachSeries(
countries,
function(current,callback) {
// make your nCountry object
insertCountry(nCountry,function(err,value) {
// do something, then
callback(err);
})
},
function(err) {
// called where done, err contains err where set
console.log( "done" );
}
);
There is really still and issue with the array, which must be reasonably large if you are exceeding the call stack limit. You probably should look at using event streams to process that rather that load everything in memory to the array.
Personally, if you were just trying not to insert duplicates for a field and had MongoDB 2.6 available I would just use the Bulk Operations API with "unordered operations" and allow non fatal failures on the duplicate keys. Coupled with the fact that bulk operations are sent in "batches" and not one at a time, this is much more efficient than checking for the presence on every request:
var Country = mongoose.Schema({
name: String,
code: { type: String, unique: true }, // define a unique index
extra: [Extra]
});
var insertCountries = function(countries,callback) {
var bulk = Country.collection.initializeUnorderedBulkOp();
var counter = 0;
async.eachSeries(
countries,
function(current,callback) {
// same object construction
bulk.insert(nCountry);
counter++;
// only send once every 1000
if ( counter % 1000 == 0 ) {
bulk.execute(function(err,result) {
// err should generally not be set
// but result would contain any duplicate errors
// along with other insert responses
// clear to result and callback
bulk = Country.collection.initializeUnorderedBulkOp();
callback();
});
} else {
callback();
}
},
function(err) {
// send anything still queued
if ( counter % 1000 != 0 )
bulk.execute(function(err,result) {
// same as before but no need to reset
callback(err);
});
}
);
};
mongoose.on("open",function(err,conn) {
insertCountries(countries,function(err) {
console.log("done");
});
});
Keeping in mind that unlike the methods implemented directly on the mongoose models, the native driver methods require that a connection is actually established before they can be called. Mongoose "queues" these up for you, but otherwise you need something to be sure the connection is actually open. The example of the "open" event is used here.
Take a look at event streams as well. If you are constructing an array large enough to cause a problem by missing callback execution then you probably should not be loading it all in memory from whatever your source is. Stream processing that source combined with an approach as shown above should provide efficient loading.