Documents "disappearing" from mongodb collection after node loading routine is finished - javascript

Being new to mongodb and node, this particular issue is just driving me crazy:
I have written a small module, which reads an .csv file, makes it into a JSON array, and loads it into a mongodb collection one record at a time in a loop.
As I run this in debug, and set a breakpoint at "var v = i;" line, I can query the mongo collection, and see a fully populated records appear one after another.
However, as soon as the loop is finished - ALL records' data is gone!
The actual records are still all there, but they all empty. The data which I just saw in each record is no longer in those records.
It may be some wired scoping issue, but, being new, I just can not tell what that is.
Here is my code:
exports.excelFileParser = function(fileName, tabName, metadataFields){
var assert = require('assert');
console.log(metadataFields);
if(typeof require !== 'undefined') XLSX = require('xlsx');
var mongodb = require('mongodb');
var _ = require('underscore');
var fs = require('fs');
var Converter=require("csvtojson").core.Converter;
var distinctDataFields;
var MongoClient = mongodb.MongoClient;
var url = 'mongodb://localhost:27017/datamanager-03-test';
var fileName = 'clean_file.csv';
//fs.writeFileSync(fileName, newCsvLines);
var csvFileName=fileName;
var fileStream=fs.createReadStream(csvFileName);
//new converter instance
var csvConverter=new Converter({constructResult:true});
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed",function(jsonObj){
//console.log(jsonObj); //here is your result json object
makeRecords(jsonObj);
});
function makeRecords(result){
console.log(result.length);
MongoClient.connect(url, function (err, db) {
if (err) {
console.log('Unable to connect to the mongoDB server. Error:', err);
} else {
console.log('Connection established to', url);
var collectionName = 'DC_Facilities';
db.open(function(err, client){
client.createCollection(collectionName, function(err, col) {
});
var collection = db.collection(collectionName);
for(var i =0;i < result.length; i++){
var dataRecord = result[i];
collection.insert(dataRecord);
var v = i;
}
console.log("finished");
db.close();
});
}
});
}
fileStream.pipe(csvConverter);
};

collection.insert is an asynchronous function, so you're calling db.close() before any of them have a chance to complete. You also don't need to call createCollection as the collection will be created for you if it doesn't already exist.
So your code should look something like this instead, so that db.close() isn't called until all the insert operations have completed:
db.open(function(err, client){
var collection = db.collection(collectionName);
var inserted = 0;
for(var i = 0; i < result.length; i++){
var dataRecord = result[i];
collection.insert(dataRecord, function(err) {
if (++inserted == result.length) {
console.log("finished");
db.close();
}
});
}
});

Related

Creating a function in NodeJS that returns a query result as a call back?

I'm trying to verify proper database operation using mocha/chai in NodeJS. I'm trying to create a way to run a sql query and then verify that it properly executed. Right now I am doing all the mysql execution in order but when I get to the assertion the result hasn't been generated yet, so I was wondering the best solution to this problem?
Here is the way I have it currently set up but when the expect() statement runs the array doesn't have data in it yet.
const assert = require('chai').assert;
const expect = require('chai').expect;
const mysql = require('mysql');
describe('Test mysql credentials, connection and CRUD operation', function() {
var host = "localhost";
var user = "user";
var pass = "pass";
describe('Test CREATE DATABASE', function() {
var connection = mysql.createConnection({
host: host,
user: user,
password: pass
});
before(function(done) {
connection.query("CREATE DATABASE test_db;", function(err, result) {
if (err) throw err;
});
done();
});
it('Database Sucessfully Created', function() {
var databases = new Array();
connection.query("SHOW DATABASES;", function(err, result) {
if (err) throw err;
for (i = 0; i < result.length; i++) {
databases.push(result[i].Database);
}
});
expect(databases).to.include('test_db');
});
});
Since your query is asynchronous, you need to set the expect in the callback function. You also have to call the done function once the assertion is executed.
it('Database Sucessfully Created', function(done) {
var databases = new Array();
connection.query("SHOW DATABASES;", function(err, result) {
if (err) throw err;
for (i = 0; i < result.length; i++) {
databases.push(result[i].Database);
}
expect(databases).to.include('test_db');
done();
});
});

How to save a mongodb collection as array of documents

So, the problem is that I want to obtain data from a mongoDB collection to use it later in another piece of code. Are there any ways to obtain data from all documents in collection to work with them in javascript, without mongoDB library?
For now, I have some code:
var MongoClient = require('mongodb').MongoClient;
var assert = require('assert');
var ObjectId = require('mongodb').ObjectID;
var url = 'mongodb://localhost:27017/website';
MongoClient.connect(url, function(err, db) {
assert.equal(null, err);
console.log("Connected correctly to server.");
findGoods(db, function() {
db.close();
});
});
var findGoods = function(db, callback) {
var cursor = db.collection('goodsList').find( );
};
So, I found answer, to my question. I asked my friend about it, so if anybody needs code, here is it:
var findGoods = function(db, callback) {
var collection = db.collection('goodsList');
collection.find({}).toArray( function(err, docs) {
if (!err) {
console.log(docs)
} else {
console.error(err)
}
})
};

Node Js: Exporting available Mongo database names doesn't work

I am struggling to export available Mongo databases to ./routes/index.js.
Related part of app.js:
var ACCESSIBLE_DATABASES = [];
var Db = require('mongodb').Db,
MongoClient = require('mongodb').MongoClient,
Server = require('mongodb').Server,
assert = require('assert');
var db = new Db('test', new Server('localhost', 27017));
db.open(function(err, db) {
var existing_databases = [];
var adminDb = db.admin();
// List all the available databases
adminDb.listDatabases(function(err, dbs) {
assert.equal(null, err);
assert.ok(dbs.databases.length > 0);
ACCESSIBLE_DATABASES = dbs.databases;
db.close();
});
});
// Code below export empty array
module.exports.accessible_databases = ACCESSIBLE_DATABASES;
// After some milisec the array has already contain the databases
setTimeout(function() {
console.log(ACCESSIBLE_DATABASES);
}, 100);
I'd like to avoid code repetition in my index.js file, but I couldn't achieve it. Import can't work either inside of 'adminDb.listDatabases' function nor later in 'setTimeout' function. (I'd like to use the result later in the app.js file so migrating the code into the index.js file is not an option.
I suggest the reason of it is the asynchronous code execution.
Since this involves async operations, the way you're trying to export wouldn't work. You'll have to pass a callback to get the databases once the operation has finished.
for eg.
function getDBs(db, adminDb, callback) {
// List all the available databases
adminDb.listDatabases(function(err, dbs) {
assert.equal(null, err);
assert.ok(dbs.databases.length > 0);
db.close();
callback(null, dbs);
});
}
function openHandler(callback) {
return function(err, db) {
var adminDb = db.admin();
getDBs(db, adminDb, callback);
}
}
module.exports.databases = function(callback) {
db.open(openHandler(callback));
}
// usage in routes/index.js
const dbs = require('./app').databases;
dbs(function(err, availableDBs) {
console.log(availableDBs);
});
You are exporting the accessible_databases object before it is initialized.
Try something like this:
var DB_OBJ = {};
adminDb.listDatabases(function(err, dbs) {
assert.equal(null, err);
assert.ok(dbs.databases.length > 0);
//export the array here
DB_OBJ.accessible_databases = ACCESSIBLE_DATABASES = dbs.databases;
db.close();
});
module.exports = DB_OBJ;
// After some milisec the array has already contain the databases
setTimeout(function() {
console.log(ACCESSIBLE_DATABASES);
}, 100);

Node script doesn't ever end

I have the node script below to basically copy the contents of some files and insert them to mongo.
The script never seems to end and even though all the data gets inserted successfully, I always have to do Ctrl+C to kill it.
Is there something i'm supposed to use in node.js to end a script?
var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/testdb');
var dir = './seeds';
var db = mongoose.connection;
// Show connection error if there is one
db.on('error', console.error.bind(console, 'Database Connection Error:'));
// If we successfully connected to mongo
db.once('open', function callback() {
var fs = require('fs'); // Used to get all the files in a directory
// Read all the files in the folder
fs.readdir(dir, function(err, list) {
// Log the error if something went wrong
if(err) {
console.log('Error: '+err);
}
// For every file in the list
list.forEach(function(file) {
// Set the filename without the extension to the variable collection_name
var collection_name = file.split(".")[0];
var parsedJSON = require(dir + '/' + file);
for(var i = 0; i < parsedJSON.length; i++) {
// Counts the number of records in the collection
db.collection('cohort').count(function(err, count) {
if(err) {
console.log(err);
}
});
db.collection(collection_name).insert(parsedJSON[i], function(err, records) {
if(err) {
console.log(err);
}
console.log(records[0]);
console.log("Record added as "+records[0]);
});
}
});
});
});
When everything is done, call mongoose.disconnect(). As #AaronDufour correctly points out, node will not exit while event handler callbacks are registered because it doesn't know that no more events are expected, like the connection emitting a 'close' or 'error' event, for example.
you can call process.exit(); to exit

node.js never exits after insert to couchbase, opposite of most node questions

My problem seems to be the opposite of every node.js question :-) I have a simple forEach loop to read a list of files and insert them into a Couchbase database. This works great, but it never exits after reading all the lines. So I added a counter to shutdown the couchbase connection after all inserts are complete. This works.
This process is intended to load hundreds of thousands of files, so I brought the async module into the mix to batch the inserts into groups of 100. The async.eachLimit is used to iterate over the array and insert documents in batches. Now the orig problem is back. Whatever magic async.eachLimit uses to recognize the process is complete is not happening.
I've been going through javascript scoping, callbacks, async, etc. Google searches are hitting keywords but not this issue. I've reduced the code down to the following testcase. To test, create three files and add their names to testlist.txt.
The async.eachLimit in place works up until it hits the limit, then hangs. Comment this out and uncomment array.forEach line and it works. Thanks in advance!
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
} else {
console.log('PASS ['+line+']');
}
key_count--;
if (key_count == 0) {
console.log('DONE Shutting down client, no more keys');
db.shutdown();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, lines) {
if(filelist_err) throw filelist_err;
// HACK split adds empty line to array, use replace to fix
var array = lines.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('INIT lines['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) { console.log('FAIL async err['+err+']');} );
//array.forEach(function(data){insertFile(data);return;});
});
Testcase output using array.forEach:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
LOAD [files.txt]
PASS [files.little.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys
Testcase output using async.eachLimit:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
PASS [files.big.txt]
... hang, never gets to 3...
After review with a coworker, they spotted my mistake. I missed the async callback in my insertFile function. Adding that in works and allows me to remove the key counter! Solution code below:
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line, callback) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
callback(db_err);
} else {
console.log('PASS ['+line+']');
callback();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, data) {
if(filelist_err) throw filelist_err;
// HACK stoopid bug split adds empty line to array, use replace to fix
var array = data.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('READ files['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) {
if (err) console.log('LAST with async err['+err+']');
console.log('DONE Shutting down client, no more keys');
db.shutdown();
});
});
And successful output:
$ node testcase.js
READ files[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
LOAD [files.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys

Categories