Node: Traversing directories in a recursion - javascript

I'm pretty new to Node...I need to hammer Node's async behavior and callback structure into my mind. Here where I struggle right now:
// REQUIRE --------------------------------------------------------------------
fs = require('fs');
path = require('path');
// FUNCTION readAllDirs -------------------------------------------------------
function readAllDirs(dir, result) {
if (!result) {
result = function() {};
};
fs.readdir(dir, function(err, list) {
if(err) { return result(err) };
list.forEach(function(file) {
var fullpath = path.resolve(dir, file);
fs.stat(fullpath, function(err, stat) {
if(err) { return result(err) };
if(stat && stat.isDirectory()) {
readAllDirs(fullpath);
//console.log('In: ' + fullpath);
result(null, fullpath);
}
});
});
});
}
// MAIN -----------------------------------------------------------------------
readAllDirs('/somedir', function(err, dirs) {
console.log(dirs);
});
I try to traverse a tree of directories. In principle the function is working nice...as long I do not callback but print the directory names on the console. All folders and sub-folders come up as expected. BUT when I do callbacks the callback is not called for recursion deeper than first level.
Pleeeaaassee help! Thanks guys!

Your problem is here, inside the if (stat ...) branch:
readAllDirs(fullpath);
You need to pass the supplied callback again back into the recursion:
readAllDirs(fullpath, result);

Related

Javascript - Moving Files between Directories

I have a folder called 'Received' and two more folders called 'Successful' and 'Error'.
All new files will be stored in the 'Received' Folder and upon being stored in the said folder, it will be processed by my system. Successfuly parsed files will be moved to the 'Successful' folder, while all files that had issues will be stored in the 'Error' folder.
My main concern is basically moving files between directories.
I have tried this:
// oldPath = Received Folder
// sucsPath = Successful Folder
// failPath = Error Folder
// Checks if Successful or fail. 1 = Success; 0 = Fail
if(STATUS == '1') { // 1 = Success;
fs.rename(oldPath, sucsPath, function (err) {
if (err) {
if (err.code === 'EXDEV') {
var readStream = fs.createReadStream(oldPath);
var writeStream = fs.createWriteStream(sucsPath);
readStream.on('error', callback);
writeStream.on('error', callback);
readStream.on('close', function () {
fs.unlink(oldPath, callback);
});
readStream.pipe(writeStream);
}
else {
callback(err);
}
return;
}
callback();
});
}
else { // 0 = Fail
fs.rename(oldPath, failPath, function (err) {
if (err) {
if (err.code === 'EXDEV') {
var readStream = fs.createReadStream(oldPath);
var writeStream = fs.createWriteStream(failPath);
readStream.on('error', callback);
writeStream.on('error', callback);
readStream.on('close', function () {
fs.unlink(oldPath, callback);
});
readStream.pipe(writeStream);
}
else {
callback(err);
}
return;
}
callback();
});
}
But my concern here is that it deletes the original folder and passes all files into the specified folder. I believe the logic in the code is that, it literally renames the file (directory included). I also came across 'await moveFile' but it basically does the same thing.
I just want to move files between directories by simply specifying the file name, the origin of the file, and its destination.
As mentioned by rufus1530, I used this:
fs.createReadStream(origin).pipe(fs.createWriteStream(destination));
Deleting the file would be the next step.
I used this:
fs.unlinkSync(file);
Starting with 8.5 you have fs.copyFile which the easiest way to copy files.
So you would create your own move function, that would first try a rename and then a copy.
const util = require('util')
const copyFile = util.promisify(fs.copyFile)
const rename = util.promisify(fs.rename)
const unlink = util.promisify(fs.unlink)
const path = require('path')
async function moveFileTo(file, dest) {
// get the file name of the path
const fileName = path.basename(file)
// combine the path of destination directory and the filename
const destPath = path.join(dest, fileName)
try {
await fs.rename(file, destPath)
} catch( err ) {
if (err.code === 'EXDEV') {
// we need to copy if the destination is on another parition
await copyFile(file, destPath)
// delete the old file if copying was successful
await unlink(file)
} else {
// re throw the error if it is another error
throw err
}
}
}
Then you could use it that way await moveFileTo('/path/to/file.txt', '/new/path') which will move /path/to/file.txt to /new/path/file.txt.

Is it okay to use synchronous code to load data into the memory at startup?

In my routes.js file, I've this:
var pages = require('./pages')();
...
app.get('/:page', function(req, res, next) {
var p = req.params.page;
if (p in pages) {
res.render('page', pages[p]);
} else {
next();
}
});
pages.js:
module.exports = function() {
var fs = require('fs'),
ret = [],
dir = './pages',
files = fs.readdirSync(dir);
files.forEach(function(file) {
var text = fs.readFileSync(dir + '/' + file, 'utf-8'),
fileName = file.substr(0, file.lastIndexOf('.'));
ret[fileName] = {content: text};
});
return ret;
};
This code runs only one, when I run node. This is how I can make it async:
require('./pages')(function(pages) {
app.get('/:page', function(req, res, next) {
var p = req.params.page;
if (p in pages) {
res.render('page', pages[p]);
} else {
next();
}
});
});
pages.js:
module.exports = function(callback) {
var fs = require('fs'),
ret = [],
dir = './pages';
fs.readdir(dir, function(err, files) {
if (err) throw err;
files.forEach(function(file, i) {
fs.readFile(dir + '/' + file, 'utf-8', function(err, text) {
if (err) throw err;
var fileName = file.substr(0, file.lastIndexOf('.'));
ret[fileName] = {content: text};
if ( i === (files.length - 1) ) callback(ret);
});
});
});
};
Assuming the total pages will not exceed more than 1 MB in size, I can cache the text into memory indefinitely without getting node crashed due to out of memory.
Should I be using the async code?
According to what I've learnt, the async version will make node start listening on localhost faster, but /:page URLs will only work when the files have been loaded into memory.
Is the async code in the right pattern?
What if I need to reuse the pages object in another file? Right now it is only accessible in routes.js.
Can I rewrite pages.js to execute only once like this:
var ret = [];
module.exports = function(callback) {
var fs = require('fs'),
dir = './pages';
if (ret.length < 1) {
fs.readdir(dir, function(err, files) {
if (err) throw err;
files.forEach(function(file) {
fs.readFile(dir + '/' + file, 'utf-8', function(err, text) {
if (err) throw err;
var fileName = file.substr(0, file.lastIndexOf('.'));
ret[fileName] = {content: text};
if ( i === (files.length - 1) ) callback(ret);
});
});
});
} else {
callback(ret);
}
};
What if require('./pages')(function(pages) {}) is called multiple times together? Is there a chance of the if condition failing? I can't wrap my mind around this.
Should I be using the async code?
If you want, why not. But there's no real need for it, synchronous IO on startup is fine. require does it as well.
Is the async code in the right pattern?
No. It does invoke callback once for each directory. Calling app.get('/:page', …) multiple times is not what you want.
What if I need to reuse the pages object in another file? Right now it is only accessible in routes.js.
You could pass it from routes.js to the other modules. Or just rewrite pages.js to store it statically and execute the async things only once, so that you can require it multiple times.
What if require('./pages')(function(pages) {}) is called multiple times together? Is there a chance of the if condition failing?
Yes, it will most certainly fail, because you are populating ret only asynchronously.
I can't wrap my mind around this.
Use promises. The act as asynchronous, unmutable values, just what you need here. They will guarantee that callbacks are only invoked once, that every callback is invoked with the same ret value, and provide many more useful things (like managing the parallel file reads for you).
You'll want to export a promise from pages.js.

Node.js possible callback endless call?

I'm actually trying to learn a bit about node.js
At the moment I try to understand the principles about callbacks.
I've written a module that should filter me files from a given directory by a specified file extension. But it won't work. I've tested a bit and I noticed that the function 'getFiles' will be called more the ones. But why? I can't find the mistake, can someone help me, to understood my problem? If someone thinks my code is ugly, please give me a better example, thanks.
So here's my code:
//Returns a list of filtered files from a specified directory and extension
function getFilteredFiles(dir, ext, callback)
{
getFiles(dir, function(error, data){
if(error)
{
callback(error);
}
else
{
var result = getFilteredFiles(data, ext);
callback(null, result);
}
});
}
//Reading files from a given directory
function getFiles(dir, callback)
{
var fs = require('fs');
console.log(typeof dir);
fs.readdir(dir, function(err, data){
if(err)
{
callback(err);
}
else
{
callback(null, data);
}
});
}
//Filters a file by a specified extension
function filterFiles(data, extension)
{
var path = require('path');
return data.filter(function(file){
return path.extname(file) == '.' + extension;
});
}

Using Async queue to control DB connection request

I'm buidling an app with Node anb Mongodb Native. I'm working on a db module which i can require and call in other modules so that I end up using just one connection. The module db.js started out with this code:
var _db = null;
var getDb = module.exports.getDb = function(callback) {
if (_db) {
console.log('_db returned');
return callback(null, _db);
}
MongoClient.connect('mongodb://localhost:' + config.db.port + '/' + config.db.name, {native_parser: true}, function (err, db) {
if (err) return callback(err);
console.log('_db created');
_db = db;
callback(err, _db);
});
};
In my other modules that need a db connection I do this
db.getDb(function (err, connection) {
// Do something with connection
});
It works fine. But an unpleasant problem is that if my code would call getDb multiple times in a very short time span, I would end up with several copies of a connection. Like if I do my db.js requirements and getDb calls at the very beginning of all modules that need a db connection
I'm now thinking about controlling the calls to getDb by queuing them, so that only the absolute first call will create a connection and save it in _db. All later calls will get the created connection _db in return. I believe Async queue will help me with this...
The problem is that i dont understand how I write this with Async queue. The documentation is a little bit vague, and i dont find any better examples online. Maybe you can give me some hints. This is what i got so far...
var dbCalls = async.queue(function (task, callback) {
if (_db) {
console.log('_db returned');
return callback(null, _db);
}
MongoClient.connect('mongodb://localhost:' + config.db.port + '/' + config.db.name, {native_parser: true}, function (err, db) {
if (err) return callback(err);
console.log('Connected to mongodb://localhost:' + config.db.port + '/' + config.db.name);
_db = db;
callback(null, _db);
});
}, 1);
// I guess this .push() must be the exposed (exported) API for other modules to get a connection, but how do I return it to them,
dbCalls.push(null, function (err) {
console.log('finished processing foo');
});
dbCalls.push(null, function (err) {
console.log('finished processing bar');
});
I dont understand the object passed as first argument to .push() What should i use if for? Right now its null How do I pass on the connection and possible error all the way out to the module that made the call?
A quick and dirty solution without async.queue:
var _db = null;
var _err = null;
var _queue = [];
var _pending = false;
var getDb = module.exports.getDb = function(callback) {
if (_err || _db) {
console.log('_db returned');
return callback(_err, _db);
} else if (_pending) { // already a connect() request pending
_queue.push(callback);
} else {
_pending = true;
_queue.push(callback);
MongoClient.connect(..., function (err, db) {
_err = err;
_db = db;
_queue.forEach(function(queuedCallback) {
queuedCallback(err, db);
});
});
};

Add additional parameters to callback function

I'm building a system in Node.js that is supposed to find all files in an array of folders, stat them, and then do some additional work using that information.
I'm using fs.readdir() to get all the files synchronously from each of the folders. My code looks like this:
for(i=0,max=paths.length; i<max; i++) {
var path = paths.pop();
console.log("READING PATH: " + path);
fs.readdir(path, function(err, files) { handleDir(err, files, path); });
}
The problem is that, depending on how fast the readdir() executes, handleDir() is getting the wrong path. This happens because by the time the callback runs, the next loop has already started - meaning that the path variable has changed.
So, what I need to do is somehow lock that path variable to it's specific callback function. I can't think of any good way to do this - anyone have some ideas?
There is no block scope, so use a function for scope.
for(var i=0, path, max=paths.length; i<max; i++) {
path = paths.pop();
console.log("READING PATH: " + path);
handlePath( path );
}
function handlePath ( path ) {
fs.readdir(path, onPathRead);
function onPathRead (err, files) {
handleDir(err, files, path);
}
}
This is one of the more annoying parts of JS for me. An alternative to creating a separate function (as #generalhenry demonstrated) is to wrap the code in an anonymous function that's executed before the path variable changes.
for(i=0,max=paths.length; i<max; i++) {
var path = paths.pop();
console.log("READING PATH: " + path);
fs.readdir(path,
(function(p){
return function(err, files) {
handleDir(err, files, p);
};
})(path);
);
}
Either way, the important point is that the function's context (anonymous or not) is initiated before the value of the path variable is reassigned.
This is indeed an annoying feature of Javascript, so much so that Coffeescript (which is a language that compiles down to Javascript) has a specific way of dealing with it, the do operator on for. In Coffeescript your original function would be:
for path in paths
fs.readdir path, (err, files) -> handleDir(err, files, path)
and the solution to your problem would be:
for path in paths
do (path) ->
fs.readdir path, (err, files) -> handleDir(err, files, path)
I was looking for the same thing and end up with the solution and here it's a simple example if anybody wants to go through this.
var FA = function(data){
console.log("IN A:"+data)
FC(data,"LastName");
};
var FC = function(data,d2){
console.log("IN C:"+data,d2)
};
var FB = function(data){
console.log("IN B:"+data);
FA(data)
};
FB('FirstName')
Great solution from generalhenry, but be careful if you're going to use a try/catch structure inside the callback function
function handlePath ( path ) {
fs.readdir(path, onPathRead);
function onPathRead (err, files) {
try {
handleDir(err, files, path);
} catch (error) {
var path = 'something_else'; // <<--- Never do this !!!
}
}
}
Never try to redeclare the same var in a catch block, even if the catch block is never invoked, the path var is reset and you will find it as 'undefined' when the callback is executed.
Try this simple example:
function wrapper(id) {
console.log('wrapper id:' + id);
setTimeout(callback, 1000);
function callback() {
try {
console.log('callback id:' + id);
} catch (error) {
var id = 'something_else';
console.log('err:' + error);
}
}
}
wrapper(42);
This will output:
wrapper id:42
callback id:undefined

Categories