I'm building a system in Node.js that is supposed to find all files in an array of folders, stat them, and then do some additional work using that information.
I'm using fs.readdir() to get all the files synchronously from each of the folders. My code looks like this:
for(i=0,max=paths.length; i<max; i++) {
var path = paths.pop();
console.log("READING PATH: " + path);
fs.readdir(path, function(err, files) { handleDir(err, files, path); });
}
The problem is that, depending on how fast the readdir() executes, handleDir() is getting the wrong path. This happens because by the time the callback runs, the next loop has already started - meaning that the path variable has changed.
So, what I need to do is somehow lock that path variable to it's specific callback function. I can't think of any good way to do this - anyone have some ideas?
There is no block scope, so use a function for scope.
for(var i=0, path, max=paths.length; i<max; i++) {
path = paths.pop();
console.log("READING PATH: " + path);
handlePath( path );
}
function handlePath ( path ) {
fs.readdir(path, onPathRead);
function onPathRead (err, files) {
handleDir(err, files, path);
}
}
This is one of the more annoying parts of JS for me. An alternative to creating a separate function (as #generalhenry demonstrated) is to wrap the code in an anonymous function that's executed before the path variable changes.
for(i=0,max=paths.length; i<max; i++) {
var path = paths.pop();
console.log("READING PATH: " + path);
fs.readdir(path,
(function(p){
return function(err, files) {
handleDir(err, files, p);
};
})(path);
);
}
Either way, the important point is that the function's context (anonymous or not) is initiated before the value of the path variable is reassigned.
This is indeed an annoying feature of Javascript, so much so that Coffeescript (which is a language that compiles down to Javascript) has a specific way of dealing with it, the do operator on for. In Coffeescript your original function would be:
for path in paths
fs.readdir path, (err, files) -> handleDir(err, files, path)
and the solution to your problem would be:
for path in paths
do (path) ->
fs.readdir path, (err, files) -> handleDir(err, files, path)
I was looking for the same thing and end up with the solution and here it's a simple example if anybody wants to go through this.
var FA = function(data){
console.log("IN A:"+data)
FC(data,"LastName");
};
var FC = function(data,d2){
console.log("IN C:"+data,d2)
};
var FB = function(data){
console.log("IN B:"+data);
FA(data)
};
FB('FirstName')
Great solution from generalhenry, but be careful if you're going to use a try/catch structure inside the callback function
function handlePath ( path ) {
fs.readdir(path, onPathRead);
function onPathRead (err, files) {
try {
handleDir(err, files, path);
} catch (error) {
var path = 'something_else'; // <<--- Never do this !!!
}
}
}
Never try to redeclare the same var in a catch block, even if the catch block is never invoked, the path var is reset and you will find it as 'undefined' when the callback is executed.
Try this simple example:
function wrapper(id) {
console.log('wrapper id:' + id);
setTimeout(callback, 1000);
function callback() {
try {
console.log('callback id:' + id);
} catch (error) {
var id = 'something_else';
console.log('err:' + error);
}
}
}
wrapper(42);
This will output:
wrapper id:42
callback id:undefined
Related
In my routes.js file, I've this:
var pages = require('./pages')();
...
app.get('/:page', function(req, res, next) {
var p = req.params.page;
if (p in pages) {
res.render('page', pages[p]);
} else {
next();
}
});
pages.js:
module.exports = function() {
var fs = require('fs'),
ret = [],
dir = './pages',
files = fs.readdirSync(dir);
files.forEach(function(file) {
var text = fs.readFileSync(dir + '/' + file, 'utf-8'),
fileName = file.substr(0, file.lastIndexOf('.'));
ret[fileName] = {content: text};
});
return ret;
};
This code runs only one, when I run node. This is how I can make it async:
require('./pages')(function(pages) {
app.get('/:page', function(req, res, next) {
var p = req.params.page;
if (p in pages) {
res.render('page', pages[p]);
} else {
next();
}
});
});
pages.js:
module.exports = function(callback) {
var fs = require('fs'),
ret = [],
dir = './pages';
fs.readdir(dir, function(err, files) {
if (err) throw err;
files.forEach(function(file, i) {
fs.readFile(dir + '/' + file, 'utf-8', function(err, text) {
if (err) throw err;
var fileName = file.substr(0, file.lastIndexOf('.'));
ret[fileName] = {content: text};
if ( i === (files.length - 1) ) callback(ret);
});
});
});
};
Assuming the total pages will not exceed more than 1 MB in size, I can cache the text into memory indefinitely without getting node crashed due to out of memory.
Should I be using the async code?
According to what I've learnt, the async version will make node start listening on localhost faster, but /:page URLs will only work when the files have been loaded into memory.
Is the async code in the right pattern?
What if I need to reuse the pages object in another file? Right now it is only accessible in routes.js.
Can I rewrite pages.js to execute only once like this:
var ret = [];
module.exports = function(callback) {
var fs = require('fs'),
dir = './pages';
if (ret.length < 1) {
fs.readdir(dir, function(err, files) {
if (err) throw err;
files.forEach(function(file) {
fs.readFile(dir + '/' + file, 'utf-8', function(err, text) {
if (err) throw err;
var fileName = file.substr(0, file.lastIndexOf('.'));
ret[fileName] = {content: text};
if ( i === (files.length - 1) ) callback(ret);
});
});
});
} else {
callback(ret);
}
};
What if require('./pages')(function(pages) {}) is called multiple times together? Is there a chance of the if condition failing? I can't wrap my mind around this.
Should I be using the async code?
If you want, why not. But there's no real need for it, synchronous IO on startup is fine. require does it as well.
Is the async code in the right pattern?
No. It does invoke callback once for each directory. Calling app.get('/:page', …) multiple times is not what you want.
What if I need to reuse the pages object in another file? Right now it is only accessible in routes.js.
You could pass it from routes.js to the other modules. Or just rewrite pages.js to store it statically and execute the async things only once, so that you can require it multiple times.
What if require('./pages')(function(pages) {}) is called multiple times together? Is there a chance of the if condition failing?
Yes, it will most certainly fail, because you are populating ret only asynchronously.
I can't wrap my mind around this.
Use promises. The act as asynchronous, unmutable values, just what you need here. They will guarantee that callbacks are only invoked once, that every callback is invoked with the same ret value, and provide many more useful things (like managing the parallel file reads for you).
You'll want to export a promise from pages.js.
I have the following JavaScript:
var async = require('async');
var MyOutputModel = function(persistenceModel, callback) {
async.each(persistenceModel.helpOffers, function(helpOffer, asyncCallback) {
console.log('Original source (loop)');
//Do something ...
asyncCallback();
}, function(err) {
console.log('Original source (done)');
console.log(err);
if(err) return callback(err);
return callback(null, _this);
});
};
module.exports = MyOutputModel;
I would like to test the path containing if(err) return callback(err);...
For doing so I see the only possibility in dependency injection with rewire (https://www.npmjs.com/package/rewire).
In my unit test:
var rewire = require('rewire');
var MyOutputModel = rewire('...');
var AsyncMock = function() {};
AsyncMock.prototype.each = function(array, successCallback, errorCallback) {
console.log('Inside mock!');
callback(new Error());
};
var asyncMock = new AsyncMock();
MyOutputModel.__set__('async', asyncMock); //Rewire for require-dependency-injection
//Act
new CdObjectOutputModel(cdObjectPersistenceModel, function(err, cdObjectOutputModel) {
//Assert
assert(err);
});
However the "mock" doesn't seem to be used. In my unit test I never see 'Inside mock!' but 'Original source (loop)' and 'Original source (done)' on the console. And there is no error.
Anything I do wrong? I've already used rewire for a similar use case and it worked perfectly. Or is there another approach?
This is what rewire does under the hood:
For each rewired path it'll actually read the contents of the file pointed by path which we will call the body
it'll also generate two strings, the first one is called the prelude and it has some helpful vars to avoid overwriting the global ones with __set__, e.g. if we wanted to override the global console as if it were local on our module the contents of the prelude var would be: var console = global.console, this awesome detection of globals which can be overridden locally is done here
The second one is called the appendix and this string contains actually the definitions of the __set__, __get__ and there's also __with__ source, each one is a function defined over module.exports but remember that we're still dealing with strings
Imagine that there's a way to create a fake/test module out of strings! That's what wired does, it concatenates the prelude, the body and the appendix in one big string and it then creates a module with require('module')
e.g. (remember that this is a big string)
// prelude start
var console = global.console
// a lot of globals that can be overriden are defined here :)
// prelude end
// body start
// I'll paste the contents of the rewire's README
var fs = require("fs");
var path = "/somewhere/on/the/disk";
function readSomethingFromFileSystem(cb) {
console.log("Reading from file system ...");
fs.readFile(path, "utf8", cb);
}
exports.readSomethingFromFileSystem = readSomethingFromFileSystem;
// body end
// appendix start
// generation of three functions to set the variables defined above
Object.defineProperty(module.exports, '__set__', {
enumerable: false,
value: function () {
// set contents, we will see this later
}
});
Object.defineProperty(module.exports, '__get__', { ... });
Object.defineProperty(module.exports, '__with__', { ... });
// appendix end
So what rewire does is kind of create a module with this string and return it, and voila you have a magic module with three additional properties, the next step is actually find out what __set__ does, the __set__ source boils down to the following operation:
function (key, value) {
eval(key + ' = ' + value);
}
And that's it so now we know that executing __set__ will just eval whatever key (which for us is any local var) you want, to be set as value but ONLY in this test/fake module.
Now going back to your problem I see that this might be the source of it:
new CdObjectOutputModel(...);
I'm assuming that CdObjectOutputModel is defined perhaps as a subclass of MyOutputModel but remember that in your test var MyOutputModel = rewire('...'); is actually a test/fake module so you're not really overriding the original code of MyOutputModel but overriding kind of a copy of it which exists only locally, I guess that you have to rewire CdObjectOutputModel or perhaps locally make CdObjectOutputModel inherit from the rewired version of MyOutputModel
Just replace callback with errorCallback and it should work fine.
I'm actually trying to learn a bit about node.js
At the moment I try to understand the principles about callbacks.
I've written a module that should filter me files from a given directory by a specified file extension. But it won't work. I've tested a bit and I noticed that the function 'getFiles' will be called more the ones. But why? I can't find the mistake, can someone help me, to understood my problem? If someone thinks my code is ugly, please give me a better example, thanks.
So here's my code:
//Returns a list of filtered files from a specified directory and extension
function getFilteredFiles(dir, ext, callback)
{
getFiles(dir, function(error, data){
if(error)
{
callback(error);
}
else
{
var result = getFilteredFiles(data, ext);
callback(null, result);
}
});
}
//Reading files from a given directory
function getFiles(dir, callback)
{
var fs = require('fs');
console.log(typeof dir);
fs.readdir(dir, function(err, data){
if(err)
{
callback(err);
}
else
{
callback(null, data);
}
});
}
//Filters a file by a specified extension
function filterFiles(data, extension)
{
var path = require('path');
return data.filter(function(file){
return path.extname(file) == '.' + extension;
});
}
I'm pretty new to Node...I need to hammer Node's async behavior and callback structure into my mind. Here where I struggle right now:
// REQUIRE --------------------------------------------------------------------
fs = require('fs');
path = require('path');
// FUNCTION readAllDirs -------------------------------------------------------
function readAllDirs(dir, result) {
if (!result) {
result = function() {};
};
fs.readdir(dir, function(err, list) {
if(err) { return result(err) };
list.forEach(function(file) {
var fullpath = path.resolve(dir, file);
fs.stat(fullpath, function(err, stat) {
if(err) { return result(err) };
if(stat && stat.isDirectory()) {
readAllDirs(fullpath);
//console.log('In: ' + fullpath);
result(null, fullpath);
}
});
});
});
}
// MAIN -----------------------------------------------------------------------
readAllDirs('/somedir', function(err, dirs) {
console.log(dirs);
});
I try to traverse a tree of directories. In principle the function is working nice...as long I do not callback but print the directory names on the console. All folders and sub-folders come up as expected. BUT when I do callbacks the callback is not called for recursion deeper than first level.
Pleeeaaassee help! Thanks guys!
Your problem is here, inside the if (stat ...) branch:
readAllDirs(fullpath);
You need to pass the supplied callback again back into the recursion:
readAllDirs(fullpath, result);
I'm trying to use Node.js to create a zip file from an existing folder, and preserve the structure.
I was hoping there would be a simple module to allow this kind of thing:
archiver.create("../folder", function(zipFile){
console.log('et viola');
});
but I can't find anything of the sort!
I've been googling around, and the best I've found so far is zipstream, but as far as I can tell there's no way to do what I want. I don't really want to call into commandline utilities, as the the app has to be cross platform.
Any help would be greatly appreciated.
Thanks.
It's not entirely code free, but you can use node-native-zip in conjunction with folder.js. Usage:
function zipUpAFolder (dir, callback) {
var archive = new zip();
// map all files in the approot thru this function
folder.mapAllFiles(dir, function (path, stats, callback) {
// prepare for the .addFiles function
callback({
name: path.replace(dir, "").substr(1),
path: path
});
}, function (err, data) {
if (err) return callback(err);
// add the files to the zip
archive.addFiles(data, function (err) {
if (err) return callback(err);
// write the zip file
fs.writeFile(dir + ".zip", archive.toBuffer(), function (err) {
if (err) return callback(err);
callback(null, dir + ".zip");
});
});
});
}
This can be done even simpler using node's built-in execfile function. It spawns a process and executes the zip command through the os, natively. Everything just works.
var execFile = require('child_process').execFile;
execFile('zip', ['-r', '-j', zipName, pathToFolder], function(err, stdout) {
console.log(err);
logZipFile(localPath);
});
The -j flag 'junks' the file path, if you are zipping a sibdirectory, and don't want excessive nesting within the zip file.
Here's some documentation on execfile.
Here's a man page for zip.
Using Easy-zip, npm install easy-zip, you can do:
var zip5 = new EasyZip();
zip5.zipFolder('../easy-zip',function(){
zip5.writeToFile('folderall.zip');
});