Iterating file directory with promises and recursion - javascript

I know I'm returning early in the following function, how can I chain the recursive promises to my result?
My goal is to get an array of list of files in the directory and all of it's subdirectories. Array is single dimension, I'm using concat in this example.
function iterate(body) {
return new Promise(function(resolve, reject){
var list = [];
fs.readdir(body.path, function(error, list){
list.forEach(function(file){
file = path.resolve(body.path, file);
fs.stat(file, function(error, stat){
console.log(file, stat.isDirectory());
if(stat.isDirectory()) {
return iterate({path: file})
.then(function(result){
list.concat(result);
})
.catch(reject);
} else {
list.push(file);
}
})
});
resolve(list);
});
});
};

There are numerous mistakes in your code. A partial list:
.concat() returns a new array, so list.concat(result) by itself doesn't actually do anything.
You're calling resolve() synchronously and not waiting for all async operations to be completed.
You're trying to recursively return from deep inside several nested async callbacks. You can't do that. That won't get the results back anywhere.
I find this a ton easier to use by using a promisified version of the fs module. I use Bluebird to create that and then you can do this:
const path = require('path');
var Promise = require('bluebird');
const fs = Promise.promisifyAll(require('fs'));
function iterate(dir) {
return fs.readdirAsync(dir).map(function(file) {
file = path.resolve(dir, file);
return fs.statAsync(file).then(function(stat) {
if (stat.isDirectory()) {
return iterate(file);
} else {
return file;
}
})
}).then(function(results) {
// flatten the array of arrays
return Array.prototype.concat.apply([], results);
});
}
Note: I changed iterate() to just take the initial path so it's more generically useful. You can just pass body.path to it initially to adapt.
Here's a version using generic ES6 promises:
const path = require('path');
const fs = require('fs');
fs.readdirAsync = function(dir) {
return new Promise(function(resolve, reject) {
fs.readdir(dir, function(err, list) {
if (err) {
reject(err);
} else {
resolve(list);
}
});
});
}
fs.statAsync = function(file) {
return new Promise(function(resolve, reject) {
fs.stat(file, function(err, stat) {
if (err) {
reject(err);
} else {
resolve(stat);
}
});
});
}
function iterate2(dir) {
return fs.readdirAsync(dir).then(function(list) {
return Promise.all(list.map(function(file) {
file = path.resolve(dir, file);
return fs.statAsync(file).then(function(stat) {
if (stat.isDirectory()) {
return iterate2(file);
} else {
return file;
}
});
}));
}).then(function(results) {
// flatten the array of arrays
return Array.prototype.concat.apply([], results);
});
}
iterate2(".").then(function(results) {
console.log(results);
});
Here's a version that adds a customizable filter function:
function iterate2(dir, filterFn) {
// default filter function accepts all files
filterFn = filterFn || function() {return true;}
return fs.readdirAsync(dir).then(function(list) {
return Promise.all(list.map(function(file) {
file = path.resolve(dir, file);
return fs.statAsync(file).then(function(stat) {
if (stat.isDirectory()) {
return iterate2(file, filterFn);
} else {
return filterFn(file)? file : "";
}
});
})).then(function(results) {
return results.filter(function(f) {
return !!f;
});
});
}).then(function(results) {
// flatten the array of arrays
return Array.prototype.concat.apply([], results);
});
}
// example usage
iterate2(".", function(f) {
// filter out
return !(/(^|\/)\.[^\/\.]/g).test(f);
}).then(function(results) {
console.log(results);
});

Related

Asynchronously check folder contents and return file paths

I want to write a script in Node, that it checks if in given array of urls there's files and then retrieve it. I would like to make it asynchronous.
As far as I was able to develop a proper function:
const fs = require('fs-extra');
function getFiles(pathArr = []) {
return new Promise((resolve, reject) => {
let filesArr = pathArr.filter(obj => {
return fs.lstatSync(obj, (err, stat) => {
if (err) {
return false;
}
return stat.isFile();
});
});
resolve(filesArr);
});
}
But it uses fs.lstatSync and I want to use fs.lstat to be able to use it as a async method (with .then() usage). pathArr argument is an array of urls (strings) which I want to check if they're a file or folder.
Please help!
How about this
const fs = require('fs-extra');
function getFiles(pathArr = []) {
var promises = [];
return new Promise((resolve, reject) => {
let filesArr = pathArr.filter(obj => {
promises.push(fileStat(obj));
});
Promise.all(promises).then(function (result) {
resolve(result);
});
});
}
function fileStat(obj) {
return new Promise((resolve, reject) => {
return fs.lstat(obj, (err, stat) => {
if (err) {
reject(err);
}
resolve(stat.isFile())
});
});
}
You can use any of async.series, async.map, async.each. Which takes array as parameter.Refer following example which calculates total files size asynchronously.
let fs = require("fs");
let async = require('async');
let paths = ['./demo1.txt', './demo2.txt', './demo3.txt'];
let totalSize = 0;
let calcSize = function () {
async.each(paths, function iterator(path, next) {
let fileName = path.split("/");
fs.stat(path, function (err, stat) {
totalSize += stat.size;
next(null);
});
},function(){ console.log("totalSize : "+totalSize)})
}
calcSize();

How to return a promise when out of scope?

I want to retrieve multiple files asynchronously from AWS S3 so I am using Promises.
I am using AWS S3 to retrieve the files. However, fulfill is out of scope. I am getting the following error;
ReferenceError: reject is not defined
What would be the correct way to return a Promise in this circumstance and how would it be best to provide a key to files with the data returned from S3?
var getFiles = function getFiles(files) {
return Promise.all(files.map(function(file) {
var params = {
Bucket: 'my-bucket',
Key: file.key
}
s3.getObject(params, function(err, data) {
if (err) reject(err);
else {
// set key.data = data
fulfill(data);
}
});
}));
}
var fileNames = ['file1.jpg', 'file2.jpg'];
var files = fileNames.map(function(fileName) {
return {
key: fileName,
}
});
getFiles(files);
Promise.all expects an array of promises. Right now your callback inside the map function is not returning a promise. You can fix this by updating the code to the following:
var getFiles = function getFiles(files) {
return Promise.all(files.map(function (file) {
return new Promise((resolve, reject) => {
var params = {
Bucket: 'app-design.aiir.net',
Key: file.key
};
s3.getObject(params, function (err, data) {
if (err) reject(err);
else {
// set key.data = data
resolve(data);
}
});
});
}));
};
var fileNames = ['file1.jpg', 'file2.jpg'];
var files = fileNames.map(function (fileName) {
return {
key: fileName,
}
});

Array .push is not working inside a promise function

I have a nodejs Mongodb code like this . I'm trying to push into a global array and return all my results but the array contains only one value . Any idea why ?
var s = [];
var p = function(){
return new Promise(function(resolve, reject) {
MongoClient.connect(url, function (err, db) {
db.listCollections().forEach(function(collname) {
db.collection(collname.name).find().sort( { duration_in_seconds: 1 }).limit(1).toArray(
function(err, docs) {
assert.equal(null, err);
s.push(docs);
resolve();
});
});
});
});
}
p().then(function() {
console.log(s);
});
You are resolving the promise when the first collections returns its document. You'd need to wait for all of them. Instead of wrapping everything in a large new Promise, promisify every asynchronous function on its own, and make the returned promise fulfill with the result value instead of putting it in a global variable:
function connect(url) {
return new Promise(function(resolve, reject) {
MongoClient.connect(url, function (err, db) {
if (err) reject(err);
else resolve(db);
});
});
}
function getArray(cursor) {
return new Promise(function(resolve, reject) {
cursor.toArray(function(err, docs) {
if (err) reject(err);
else resolve(docs);
});
});
}
Now you can write your p using these helpers, put the promises for each collection in an array, and await them with Promise.all, which yields a promise right for the array of all results:
function p() {
return connect(url).then(function(db) {
return getArray(db.listCollections()).then(function(collections) {
var promises = collections.map(function(collname) {
return getArray(db.collection(collname.name).find().sort({duration_in_seconds: 1 }).limit(1));
});
return Promise.all(promises);
});
});
}
p().then(function(s) {
console.log(s);
});

Javascript - Promises and forEach

I am trying to fill the array with avatarIcon node from parsed xmls but my final array in res.view is empty plus the execution doesn't seem to reach return res.view function. How do I do this correctly?
function parseXML(xml) {
var parsed
parseString(xml, { explicitArray: false }, function(err, result) {
parsed = result
})
return parsed
}
function findUsers() {
return new Promise(function(resolve, reject) {
User.find().exec(function(err, sids) {
resolve(sids)
})
})
}
avatars: function(req, res) {
var arr = []
findUsers().then(function(result) {
result.forEach(function(el) {
getProfileXML(el.sid).then(function(result) {
arr.push(parseXML(result).profile.avatarIcon)
})
})
return res.view('users', {
users: arr
})
})
}
You can use Promise.all to wrap a collection of promises. Here untested code to demonstrate the use, where the result of the expression is a promise too:
return findUsers().then(function(result) {
var promises = result.map(function(el) {
return getProfileXML(el.sid);
});
return Promise.all(promises);
}).then(function(values) {
var arr = values.map(function(v) {
return parseXML(v).profile.avatarIcon;
});
return res.view('users', {
users: arr
})
})

How to delay all the calls to any method of a javascript object until the object is initialized?

Suppose this simple piece of code:
/*jslint node: true */
"use strict";
function MyClass(db) {
var self = this;
this._initError = new Error("MyClass not initialized");
db.loadDataAsyncronously(function(err, data) {
if (err) {
self._initError =err;
} else {
self._initError = null;
self.data = data;
}
});
}
MyClass.prototype.getA = function(cb) {
if (this._initError) {
return cb(this._initError);
}
return cb(null, this.data.a);
};
MyClass.prototype.getB = function(cb) {
if (this._initError) {
return cb(this._initError);
}
return cb(null, this.data.b);
};
var db = {
loadDataAsyncronously: function(cb) {
// Emulate the load process from disk.
// Data will be available later
setTimeout(function() {
cb(null, {
a:"Hello",
b:"World"
});
},1000);
}
};
var obj = new MyClass(db);
obj.getA(function (err, a) {
if (err) {
console.log(err);
} else {
console.log("a: " + a);
}
});
obj.getB(function (err, b) {
if (err) {
console.log(err);
} else {
console.log("a: " + b);
}
});
This gives an error because obj is not initialized when getA and getB methods are called. I would like that any method called before the object is initialized, be delayed automatically until the class finish its initialization.
One way to solve it is this way:
/*jslint node: true */
"use strict";
function MyClass(db) {
var self = this;
self._pendingAfterInitCalls = [];
db.loadDataAsyncronously(function(err, data) {
if (!err) {
self.data = data;
}
self._finishInitialization(err);
});
}
MyClass.prototype.getA = function(cb) {
this._waitUntiliInitialized(function(err) {
if (err) {
return cb(err);
}
return cb(null, this.data.a);
});
};
MyClass.prototype.getB = function(cb) {
this._waitUntiliInitialized(function(err) {
if (err) {
return cb(err);
}
return cb(null, this.data.b);
});
};
MyClass.prototype._finishInitialization = function(err) {
this._initialized=true;
if (err) {
this._initError = err;
}
this._pendingAfterInitCalls.forEach(function(call) {
call(err);
});
delete this._pendingAfterInitCalls;
};
MyClass.prototype._waitUntiliInitialized = function(cb) {
var bindedCall = cb.bind(this);
if (this._initialized) {
return bindedCall(this._initError);
}
this._pendingAfterInitCalls.push(bindedCall);
};
var db = {
loadDataAsyncronously: function(cb) {
// Emulate the load process from disk.
// Data will be available later
setTimeout(function() {
cb(null, {
a:"Hello",
b:"World"
});
},1000);
}
};
var obj = new MyClass(db);
obj.getA(function (err, a) {
if (err) {
console.log(err);
} else {
console.log("a: " + a);
}
});
obj.getB(function (err, b) {
if (err) {
console.log(err);
} else {
console.log("a: " + b);
}
});
But it seems to me a lot of overhead to be written for each class following this pattern.
Is there a more elegant way to handle this functionality?
Does exist any library to simplify this functionality?
Preparing this question, came to my head what probable would be a good answer. The idea is to use the concept of a factory function. The code above would be rewritten this way.
/*jslint node: true */
"use strict";
function createMyClass(db, cb) {
var obj = new MyClass();
obj._init(db, function(err) {
if (err) return cb(err);
cb(null, obj);
});
}
function MyClass() {
}
MyClass.prototype._init = function(db, cb) {
var self=this;
db.loadDataAsyncronously(function(err, data) {
if (err) return cb(err);
self.data = data;
cb();
});
};
MyClass.prototype.getA = function(cb) {
if (this._initError) return cb(this._initError);
cb(null, this.data.a);
};
MyClass.prototype.getB = function(cb) {
if (this._initError) return cb(this._initError);
cb(null, this.data.b);
};
var db = {
loadDataAsyncronously: function(cb) {
// Emulate the load process from disk.
// Data will be available later
setTimeout(function() {
cb(null, {
a:"Hello",
b:"World"
});
},1000);
}
};
var obj;
createMyClass(db,function(err, aObj) {
if (err) {
console.log(err);
return;
}
obj=aObj;
obj.getA(function (err, a) {
if (err) {
console.log(err);
} else {
console.log("a: " + a);
}
});
obj.getB(function (err, b) {
if (err) {
console.log(err);
} else {
console.log("a: " + b);
}
});
});
I share this Q/A because I thing it can be interesting to some body else. If you thing there exist better solutions, libraries to handle this situations, or any other idea, I will appreciate it.
The usual strategy here is to NOT use any async operation from a constructor. If an object needs an async operation in order to initialize it, then you use one of two options:
The async portion of the initialization is done in an .init(cb) method that must be called by the creator of the object.
You use a factory function that takes a callback that is called when the async portion of the operation has completed (like your proposed answer).
If you create a lot of these, perhaps the factory function makes sense because it saves you a little repeated code. If you don't create a lot of them, I prefer the first option because I think it makes it a lot clearer in the code exactly what is happen (you create an object and then you initialize it asynchronously and then the code continues only when the async operation has completed).
For the first option, it could look like this:
function MyClass(...) {
// initialize instance variables here
}
MyClass.prototype.init = function(db, callback) {
var self = this;
db.loadDataAsyncronously(function(err, data) {
if (err) {
self._initError = err;
} else {
self._initError = null;
self.data = data;
}
callback(err);
});
}
// usage
var obj = new MyClass(...);
obj.init(db, function(err) {
if (!err) {
// continue with rest of the code that uses this object
// in here
} else {
// deal with initialization error here
}
});
Personally, I would probably use a design using promises so the calling code could look like this:
var obj = new MyClass(...);
obj.init(db).then(function() {
// object successfully initialized, use it here
}, function(err) {
// deal with initialization error here
});
You can use a Promise internally in MyClass and have all functions wait for the promise to complete before proceeding.
http://api.jquery.com/promise/
You could inject the precondition into the functions:
function injectInitWait(fn) {
return function() {
var args = Array.prototype.slice.call(arguments, 0);
this._waitUntiliInitialized(function(err) {
if (err) return args[args.length - 1](err);
fn.apply(this, args);
});
}
}
injectInitWait(MyClass.prototype.getA);
injectInitWait(MyClass.prototype.getB);
That being said, your factory approach would be superior.

Categories