Recursive reading on all files in a directory - FileSystem API

Recursive reading on all files in a directory - FileSystem API - javascript

I am getting confuse how to read a list of files recursively.
Assume I have 3 text files in my filesytem api root directory
text1.txt
text2.txt
text3.txt
My goal is to read each text files one by one and then concatenate all the entries in each file into one string but I am currently at lost
how to do this in Javascript FileSystem API.
window.requestFileSystem(window.TEMPORARY, 1024*1024, onInitFs, errorHandler);
function onInitFs(fs) {
var dirReader = fs.root.createReader();
var entries = [];
// Call the reader.readEntries() until no more results are returned.
var readEntries = function() {
dirReader.readEntries (function(results) {
if (!results.length) {
readAllFiles(results);
} else {
entries = entries.concat(toArray(results));
readEntries();
}
}, errorHandler);
};
readEntries(); // Start reading dirs.
}
function readAllFiles(entries){
//Loop thru all the files and read each entries
}
I have seen how to read one text file but I dont know how to implement the reading of all files and concatenate the value.
They all implement callback functions so I am getting confused on how to handle it. Any points please?
I actually have been basing all my works in this http://www.html5rocks.com/en/tutorials/file/filesystem
UPDATE 2
As per #Johan
I actually changed my code to make use of callback
window.requestFileSystem(window.TEMPORARY, 1024*1024, onInitFs, errorHandler);
function onInitFs(fs) {
var dirReader = fs.root.createReader();
var entries = [];
// Call the reader.readEntries() until no more results are returned.
var readEntries = function() {
dirReader.readEntries (function(results) {
if (!results.length) {
readAllFiles(results, concatMessages);
} else {
entries = entries.concat(toArray(results));
readEntries();
}
}, errorHandler);
};
readEntries(); // Start reading dirs.
}
var concatMessage = '';
function concatMessages(message){
concatMessage += message;
}
function readAllFiles(logs, callBack) {
logs.forEach(function(entry, iCtr) {
var message;
entry.file(function(file) {
var reader = new FileReader();
reader.onloadend = function(e) {
//message += this.result;
if(callBack)
callBack('==================' + iCtr + '==========================');
callBack(this.result);
};
reader.readAsText(file); // Read the file as plaintext.
}, function(err) {
console.log(err);
});
});
}
My only problem is this, the callback function is not sequential.
It reads text2.txt first then text3.txt then text1.txt so the end result is not sequential which is not what I want to do. Any more hints?

Highly recommend you consider using something like caolan's async library to accomplish this.
You can do something like this:
async.each(openFiles, function(file, callback) {
// Perform operation on file here.
console.log('Processing file ' + file);
callback();
}, function(err) {
// if any of the file processing produced an error, err would equal that error
if (err) {
// One of the iterations produced an error.
// All processing will now stop.
console.log('A file failed to process');
} else {
// do your concatenation here
}
});

Related

Watson Assistant context is not updated

I use watson assistant v1
My problem is that every time I make a call to the code in Nodejs, where I return the context, to have a coordinated conversation, the context is only updated once and I get stuck in a node of the conversation
this is my code
client.on('message', message => {
//general variables
var carpetaIndividual = <../../../>
var cuerpoMensaje = <....>
var emisorMensaje = <....>
//detect if context exists
if(fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = require(carpetaIndividual+'/contexto.json');
var variableContexto = watsonContexto;
} else {
var variableContexto = {}
}
//conection with Watson Assistant
assistant.message(
{
input: { text: cuerpoMensaje },
workspaceId: '<>',
context: variableContexto,
})
.then(response => {
let messageWatson = response.result.output.text[0];
let contextoWatson = response.result.context;
console.log('Chatbot: ' + messageWatson);
//Save and create JSON file for context
fs.writeFile(carpetaIndividual+'/contexto.json', JSON.stringify(contextoWatson), 'utf8', function (err) {
if (err) {
console.error(err);
}
});
//Send messages to my application
client.sendMessage(emisorMensaje, messageWatson)
})
.catch(err => {
console.log(err);
});
}
client.initialize();
the context.json file is updated, but when it is read the code only reads the first update of the context.json file and not the other updates

This will be because you are using require to read the .json file. For all subsequent requires of an already-required file, the data is cached and reused.
You will need to use fs.readfile and JSON.parse
// detect if context exists
if (fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = fs.readFileSync(carpetaIndividual+'/contexto.json');
// Converting to JSON
var variableContexto = JSON.parse(watsonContexto);
} else {
var variableContexto = {}
}
There is another subtle problem with your code, in that you are relying on
your async call to fs.writeFile completing before you read the file. This will be the case most of the time, but as you don't wait for the fs.writeFile to complete there is the chance that you may try to read the file, before it is written.

Mocha test is failing but implementation works

I am trying to write a newer watch module that uses the fs.watch method instead of the watchFile approach.
So far, it works beautifully, but only when I run it outside of mocha. I can't figure out why my unit test is throwing a tantrum, maybe someone here can?
Here is my class code:
/**
* requirements
*/
var fs, path, events;
fs = require('fs');
path = require('path');
events = require('events');
/**
* private
*/
var Monitor = function(directory, options) {
this.directory = directory;
this.options = options || {};
(this.options.lazy && this.empty()) || this.walk(this.directory);
this.watch(this.directory);
};
Monitor.prototype = new events.EventEmitter();
Monitor.prototype.watch = function(directory, stats) {
var stats = stats || {};
if (!this.directories[directory]) {
var w = fs.watch(directory, this.options, this.justlookatit.bind(this));
}
this.directories[directory] = { 'stats': stats, 'w': w };
};
Monitor.prototype.directories = function() {
if (!Object.keys(this.directories).length) {
this.walk(this.directory);
}
return this.directories;
};
Monitor.prototype.files = function() {
if (!Object.keys(this.files).length) {
this.walk(this.directory);
}
return this.files;
};
Monitor.prototype.unwatch = function() {
if (!Object.keys(this.directories).length) {
for (var dir in this.directories) {
dir.w.close();
}
}
};
Monitor.prototype.empty = function() {
this.unwatch();
this.files = {};
this.directories = {};
};
Monitor.prototype.walk = function(directory) {
var monitor = this;
this.empty();
fs.readdir(directory, function(err, files) {
if (err) return;
for (var file in files) {
var fullname = path.resolve(files[file]);
if (!monitor.options.filter || monitor.options.filter(fullname)) {
fs.stat(fullname, function(err, stats) {
if (err) return;
if (stats.isDirectory()) {
monitor.walk(fullname);
monitor.watch(fullname, stats);
} else {
monitor.files[fullname] = stats;
}
});
}
}
});
};
Monitor.prototype.justlookatit = function(action, file) {
var monitor = this;
var fullname = path.resolve(file);
if (this.options.filter && !this.options.filer(fullname)) return;
fs.exists(fullname, function(exists) {
if (exists) {
fs.stat(fullname, function(err, stats) {
if (stats.isDirectory()) {
monitor.watch(fullname, stats);
} else {
if (monitor.files[fullname]) {
if (stats.mtime.getTime() > monitor.files[fullname].mtime.getTime()) {
monitor.emit('modified', fullname, stats);
}
} else {
monitor.emit('added', fullname, stats);
}
monitor.files[fullname] = stats;
}
});
} else {
if (monitor.files[fullname]) {
delete monitor.files[fullname];
monitor.emit('deleted', fullname);
} else if (monitor.directories[fullname]) {
monitor.directories[fullname].w.close();
delete monitor.directories[fullname];
}
}
});
};
/**
* exports
*/
exports.start = function(directory, options) {
return new Monitor(directory, options);
};
Here is my Working external test code:
var watch = require("./watch.js");
var fs = require('fs');
monitor = watch.start(__dirname);
monitor.on('added', function(file, stats) {
console.log("Caught Added: " + file);
});
monitor.on('modified', function(file, stats) {
console.log("Caught Modified: " + file);
});
monitor.on('deleted', function(file) {
console.log("Caught deleted: " + file);
});
// try creating a file immediately
fs.openSync('v.md', 'w');
The first test file runs perfectly fine, and I've tried both openSync and open. Finally, here is a version of the same test code, wrapped in a mocha unit test which is timing out:
/**
* requirements
*/
var watch, Q, fs, path, mocha, chai, assert;
watch = require('../lib/watch.js');
Q = require('q');
fs = require('fs');
path = require('path');
mocha = require('mocha');
chai = require('chai');
assert = chai.assert;
/**
* variables
*/
var watch_directory = path.join(__dirname, './watch');
/**
* tests
*/
describe('test watch', function() {
it('should create a monitor and run callbacks after fs changes', function(done) {
// I had planned to implement promises that chained the three callbacks
// but couldn't get one of them working in general
var added = function(file, stats) {
console.log("added");
done();
};
var modified = function(file, stats) {
console.log("modified");
};
var deleted = function(file, stats) {
console.log("deleted");
};
// create our service
var monitor = watch.start(watch_directory);
// assert it is defined
assert.isDefined(monitor);
// establish a listener
monitor.on('added', added);
monitor.on('modified', modified);
monitor.on('deleted', deleted);
// here is a file name using the current date to prevent duplication during tests
var file = path.join(watch_directory, (new Date()).getTime() + '.md');
// let's create the file, then delete it
fs.open(file, 'w+', function(err, fileDescriptor) {
// this prints before console output from the watch.js's `justlookatit` method
console.log(err);
console.log("writing to file");
// we probably don't want to try closing the fileDescriptor if the open failed
if (err) return;
// close the file descriptor
fs.close(fileDescriptor, function() {
// delete the file we just created
// fs.unlink(file, function() { /* not a big deal */ });
});
});
// modify a known-existing test file
fs.open('test.md', 'w+', function() {/* we don't care about this */});
})
});
I checked with console.log(fullname) inside the justlookatit method on the watch code, and it spits out the correct file name, matching the one generated by the unit test.
However, it then proceeds to return false when I run fs.exists. As I undestand it, that means the file system is notifying me that a file exists before it exists, which doesn't make sense really. So I tried adding an additional delay by wrapping my fs.exists method in a setTimeout, and that didn't change the results. I have also tried using both openSync and existsSync, and that made no difference.
I'm stumped, does anyone have any ideas why the mocha code isn't working?

So, the solution was to go for a walk. I came back, looked at the code again and figured out the cause of the problem with mocha, and also identified many other bugs.
The problem was the lack of context. The justlookatit method does not have a context, and in the test.js scenario it is watching the current directory, while the mocha test is watching a sub-directory.
The path.resolve was receiving only the file name, not the directory, and therefore merged it with the default (executables) directory, so the level of test.js, or watch_test.js for mocha. It proceeded to fail to locate any of the files in the mocha test case because they were all one level below the executable.
I won't go into detail about all the other bugs, but I may come back and post the repository link when I get to a point that I want to push it online.

You're missing the callback return(done); at the end of your test. Unless you call that callback, Mocha will time out every time.

node.js never exits after insert to couchbase, opposite of most node questions

My problem seems to be the opposite of every node.js question :-) I have a simple forEach loop to read a list of files and insert them into a Couchbase database. This works great, but it never exits after reading all the lines. So I added a counter to shutdown the couchbase connection after all inserts are complete. This works.
This process is intended to load hundreds of thousands of files, so I brought the async module into the mix to batch the inserts into groups of 100. The async.eachLimit is used to iterate over the array and insert documents in batches. Now the orig problem is back. Whatever magic async.eachLimit uses to recognize the process is complete is not happening.
I've been going through javascript scoping, callbacks, async, etc. Google searches are hitting keywords but not this issue. I've reduced the code down to the following testcase. To test, create three files and add their names to testlist.txt.
The async.eachLimit in place works up until it hits the limit, then hangs. Comment this out and uncomment array.forEach line and it works. Thanks in advance!
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
} else {
console.log('PASS ['+line+']');
}
key_count--;
if (key_count == 0) {
console.log('DONE Shutting down client, no more keys');
db.shutdown();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, lines) {
if(filelist_err) throw filelist_err;
// HACK split adds empty line to array, use replace to fix
var array = lines.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('INIT lines['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) { console.log('FAIL async err['+err+']');} );
//array.forEach(function(data){insertFile(data);return;});
});
Testcase output using array.forEach:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
LOAD [files.txt]
PASS [files.little.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys
Testcase output using async.eachLimit:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
PASS [files.big.txt]
... hang, never gets to 3...

After review with a coworker, they spotted my mistake. I missed the async callback in my insertFile function. Adding that in works and allows me to remove the key counter! Solution code below:
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line, callback) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
callback(db_err);
} else {
console.log('PASS ['+line+']');
callback();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, data) {
if(filelist_err) throw filelist_err;
// HACK stoopid bug split adds empty line to array, use replace to fix
var array = data.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('READ files['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) {
if (err) console.log('LAST with async err['+err+']');
console.log('DONE Shutting down client, no more keys');
db.shutdown();
});
});
And successful output:
$ node testcase.js
READ files[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
LOAD [files.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys

Crypto module - Node.js

Which is the simplest way to compare a hash of a file without storing it in a database?
For example:
var filename = __dirname + '/../public/index.html';
var shasum = crypto.createHash('sha1');
var s = fs.ReadStream(filename);
s.on('data', function(d) {
shasum.update(d);
});
s.on('end', function() {
var d = shasum.digest('hex');
console.log(d + ' ' + filename);
fs.writeFile(__dirname + "/../public/log.txt", d.toString() + '\n', function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
The above code returns the hash of the HTML file. If I edit the file how can I know if it has been changed? In other words, how can I know if the hash has been changed?
Any suggestions?
Edited
Now the hash is being saved in the log file. How can I retrieve the hash from the file and match it with the new generated one? A code example would be awesome to give me a better understanding.
There is no difference with this question, but it isn't clear for me yet how to implement it.

If you're looking for changes on a file, then you can use one of Node's filesystem functions, fs.watch. This is how it's used:
fs.watch(filename, function (event, filename) {
//event is either 'rename' or 'change'
//filename is the name of the file which triggered the event
});
The watch function is however not very consistent, so you can use fs.watchFile as an alternative. fs.watchFile uses stat polling, so it's quite a bit slower than fs.watch, which detects file changes instantly.
Watching a file will return an instance of fs.FSWatcher, which has the events change and error. Calling .close will stop watching for changes on the file.
Here's an example relating to your code:
var filename = __dirname + '/../public/index.html';
var shasum = crypto.createHash('sha1');
var oldhash = null;
var s = fs.ReadStream(filename);
s.on('data', function(d) {
shasum.update(d);
});
s.on('end', function() {
var d = shasum.digest('hex');
console.log(d + ' ' + filename);
oldhash = d.toString();
fs.writeFile(__dirname + "/../public/log.txt", d.toString() + '\n', function(err) {
if(err) {
console.log(err);
}
else {
console.log("The file was saved!");
}
});
});
//watch the log for changes
fs.watch(__dirname + "/../public/log.txt", function (event, filename) {
//read the log contents
fs.readFile(__dirname + "/../public/log.txt", function (err, data) {
//match variable data with the old hash
if (data == oldhash) {
//do something
}
});
});

What's the difference between this question and the previous one you asked? If you're not wanting to store it in a database, then store it as a file. If you want to save the hash for multiple files, then maybe put them in a JSON object and write them out as a .json file so they're easy to read/write.
EDIT
Given what you added to your question, it should be pretty simple. You might write a function to do check and re-write:
function updateHash (name, html, callback) {
var sha = crypto.createHash('sha1');
sha.update(html);
var newHash = sha.digest('hex');
var hashFileName = name + '.sha';
fs.readFile(hashFileName, 'utf8', function (err, oldHash) {
var changed = true;
if (err)
console.log(err); // probably indicates the file doesn't exist, but you should consider doing better error handling
if (oldHash === newHash)
changed = false;
fs.writeFile(hashFileName, newHash, { encoding: 'utf8' }, function (err) {
callback(err, changed);
});
});
}
updateHash('index.html', "<html><head><title>...", function (err, isChanged) {
// do something with this information ?
console.log(isChanged);
});

Node.js - Organising code and closures - SFTP/Inotify

I was hoping I could get some advice on why my nodejs program is behaving in the way it is.
I am using two modules, node-sftp and node-inotify. I have setup node-inotify to watch a directory and call a function when something is written there, the function being an sftp upload.
Now the problem I have is that processing one file at a time is fine but when I drop 4 files in one go there, the function is called four times but only one sftp upload goes through.
Do I need to order my code in a particular way to ensure that the sftp upload occurs x times, is this something to do with closures perhaps?
This is a basic version of my code...
"event_handler" is called when something happens on a "watched" directory
"check_event" figures out if this type of event is one we want, in this case it's a "write"
"ftp_to_server" prepare connection details
"do_ftp" basically uses the node-sftp module to perform the sftp upload
event_handler = function(event){
var supplier;
check_event(event, supplier, type, ftp_to_server);
};
=================
function check_event(event, handler)
{
if (event.type === 'xxxxxx') {
var file_to_process_name = 'abc';
var file_to_process_dir = 'abc';
var remote_dir = 'abc';
handler(file_to_process_name, file_to_process_dir, remote_dir);
}
}
function ftp_to_server(file_to_process_name, file_to_process_dir, remote_dir) {
var connection_details = conf.ftp.connections
do_ftp(connection_details, file_to_process_name, file_to_process_dir, remote_dir);
}
function do_ftp(connection_details, file_to_process_name, file_to_process_dir, remote_dir) {
var credentials = {
// FTP settings here
};
var local_file = file_to_process_dir + file_to_process_name;
var remote_file = remote_dir + file_to_process_name;
connection = new sftp(credentials, function(err) {
if (err){
throw err;
}
connection.writeFile(remote_file, fs.readFileSync(local_file, "utf8"), null, function(err) {
if (err) {
throw err;
}
console.info('FTP PUT DONE');
});
});
};

Your "connection = new sftp(credentials, function(err) {"
should be
var connection = new sftp(credentials, function(err) {
The way you currently have it coded, "connection" is a global and you are writing over it.

We Keep Coding

JavaScript is the programming language of the Web.

Recursive reading on all files in a directory - FileSystem API - javascript

Related

Watson Assistant context is not updated

Mocha test is failing but implementation works

node.js never exits after insert to couchbase, opposite of most node questions

Crypto module - Node.js

Node.js - Organising code and closures - SFTP/Inotify

Categories

Resources