Syncchronously and recursive filesystem file finding - javascript

I just have a quick question there:
I am using Node.JS to write a commandline tool that validates JSON Files with JSON Schemas. So, now I have a problem that when wanting to get all the schemas, that I always get "undefined" for using a async function but otherwise only sync functions.
For this commandline tool async is NOT needed.
Could someone help me out and give me a hand on how to make it work just fine?
var getJSONSchemaFiles = function (dir) {
results2 = [];
var recursive = require('recursive-readdir');
recursive(dir, function (err, files) {
// Files is an array of filename
// console.log(files);
files.forEach(function (entry) {
if (entry.indexOf(".schema.json") > -1) {
results2.push(entry);
}
});
console.log(results2);
});
return results2;
};
I am using the npm "recursive-readdir" but I think that I do not even need a npm for this kind of thing?

Ok, this enumerates all files under the given path synchronously:
var fs = require('fs');
function recursiveReaddir(path) {
var stat = fs.lstatSync(path);
if(stat.isFile())
return [path];
if(!stat.isDirectory())
return [];
return [].concat.apply([], fs.readdirSync(path).map(function(fname) {
return recursiveReaddir(path + '/' + fname);
}));
}

Use glob module https://github.com/isaacs/node-glob. There is async and Sync methods like: glob.sync(pattern, [options]); and glob(pattern, [options], cb);
Example from their docs:
var glob = require("glob")
// options is optional
glob("**/*.js", options, function (er, files) {
// files is an array of filenames.
// If the `nonull` option is set, and nothing
// was found, then files is ["**/*.js"]
// er is an error object or null.
})

Related

How to loop through the folder with files?

How can I loop through the folder that has subfolder and retrieve all files with extension '.element.ts' ?
const fs = require('fs');
const filesDir = fs.readdirSync('packages/web-components/src');
// the json result that will be generated
let content = [];
files.forEach(file => {
if (fileName === '???')
content.push(file);
});
const fs=require('fs');
function getAllFiles (dir, allFilesList = []){
const files = fs.readdirSync(dir);
files.map(file => {
const name = dir + '/' + file;
if (fs.statSync(name).isDirectory()) { // check if subdirectory is present
getAllFiles(name, allFilesList); // do recursive execution for subdirectory
} else {
allFilesList.push(name); // push filename into the array
}
})
return allFilesList;
}
const allFiles = getAllFiles('./testfolder');
const fileEndsWith = allFiles.filter(file => file.endsWith('.element.ts'))
console.log(fileEndsWith);
Easy way: use glob package.
const glob = require("glob");
const pattern = "packages/web-components/src/*.element.ts"
const elementTsFilenames = glob.sync(pattern);
Manual way:
const dir = "packages/web-components/src";
const extension = ".element.ts";
const elementTsFilenames = fs.readdirSync(dir).filter(fn => fn.endsWith(extension));
This is easy enough that the manual way is as easy or easier; if you have more complex requirements (e.g. recursively searching subdirectories), a library approach is nice.
I'll show you how to recursively get all the files in a directory (even those located in a subdirectory).
To do this, we need to create a recursive function that can call itself when dealing with sub-directories. And we also need the function to go through each of the sub-directories and add any new files it encounters. Then we also need to check if the filename contains a specific string. When the function is finished , it should return an array with all the files it encountered.
Here's what the recursive function looks like:
const fs = require("fs")
const path = require("path")
const getAllFiles = function(dirPath, extension, arrayOfFiles) {
files = fs.readdirSync(dirPath);
arrayOfFiles = arrayOfFiles || [];
files.forEach(function(file) {
if (fs.statSync(dirPath + "/" + file).isDirectory()) {
arrayOfFiles = getAllFiles(dirPath + "/" + file, arrayOfFiles);
} else if (file.endsWith(extension)){
arrayOfFiles.push(path.join(__dirname, dirPath, "/", file));
}
});
return arrayOfFiles;
}
First, we require() the Node.js path module. Since this is
included with Node.js, you don't need to install anything for it to
work. This module will help us easily create full file paths for our
files.
The getAllFiles variable holds the recursive function that will go
through each subdirectory and return an array of filenames. It takes a
directory file path, a specified character and an optional arrayOfFiles as arguments.
Inside the getAllFiles function, we first use the readdirSync()
function to get all of the files and directories inside the given
dirPath supplied to the function.
Then, we create an arrayOfFiles that will hold all the filenames
that will be returned when the function is done running.
Next, we loop over each item (file or directory) found by the
readdirSync() function. If the item is a directory, we have the
function recursively call itself to get all of the files and
sub-directories inside the given directory.
And if the item is a file, we simply append the file path to the
arrayOfFiles array. (When the end of the file name is confirmed to
contain characters)
When the forEach loop has finished, we return the arrayOfFiles
array.
Here is how you use the function in your code:
const result = getAllFiles("packages/web-components/src", ".element.ts");
I don't think you need an npm package for this: It's not too hard to walk the file system using an async iterator and filter the results based on something like a regular expression.
Another bonus of an async technique is that it doesn't block your thread while it iterates the files (other work can be done in between each result while it's searching), especially if you have a lot of sub-directories/files to search through.
If you want to reduce your project's dependencies, you can do something like this:
example.mjs:
import * as path from 'node:path';
import {readdir} from 'node:fs/promises';
/** Search all subdirectories, yielding matching file entries */
export async function* findFiles (dir, regexpFilter) {
for (const entry of await readdir(dir, {withFileTypes: true})) {
const fPath = path.resolve(dir, entry.name);
if (entry.isDirectory()) {
yield* findFiles(fPath, regexpFilter);
continue;
}
if (regexpFilter && !regexpFilter.test(entry.name)) continue;
yield Object.assign(entry, {path: fPath});
}
}
async function main () {
const dir = 'packages/web-components/src';
// Regular expression which means: ends with '.element.ts'
const filter = /\.element\.ts$/;
for await (const entry of findFiles(dir, filter)) {
// ^^^^^^
// If you don't include a filter argument, then all files will be iterated
console.log(entry.name); // just the file name
console.log(entry.path); // the full file path
}
}
main();

Why is merge-stream requiring async completion signal?

I'm in the process of migrating from gulp#3.9.1 to gulp#4.0.2 and upgrading my gulp dependencies in the process. I have the following task in my gulpfile, where you can assume directories is just an array of directories I want to perform this operation on:
var gulp = require('gulp');
var ngAnnotate = require('gulp-ng-annotate'); //annotates dependencies in Angular components
var rev = require('gulp-rev'); //appends a hash to the end of file names to eliminate stale cached files
var revReplace = require('gulp-rev-replace');
var uglify = require('gulp-uglify'); // minimizes javascript files
var compressCss = require('gulp-minify-css');
var useref = require('gulp-useref'); // replaces style and script blocks in HTML files
var filter = require('gulp-filter');
var merge = require('merge-stream');
var sourcemaps = require('gulp-sourcemaps');
function minify() {
var tasks = directories.map(function (directory) {
var cssFilter = filter("**/all.min.css", {restore:true});
var jsAppFilter = filter("**/app.min.js", {restore:true});
var jsFilter = filter("**/*.js", {restore:true});
return gulp.src(dstBasePath + directory + "index.html", {allowEmpty: true})
.pipe(useref())
.pipe(cssFilter)
.pipe(compressCss({keepSpecialComments:false}))
.pipe(rev())
.pipe(cssFilter.restore)
.pipe(jsAppFilter)
.pipe(sourcemaps.init())
.pipe(ngAnnotate({add:true, single_quotes:true}))
.pipe(jsAppFilter.restore)
.pipe(jsFilter)
.pipe(uglify())
.pipe(rev())
.pipe(jsFilter.restore)
.pipe(revReplace())
.pipe(sourcemaps.write('.')) // sourcemaps need to be written to same folder for Datadog upload to work
.pipe(gulp.dest(dstBasePath + directory))
});
return merge(tasks);
}
Why would this result in the error "Did you forget to signal async completion?" from Gulp when running the task? Note that I'm using Gulp 4. I've tried passing a callback done to this task, and adding .addListener('end', done) to the final pipe, but this causes my merged stream to end prematurely (presumably when the first one ends). So perhaps one of these plugins is not signaling when it's completed, but how would you even get this to work otherwise? Thanks for any insight you can provide.
return merge(folders.map(function (folder) { // this has worked for me in the past
as has this form without merge
gulp.task('init', function (done) {
var zips = getZips(zipsPath);
var tasks = zips.map(function (zip) {
return gulp.src(zipsPath + "/" + zip)
.pipe(unzip({ keepEmpty: true }))
.pipe(gulp.dest(path.join("src", path.basename(zip, ".zip"))))
.on('end', function() { // this bit is necessary
done();
});
});
return tasks;
});
Gulp 4 requires that you signal async completion. There's some good information about it in this answer to a similar question:
Gulp error: The following tasks did not complete: Did you forget to signal async completion?
I had a similar case where I was returning a merged set of tasks, and I was able to resolve the error by making the function async and awaiting the merge. My case looked something like this:
gulp.task("build", async function () {
...
return await merge(tasks);
});
so I think you should be able to do something like
async function minify(){
...
return await merge(tasks);
}

Javascript,Nodejs: search for a specific word string in files

i'm trying to make an app that searches for all files
contains a specified string under the current directory/subdirectory.
as i understand it means i need to create a read stream, loop it, load the read data to an array, if the word found give __filename, dirname and if ! not found message.
unfortunately, i could not make it work...
any clue?
var path = require('path'),
fs=require('fs');
function fromDir(startPath,filter,ext){
if (!fs.existsSync(startPath)){
console.log("no dir ",startPath);
return;
};
var files=fs.readdirSync(startPath);
let found = files.find((file) => {
let thisFilename = path.join(startPath, file);
let stat = fs.lstatSync(thisFilename);
var readStream = fs.createReadStream(fs);
var readline = require('readline');
if (stat.isDirectory()) {
fromDir(thisFilename, filename,readline, ext);
} else {
if (path.extname(createReadStream) === ext && path.basename(thisFilename, ext) === filename) {
return true;
}
}
});
console.log('-- your word has found on : ',filename,__dirname);
}
if (!found) {
console.log("Sorry, we didn't find your term");
}
}
fromDir('./', process.argv[3], process.argv[2]);
Because not everything was included in the question, I made an assumption:
We are looking for full words (if that's not the case, replace the regex with a simple indexOf()).
Now, I've split the code into two functions - to make it both more readable and easier to recursively find the files.
Synchronous version:
const path = require('path');
const fs = require('fs');
function searchFilesInDirectory(dir, filter, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
const files = getFilesInDirectory(dir, ext);
files.forEach(file => {
const fileContent = fs.readFileSync(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
});
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
function getFilesInDirectory(dir, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
let files = [];
fs.readdirSync(dir).forEach(file => {
const filePath = path.join(dir, file);
const stat = fs.lstatSync(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = getFilesInDirectory(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
});
return files;
}
Async version - because async is cool:
const path = require('path');
const fs = require('fs');
const util = require('util');
const fsReaddir = util.promisify(fs.readdir);
const fsReadFile = util.promisify(fs.readFile);
const fsLstat = util.promisify(fs.lstat);
async function searchFilesInDirectoryAsync(dir, filter, ext) {
const found = await getFilesInDirectoryAsync(dir, ext);
for (file of found) {
const fileContent = await fsReadFile(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
};
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
async function getFilesInDirectoryAsync(dir, ext) {
let files = [];
const filesFromDirectory = await fsReaddir(dir).catch(err => {
throw new Error(err.message);
});
for (let file of filesFromDirectory) {
const filePath = path.join(dir, file);
const stat = await fsLstat(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = await getFilesInDirectoryAsync(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
};
return files;
}
If you have not worked with / understand async/await yet, it is a great step to take and learn it as soon as possible. Trust me, you will love not seeing those ugly callbacks again!
UPDATE:
As you pointed in comments, you want it to execute the function after running node process on the file. You also want to pass the function parameters as node's arguments.
To do that, at the end of your file, you need to add:
searchFilesInDirectory(process.argv[2], process.argv[3], process.argv[4]);
This extracts our arguments and passes them to the function.
With that, you can call our process like so (example arguments):
node yourscriptname.js ./ james .txt
Personally, if I were to write this, I would leverage the beauty of asynchronous code, and Node.js's async / await.
As a very side note:
You can easily improve readability of your code, if you add proper formatting to it. Don't get me wrong, it's not terrible - but it can be improved:
Use spaces OR newlines after commas.
Use spaces around equality operators and arithmetic operators.
As long as you are consistent with formatting, everything looks much better.

Getting a list of filenames using Glob

I'm trying to get a list of filenames in node and I'm getting what I think is a scoping issue.
var files = [];
glob(options.JSX_DEST + "/*.js", function (er, files) {
files = files.map(function(match) {
return path.relative(options.JSX_DEST, match);
});
});
Since I declare files outside the call to glob, should it get populated with the results of the map function?
Just rename the variable.
var globalFiles = [];
glob(options.JSX_DEST + "/*.js", function (er, files) {
globalFiles = files.map(function(match) {
return path.relative(options.JSX_DEST, match);
});
});

Create plugin gulp with stream

I created plugin for send json data in json file.
But I don't understand why send my object json in pipe, and not write file directly in my plugin.
I want use my plugin whit this syntax:
gulp.task('js-hash', function()
{
// Get all js in redis
gulp.src('./build/js/**/*.js')
.pipe(getHashFile('/build/js/'))
.pipe(gulp.dest('./build/js/hash.json'));
});
And not that:
gulp.task('js-hash', function()
{
// Get all js in redis
gulp.src('./build/js/**/*.js')
.pipe(getHashFile('./build/js/hash.json', '/build/js/'));
});
This is my plugin:
var through = require('through2');
var gutil = require('gulp-util');
var crypto = require('crypto');
var fs = require('fs');
var PluginError = gutil.PluginError;
// Consts
const PLUGIN_NAME = 'get-hash-file';
var json = {};
function getHashFile(filename, basename)
{
if (!filename) {
throw PluginError(PLUGIN_NAME, "Missing filename !");
}
// Creating a stream through which each file will pass
var stream = through.obj(function (file, enc, callback) {
if (file.isNull()) {
this.push(file); // Do nothing if no contents
return callback();
}
if (file.isBuffer()) {
var hash = crypto.createHash('sha256').update(String(file.contents)).digest('hex');
json[file.path.replace(file.cwd+basename, '')] = hash;
return callback();
}
if (file.isStream()) {
this.emit('error', new PluginError(PLUGIN_NAME, 'Stream not supported!'));
return callback();
}
}).on('finish', function () {
fs.writeFile(filename, JSON.stringify(json), function(err) {
if (err) {
throw err;
}
});
});
// returning the file stream
return stream;
}
// Exporting the plugin main function
module.exports = getHashFile;
Your are idea
Nothing prevents you from doing this... besides not respecting plugins guidelines!
Users actually assume a plugin will stream files and that they can pipe them to other plugins.
If I get your code right, you're trying to generate a file that contains all sha hashes of inbound files. Why not let users take this file and pipe it to other plugins? You'd be surprised what people could do.
While this question looks a bit opinion-based, you could definitely put the focus on how to deal with files that may not belong to the main stream of files. Issues like this can be found in many plugins; for example, gulp-uglify authors are wondering how they can add source-maps without mixing js and source map downstream.

Categories