I am running a script which looks into a directory and lists files, then checks for the file type to process, if the extension matches then the file is read and each line of the file (.col which is just a txt file renamed) is inserted into an array.
Now after the file is read and the array is populated I would like to use the array and do some further processing, e.g create a db record. I am missing something really basic here because on each console log I do as below I always get the full items (in my array) of the contents of all files.
So to make it a bit simpler:
array is empty.
Then file is read and processed and array now has
array[0]=line 0 of file
array[0]=line 1 of file etc
const fs = require('fs');
const readline =require('readline');
var files = fs.readdirSync('/home/proj/data');
var path = require('path');
var model=[];
var lineReader=[];
for(var i=0; i<files.length; i++) {
if(path.extname(files[i]) === ".col") {
lineReader[i] = readline.createInterface({
input: require('fs').createReadStream(files[i])
});
lineReader[i].on('line', function (line) {
model.push(line);
}).on('close', async function() {
console.log(model);
});
}
}
Instead the script is run and array[] holds all lines of all files that match the extension.
Your help is greatly appreciated and anyone is allowed to scorch my JS as I am pretty sure I am missing something basic here.
So, you want to read the files in parallel (because that's what your program does) and put it in an array of arrays?
You can make the reading file mechanism a promise and use it using Promise.all. Here is an example to get you started.
const fs = require('fs');
const readline = require('readline');
var files = fs.readdirSync('./');
var path = require('path');
function readFile(fileName) {
return new Promise(resolve => {
const array = [];
const lineReader = readline.createInterface({
input: fs.createReadStream(files[i])
});
lineReader.on('line', function (line) {
array.push(line);
}).on('close', async function () {
//do some proc
console.log(array);
resolve(array);
});
});
}
const readFilePromises = [];
for (var i = 0; i < files.length; i++) {
if (path.extname(files[i]) === ".js") {
readFilePromises.push(readFile(files[i]));
}
}
Promise.all(readFilePromises) //or await Promise.all([..])
.then(data => {
console.log(data);//will be array of arrays
})
If you want a single Array you can always flatten the result using data.flat()
If your files are not very big and sync methods are OK, you can simplify the code this way:
'use strict';
const fs = require('fs');
const path = require('path');
const model = [];
fs.readdirSync('/home/proj/data')
.filter(name => path.extname(name) === '.col')
.forEach((name) => {
model.push(...fs.readFileSync(name, 'utf8').split('\n'));
});
console.log(model);
Related
I have an gif file that is stored in a directory call assets on my computer. I would like to create X amount of duplicates and they should be stored in the same directory and each of them should have a different file name.
Example:
I in the assets directory is the gif file call 0.gif I would like to duplicate this gif file 10 times and The duplicates should be called 1.gif, 2.gif, 3.R and so on.
The simplest option is to use fs and using copyFile function available
const fs = require("fs");
const path = require("path");
let copyMultiple = (src, count) => {
let initCount = 0;
while (initCount < count) {
initCount++;// you can put this at bottom too acc to your needs
const newFileName = `${initCount}_${initCount}${path.extname(src)}`;
console.log(newFileName, "is new file name");
fs.copyFile(src, newFileName, (error) => {
// if errors comes
if (error) {
console.log(error);
}
});
}
};
copyMultiple("1.gif", 3);
Another elegant way of doing this is
const util = require("util");
const fs = require("fs");
const path = require("path");
const copyFilePromise = util.promisify(fs.copyFile);
function copyFiles(srcFile, destDir, destFileNames) {
return Promise.all(
destFileNames.map((file) => {
return copyFilePromise(srcFile, path.join(destDir, file));
})
);
}
const myDestinationFileNames = ["second.gif", "third.gif"];
const sourceFileName = "1.gif";
copyFiles(sourceFileName, "", myDestinationFileNames)
.then(() => {
console.log("Copying is Done");
})
.catch((err) => {
console.log("Got and Error", error);
});
Using this will also give upperhand of knowing when it is done.
You can read docs here
const fs = require("fs")
const filename = "index.js".split(".") //filename like 0.gif to gif
const times = 10 // number of times to duplicate
for(var int = 1; int < times; int++){
const newFilename = `${(parseInt(filename[0]) + init)}.${filename[1]}` //new filename like 0.gif to 1.gif
fs.copyFileSync(filename, newfilename)
}
use the write file and read file from the fs module and a simple for loop
not sure which framework you're on but fs.copyFile() is the standard way for node.js https://nodejs.org/api/fs.html#fscopyfilesrc-dest-mode-callback
I am attempting to do a file search based off array of file names and root directory. I found some file search snips online that seem to work when I am finding just 1 single file, but it will not work when there is more than 1 file specified. Below is the snippet:
const fs = require('fs');
const path = require('path');
var dir = '<SOME ROOT DIRECTORY>';
var file = 'Hello_World.txt'
var fileArr = ['Hello_World.txt', 'Hello_World2.txt', 'Hello_World3.txt'];
const findFile = function (dir, pattern) {
var results = [];
fs.readdirSync(dir).forEach(function (dirInner) {
dirInner = path.resolve(dir, dirInner);
var stat = fs.statSync(dirInner);
if (stat.isDirectory()) {
results = results.concat(findFile(dirInner, pattern));
}
if (stat.isFile() && dirInner.endsWith(pattern)) {
results.push(dirInner);
}
});
return results;
};
console.log(findFile(dir, file));
Any thoughts on how I can get this working with an array rather than just a single file string?
Doing the following seems to be working, but didn't know if there were other ways to do this which may be simpler:
newFileArr = [];
fileArr.forEach((file => {
//findFile(dir, file).toString();
console.log(findFile(dir, file).toString());
}));
The only thing that needs to change is the condition to determine if an individual filepath meets the search criteria. In the code you've posted, it looks like dirInner.endsWith(pattern), which says "does the filepath end with the given pattern?"
We want to change that to say "does the filepath end with any of the given patterns?" And closer to how the code will look, we can rephrase that as "Can we find a given pattern such that the filepath ends with that pattern?"
Let's rename pattern to patterns. Then we can simple replace dirInner.endsWith(patterns) with patterns.some(pattern => dirInner.endsWith(pattern))
I have a simple txt file with data in this format with millions of lines:
{"a":9876312,"b":1572568981512}
{"a":9876312,"b":1572568981542}
I want to convert this into a file with "dot" json extension file using reduce function in NodeJs and return statement, probably looking like this:
[{"a":9876312,"b":1572568981512},
{"a":9876312,"b":1572568981542}]
Any help will be really really appreciated. Thanks :)
SO far I tried this:
const fs = require('fs');
const FILE_NAME = 'abc.txt';
const x = mapEvents(getJSONFileData(FILE_NAME));
function getJSONFileData(filename) {
return fs.readFileSync(filename, 'utf-8')
.split('\n')
.map(JSON.parse)
}
function mapEvents(events) {
events.reduce((acc, data) => {
return [{data.a, data.b}]
});
}
console.log(x)
I am getting an 'undefined' value constantly
I have found some issues, in your code.
You haven't returned anything from mapEvents function, that makes your varaible x value undefined.
getJSONFileData needs some fixing.
You can use below code:-
const fs = require('fs');
const FILE_NAME = 'abc.txt';
const x = mapEvents(getJSONFileData(FILE_NAME));
function getJSONFileData(filename) {
return fs
.readFileSync(filename, 'utf-8')
.split('\n')
.filter(Boolean)
.map(JSON.parse);
}
function mapEvents(events) {
return JSON.stringify(events);
}
console.log(x);
i'm trying to make an app that searches for all files
contains a specified string under the current directory/subdirectory.
as i understand it means i need to create a read stream, loop it, load the read data to an array, if the word found give __filename, dirname and if ! not found message.
unfortunately, i could not make it work...
any clue?
var path = require('path'),
fs=require('fs');
function fromDir(startPath,filter,ext){
if (!fs.existsSync(startPath)){
console.log("no dir ",startPath);
return;
};
var files=fs.readdirSync(startPath);
let found = files.find((file) => {
let thisFilename = path.join(startPath, file);
let stat = fs.lstatSync(thisFilename);
var readStream = fs.createReadStream(fs);
var readline = require('readline');
if (stat.isDirectory()) {
fromDir(thisFilename, filename,readline, ext);
} else {
if (path.extname(createReadStream) === ext && path.basename(thisFilename, ext) === filename) {
return true;
}
}
});
console.log('-- your word has found on : ',filename,__dirname);
}
if (!found) {
console.log("Sorry, we didn't find your term");
}
}
fromDir('./', process.argv[3], process.argv[2]);
Because not everything was included in the question, I made an assumption:
We are looking for full words (if that's not the case, replace the regex with a simple indexOf()).
Now, I've split the code into two functions - to make it both more readable and easier to recursively find the files.
Synchronous version:
const path = require('path');
const fs = require('fs');
function searchFilesInDirectory(dir, filter, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
const files = getFilesInDirectory(dir, ext);
files.forEach(file => {
const fileContent = fs.readFileSync(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
});
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
function getFilesInDirectory(dir, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
let files = [];
fs.readdirSync(dir).forEach(file => {
const filePath = path.join(dir, file);
const stat = fs.lstatSync(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = getFilesInDirectory(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
});
return files;
}
Async version - because async is cool:
const path = require('path');
const fs = require('fs');
const util = require('util');
const fsReaddir = util.promisify(fs.readdir);
const fsReadFile = util.promisify(fs.readFile);
const fsLstat = util.promisify(fs.lstat);
async function searchFilesInDirectoryAsync(dir, filter, ext) {
const found = await getFilesInDirectoryAsync(dir, ext);
for (file of found) {
const fileContent = await fsReadFile(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
};
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
async function getFilesInDirectoryAsync(dir, ext) {
let files = [];
const filesFromDirectory = await fsReaddir(dir).catch(err => {
throw new Error(err.message);
});
for (let file of filesFromDirectory) {
const filePath = path.join(dir, file);
const stat = await fsLstat(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = await getFilesInDirectoryAsync(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
};
return files;
}
If you have not worked with / understand async/await yet, it is a great step to take and learn it as soon as possible. Trust me, you will love not seeing those ugly callbacks again!
UPDATE:
As you pointed in comments, you want it to execute the function after running node process on the file. You also want to pass the function parameters as node's arguments.
To do that, at the end of your file, you need to add:
searchFilesInDirectory(process.argv[2], process.argv[3], process.argv[4]);
This extracts our arguments and passes them to the function.
With that, you can call our process like so (example arguments):
node yourscriptname.js ./ james .txt
Personally, if I were to write this, I would leverage the beauty of asynchronous code, and Node.js's async / await.
As a very side note:
You can easily improve readability of your code, if you add proper formatting to it. Don't get me wrong, it's not terrible - but it can be improved:
Use spaces OR newlines after commas.
Use spaces around equality operators and arithmetic operators.
As long as you are consistent with formatting, everything looks much better.
Imagine you have many long text files, and you need to only extract data from the first line of each one (without reading any further content). What is the best way in Node JS to do it?
Thanks!
I ended up adopting this solution, which seems the most performant I've seen so far:
var fs = require('fs');
var Q = require('q');
function readFirstLine (path) {
return Q.promise(function (resolve, reject) {
var rs = fs.createReadStream(path, {encoding: 'utf8'});
var acc = '';
var pos = 0;
var index;
rs
.on('data', function (chunk) {
index = chunk.indexOf('\n');
acc += chunk;
index !== -1 ? rs.close() : pos += chunk.length;
})
.on('close', function () {
resolve(acc.slice(0, pos + index));
})
.on('error', function (err) {
reject(err);
})
});
}
I created a npm module for convenience, named "firstline".
Thanks to #dandavis for the suggestion to use String.prototype.slice()!
There's a built-in module almost for this case - readline. It avoids messing with chunks and so forth. The code would look like the following:
const fs = require('fs');
const readline = require('readline');
async function getFirstLine(pathToFile) {
const readable = fs.createReadStream(pathToFile);
const reader = readline.createInterface({ input: readable });
const line = await new Promise((resolve) => {
reader.on('line', (line) => {
reader.close();
resolve(line);
});
});
readable.close();
return line;
}
I know this doesn't exactly answer the question but for those who are looking for a READABLE and simple way to do so:
const fs = require('fs').promises;
async function getFirstLine(filePath) {
const fileContent = await fs.readFile(filePath, 'utf-8');
return (fileContent.match(/(^.*)/) || [])[1] || '';
}
NOTE:
naturaly, this will only work with text files, which I assumed you used from your description
this will work with empty files and will return an empty string
this regexp is very performant since it is simple (no OR conditions`or complex matches) and only reads the first line
Please try this:
https://github.com/yinrong/node-line-stream-util#get-head-lines
It unpipe the upstream once got the head lines.
Node.js >= 16
In all current versions of Node.js, readline.createInterface can be used as an async iterable, to read a file line by line - or just for the first line. This is also safe to use with empty files.
Unfortunately, the error handling logic is broken in versions of Node.js before 16, where certain file system errors may go uncaught even if the code is wrapped in a try-catch block because of the way asynchronous errors are propagated in streams. So I recommend using this method only in Node.js >= 16.
import { createReadStream } from "fs";
import { createInterface } from "readline";
async function readFirstLine(path) {
const inputStream = createReadStream(path);
try {
for await (const line of createInterface(inputStream)) return line;
return ''; // If the file is empty.
}
finally {
inputStream.destroy(); // Destroy file stream.
}
}
const firstLine = await readFirstLine("path/to/file");
//Here you go;
var lineReader = require('line-reader');
var async = require('async');
exports.readManyFiles = function(files) {
async.map(files,
function(file, callback))
lineReader.open(file, function(reader) {
if (reader.hasNextLine()) {
reader.nextLine(function(line) {
callback(null,line);
});
}
});
},
function(err, allLines) {
//do whatever you want to with the lines
})
}