Regex-based line deletion script (JS) not working - javascript

I have an script that reads the file and compares the string by a pattern, if it returns false it will delete the line on the .txt file.
This is my code
const readline = require('readline');
const lineReplace = require('line-replace')
const fs = require('fs');
const inputFileName = './outputfinal.txt';
const readInterface = readline.createInterface({
input: fs.createReadStream(inputFileName),
});
let testResults = [];
readInterface.on('line', line => {
testResult = test(line);
console.log(`Test result (line #${testResults.length+1}): `, testResult);
testResults.push({ input: line, testResult } );
if (testResult == false){
console.log(`Line #${testResults.length} will get deleted from this list`);
lineReplace({
file: './outputfinal.txt',
line: testResults.length,
text: '',
addNewLine: false,
callback: onReplace
});
function onReplace({file, line, text, replacedText}) {
};
};
});
// You can do whatever with the test results here.
//readInterface.on('close', () => {
// console.log("Test results:", testResults);
//});
function test(str){
let regex = /^\w+([\.-]?\w+)*#\w+([\.-]?\w+)*(\.\w{2,3})+$/; // email regex
str = str.split(",");
// string should be of length 3 with str[1] number of length 7
if(str && str.length === 3 && Number(str[1]) && str[1] ) {
let temp = str[0].split("-");
// check for 85aecb80-ac00-40e3-813c-5ad62ee93f42 separately.
if(temp && temp.length === 5 && /[a-zA-Z\d]{8}/.test(temp[0]) && /[a-zA-Z\d]{4}/.test(temp[1]) && /[a-zA-Z\d]{4}/.test(temp[2]) && /[a-zA-Z\d]{4}/.test(temp[3]) && /[a-zA-Z\d]{12}/.test(temp[4])){
// email regex
if(regex.test(str[2])) {
return true;
} else {
return false;
}
} else {
return false
}
} else {
return false;
}
}
But isn't working, returns error no such file or directory, I dont think that is the correct way to do a line remover script

First, if the error is "no such file or directory" is because the file doesn't exist. Please check the file exists at first in the same root of your project.
Second, don't use the library "line-replace", if you check the code this create a tmp file and rewrite all the file in a tmp with the replacement. When finish the process the tmp file is renamed to the original.
Third, if you analyze the code the "lineReplace" is async. So there will sometimes to try to open multiple times the file at the same time and consequently write it at same time again. This will create an unsuspected result.
The best recommendation is you must see how File works and Promises (async) in nodejs:
https://nodejs.org/api/fs.html
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
https://itnext.io/javascript-promises-with-node-js-e8ca827e0ea3
If you see the next code, you will see the next steps:
Create the tmp Route
Create the tmp File
Create a promise:
Create a readline interface
Process each line with a try catch to reject in case of error
When finish the process replace the tmp file to original file with a try-catch to reject in case of error
Wait to finish the promise, in case of error delete the tmp file
const fs = require('fs');
const readline = require('readline');
async function replaceLineWithConditional(pathFile, conditional) {
// tmpFile name
const tmpFilePath = `${pathFile}.tmp`;
// Create write stream
const tmpStream = fs.createWriteStream(tmpFilePath);
// Process it
const processFile = new Promise((resolve, reject) => {
const rl = readline.createInterface({
input: fs.createReadStream(pathFile),
});
// Process line
rl.on("line", (input) => {
try {
if (conditional(input)) {
tmpStream.write(input); // input
tmpStream.write("\n"); // linejump
}
} catch (err) {
// Reject error
reject(err);
}
});
// Finish
rl.on("close", () => {
try {
// Move the tmpFile
tmpStream.close();
fs.renameSync(tmpFilePath, pathFile);
// Resolve it
resolve(true);
} catch (err) {
// Reject error
reject(err);
}
});
});
try {
// Await the promise
return await processFile;
} catch (err) {
// Delete the tmp file and throw the error
tmpStream.close();
fs.unlinkSync(tmpFilePath);
throw err;
}
}
So you could call the function with your conditional function process as a callback. For example, I want to keep it all the lines who have a length more than 3 and not start with "a":
// async/await:
await replaceLineWithConditional("./test.txt", (line) => {
return line.length > 3 && /^[^a]/.test(line);
});
// then/catch:
replaceLineWithConditional("./test.txt", (line) => {
return line.length > 3 && /^[^a]/.test(line);
}).then(...).catch(...);
input:
Hi
Hello
abcdef
a
lalalal
output:
Hello
lalalal
If you want the file don't finish with the end line. (take a note: Why should text files end with a newline?) this can be a Quiz problem to test knowledge in the fs library :)

Related

Try statement never passes without throwing error

In the following block of code, I am trying to get user input and save them in a json file. If the the inserted item is already exists in the file it gets rejected. The issue here is that my programme always executes the catch clause and generate an empty array, although in some situations json file exists and it is ready to be read.
const fs = require('fs');
const loadFile = () => {
try{
const stringData = fs.readFileSync('note-data.json', 'utf8');
return [JSON.parse(stringData)];
} catch (e) {
return [];
}
};
const writeFile = (notes) => {
fs.writeFileSync('note-data.json', JSON.stringify(notes));
};
const addNote = (title, body) => {
let notes = loadFile();
const note = {
title,
body
}
console.log(notes);
const duplicateArray = notes.filter((note) => note.title === title);
console.log(duplicateArray);
if(duplicateArray.length === 0){
notes.push(note);
writeFile(notes);
}
};
The command line input is as follow:
node app.js add --title=Greeting --body="hello"
node app.js add --title=Greeting2 --body="hello2"
The output is:
[{"title":"Greeting","body":"hello"}]
The output should be:
[{"title":"Greeting","body":"hello"}, {"title":"Greeting2","body":"hello2"}]
My question is where this error occurs?
Typo?
fs.writeFileSync('note-date.json', JSON.stringify(notes));
Isn't the file 'note-data.json'?

Javascript,Nodejs: search for a specific word string in files

i'm trying to make an app that searches for all files
contains a specified string under the current directory/subdirectory.
as i understand it means i need to create a read stream, loop it, load the read data to an array, if the word found give __filename, dirname and if ! not found message.
unfortunately, i could not make it work...
any clue?
var path = require('path'),
fs=require('fs');
function fromDir(startPath,filter,ext){
if (!fs.existsSync(startPath)){
console.log("no dir ",startPath);
return;
};
var files=fs.readdirSync(startPath);
let found = files.find((file) => {
let thisFilename = path.join(startPath, file);
let stat = fs.lstatSync(thisFilename);
var readStream = fs.createReadStream(fs);
var readline = require('readline');
if (stat.isDirectory()) {
fromDir(thisFilename, filename,readline, ext);
} else {
if (path.extname(createReadStream) === ext && path.basename(thisFilename, ext) === filename) {
return true;
}
}
});
console.log('-- your word has found on : ',filename,__dirname);
}
if (!found) {
console.log("Sorry, we didn't find your term");
}
}
fromDir('./', process.argv[3], process.argv[2]);
Because not everything was included in the question, I made an assumption:
We are looking for full words (if that's not the case, replace the regex with a simple indexOf()).
Now, I've split the code into two functions - to make it both more readable and easier to recursively find the files.
Synchronous version:
const path = require('path');
const fs = require('fs');
function searchFilesInDirectory(dir, filter, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
const files = getFilesInDirectory(dir, ext);
files.forEach(file => {
const fileContent = fs.readFileSync(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
});
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
function getFilesInDirectory(dir, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
let files = [];
fs.readdirSync(dir).forEach(file => {
const filePath = path.join(dir, file);
const stat = fs.lstatSync(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = getFilesInDirectory(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
});
return files;
}
Async version - because async is cool:
const path = require('path');
const fs = require('fs');
const util = require('util');
const fsReaddir = util.promisify(fs.readdir);
const fsReadFile = util.promisify(fs.readFile);
const fsLstat = util.promisify(fs.lstat);
async function searchFilesInDirectoryAsync(dir, filter, ext) {
const found = await getFilesInDirectoryAsync(dir, ext);
for (file of found) {
const fileContent = await fsReadFile(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
};
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
async function getFilesInDirectoryAsync(dir, ext) {
let files = [];
const filesFromDirectory = await fsReaddir(dir).catch(err => {
throw new Error(err.message);
});
for (let file of filesFromDirectory) {
const filePath = path.join(dir, file);
const stat = await fsLstat(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = await getFilesInDirectoryAsync(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
};
return files;
}
If you have not worked with / understand async/await yet, it is a great step to take and learn it as soon as possible. Trust me, you will love not seeing those ugly callbacks again!
UPDATE:
As you pointed in comments, you want it to execute the function after running node process on the file. You also want to pass the function parameters as node's arguments.
To do that, at the end of your file, you need to add:
searchFilesInDirectory(process.argv[2], process.argv[3], process.argv[4]);
This extracts our arguments and passes them to the function.
With that, you can call our process like so (example arguments):
node yourscriptname.js ./ james .txt
Personally, if I were to write this, I would leverage the beauty of asynchronous code, and Node.js's async / await.
As a very side note:
You can easily improve readability of your code, if you add proper formatting to it. Don't get me wrong, it's not terrible - but it can be improved:
Use spaces OR newlines after commas.
Use spaces around equality operators and arithmetic operators.
As long as you are consistent with formatting, everything looks much better.

Node.js - Array undefined but logging on console

What I'm trying to do is to download a csv file, read it line by line and to add the splitted line (on ',') to tmparray.
This code works and prints all the element in the array.
var request = require('request');
var fs = require('fs');
readline = require('readline');
try {
request('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.csv').pipe(fs.createWriteStream("MyCsv.txt"));
} catch (e) {
console.error(e);
}
var inputFile = 'MyCsv.csv';
var tmparray;
//read the file
var rd = readline.createInterface({
input: fs.createReadStream('/home/nome/Node/MyCsv.csv')
});
try {
//read line by line
rd.on('line', (line) => {
tmparray += line.split(",");
//print the elements
tmparray.forEach((element) => {
console.log(element);
}, this);
});
} catch (e) {
console.error(e);
}
What I want to do is to print the array after I assigned it.
I've tried this:
var request = require('request');
var fs = require('fs');
readline = require('readline');
try {
request('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.csv').pipe(fs.createWriteStream("MyCsv.txt"));
} catch (e) {
console.error(e);
}
var inputFile = 'MyCsv.csv';
var tmparray;
//read the file
var rd = readline.createInterface({
input: fs.createReadStream('/home/nome/Node/MyCsv.csv')
});
try {
//read line by line
rd.on('line', (line) => {
tmparray += line.split(",");
});
} catch (e) {
console.error(e);
} finally {
console.log(tmparray); // undefined
// or this: console.log(tmparray[0]) can't read the property '0' of undefined
}
but the array is printed as if it is undefined
The problem is that rd.on(...) is asynchronous.
That means that you are telling rd that when it reads a line, it should add it to tmparray — but that doesn't actually happen yet. It happens moments later, after you console.log(tmparray).
You should say rd.on('close', () => console.log(tmparray)) to tell Node "when you have finished reading rd, then log the data".
There are a couple of other issues in the code but they should be easier to find once this is fixed. Looking at it, I think line isn't an event on readable streams so you should say rd.on('data', ...) instead; and you're trying to build up an array using the + operator which doesn't work. It will probably convert everything to strings though, so it should log something fairly reasonable for now.
Why not use the csv package it will give you the same result, Here is an example of transforming csv file into array:
const csv = require('csv')
, request = require('request');
var url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.csv';
request(url, function (err, response, data) {
if (err) throw err;
csv.parse(data, function (err, data) {
if (err) throw err;
// here you get your array
console.log(data);
});
});

How to read specific data from an uploaded txt file on node js

I need a client to upload a text file. Then I want to parse the text file such that only lines with the word "object" in it is the only thing left in the text file. I have successfully coded the uploading part. But need help coding how to parse out the lines with "object" not in it. My node js code is below.
You can use the ReadLine API that's part of Node Core to iterate through the file line-by-line. You can use string.includes() to determine if your line contains the phrase you're looking for.
var readline = require('readline');
var fs = require('fs');
function filterFile(phrase, input) {
return Promise((resolve, reject) => {
var lines = [];
let rl = readline.createInterface({
input: input
});
rl.on('line', (line) => {
if (line.includes(phrase, 0))
lines.push(line);
});
rl.on('close', () => {
let filteredLines = Buffer.from(lines);
return resolve(fs.createReadStream(filteredLines));
});
rl.on('error', (err) => {
return reject(err);
});
});
}
Edit for Filtered Output Write Stream Example
We can take the resulting stream returned by filterFile() and pipe its contents into a new file like so
var saveDest = './filteredLines.txt');
filterFile('object', inputStream)
.then((filteredStream) => {
let ws = fs.createWriteStream(saveDest);
filteredStream.once('error', (err) => {
return Promise.reject(err);
});
filteredStream.once('end', () => {
console.log(`Filtered File has been created at ${saveDest}`);
return Promise.resolve();
});
filteredStream.pipe(ws);
});
Step : 1
Divide the line using --
var x='i am object\ni m object';
var arr = x.split('\n');
Step : 2
For each line, test with object regexp
var reg = /object/g
if(reg.test(<eachline>)){
// write new line
}else{
// do nothing
}

What is the most efficient way to read only the first line of a file in Node JS?

Imagine you have many long text files, and you need to only extract data from the first line of each one (without reading any further content). What is the best way in Node JS to do it?
Thanks!
I ended up adopting this solution, which seems the most performant I've seen so far:
var fs = require('fs');
var Q = require('q');
function readFirstLine (path) {
return Q.promise(function (resolve, reject) {
var rs = fs.createReadStream(path, {encoding: 'utf8'});
var acc = '';
var pos = 0;
var index;
rs
.on('data', function (chunk) {
index = chunk.indexOf('\n');
acc += chunk;
index !== -1 ? rs.close() : pos += chunk.length;
})
.on('close', function () {
resolve(acc.slice(0, pos + index));
})
.on('error', function (err) {
reject(err);
})
});
}
I created a npm module for convenience, named "firstline".
Thanks to #dandavis for the suggestion to use String.prototype.slice()!
There's a built-in module almost for this case - readline. It avoids messing with chunks and so forth. The code would look like the following:
const fs = require('fs');
const readline = require('readline');
async function getFirstLine(pathToFile) {
const readable = fs.createReadStream(pathToFile);
const reader = readline.createInterface({ input: readable });
const line = await new Promise((resolve) => {
reader.on('line', (line) => {
reader.close();
resolve(line);
});
});
readable.close();
return line;
}
I know this doesn't exactly answer the question but for those who are looking for a READABLE and simple way to do so:
const fs = require('fs').promises;
async function getFirstLine(filePath) {
const fileContent = await fs.readFile(filePath, 'utf-8');
return (fileContent.match(/(^.*)/) || [])[1] || '';
}
NOTE:
naturaly, this will only work with text files, which I assumed you used from your description
this will work with empty files and will return an empty string
this regexp is very performant since it is simple (no OR conditions`or complex matches) and only reads the first line
Please try this:
https://github.com/yinrong/node-line-stream-util#get-head-lines
It unpipe the upstream once got the head lines.
Node.js >= 16
In all current versions of Node.js, readline.createInterface can be used as an async iterable, to read a file line by line - or just for the first line. This is also safe to use with empty files.
Unfortunately, the error handling logic is broken in versions of Node.js before 16, where certain file system errors may go uncaught even if the code is wrapped in a try-catch block because of the way asynchronous errors are propagated in streams. So I recommend using this method only in Node.js >= 16.
import { createReadStream } from "fs";
import { createInterface } from "readline";
async function readFirstLine(path) {
const inputStream = createReadStream(path);
try {
for await (const line of createInterface(inputStream)) return line;
return ''; // If the file is empty.
}
finally {
inputStream.destroy(); // Destroy file stream.
}
}
const firstLine = await readFirstLine("path/to/file");
//Here you go;
var lineReader = require('line-reader');
var async = require('async');
exports.readManyFiles = function(files) {
async.map(files,
function(file, callback))
lineReader.open(file, function(reader) {
if (reader.hasNextLine()) {
reader.nextLine(function(line) {
callback(null,line);
});
}
});
},
function(err, allLines) {
//do whatever you want to with the lines
})
}

Categories