I have a simple txt file with data in this format with millions of lines:
{"a":9876312,"b":1572568981512}
{"a":9876312,"b":1572568981542}
I want to convert this into a file with "dot" json extension file using reduce function in NodeJs and return statement, probably looking like this:
[{"a":9876312,"b":1572568981512},
{"a":9876312,"b":1572568981542}]
Any help will be really really appreciated. Thanks :)
SO far I tried this:
const fs = require('fs');
const FILE_NAME = 'abc.txt';
const x = mapEvents(getJSONFileData(FILE_NAME));
function getJSONFileData(filename) {
return fs.readFileSync(filename, 'utf-8')
.split('\n')
.map(JSON.parse)
}
function mapEvents(events) {
events.reduce((acc, data) => {
return [{data.a, data.b}]
});
}
console.log(x)
I am getting an 'undefined' value constantly
I have found some issues, in your code.
You haven't returned anything from mapEvents function, that makes your varaible x value undefined.
getJSONFileData needs some fixing.
You can use below code:-
const fs = require('fs');
const FILE_NAME = 'abc.txt';
const x = mapEvents(getJSONFileData(FILE_NAME));
function getJSONFileData(filename) {
return fs
.readFileSync(filename, 'utf-8')
.split('\n')
.filter(Boolean)
.map(JSON.parse);
}
function mapEvents(events) {
return JSON.stringify(events);
}
console.log(x);
Related
I am reading a json file from within a zip file using jszip. I can open it and get the information that I want into the console from my function. I can't however get that information out of my javascript function. Maybe I am doing it wrong. Don't usually code using js.
const JSZip = require("jszip");
const fs = require("fs");
var myReturn;
function readJsons(bookPath,bookName,fileName,filePath,keyOption){
fs.readFile(bookPath + bookName, function(err, data) {
if (err) throw err;
JSZip.loadAsync(data).then(function (zip) {
// Read the contents of the '.txt' file
zip.file(filePath + fileName).async("string").then(function (data) {
var mydata = JSON.parse(data);
//gets the value of the key entered
myReturn = JSON.stringify(mydata[0][keyOption]); //value here should be "test book"
console.log(myReturn); //printed in console is "test book" works to here
return myReturn;
});
});
});
}
console.log(readJsons('simplelbook.zip','','frontMatter.json','','bookName'));
The problem is that you are returning inside the callback, so you aren't returning in the actual function. The solution would be using async/await instead:
const JSZip = require("jszip");
const fs = require("fs");
const util = require("util"); // require the util module
const readFile = util.promisify(fs.readFile); // transform fs.readFile into a more compatible form
async function readJsons(bookPath, bookName, fileName, filePath, keyOption) {
try {
// this part does the same thing, but with different syntax
const data = await readFile(bookPath + bookName);
const zip = await JSZip.loadAsync(data);
const jsonData = await zip.file(filePath + fileName).async("string");
const mydata = JSON.parse(jsonData);
const myReturn = JSON.stringify(mydata[0][keyOption]);
return myReturn; // return the data, simple as that
} catch (e) {
console.error(e); // error handling
}
}
(async () => { // self executing async function so we can use await
console.log(
await readJsons("simplelbook.zip", "", "frontMatter.json", "", "bookName")
);
})()
Notice I have imported the util module to turn fs.readFile into a function that is more suited for async/await :)
I am running a script which looks into a directory and lists files, then checks for the file type to process, if the extension matches then the file is read and each line of the file (.col which is just a txt file renamed) is inserted into an array.
Now after the file is read and the array is populated I would like to use the array and do some further processing, e.g create a db record. I am missing something really basic here because on each console log I do as below I always get the full items (in my array) of the contents of all files.
So to make it a bit simpler:
array is empty.
Then file is read and processed and array now has
array[0]=line 0 of file
array[0]=line 1 of file etc
const fs = require('fs');
const readline =require('readline');
var files = fs.readdirSync('/home/proj/data');
var path = require('path');
var model=[];
var lineReader=[];
for(var i=0; i<files.length; i++) {
if(path.extname(files[i]) === ".col") {
lineReader[i] = readline.createInterface({
input: require('fs').createReadStream(files[i])
});
lineReader[i].on('line', function (line) {
model.push(line);
}).on('close', async function() {
console.log(model);
});
}
}
Instead the script is run and array[] holds all lines of all files that match the extension.
Your help is greatly appreciated and anyone is allowed to scorch my JS as I am pretty sure I am missing something basic here.
So, you want to read the files in parallel (because that's what your program does) and put it in an array of arrays?
You can make the reading file mechanism a promise and use it using Promise.all. Here is an example to get you started.
const fs = require('fs');
const readline = require('readline');
var files = fs.readdirSync('./');
var path = require('path');
function readFile(fileName) {
return new Promise(resolve => {
const array = [];
const lineReader = readline.createInterface({
input: fs.createReadStream(files[i])
});
lineReader.on('line', function (line) {
array.push(line);
}).on('close', async function () {
//do some proc
console.log(array);
resolve(array);
});
});
}
const readFilePromises = [];
for (var i = 0; i < files.length; i++) {
if (path.extname(files[i]) === ".js") {
readFilePromises.push(readFile(files[i]));
}
}
Promise.all(readFilePromises) //or await Promise.all([..])
.then(data => {
console.log(data);//will be array of arrays
})
If you want a single Array you can always flatten the result using data.flat()
If your files are not very big and sync methods are OK, you can simplify the code this way:
'use strict';
const fs = require('fs');
const path = require('path');
const model = [];
fs.readdirSync('/home/proj/data')
.filter(name => path.extname(name) === '.col')
.forEach((name) => {
model.push(...fs.readFileSync(name, 'utf8').split('\n'));
});
console.log(model);
For some reason I'm having such a hard time converting this txt file to an actual javascript array.
myJson.txt
{"action": "key press", "timestamp": 1523783621, "user": "neovim"}
{"action": "unlike", "timestamp": 1523784584, "user": "r00k"}
{"action": "touch", "timestamp": 1523784963, "user": "eevee"}
{"action": "report as spam", "timestamp": 1523786005, "user": "moxie"}
Currently what I have that doesn't work
const fs = require('fs');
function convert(input_file_path) {
const file = fs.readFileSync(input_file_path, 'utf8');
const newFormat = file
.replace(/(\r\n\t|\n|\r\t)/gm,'')
.replace(/}{/g, '},{');
console.log([JSON.parse(newFormat)]);
}
convert('myJson.txt');
Since your file contains a JSON object per line, you could read that file line by line, using readline.
Each line is then parsed, and push into an array, which is then returned (resolved) after the file is fully read.
'use strict';
const fs = require('fs');
const readline = require('readline');
function convert(file) {
return new Promise((resolve, reject) => {
const stream = fs.createReadStream(file);
// Handle stream error (IE: file not found)
stream.on('error', reject);
const reader = readline.createInterface({
input: stream
});
const array = [];
reader.on('line', line => {
array.push(JSON.parse(line));
});
reader.on('close', () => resolve(array));
});
}
convert('myJson.txt')
.then(res => {
console.log(res);
})
.catch(err => console.error(err));
I would have done this in this way
var fs = require('fs');
var readline = require('readline');
var array = [];
var input = null;
var rd = readline.createInterface({
input: fs.createReadStream(__dirname+'/demo.txt')
});
rd.on('line', function(line) {
array.push(JSON.parse(line));
});
rd.on('close', function(d){
array.forEach(e=>console.log(e.action))
})
What's happening here is, I am reading the lines of the file on by one using readline which is one of the core modules of nodejs. Listening on the events and doing what needed.
And yeah, you'll have to parse the line to JSON for sure ;)
Thanks
The problem with your code is that you're trying to parse JS array as JSON array. while JSON array string should be only string.
Here what you're trying to do:
jsArray = ['{"foo": "bar"}, {"foo":"baz"}']
This is a valid JS array of a single string value '{"foo": "bar"}, {"foo":"baz"}'.
while
jsonArrayStr = '["{"foo": "bar"}, {"foo":"baz"}"]'
This is a valid JSON array string (as the square brackets is part of the string).
So as to get your code running, you need to add the square brackets to your string before parsing it.
function convert(input_file_path) {
const file = fs.readFileSync(input_file_path, 'utf8');
const newFormat = file
.replace("{", "[{")
.replace(/}$/, "}]")
console.log(JSON.parse('[' + newFormat + ']'));
}
What I'm doing in the script is reading the content of text file line by line and storing it to array along with converting it to JSON object. When we reach last line and our JSON array/object has all the data. Now you can write this object to a new file fs.writeFileSync() after converting JSON object to string with JSON.stringify().
Note :- You've to install Line reader package i.e. npm install line-reader
var lineReader = require('line-reader');
var fs = require('fs')
var jsonObj = {};
var obj = [];
var file = "fileName.json"
var num= 0;
lineRead();
async function lineRead(){
lineReader.eachLine('input.txt', function(line, last) {
// to check on which line we're.
console.log(num);
num++;
convertJson(line)
if(last){
//when it's last line we convert json obj to string and save it to new file.
var data = JSON.stringify(obj)
fs.writeFileSync(file,data);
}
});
}
function convertJson(data){
var currentVal = data
var value = JSON.parse(data)
var temp = value;
//storing the value in json object
jsonObj = value;
obj.push(jsonObj);
}
}
i'm trying to make an app that searches for all files
contains a specified string under the current directory/subdirectory.
as i understand it means i need to create a read stream, loop it, load the read data to an array, if the word found give __filename, dirname and if ! not found message.
unfortunately, i could not make it work...
any clue?
var path = require('path'),
fs=require('fs');
function fromDir(startPath,filter,ext){
if (!fs.existsSync(startPath)){
console.log("no dir ",startPath);
return;
};
var files=fs.readdirSync(startPath);
let found = files.find((file) => {
let thisFilename = path.join(startPath, file);
let stat = fs.lstatSync(thisFilename);
var readStream = fs.createReadStream(fs);
var readline = require('readline');
if (stat.isDirectory()) {
fromDir(thisFilename, filename,readline, ext);
} else {
if (path.extname(createReadStream) === ext && path.basename(thisFilename, ext) === filename) {
return true;
}
}
});
console.log('-- your word has found on : ',filename,__dirname);
}
if (!found) {
console.log("Sorry, we didn't find your term");
}
}
fromDir('./', process.argv[3], process.argv[2]);
Because not everything was included in the question, I made an assumption:
We are looking for full words (if that's not the case, replace the regex with a simple indexOf()).
Now, I've split the code into two functions - to make it both more readable and easier to recursively find the files.
Synchronous version:
const path = require('path');
const fs = require('fs');
function searchFilesInDirectory(dir, filter, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
const files = getFilesInDirectory(dir, ext);
files.forEach(file => {
const fileContent = fs.readFileSync(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
});
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
function getFilesInDirectory(dir, ext) {
if (!fs.existsSync(dir)) {
console.log(`Specified directory: ${dir} does not exist`);
return;
}
let files = [];
fs.readdirSync(dir).forEach(file => {
const filePath = path.join(dir, file);
const stat = fs.lstatSync(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = getFilesInDirectory(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
});
return files;
}
Async version - because async is cool:
const path = require('path');
const fs = require('fs');
const util = require('util');
const fsReaddir = util.promisify(fs.readdir);
const fsReadFile = util.promisify(fs.readFile);
const fsLstat = util.promisify(fs.lstat);
async function searchFilesInDirectoryAsync(dir, filter, ext) {
const found = await getFilesInDirectoryAsync(dir, ext);
for (file of found) {
const fileContent = await fsReadFile(file);
// We want full words, so we use full word boundary in regex.
const regex = new RegExp('\\b' + filter + '\\b');
if (regex.test(fileContent)) {
console.log(`Your word was found in file: ${file}`);
}
};
}
// Using recursion, we find every file with the desired extention, even if its deeply nested in subfolders.
async function getFilesInDirectoryAsync(dir, ext) {
let files = [];
const filesFromDirectory = await fsReaddir(dir).catch(err => {
throw new Error(err.message);
});
for (let file of filesFromDirectory) {
const filePath = path.join(dir, file);
const stat = await fsLstat(filePath);
// If we hit a directory, apply our function to that dir. If we hit a file, add it to the array of files.
if (stat.isDirectory()) {
const nestedFiles = await getFilesInDirectoryAsync(filePath, ext);
files = files.concat(nestedFiles);
} else {
if (path.extname(file) === ext) {
files.push(filePath);
}
}
};
return files;
}
If you have not worked with / understand async/await yet, it is a great step to take and learn it as soon as possible. Trust me, you will love not seeing those ugly callbacks again!
UPDATE:
As you pointed in comments, you want it to execute the function after running node process on the file. You also want to pass the function parameters as node's arguments.
To do that, at the end of your file, you need to add:
searchFilesInDirectory(process.argv[2], process.argv[3], process.argv[4]);
This extracts our arguments and passes them to the function.
With that, you can call our process like so (example arguments):
node yourscriptname.js ./ james .txt
Personally, if I were to write this, I would leverage the beauty of asynchronous code, and Node.js's async / await.
As a very side note:
You can easily improve readability of your code, if you add proper formatting to it. Don't get me wrong, it's not terrible - but it can be improved:
Use spaces OR newlines after commas.
Use spaces around equality operators and arithmetic operators.
As long as you are consistent with formatting, everything looks much better.
Imagine you have many long text files, and you need to only extract data from the first line of each one (without reading any further content). What is the best way in Node JS to do it?
Thanks!
I ended up adopting this solution, which seems the most performant I've seen so far:
var fs = require('fs');
var Q = require('q');
function readFirstLine (path) {
return Q.promise(function (resolve, reject) {
var rs = fs.createReadStream(path, {encoding: 'utf8'});
var acc = '';
var pos = 0;
var index;
rs
.on('data', function (chunk) {
index = chunk.indexOf('\n');
acc += chunk;
index !== -1 ? rs.close() : pos += chunk.length;
})
.on('close', function () {
resolve(acc.slice(0, pos + index));
})
.on('error', function (err) {
reject(err);
})
});
}
I created a npm module for convenience, named "firstline".
Thanks to #dandavis for the suggestion to use String.prototype.slice()!
There's a built-in module almost for this case - readline. It avoids messing with chunks and so forth. The code would look like the following:
const fs = require('fs');
const readline = require('readline');
async function getFirstLine(pathToFile) {
const readable = fs.createReadStream(pathToFile);
const reader = readline.createInterface({ input: readable });
const line = await new Promise((resolve) => {
reader.on('line', (line) => {
reader.close();
resolve(line);
});
});
readable.close();
return line;
}
I know this doesn't exactly answer the question but for those who are looking for a READABLE and simple way to do so:
const fs = require('fs').promises;
async function getFirstLine(filePath) {
const fileContent = await fs.readFile(filePath, 'utf-8');
return (fileContent.match(/(^.*)/) || [])[1] || '';
}
NOTE:
naturaly, this will only work with text files, which I assumed you used from your description
this will work with empty files and will return an empty string
this regexp is very performant since it is simple (no OR conditions`or complex matches) and only reads the first line
Please try this:
https://github.com/yinrong/node-line-stream-util#get-head-lines
It unpipe the upstream once got the head lines.
Node.js >= 16
In all current versions of Node.js, readline.createInterface can be used as an async iterable, to read a file line by line - or just for the first line. This is also safe to use with empty files.
Unfortunately, the error handling logic is broken in versions of Node.js before 16, where certain file system errors may go uncaught even if the code is wrapped in a try-catch block because of the way asynchronous errors are propagated in streams. So I recommend using this method only in Node.js >= 16.
import { createReadStream } from "fs";
import { createInterface } from "readline";
async function readFirstLine(path) {
const inputStream = createReadStream(path);
try {
for await (const line of createInterface(inputStream)) return line;
return ''; // If the file is empty.
}
finally {
inputStream.destroy(); // Destroy file stream.
}
}
const firstLine = await readFirstLine("path/to/file");
//Here you go;
var lineReader = require('line-reader');
var async = require('async');
exports.readManyFiles = function(files) {
async.map(files,
function(file, callback))
lineReader.open(file, function(reader) {
if (reader.hasNextLine()) {
reader.nextLine(function(line) {
callback(null,line);
});
}
});
},
function(err, allLines) {
//do whatever you want to with the lines
})
}