I have an application parsing csv file. I used csv module and it basically works fine. However, once there is a bad row in the csv file, the whole process fails.
Is there anyway to skip bad rows and resume streaming after catching an error?
This is a simple example
var csv = require('csv');
var stream = require('stream');
var parser = csv.parse({ delimiter: "," });
parser.on("data", (chunk) => {
console.log("one chunk");
chunk.forEach((datum) => {
console.log("data: ", datum);
});
});
parser.on("error", (err) => {
// Skip the error and resume stream here
console.log("one error: ", err.message);
});
var test = "00,01,02,03\n10,11,12,23\n21,22,\n30,31,32,33";
var rs = new stream.Readable();
rs._read = () => {};
rs.push(test);
rs.pipe(parser);
Here the third row has only three columns while other rows have four. I want to catch the error and write out all other rows. Is there any good strategy to do this? Using some function or option in csv module will be perfect.
Well there are two things here.
The first one is that you can use the relax_column_count: true in csv.parse options, and it should be working.
But if you test it you will see that the last line is missing. In fact in the way you pass your stream even with a proper csv string it would fail! Although if you pass a proper csv file it will work, so I suspect that there is something wrong with the stream also.
So to sum up this is the code.
var csv = require('csv');
var parser = csv.parse({ relax_column_count:true, delimiter: "," });
parser.on("data", (chunk) => {
console.log("one chunk");
chunk.forEach((datum) => {
console.log("data: ", datum);
});
});
parser.on("error", (err) => {
// Skip the error and resume stream here
console.log("one error: ", err.message);
})
parser.on('close',function(){
console.log(parser)
})
require('fs').createReadStream('test.csv').pipe(parser);
And in test.csv
00,01,02,03
10,11,12,23
21,22,23,24
30,31,
34,35,36,37
As requested here is the code working with stream
var csv = require('csv');
var stream = require('stream');
var parser = csv.parse({ relax_column_count:true, delimiter: "," });
parser.on("data", (chunk) => {
console.log("one chunk");
chunk.forEach((datum) => {
console.log("data: ", datum);
});
});
parser.on("error", (err) => {
// Skip the error and resume stream here
console.log("one error: ", err.message);
})
parser.on('close',function(){
console.log(parser)
})
var test = "00,01,02,03\n10,11,12,23\n21,22,\n30,31,32,33"
const myReadable = new stream.Readable({
read(size) {
this.push(test)
test = null
}
});
myReadable.pipe(parser);
I believe the problem with your stream was that you didn't push null in the end and it didn't end it in a good manner.
Related
Suppose I have code as below:
const fs = require('node:fs')
const csvParser = require('csv-parser')
const rs = fs.createReadStream(dir)
const ws = fs.createWriteStream(outDir)
ws.write('[' + '\n')
rs.pipe(
csvParser({
mapHeaders: mapper,
}),
)
.on('data', (data) => {
ws.write('\t' + JSON.stringify({ ...data, provider } + ',\n'))
})
.on('end', () => {
ws.write(']')
console.log('writing to a file has been completed >> ', OUT_DIR)
})
Basically what this code does is to read from large csv file then stream this into write a json file. As seen in the code, I append ',\n' as separator for each row such that all the json code is formatted once streaming has been completed.
The issue with such method is that it leaves comma at the end of each entry that you would have to manually format the file (i.e. removing trailing comma) each time document write is completed.
For instance, the result would come out be as such:
[
{"key1":"value1", "key2":"value2"},
{"key1":"value1", "key2":"value2"}, // <- note the trailing comma here.
]
Any suggestion as to handling the trailing comma?
Also, what would be alternative way of handling read and write stream better, please let me know. Thanks in advance.
I'd reverse the problem and include the comma before every item except the first.
let first = true;
ws.write('[' + '\n')
// ...
.on('data', (data) => {
if (!first) {
ws.write(',\n');
}
first = false;
ws.write('\t' + JSON.stringify({ ...data, provider }))
})
[EDIT]
as mentioned in comments, this defeats the purpose of using stream. Don't use this on really large files
I would not handle the trailling coma but here is how I would do it.
The idea is to push each row into an array and the JSON.stringify()thee whole array.
Something like this
const rows = []
rs.pipe(
csvParser({
mapHeaders: mapper,
}),
)
.on('data', (data) => {
rows.push({ row content })
})
.on('end', () => {
ws.write(JSON.stringify(rows))
console.log('writing to a file has been completed >> ', OUT_DIR)
})
There is probably a better way to do it but I would do something like this:
const fs = require('node:fs')
const csvParser = require('csv-parser')
const rs = fs.createReadStream(dir)
const ws = fs.createWriteStream(outDir)
let buffer = null;
ws.write('[' + '\n')
rs.pipe(
csvParser({
mapHeaders: mapper,
}),
)
.on('data', (data) => {
if (buffer) {
ws.write(buffer + ',\n'))
}
buffer = '\t' + JSON.stringify({ ...data, provider })
})
.on('end', () => {
ws.write(buffer + '\n'))
ws.write(']')
console.log('writing to a file has been completed >> ', OUT_DIR)
})
ISSUE: I am trying to use Nodejs streams to read a small CSV file (1 row) using the fast-csv module.
The CSV 'rows' are pushed to an array(rows []) when the 'data' event is emitted. When 'end' is emitted, the data is update in a DB. However, the 'end' event is triggered before the rows[] array can be populated. This happens intermittently and sometimes the code works as intended.
My guess after reading the Nodejs docs is that this is due to the small size of the CSV file. The data is being read in the 'flowing' mode and as soon as the the first row is read, the 'end' even is triggered, which seems to happen before the record is pushed to the required array.
Tried using the 'paused' mode, but it didn't work.
I am new with Nodejs and not able to figure out how to make this function work. Any help or guidance would be appreciated.
CODE:
function updateToDb(filename, tempLocation) {
const rows = [];
const readStream = fs.createReadStream(tempLocation + '\\' + filename).pipe(csv.parse());
return new Promise((resolve, reject) => {
readStream.on('data', row => {
console.log('Reading');
rows.push(row);
})
.on('end', () => {
console.log('Completed');
let query = `UPDATE ${tables.earnings} SET result_date = CASE `;
rows.forEach(element => {
query += `WHEN isin = '${element[0]}' AND announcement_date = '${element[1]}' THEN '${element[2]}' ELSE result_date`;
});
query += ' END';
connection.query(query, (error, results) => {
if (error)
reject(error);
else
resolve(results.changedRows);
});
})
.on('error', error => {
reject(error);
});
});
}
What I'm trying to do is to download a csv file, read it line by line and to add the splitted line (on ',') to tmparray.
This code works and prints all the element in the array.
var request = require('request');
var fs = require('fs');
readline = require('readline');
try {
request('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.csv').pipe(fs.createWriteStream("MyCsv.txt"));
} catch (e) {
console.error(e);
}
var inputFile = 'MyCsv.csv';
var tmparray;
//read the file
var rd = readline.createInterface({
input: fs.createReadStream('/home/nome/Node/MyCsv.csv')
});
try {
//read line by line
rd.on('line', (line) => {
tmparray += line.split(",");
//print the elements
tmparray.forEach((element) => {
console.log(element);
}, this);
});
} catch (e) {
console.error(e);
}
What I want to do is to print the array after I assigned it.
I've tried this:
var request = require('request');
var fs = require('fs');
readline = require('readline');
try {
request('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.csv').pipe(fs.createWriteStream("MyCsv.txt"));
} catch (e) {
console.error(e);
}
var inputFile = 'MyCsv.csv';
var tmparray;
//read the file
var rd = readline.createInterface({
input: fs.createReadStream('/home/nome/Node/MyCsv.csv')
});
try {
//read line by line
rd.on('line', (line) => {
tmparray += line.split(",");
});
} catch (e) {
console.error(e);
} finally {
console.log(tmparray); // undefined
// or this: console.log(tmparray[0]) can't read the property '0' of undefined
}
but the array is printed as if it is undefined
The problem is that rd.on(...) is asynchronous.
That means that you are telling rd that when it reads a line, it should add it to tmparray — but that doesn't actually happen yet. It happens moments later, after you console.log(tmparray).
You should say rd.on('close', () => console.log(tmparray)) to tell Node "when you have finished reading rd, then log the data".
There are a couple of other issues in the code but they should be easier to find once this is fixed. Looking at it, I think line isn't an event on readable streams so you should say rd.on('data', ...) instead; and you're trying to build up an array using the + operator which doesn't work. It will probably convert everything to strings though, so it should log something fairly reasonable for now.
Why not use the csv package it will give you the same result, Here is an example of transforming csv file into array:
const csv = require('csv')
, request = require('request');
var url = 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.csv';
request(url, function (err, response, data) {
if (err) throw err;
csv.parse(data, function (err, data) {
if (err) throw err;
// here you get your array
console.log(data);
});
});
I am new to Node.js and JavaScript. I have a results.json file that I want to keep a running log of results from a script that pulls images from the web. However, my current script only overwrites the existing result. How do I build upon or add to the results.json so each subsequent result is logged in the results.json file? I would like it to be valid json.
Here is general example:
var currentSearchResult = someWebSearchResult
var fs = require('fs');
var json = JSON.stringify(['search result: ' + currentSearchResult + ': ', null, "\t");
fs.writeFile("results.json", json);
And the results.json:
[
"search result: currentSearchResult"
]
If you want the file to be valid JSON, you have to open your file, parse the JSON, append your new result to the array, transform it back into a string and save it again.
var fs = require('fs')
var currentSearchResult = 'example'
fs.readFile('results.json', function (err, data) {
var json = JSON.parse(data)
json.push('search result: ' + currentSearchResult)
fs.writeFile("results.json", JSON.stringify(json))
})
In general, If you want to append to file you should use:
fs.appendFile("results.json", json , function (err) {
if (err) throw err;
console.log('The "data to append" was appended to file!');
});
Append file creates file if does not exist.
But ,if you want to append JSON data first you read the data and after that you could overwrite that data.
fs.readFile('results.json', function (err, data) {
var json = JSON.parse(data);
json.push('search result: ' + currentSearchResult);
fs.writeFile("results.json", JSON.stringify(json), function(err){
if (err) throw err;
console.log('The "data to append" was appended to file!');
});
})
Promise based solution [Javascript (ES6) + Node.js (V10 or above)]
const fsPromises = require('fs').promises;
fsPromises.readFile('myFile.json', 'utf8')
.then(data => {
let json = JSON.parse(data);
json.myArr.push({name: "Krishnan", salary: 5678});
fsPromises.writeFile('myFile.json', JSON.stringify(json))
.then( () => { console.log('Append Success'); })
.catch(err => { console.log("Append Failed: " + err);});
})
.catch(err => { console.log("Read Error: " +err);});
If your project supports Javascript ES8 then you could use asyn/await instead of native promise.
I have created data. Js file to maintain data displaying in the table fomat. Including text box to read data from user and how to display data enteted through text box in table
I need to pass in a text file in the terminal and then read the data from it, how can I do this?
node server.js file.txt
How do I pass in the path from the terminal, how do I read that on the other side?
You'll want to use the process.argv array to access the command-line arguments to get the filename and the FileSystem module (fs) to read the file. For example:
// Make sure we got a filename on the command line.
if (process.argv.length < 3) {
console.log('Usage: node ' + process.argv[1] + ' FILENAME');
process.exit(1);
}
// Read the file and print its contents.
var fs = require('fs')
, filename = process.argv[2];
fs.readFile(filename, 'utf8', function(err, data) {
if (err) throw err;
console.log('OK: ' + filename);
console.log(data)
});
To break that down a little for you process.argv will usually have length two, the zeroth item being the "node" interpreter and the first being the script that node is currently running, items after that were passed on the command line. Once you've pulled a filename from argv then you can use the filesystem functions to read the file and do whatever you want with its contents. Sample usage would look like this:
$ node ./cat.js file.txt
OK: file.txt
This is file.txt!
[Edit] As #wtfcoder mentions, using the "fs.readFile()" method might not be the best idea because it will buffer the entire contents of the file before yielding it to the callback function. This buffering could potentially use lots of memory but, more importantly, it does not take advantage of one of the core features of node.js - asynchronous, evented I/O.
The "node" way to process a large file (or any file, really) would be to use fs.read() and process each available chunk as it is available from the operating system. However, reading the file as such requires you to do your own (possibly) incremental parsing/processing of the file and some amount of buffering might be inevitable.
Usign fs with node.
var fs = require('fs');
try {
var data = fs.readFileSync('file.txt', 'utf8');
console.log(data.toString());
} catch(e) {
console.log('Error:', e.stack);
}
IMHO, fs.readFile() should be avoided because it loads ALL the file in memory and it won't call the callback until all the file has been read.
The easiest way to read a text file is to read it line by line. I recommend a BufferedReader:
new BufferedReader ("file", { encoding: "utf8" })
.on ("error", function (error){
console.log ("error: " + error);
})
.on ("line", function (line){
console.log ("line: " + line);
})
.on ("end", function (){
console.log ("EOF");
})
.read ();
For complex data structures like .properties or json files you need to use a parser (internally it should also use a buffered reader).
You can use readstream and pipe to read the file line by line without read all the file into memory one time.
var fs = require('fs'),
es = require('event-stream'),
os = require('os');
var s = fs.createReadStream(path)
.pipe(es.split())
.pipe(es.mapSync(function(line) {
//pause the readstream
s.pause();
console.log("line:", line);
s.resume();
})
.on('error', function(err) {
console.log('Error:', err);
})
.on('end', function() {
console.log('Finish reading.');
})
);
I am posting a complete example which I finally got working. Here I am reading in a file rooms/rooms.txt from a script rooms/rooms.js
var fs = require('fs');
var path = require('path');
var readStream = fs.createReadStream(path.join(__dirname, '../rooms') + '/rooms.txt', 'utf8');
let data = ''
readStream.on('data', function(chunk) {
data += chunk;
}).on('end', function() {
console.log(data);
});
The async way of life:
#! /usr/bin/node
const fs = require('fs');
function readall (stream)
{
return new Promise ((resolve, reject) => {
const chunks = [];
stream.on ('error', (error) => reject (error));
stream.on ('data', (chunk) => chunk && chunks.push (chunk));
stream.on ('end', () => resolve (Buffer.concat (chunks)));
});
}
function readfile (filename)
{
return readall (fs.createReadStream (filename));
}
(async () => {
let content = await readfile ('/etc/ssh/moduli').catch ((e) => {})
if (content)
console.log ("size:", content.length,
"head:", content.slice (0, 46).toString ());
})();