Get First Column of CSV file javascript - javascript

Have tried PapaParse without success, how would one get the first column value of a CSV file?
const csv = require('csv-parser');
const fs = require('fs');
(async () => {
try {
fs.createReadStream('test.csv')
.pipe(csv())
.on('data', (row) => {
console.log(row);
})
.on('end', () => {
console.log('CSV file successfully processed');
});
} catch (err) {
console.log(error(err));
await browser.close();
console.log(error("Browser Closed"));
}
})();

For anyone in the future, set a function then set as a const to your CSV list of URLs, the number [1] represents the column.
function readURLFile(path) {
return fs.readFileSync(path, 'utf-8')
.split('\n')
.map((elt) => {
const url = elt.split(',')[1].replace('\r', '');
return `http://${url.toLowerCase()}`;
});
}

Related

How to Run a Cron Job when the code is in Other File - Node JS

I am trying to execute a cron every 1 hour.
For which I have initiated the cron job in my index.js file as below
const cron = require('node-cron');
const readDocuments = require("./cronJobs/readDocuments");
var task = cron.schedule('0 0 */1 * * *', readDocuments);
task.start();
Where as the actual code to be executed has been written in ./cronJobs/readDocuments and the code inside this file is below, where I am trying to read a csv file.
readDocuments.js
const ResponseManager = require("../common/ResponseManager");
var fs = require('fs');
const csv = require('csv-parser');
console.log('Read Document....')
try {
var filepath = "../files/abc.csv";
fs.createReadStream(filepath)
.on('error', () => {
// handle error
})
.pipe(csv())
.on('data', (row) => {
console.log('rowrowrow',row)
})
.on('end', () => {
// handle end of CSV
})
} catch (error) {
console.log('errorerror',error)
res.status(500).json(ResponseManager(false, error.message));
}
When I run the node js in cmd with node index.js, the console Read Document.... is getting displayed but the other code is not getting executed.
It is giving me throw 'execution must be a function'; error
How do I resolve this or what is the actual procedure to execute the code inside a file with cron job from index.js.
Please help. Thanks !!
you need to export the cron function and consume it on your file
The second argument to cron.schedule() must be function. You need to wrap the code into a function and export it from the module.
const ResponseManager = require("../common/ResponseManager");
const fs = require('fs');
const csv = require('csv-parser');
module.exports = function () {
console.log('Read Document....')
try {
const filepath = "../files/abc.csv";
fs.createReadStream(filepath)
.on('error', () => {
// handle error
})
.pipe(csv())
.on('data', (row) => {
console.log('rowrowrow', row)
})
.on('end', () => {
// handle end of CSV
});
} catch (error) {
console.log('errorerror', error)
res.status(500).json(ResponseManager(false, error.message));
}
};

Read csv file with nodejs with a max number rows

I'm trying to read a CSV file with node.js using the csv-parser library.
Since it's a big file, I need to check the header and the first 100 rows and the stop the method and return true if everything is ok or false if the data doesn't respect the condition.
How can I achieve this?
This is what I have so far:
const csv = require('csv-parser');
const fs = require('fs');
exports.checkFileFormat = (file) => {
let stream = fs.createReadStream(file.tempFilePath)
.pipe(csv())
.on('headers', (headers) => {
/*...some logic...*/
})
.on('data', (row) => {
if (!typeof (row["USAGE"]) == 'number'
|| !moment(row["START_DATE"], 'YYYYMMDD', true).isValid()
|| !moment(row["END_DATE"], 'YYYYMMDD', true).isValid()) {
stream.unpipe(csv());
return false;
}
})
.on('end', () => {
console.log('CSV file successfully processed');
});
return true;
}
In a previous version I had also declared: var num = 100 and tested it inside .on('data', (row) => {...} but it didn't work.
Following up from my comment
make the function checkFileFormat return a promise. Inside the promise, resolve(false) instead of return false and resolve(true) in the '.on('end') callback. I'm not completely sure this will work, but that's how I would approach it
const csv = require('csv-parser');
const fs = require('fs');
exports.checkFileFormat = (file) => {
return new Promise((resolve, reject) => {
let stream = fs.createReadStream(file.tempFilePath)
.pipe(csv())
.on('headers', (headers) => {
/*...some logic...*/
})
.on('data', (row) => {
if (!typeof (row["USAGE"]) == 'number'
|| !moment(row["START_DATE"], 'YYYYMMDD', true).isValid()
|| !moment(row["END_DATE"], 'YYYYMMDD', true).isValid()) {
stream.end(); // stream.unpipe(csv());
resolve(false);
}
})
.on('end', () => {
console.log('CSV file successfully processed');
resolve(true);
});
});
}
If you want to read a certain amount of lines and then break, you can try the following:
const csv = require('csv-parser');
const fs = require('fs');
let count = 0;
let maxLines = 3;
let fsStream = fs.createReadStream('./data.csv');
let csvStream = csv();
fsStream.pipe(csvStream)
.on('headers', (headers) => {
console.log(headers)
})
.on('data', (data) => {
if (count >= maxLines) {
fsStream.unpipe(csvStream);
csvStream.end();
fsStream.destroy();
} else {
console.log(data);
count++;
}
});
Basically you just count each read line and when the max is reached, you unpipe the csv-stream from the fs-stream, then end the csv-stream and finally destroy the fs-stream.

How to return my CSV data from my service - async/await issue?

I am attempting to load some CSV data in my API such that I can manipulate it and pass through to my front end, however I am having a few issues returning the data.
I am using fast-csv to do the parsing here.
service.js
const fs = require('fs');
const csv = require('fast-csv');
module.exports.getFileContents = (filepath) => {
let data = [];
fs.createReadStream(filepath)
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error(error))
.on('data', row => data.push(row))
.on('end', () => {
console.log(data) // This will print the full CSV file fine
return data;
});
};
routes.js
router.get('/data/:filename', (req, res) => {
const file = FS.getFileContents(testUrl + '/' + req.params.filename + '.csv');
console.log(file); // This prints 'undefined'
res.send(file);
});
I can print out the CSV contents fine from the service, but I just get 'undefined' from the actual routes. Can somebody please point out what I'm missing?
This is a common problem with JavaScript code, in the following.
.on('end', () => {
console.log(data);
return data;
});
Your on-end handler is an anonymous callback function (because of () =>), so when you return data, you are returning data out of your on-end handler callback function. You are not returning data out of your enclosing getFileContents() function.
Here's a typical way to write this kind of code:
const getFileContents = async (filepath) => {
const data = [];
return new Promise(function(resolve, reject) {
fs.createReadStream(filepath)
.pipe(csv.parse({ headers: true }))
.on('error', error => reject(error))
.on('data', row => data.push(row))
.on('end', () => {
console.log(data);
resolve(data);
});
});
}
And then, call it as follows, though this must be within an async function:
const data = await getFileContents('games.csv');
What's happened here is as follows:
your getFileContents is now async and returns a promise
the CSV data will be available when resolve(data) is executed
the caller can await the fulfillment/resolution of this promise to get the data
You could just create a Promise in the service and return it. Once the job is done, resolve it. The returned Promise will wait until it is resolved.
service.js
const fs = require('fs');
const csv = require('fast-csv');
module.exports.getFileContents = (filepath) => {
let data = [];
return new Promise((resolve) => {
fs.createReadStream(filepath)
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error(error))
.on('data', row => data.push(row))
.on('end', () => {
resolve(data);
});
}
};
routes.js
router.get('/data/:filename', (req, res) => {
const file = await FS.getFileContents(testUrl + '/' + req.params.filename + '.csv');
console.log(file); // This prints only after it is resolved
res.send(file);
});

Read and write to csv file with Node.js fast-csv library

I may be lacking some in depth understanding of streams in general. However, I would like to know how efficiently what I need should work.
I want to implement so that a csv file would be read, then to each row a query to the database (or api) is made and data is attached. After that the row with attached data is written to a new csv file. I am using fast-csv node library for this.
Here is my implementation:
const fs = require("fs");
const csv = require("fast-csv");
const delay = t => new Promise(resolve => setTimeout(resolve, t));
const asyncFunction = async (row, csvStream) => {
// Imitate some stuff with database
await delay(1200);
row.data = "data";
csvStream.write(row);
};
const array = [];
const csvStream = csv.format({ headers: true });
const writeStream = fs.createWriteStream("output.csv");
csvStream.pipe(writeStream).on("finish", () => {
console.log("End of writing");
});
fs.createReadStream("input.csv")
.pipe(csv.parse({ headers: true }))
.transform(async function(row, next) {
array.push(asyncFunction(row, csvStream));
next();
})
.on("finish", function() {
console.log("finished reading file");
//Wait for all database requests and writings to be finished to close write stream
Promise.all(array).then(() => {
csvStream.end();
console.log("finished writing file");
});
});
Particularly I would like to know are there ways to optimize what I am doing here, because I feel that I am missing something important on how this library can be used for these type of cases
Regards,
Rokas
I was able to find a solution in fast-csv issues section. A good person doug-martin, provided this gist, on how you can do efficiently this kind of operation via Transform stream:
const path = require('path');
const fs = require('fs');
const { Transform } = require('stream');
const csv = require('fast-csv');
class PersistStream extends Transform {
constructor(args) {
super({ objectMode: true, ...(args || {}) });
this.batchSize = 100;
this.batch = [];
if (args && args.batchSize) {
this.batchSize = args.batchSize;
}
}
_transform(record, encoding, callback) {
this.batch.push(record);
if (this.shouldSaveBatch) {
// we have hit our batch size to process the records as a batch
this.processRecords()
// we successfully processed the records so callback
.then(() => callback())
// An error occurred!
.catch(err => err(err));
return;
}
// we shouldnt persist so ignore
callback();
}
_flush(callback) {
if (this.batch.length) {
// handle any leftover records that were not persisted because the batch was too small
this.processRecords()
// we successfully processed the records so callback
.then(() => callback())
// An error occurred!
.catch(err => err(err));
return;
}
// no records to persist so just call callback
callback();
}
pushRecords(records) {
// emit each record for down stream processing
records.forEach(r => this.push(r));
}
get shouldSaveBatch() {
// this could be any check, for this example is is record cont
return this.batch.length >= this.batchSize;
}
async processRecords() {
// save the records
const records = await this.saveBatch();
// besure to emit them
this.pushRecords(records);
return records;
}
async saveBatch() {
const records = this.batch;
this.batch = [];
console.log(`Saving batch [noOfRecords=${records.length}]`);
// This is where you should save/update/delete the records
return new Promise(res => {
setTimeout(() => res(records), 100);
});
}
}
const processCsv = ({ file, batchSize }) =>
new Promise((res, rej) => {
let recordCount = 0;
fs.createReadStream(file)
// catch file read errors
.on('error', err => rej(err))
.pipe(csv.parse({ headers: true }))
// catch an parsing errors
.on('error', err => rej(err))
// pipe into our processing stream
.pipe(new PersistStream({ batchSize }))
.on('error', err => rej(err))
.on('data', () => {
recordCount += 1;
})
.on('end', () => res({ event: 'end', recordCount }));
});
const file = path.resolve(__dirname, `batch_write.csv`);
// end early after 30000 records
processCsv({ file, batchSize: 5 })
.then(({ event, recordCount }) => {
console.log(`Done Processing [event=${event}] [recordCount=${recordCount}]`);
})
.catch(e => {
console.error(e.stack);
});
https://gist.github.com/doug-martin/b434a04f164c81da82165f4adcb144ec

'pipe' function in Javascript not populating from CSV as expected

I had this code working earlier, but made some changes and I'm not sure what I did to break it. The path to the .csv file is correct, and the code seems correct, but the array raw_data is empty after the function call.
require('./trip.js');
const parser = require('csv-parser');
const fs = require('fs');
let raw_data = [];
function readFile() {
fs.createReadStream('./trips.csv')
.pipe(parser())
.on('data', (data) => raw_data.push(data))
.on('end', () => console.log('CSV has been piped into an array'));
}
const trips = async () => {
await readFile();
console.log(raw_data.length)
};
I expect the raw_data array to contain 9999 items. It contains zero. I am also not getting the console.log statement to execute on 'end'.
readFile must return Promise like this
require('./trip.js');
const parser = require('csv-parser');
const fs = require('fs');
let raw_data = [];
function readFile() {
return new Promise(resolve =>
fs.createReadStream('./trips.csv')
.pipe(parser())
.on('data', (data) => raw_data.push(data))
.on('end', resolve)
);
}
const trips = async () => {
await readFile();
console.log(raw_data.length)
};

Categories