Pushing an object onto an array from inside an .on event - javascript

I have some csv data I need to parse into array of objects for an API I am building.
I am able to see that the parser I am using is working as expected this way:
const csv = require('fast-csv')
const fs = require('fs');
const path = require('path');
let results = [];
async function parseCSVFromCSV(_sourceCSVFilePath){
return new Promise((resolve, reject) => {
fs.createReadStream(_sourceCSVFilePath)
.pipe(csv.parse({ headers: true }))
.on('error', error => reject(error))
.on('data', row => {
console.log(row);
results.push(row);
})
resolve(results);
})
}
so the console.log(row) prints out the parsed object as expected but the results object is empty. I am needing this parsed data but no matter what I try I can't get results to populate with parsed csv.
Any guidance on what I a missing here?

So I figured out what I need to do:
const fs = require('fs');
const path = require('path');
const csv = require('fast-csv');
async function getData(){
let arrOfObj = [];
fs.createReadStream(path.resolve(__dirname, 'downloads', 'sampleData.csv'))
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error(error))
.on('data', row =>
{arrOfObj.push(row);
//console.log(row);
})
.on('end', rowCount => { printData(arrOfObj);
});
}
async function printData(_arrOfObj){
console.log(_arrOfObj);
}
getData();
Basically the finished array will not be accessible expect inside of the end event. This allows it be exposed to outside the event. Hope this helps others that were needing to do this same thing.

Related

How to return my CSV data from my service - async/await issue?

I am attempting to load some CSV data in my API such that I can manipulate it and pass through to my front end, however I am having a few issues returning the data.
I am using fast-csv to do the parsing here.
service.js
const fs = require('fs');
const csv = require('fast-csv');
module.exports.getFileContents = (filepath) => {
let data = [];
fs.createReadStream(filepath)
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error(error))
.on('data', row => data.push(row))
.on('end', () => {
console.log(data) // This will print the full CSV file fine
return data;
});
};
routes.js
router.get('/data/:filename', (req, res) => {
const file = FS.getFileContents(testUrl + '/' + req.params.filename + '.csv');
console.log(file); // This prints 'undefined'
res.send(file);
});
I can print out the CSV contents fine from the service, but I just get 'undefined' from the actual routes. Can somebody please point out what I'm missing?
This is a common problem with JavaScript code, in the following.
.on('end', () => {
console.log(data);
return data;
});
Your on-end handler is an anonymous callback function (because of () =>), so when you return data, you are returning data out of your on-end handler callback function. You are not returning data out of your enclosing getFileContents() function.
Here's a typical way to write this kind of code:
const getFileContents = async (filepath) => {
const data = [];
return new Promise(function(resolve, reject) {
fs.createReadStream(filepath)
.pipe(csv.parse({ headers: true }))
.on('error', error => reject(error))
.on('data', row => data.push(row))
.on('end', () => {
console.log(data);
resolve(data);
});
});
}
And then, call it as follows, though this must be within an async function:
const data = await getFileContents('games.csv');
What's happened here is as follows:
your getFileContents is now async and returns a promise
the CSV data will be available when resolve(data) is executed
the caller can await the fulfillment/resolution of this promise to get the data
You could just create a Promise in the service and return it. Once the job is done, resolve it. The returned Promise will wait until it is resolved.
service.js
const fs = require('fs');
const csv = require('fast-csv');
module.exports.getFileContents = (filepath) => {
let data = [];
return new Promise((resolve) => {
fs.createReadStream(filepath)
.pipe(csv.parse({ headers: true }))
.on('error', error => console.error(error))
.on('data', row => data.push(row))
.on('end', () => {
resolve(data);
});
}
};
routes.js
router.get('/data/:filename', (req, res) => {
const file = await FS.getFileContents(testUrl + '/' + req.params.filename + '.csv');
console.log(file); // This prints only after it is resolved
res.send(file);
});

How to push into an array using nodejs

I am trying to push the file into the array named FileName however when I try to console.log(FileName) all I get is the empty array! Any input would be greatly appreciated!
const fs = require("fs");
const mm = require("music-metadata");
const dataFolder = "../../Songs";
const util = require("util");
//requiring path and fs modules
const path = require("path");
let FileName = [];
fs.readdir(dataFolder, (err, files) => {
files.forEach(e => {
return mm.parseFile(`../../Songs/${e}`).then(metadata => {
// console.log(`../..Songs/${e}`);
FileName.push(e);
// console.log(
// util.inspect(metadata.common.title, {
// showHidden: false,
// depth: null
// })
// );
});
});
});
console.log(FileName);
// mm.parseFile(
// "/Users/nathangriffith/Desktop/dashboard/Songs/04 Smoke and Mirrors.m4a"
// )
// .then(metadata => {
// console.log(
// metadata.native.iTunes[0].value
// );
// })
// .catch(err => {
// console.error(err.message);
// });
Assuming that you need metadata in your FileName array:
const files = readdirSync(dataFolder); // Now files is an array of file names
Promise
.all(files.map(file => mm.parseFile(`../../Songs/${file}`)
.then(metadataArr => {
console.log(metadataArr); // This should be your parsed metdatas
});
You are running an async function and expecting it to complete before the next line (console.log) runs. The log runs during the runtime of readdir, and therefore prints the value of FileName as it is before readdir changes it.
You can either change the program to use await/async correctly, or implement readdirSync instead.

'pipe' function in Javascript not populating from CSV as expected

I had this code working earlier, but made some changes and I'm not sure what I did to break it. The path to the .csv file is correct, and the code seems correct, but the array raw_data is empty after the function call.
require('./trip.js');
const parser = require('csv-parser');
const fs = require('fs');
let raw_data = [];
function readFile() {
fs.createReadStream('./trips.csv')
.pipe(parser())
.on('data', (data) => raw_data.push(data))
.on('end', () => console.log('CSV has been piped into an array'));
}
const trips = async () => {
await readFile();
console.log(raw_data.length)
};
I expect the raw_data array to contain 9999 items. It contains zero. I am also not getting the console.log statement to execute on 'end'.
readFile must return Promise like this
require('./trip.js');
const parser = require('csv-parser');
const fs = require('fs');
let raw_data = [];
function readFile() {
return new Promise(resolve =>
fs.createReadStream('./trips.csv')
.pipe(parser())
.on('data', (data) => raw_data.push(data))
.on('end', resolve)
);
}
const trips = async () => {
await readFile();
console.log(raw_data.length)
};

Can I use crawled from Node.js in javaScript?

I'm new to javaScript and trying to crawl a website with node.js. I could check the data in console log, but want to use the data in another javaScript file. How can I fetch the data?
The problem is I've never used node.js. I do javaScript so I know how to write the code, but I don't know how the back-end or server works.
I've tried to open it in my local host but the node method (e.g., require()) didn't work. I found out it's because node doesn't work in browser.(See? very new to js)
Should I use bundler or something?
The steps I thought were,
somehow send the data as json
somehow fetch the json data and render
Here is the crawling code file.
const axios = require("axios");
const cheerio = require("cheerio");
const log = console.log;
const getHtml = async () => {
try {
return await axios.get(URL);
} catch (error) {
console.error(error);
}
};
getHtml()
.then(html => {
let ulList = [];
const $ = cheerio.load(html.data);
const $bodyList = $("div.info-timetable ul").children("li");
$bodyList.each(function(i, elem) {
ulList[i] = {
screen: $(this).find('a').attr('data-screenname'),
time: $(this).find('a').attr('data-playstarttime')
};
});
const data = ulList.filter(n => n.time);
return data;
})
.then(res => log(res));
Could you please explain what steps should I take?
Also, it would be great if I can get understood WHY the steps are needed.
Thanks alot!
you can try writing your data to a JSON file and proceed, that's one way, then you can use the data as an object in any js file
const appendFile = (file, contents) =>
new Promise((resolve, reject) => {
fs.appendFile(
file,
contents,
'utf8',
err => (err ? reject(err) : resolve()),
);
});
getHtml()
.then(html => {
let ulList = [];
const $ = cheerio.load(html.data);
const $bodyList = $("div.info-timetable ul").children("li");
$bodyList.each(function(i, elem) {
ulList[i] = {
screen: $(this).find('a').attr('data-screenname'),
time: $(this).find('a').attr('data-playstarttime')
};
});
const data = ulList.filter(n => n.time);
return data;
})
.then(res => {
return appendFile('./data.json',res.toString())
}))
.then(done => {log('updated data json')});

How do you push data to a readable stream from within a function?

I'm trying to achieve the following:
Function getPaths reads directory paths and pushes them into readable stream as it finds them
The readable stream keeps piping (streaming) incoming paths into the write stream as it receives the paths.
Code
const fs = require('fs')
const zlib = require('zlib')
const zip = zlib.createGzip()
const Stream = require('stream')
let wstream = fs.createWriteStream('C:/test/file.txt.gz')
let readable = new Stream.Readable({
objectMode: true,
read(item) {
this.push(item)
}
})
readable.pipe(zip).pipe(wstream)
.on('finish', (err) => {
console.log('done');
})
let walkdir = require('walkdir')
function getPaths(dir) {
let walker = walkdir.sync(dir, {"max_depth": 0, "track_inodes": true}, (path, stat) => {
readable.push(path)
console.log('pushing a path to readable')
})
}
getPaths("C:/")
console.log('getPaths() ran')
readable.push(null) // indicates the end of the stream
Problem
The paths are not being compressed and written to the file as the getPaths function finds them and pushes them into the stream, it doesn't happen until it has found all of them. I know it's probably due to the process being synchronous but cannot figure out how to make it work.
I see the following output from the logs:
> // .gz file gets created with size of 0
> // Nothing happens for about 1 minute
> x(184206803) "pushing a path to readable"
> "getPaths() ran"
> // I see the data started being written into the file
> "Done"
UPDATE:
And if I do this asynchronously like this (or use the code from the answer below):
let walker = walkdir(dir, {"max_depth": 0, "track_inodes": true})
walker.on('path', (path, stat) => {
readable.push(path)
})
walker.on('end', (path, stat) => {
readable.push(null)
})
...
// readable.push(null)
I get an error (I think, it throws that particular error when it doesn't receive expected data chunk after you're done pushing data into it. If you remove that last line from the code: readable.push(null), and try to run the code again it throws the same error):
TypeError [ERR_INVALID_ARG_TYPE]: The "chunk" argument must be one of type
string or Buffer. Received type number
Your code is very good and works fine. You just need to remove this.push(item) and set read function with empty body.
Here is a working snippet
const fs = require('fs')
const zlib = require('zlib')
const zip = zlib.createGzip()
const Stream = require('stream')
let wstream = fs.createWriteStream('C:/test/file.txt.gz')
let readable = new Stream.Readable({
objectMode: true,
read() { }
})
readable.pipe(zip).pipe(wstream)
.on('finish', (err) => {
console.log('done');
})
let walkdir = require('walkdir')
function getPaths(dir) {
let walker = walkdir(dir, {"max_depth": 0, "track_inodes": true})
walker.on('path', (path, stat) => {
readable.push(path)
})
walker.on('end', (path, stat) => {
readable.push(null)
})
}
getPaths("C:/")
console.log('getPaths() ran')
BTW, the right argument name is read(size). It represents the number of bytes to read
EDIT
There is no need for the readable stream. You can write directly to zip.
const fs = require('fs');
const zlib = require('zlib');
const zip = zlib.createGzip();
const wstream = fs.createWriteStream('C:/test/file.txt.gz');
zip.pipe(wstream)
.on('finish', (err) => {
console.log('done');
})
let walkdir = require('walkdir')
function getPaths(dir) {
let walker = walkdir(dir, {"max_depth": 0, "track_inodes": true})
walker.on('path', (path, stat) => {
zip.write(path);
})
walker.on('end', (path, stat) => {
zip.end();
})
}
getPaths("C:/")
console.log('getPaths() ran')

Categories