I'm having some problems getting the asynchronous nature of node to co-operate with me, and after hours of callbacks and googling; I finally turn to you guys.
I have a program that needs to read in lines from a file using the readline module of node. This file contains data that is passed to some asynchronous functions defined within my node program. Once all the data is successfully read and processed, this data needs to be parsed into JSON format, and then outputted.
My problem here is that when I call: readLine.on('close', function() { ...... }, this is run before the asynchronous functions finish running, and therefore I am left with an output of nothing, but the program keeps running the asynchronous functions.
I've created a simple skeleton of functions that should explain my situation more clearly:
function firstAsyncFunc(dataFromFile) {
//do something asynchronously
return processedData;
}
function secondAsyncFunc(dataFromFile) {
//do something else asynchronously
return processedData;
}
//create readline
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('data.txt')
});
//array to hold all the data processed
var totalDataStorage;
//read file
lineReader.on('line', function(line) {
var processedData = firstAsyncFunction(line);
var moreProcessedData = secondAsyncFunction(line);
//store processed data and concatenate into one array
var tempDataStorage = [{ 'first': processedData, 'second': moreProcessedData }]
totalDataStorage = totalDataStorage.concat(tempDataStorage);
}).on('close', function() {
var JSONString = JSON.stringify(... //create JSON for totalDataStorage ...);
console.log(JSONString); //DOESN'T OUTPUT ANYTHING!
});
I have tried to add a callback to the first/secondAsynFunction, I have tried to make the reading and parsing bit of the program seperate functions, and create callbacks so that parsing is only called when reading finished, but none of those solutions seemed to be working and i'm really struggling - so any help would be appreciated.
Thanks!
EDIT: The data.txt file is of the form
IPData1 DataCenter1
IPData2 DataCenter2
...
IPDataN DataCenterN
I use str.split(" ") to get the respective values, and then pass them appropriately. IPData is a number, and DataCenter is a string
Asynchronous functions do not return a value, but you must pass a callback function to it instead. Your line
var processedData = firstAsyncFunction(line);
doesn't make sense at all. If your data.txt file looks like this
IPData1 DataCenter1
IPData2 DataCenter2
IPData3 DataCenter3
you can read data as following
var fs = require('fs');
var rl = require('readline').createInterface({
input: fs.createReadStream('data.txt')
});
var arr = [];
rl.on('line', a => {
a = a.split(' ');
arr.push({
first: a[0],
second: a[1]
});
}).on('close', () => {
console.log(JSON.stringify(arr, null, 2));
});
It will log
[
{
"first": "IPData1",
"second": "DataCenter1"
},
{
"first": "IPData2",
"second": "DataCenter2"
},
{
"first": "IPData3",
"second": "DataCenter3"
}
]
I changed the following and its working locally.
use promises to make your life easier.
remove .close since you dont have an output defined in the interface.
The 'close' event is emitted when one of the following occur:
The rl.close() method is called and the readline.Interface instance has relinquished control over the input and output streams;
The input stream receives its 'end' event;
The input stream receives -D to signal end-of-transmission (EOT);
The input stream receives -C to signal SIGINT and there is no SIGINT event listener registered on the readline.Interface instance.
function firstAsyncFunc(dataFromFile) {
return new Promise(function(resolve, reject) {
//do something asynchronously
resolve(result);
})
}
function secondAsyncFunc(dataFromFile) {
return new Promise(function(resolve, reject) {
//do something asynchronously
resolve(result);
})
}
//create readline
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('data.txt')
});
//array to hold all the data processed
var totalDataStorage;
//read file
lineReader.on('line', function(line) {
Promise.all([
firstAsyncFunc(line),
secondAsyncFunc(line)
])
.then(function(results) {
var tempDataStorage = [{
'first': results[0],
'second': results[1]
}];
// i'd use push instead of concat
totalDataStorage = totalDataStorage.concat(tempDataStorage);
});
})
Related
I'm newbie in JS and I need to create simple CSV parser for test data. And I have a very strange problem.
This is code of my test:
'use strict';
const assert = require('assert');
const HomePage = require('../pages/HomePage');
const csv = require('../tools/CsvReader');
describe('Check if values are correct', () => {
let dataProvider = csv.readFromCsv("test/resources/places.csv");
function valuesTest(city, expectedLat, expectedLong) {
it('Data must match', () => {
let searchPlace = HomePage
.open()
.findByPlaceName(city)
.getSearchPlace();
assert.strictEqual(searchPlace.getLatitude(), expectedLat);
assert.strictEqual(searchPlace.getLongtitude(), expectedLong);
console.log(dataProvider[0].CITY);
});
}
for (let i = 0; i < dataProvider.length; i++) {
valuesTest(dataProvider[i].CITY, dataProvider[i].LAT, dataProvider[i].LONG)
}
});
And code of my CSV-reader:
'use strict';
const csv = require('csv-parser');
const fs = require('fs');
class CsvReader {
readFromCsv(path) {
let results = [];
fs.createReadStream(path)
.pipe(csv())
.on('data', (data) => results.push(data));
return results;
}
}
module.exports = new CsvReader();
And this is my CSV:
CITY,LAT,LONG
Kyiv,50.447731,30.542721
Lviv,49.839684,24.029716
Ivano-Frankivsk,48.922634,24.711117
The problem is as follows: Why can I use the variable "dataProvider" in the "valuesTest" block and it works correctly and returns the value of the CITY variable, but in the "for" loop I can't use it (variable "dataProvider", "CITY", etc. is unavailable there), although they are located in the same block "describe".
Your CSV Reader is an asynchronous operation. I suspect your for loop gets executed even before the csv is parsed and value is returned. Try putting your for loop in a function and passing to the readFromCsv function as a callback. Call this function on the data event, where you will be sure to get the data.
You should pass a callback to readFromCsv that will be executed when the createReadStream is complete. You can determine when createReadStream is complete by listening to its end event. (See csv-parser example code for instance).
In your case, you could do something like:
readFromCsv(path, onEnd) {
let results = [];
fs.createReadStream(path)
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => onEnd(results));
}
csv.readFromCsv("test/resources/places.csv", function onData(data) {
for (let i = 0; i < data.length; i++) {
valuesTest(data[i].CITY, data[i].LAT, data[i].LONG);
}
});
You should use the end event instead of the data event to determine when the stream is complete. If you returned after the first data event, you might not get all of the data.
The data event is fired once per data chunk. If your data has more than one "chunk", you'll truncate it if you return in the data callback. See nodeJS docs for details on the different event types. You might not notice a difference with small test files, but make a larger file and you'll see the difference.
Specifically, given a list of data, I want to loop over that list and do a fetch for each element of that data before I combine it all afterward. The thing is, as written, the code iterates through the entire list immediately, starting all the operations at once. Then, even though the fetch operations are still running, the then call I have after all that runs, before the data could've been processed.
I read something about putting all the Promises in an array, then passing that array to a Promise.all() call, followed by a then that will have access to all that processed data as intended, but I'm not sure how exactly to go about doing it in this case, since I have nested Promises in this for loop.
for(var i in repoData) {
var repoName = repoData[i].name;
var repoUrl = repoData[i].url;
(function(name, url) {
Promise.all([fetch(`https://api.github.com/repos/${username}/${repoData[i].name}/commits`),
fetch(`https://api.github.com/repos/${username}/${repoData[i].name}/pulls`)])
.then(function(results) {
Promise.all([results[0].json(), results[1].json()])
.then(function(json) {
//console.log(json[0]);
var commits = json[0];
var pulls = json[1];
var repo = {};
repo.name = name;
repo.url = url;
repo.commitCount = commits.length;
repo.pullRequestCount = pulls.length;
console.log(repo);
user.repositories.push(repo);
});
});
})(repoName, repoUrl);
}
}).then(function() {
var payload = new Object();
payload.user = user;
//console.log(payload);
//console.log(repoData[0]);
res.send(payload);
});
Generally when you need to run asynchronous operations for all of the items in an array, the answer is to use Promise.all(arr.map(...)) and this case appears to be no exception.
Also remember that you need to return values in your then callbacks in order to pass values on to the next then (or to the Promise.all aggregating everything).
When faced with a complex situation, it helps to break it down into smaller pieces. In this case, you can isolate the code to query data for a single repo into its own function. Once you've done that, the code to query data for all of them boils down to:
Promise.all(repoData.map(function (repoItem) {
return getDataForRepo(username, repoItem);
}))
Please try the following:
// function to query details for a single repo
function getDataForRepo(username, repoInfo) {
return Promise
.all([
fetch(`https://api.github.com/repos/${username}/${repoInfo.name}/commits`),
fetch(`https://api.github.com/repos/${username}/${repoInfo.name}/pulls`)
])
.then(function (results) {
return Promise.all([results[0].json(), results[1].json()])
})
.then(function (json) {
var commits = json[0];
var pulls = json[1];
var repo = {
name: repoInfo.name,
url: repoInfo.url,
commitCount: commits.length,
pullRequestCount: pulls.length
};
console.log(repo);
return repo;
});
}
Promise.all(repoData.map(function (repoItem) {
return getDataForRepo(username, repoItem);
})).then(function (retrievedRepoData) {
console.log(retrievedRepoData);
var payload = new Object();
payload.user = user;
//console.log(payload);
//console.log(repoData[0]);
res.send(payload);
});
I'm currently fiddling around with Node.js and I stuck with this issue.
I'm using the csvtojson converter (https://github.com/Keyang/node-csvtojson) as a separate module that I can call in my other JS files as many times as I want.
Here is my tools.js:
module.exports = {
csvToJson: function (csvPath) {
var Converter = require('csvtojson').Converter;
var converter = new Converter({});
var transfer = "DEFAULT";
converter.fromFile(csvPath, function(err, result){
if (err) {
return console.log(err);
}
else {
transfer = result;
}
});
return transfer;
}
};
And here is how I call it:
var countriesCsvFile = path.join(__dirname, '..', 'testDataFiles', 'countries.csv');
//GRAB TOOLS
var tools = require('../app/tools');
console.log(tools.csvToJson(countriesCsvFile));
The result is always the "DEFAULT" value which indicates, that the converter is not touching it.
I want to pass it as the return value of the function, to further be able to process the data on the fly, without creating a file, and read that.
It is surely some scope issue, but after scratching my scalp for a few hours, and browsing the questions I couldn't retrieve anything remotely useful.
Also, another note: If I call console.log(result) instead of transfer = result, it shows me my precious and desired data.
You have to pass in a callback function because the csvToJson function is returning 'transfer' before any value is assigned to it. Like Sirko said, it's asynchronous. You can also use promises instead of callbacks but that's another topic in itself.
module.exports = {
csvToJson: function (csvPath, callback) {
var Converter = require('csvtojson').Converter;
var converter = new Converter({});
converter.fromFile(csvPath, function(err, result){
if (err) {
callback(err);
}
else {
callback(null, result);
}
});
}
};
I'm using fast-csv's fromPath() method to read data from a file. I would like to write this data into an array (which I will subsequently sort). I would expect the code below to work for this purpose, but it does not:
var csv = require('fast-csv');
var dataArr = [];
csv.fromPath("datas.csv", {headers: true})
.on("data", data => {
console.log(data);
// > { num: '4319', year: '1997', month: '4', day: '20', ...
dataArr.push(data);
});
console.log(dataArr);
// > []
I am able to read the data in the file with this code, but the array is not populated.
What is a good way to accomplish this, and why does the code above not work?
Well, I know that this question has been asked a long back but just now I got to work with CSV file for creating API with node js. Being a typical programmer I googled "Reading from a file with fast-csv and writing into an array" well something like this but till date, there isn't any proper response for the question hence I decided to answer this.
Well on is async function and hence execution will be paused in main flow and will be resumed only after nonasync function gets executed.
var queryParameter = ()=> new Promise( resolve =>{
let returnLit = []
csv.fromPath("<fileName>", {headers : true})
.on('data',(data)=>{
returnLit.push(data[<header name>].trim())
})
.on('end',()=>{
resolve(returnLit)
})
})
var mainList = [];
queryParameter().then((res)=>mainList = res)
If you want to validate something pass argument into queryParameter() and uses the argument in validate method.
The "on data" callback is asynchronous, and the commands that follow the callback will run before the callback finishes. This is why the code does not work, and this reasoning has been pointed out by others who have posted answers and comments.
As for a good way to accomplish the task, I have found that using the "on end" callback is a good fit; since the intention here is to "do something" with the whole data, after the file has been read completely.
var dataArr = [];
csv.fromPath("datas.csv", {headers: true})
.on("data", data => {
dataArr.push(data);
})
.on("end", () => {
console.log(dataArr.length);
// > 4187
});
As of "fast-csv": "^4.1.3" the approach by #ChandraKumar no longer works
The fromPath function has been removed in place of "parseFile"
var queryParameter = ()=> new Promise( resolve =>{
let returnLit = []
csv.parseFile("<fileName>", {headers : true})
.on('data',(data)=>{
returnLit.push(data[<header name>].trim())
})
.on('end',()=>{
resolve(returnLit)
})
})
var mainList = [];
queryParameter().then((res)=>mainList = res)
The "on data" callback of the module is asynchronous. Therefore, this line
console.log(dataArr);
will always return empty because it runs before the callback.
To fix this you need to process the array and sort it within the callback. For example:
var dataArr = [];
csv.fromPath("datas.csv", {headers: true})
.on("data", data => {
dataArr.push(data);
var sorted = _.sortBy(dataArr, 'propertyX');
// do something with 'sorted'
});
I looked at lot of example but couldn't achieve it..so need help..
Problem..
the content from loop should be passed to execute one by one.
each loop iteration contains a file read and database save operation along with few other object properties that need to be assigned.
I have created example here..
http://runnable.com/VI1efZDJvlQ75mlW/api-promise-loop-for-node-js-and-hello-world
how to run:
Api: http://web-91b5a8f5-67af-4ffd-9a32-54a50b10fce3.runnable.com/api/upload
method : POST
content-type : multipart/form-data
upload more than one file with name.
..
the final expected promise is
files.name = "name of file"
files.content
files.content-type
files.size
- saved to db.
currently i am getting different content from file..but other files content are not filled and is undefined.
Regards
Moyeen
The technique you're looking for is thenable chaining
var p= Q();
Object.keys(files).forEach(function(key){
p = p.then(function(){ // chain the next one
return Q.nfcall(fs.readFile, files[key].path, "binary", i). // readfile
then(function (content) { // process content and save
files.filename = files[key].name;
files.path = files[key].path;
files.content_type = files[key].type;
files.size = files[key].size;
console.log(files.filename);
files.content = binaryToBase64(content);
return Q.npost(art.save, art); // wait for save, update as needed
}));
});
});
Basically, we tell each operation to happen after the previous one has finished by chaining them and returning which causes a wait on the asynchronous value.
As a byproduct you can later use
p.then(function(last){
// all done, access last here
});
The handler will run when all the promises are done.
I have updated the code with Q.all as the mentioned p.then will execute only once.
http://runnable.com/VI1efZDJvlQ75mlW/api-promise-loop-for-node-js-and-hello-world
form.parse(req, function(err, fields, files) {
var p = Q();
Object.keys(files).forEach(function (key) {
promises.push(p.then(function () { // chain the next one
return Q.nfcall(fs.readFile, files[key].path, "binary"). // readfile
then(function (content) { // process content and save
file = {};
file.filename = files[key].name;
file.path = files[key].path;
file.content_type = files[key].type;
file.size = files[key].size;
console.log(files[key].name);
file.content = binaryToBase64(content);
filesarr.push(file);
// Q.npost(art.save, art); // wait for save, update as needed
})
}));
Q.all(promises);
});
});
the question is how to use q.npost if i have mongoose model files and want to save...?