My ultimate goal is the following: my gardener has several devices that can send data to my Node.js server via TCP. This data is in JSON format, and looks something like the following:
Device A:
{"pagename": "spacekittens", "count": 11};
Device B:
{"pagename": "norwegiansultans", "count": 22};
As each of these are streamed to my server via TCP, I have added a ; to separate each stream. In addition, the count in each device stream is randomly generated.
Now, I want to add dynamic routes for each TCP packet that comes my way, and display content from that stream to that route.
So my route myserver:4000/spacekittens should show the following:
{"pagename": "spacekittens", "count": [random number every second]};
And my route myserver:4000/norwegiansultans should show:
{"pagename": "norwegiansultans", "count": [random number every second]};
In order to accomplish this I have set up the following code:
server.on("connection", function(socket) {
let chunk = "";
socket.on('data', function(data) {
chunk += data.toString(); // Add string on the end of the variable 'chunk'
let d_index = chunk.indexOf(';'); // Find the delimiter
// While loop to keep going until no delimiter can be found
while (d_index > -1) {
try {
let string = chunk.substring(0, d_index); // Create string up until the delimiter
// define local variables that can be used in a closure
let json = JSON.parse(string); // Parse the current string
let localData = data;
console.log(json.pagename); // Function that does something with the current chunk of valid json.
app.get("/" + json.pagename, function(req, res) {
res.writeHead(200, {
'Content-Type': 'text/plain'
});
res.write(JSON.stringify(json));
res.end();
});
} catch (e) {
console.log(e);
}
chunk = chunk.substring(d_index + 1); // Cuts off the processed chunk
d_index = chunk.indexOf(';'); // Find the new delimiter
}
});
socket.on("close", function() {
console.log("connection closed");
});
});
I appreciate any thoughts and comments on methodology with regards to what I am trying to accomplish. However, I did not post only for this reason. I have a problem.
Currently, my res.write() line only populates the data in my routes one time. Adding new data via sockets does not replace the content on my route.
So under the myserver:4000/spacekittens route my count shows 11, and even though I stream an updated number ("count": 12) my route myserver:4000/spacekittens still only shows 11. Console logging gives me the correct response each time data is sent. So I am not using the res.write() correctly since it does not override old data.
Unsure how to rectify.
I would seperate the pages data and route setup from the connection handling. You can't set the same route up every time you receive a JSON blob so this modifies the app route to return the data to the user. The data will change with each blob of JSON.
class Pages {
constructor(app){
this._store = {}
this.app = app
}
get( name ){
return this._store[name]
}
set( name, data ){
if ( !this.exists(name) ) this.setupRoute(name)
return this._store[name] = data
}
exists( name ){
return this._store.hasOwnProperty(name)
}
addJSON( json_string ){
let data = JSON.parse(json_string)
if ( !data.pagename ) throw new Error('No pagename in data: "%s"', json_string)
if ( !data.count ) throw new Error('No count in data "%s"', json_string)
return this.set(data.pagename, data)
}
setupRoute( name ){
let route = `/${name}`
this.app.get(route, (req, res)=>{
res.json(this.get(name))
})
console.log('setup route: %s', route)
return this.app
}
}
Then the connection handling just deals with pulling out the JSON strings for Pages.
const pages = new Pages(app)
server.on("connection", function(socket) {
let chunk = "";
socket.on('data', function(data) {
chunk += data.toString(); // Add string on the end of the variable 'chunk'
let d_index = chunk.indexOf(';'); // Find the delimiter
// While loop to keep going until no delimiter can be found
while (d_index > -1) {
try {
let string = chunk.substring(0, d_index); // Create string up until the delimiter
pages.addJSON(string)
} catch (e) {
console.error(e);
}
chunk = chunk.substring(d_index + 1); // Cuts off the processed chunk
d_index = chunk.indexOf(';'); // Find the new delimiter
}
});
socket.on("close", function() {
console.log("connection closed");
});
});
You could also use one of the line delimited JSON parsing libraries which will take a binary stream and emit the JSON out as plain objects:
ndjson or ld-jsonstream
Related
I am periodically reading the temperature and Humidity values from a DHT22 sensor in a green house tunnel.
The sensor is attached to a Arduino Pro Mini. The Pro Mini also has a nF24l01 transceiver attached to it, and the readings are transmitted to another nF24L01/Arduino Pro Mini in my office.
The Arduino is connected to a desktop PC via a USB serial cable.
The intention is to write the received Temperatue and Humidity readings to a file in a CSV format.
I am receiving all the data over the radio link which in-turn is feed to my PC via my USB port. I am running Node with a file called index.js.
Below is the code from the Arduino connected to the PC. It is the receiver side of the radio link.
[code]
/*
See documentation at https://nRF24.github.io/RF24
See License information at root directory of this library
Author: Brendan Doherty (2bndy5)
*/
/**
A simple example of sending data from 1 nRF24L01 transceiver to another.
String message = "";
This example was written to be used on 2 devices acting as "nodes".
Use the Serial Monitor to change each node's behavior.
*/
#include <SPI.h>
#include <printf.h>
#include <nRF24L01.h>
#include <RF24.h>
struct dataStruct {
float HumH;
float TempC;
} myData;
bool newData = false;
RF24 radio(9, 10); // using pin 7 for the CE pin, andradio.read(&data, sizeof(MyData)); pin 8 for the CSN pin
uint8_t address[][6] = {"1Node", "2Node"};
bool radioNumber = 1; // 0 uses address[0] to transmit, 1 uses address[1] to transmit
bool role = false; // true = TX role, false = RX role
void setup() {
Serial.begin(115200);
if (!radio.begin()) {
Serial.println(F("radio hardware is not responding!!"));
while (1) {} // hold in infinite loop
}
radio.setPALevel(RF24_PA_HIGH); // RF24_PA_MAX is default.
radio.setPayloadSize(sizeof(dataStruct)); // float datatype occupies 4 bytes
radio.openWritingPipe(address[radioNumber]); // always uses pipe 0
radio.openReadingPipe(1, address[!radioNumber]); // using pipe 1
radio.startListening(); // put radio in RX mode
// For debugging info
printf_begin(); // needed only once for printing details
radio.printDetails(); // (smaller) function that prints raw register values
radio.printPrettyDetails(); // (larger) function that prints human readable data
} // end of setup
void getData() {
if (radio.available()) {
//Serial.println("Radio is available******");
radio.read(&myData, sizeof(dataStruct));
newData = true;
}
//Serial.println("Radio is NOT available******");
}
void showData() {
if (newData == true) {
String message = "";
message = message + "{\"humidity\": ";
message = message + myData.HumH;
message = message + ", \"temperature\": ";
message = message + myData.TempC;
message = message + "}";
Serial.println(message);
newData = false;
}
}
void loop() {
getData();
showData();
}
[/code]
Below is a screen shot of the serial output of the Arduino Pro Mini connected to my PC shown what is being received from the green house and what is being sent to the PC.
Arduino Serial port screen shot
The index2.js code is listed below
const SerialPort = require('serialport');
//const Readline = new SerialPort.parsers.Readline('\n');
const port = new SerialPort('/dev/ttyUSB0', {
baudRate: 115200
});
const fs = require('fs');
const { endianness } = require('os');
const { exit } = require('process');
const { Console } = require('console');
//const logIntervalMinutes = 1;
let lastMoment = new Date();
function tryParseJson(str) {
try {
JSON.parse(str);
} catch (e) {
console.log("JSON error")
return false;
}
return JSON.parse(str);
}
console.log('Initialising...');
port.on('open', function () {
console.log('Opened port...');
port.on('data', function (data) {
const sensorData = tryParseJson(data);
console.log('Data: ' + data);
const moment = new Date();
fs.appendFile('log.txt', `\n${sensorData.temperature} , ${sensorData.humidity} , ${moment}`, function (err) {
if (err) {
console.log('Your Jason has failed to get a complete string...');
} else {
console.log('Logged data: ', moment);
};
});
});
});
When I run node index2.js and look at the log.txt file I see that sometimes the temp/Hum values are listed as undefined as show in the screen shot below.
log.txt
After a bit of debugging I saw the following in the console.log() as show in the screen shot below.
Console.log() screen shot with program running.
So my problem is that every now and again, the fs.append can't determine the value of sensorData.temperature and sensorData.humidity. The fs.append still appends a record to the log.txt file but the 1st two fields have undefined in them.
fs.appendFile('log.txt', `\n${sensorData.temperature} , ${sensorData.humidity} , ${moment}`, function (err) {
if (err) {
console.log('Your Jason has failed to get a complete string...');
} else {
console.log('Logged data: ', moment);
};
});
It appears that function tryParseJson(str) sometimes only gets some of the data and not the full JSON object. see code below,
function tryParseJson(str) {
try {
JSON.parse(str);
} catch (e) {
console.log("JSON error")
return false;
}
return JSON.parse(str);
I see that catch (e) gets called and my console.log("JSON error") gets printed.
I need some help to work out how to resolve this..
I did some changes to my code to check the contents of the const sensorData = tryParseJson(data) as shown below.
const sensorData = tryParseJson(data);
if (sensorData.temperature == undefined || sensorData.humidity == undefined){
//console.log('Temperature or Humidity is undefined');
} else {
and then used a IF statement to append or not append to the log.txt file.
Everything is now working, however there is still one minor issue.
I noticed something in the log.txt file. If the value of the temp or humidity is say 25.00, then the file will have 25. will no trailing zero's. This will not happen say if the temp is 25.4
if I check the values of sensorData in this line of code const sensorData = tryParseJson(data); the values are correct. They seem to change with fs.appendFile
Any idea why?
I have a file which stores many JavaScript objects in JSON form and I need to read the file, create each of the objects, and do something with them (insert them into a db in my case). The JavaScript objects can be represented a format:
Format A:
[{name: 'thing1'},
....
{name: 'thing999999999'}]
or Format B:
{name: 'thing1'} // <== My choice.
...
{name: 'thing999999999'}
Note that the ... indicates a lot of JSON objects. I am aware I could read the entire file into memory and then use JSON.parse() like this:
fs.readFile(filePath, 'utf-8', function (err, fileContents) {
if (err) throw err;
console.log(JSON.parse(fileContents));
});
However, the file could be really large, I would prefer to use a stream to accomplish this. The problem I see with a stream is that the file contents could be broken into data chunks at any point, so how can I use JSON.parse() on such objects?
Ideally, each object would be read as a separate data chunk, but I am not sure on how to do that.
var importStream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
importStream.on('data', function(chunk) {
var pleaseBeAJSObject = JSON.parse(chunk);
// insert pleaseBeAJSObject in a database
});
importStream.on('end', function(item) {
console.log("Woot, imported objects into the database!");
});*/
Note, I wish to prevent reading the entire file into memory. Time efficiency does not matter to me. Yes, I could try to read a number of objects at once and insert them all at once, but that's a performance tweak - I need a way that is guaranteed not to cause a memory overload, not matter how many objects are contained in the file.
I can choose to use FormatA or FormatB or maybe something else, just please specify in your answer. Thanks!
To process a file line-by-line, you simply need to decouple the reading of the file and the code that acts upon that input. You can accomplish this by buffering your input until you hit a newline. Assuming we have one JSON object per line (basically, format B):
var stream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
var buf = '';
stream.on('data', function(d) {
buf += d.toString(); // when data is read, stash it in a string buffer
pump(); // then process the buffer
});
function pump() {
var pos;
while ((pos = buf.indexOf('\n')) >= 0) { // keep going while there's a newline somewhere in the buffer
if (pos == 0) { // if there's more than one newline in a row, the buffer will now start with a newline
buf = buf.slice(1); // discard it
continue; // so that the next iteration will start with data
}
processLine(buf.slice(0,pos)); // hand off the line
buf = buf.slice(pos+1); // and slice the processed data off the buffer
}
}
function processLine(line) { // here's where we do something with a line
if (line[line.length-1] == '\r') line=line.substr(0,line.length-1); // discard CR (0x0D)
if (line.length > 0) { // ignore empty lines
var obj = JSON.parse(line); // parse the JSON
console.log(obj); // do something with the data here!
}
}
Each time the file stream receives data from the file system, it's stashed in a buffer, and then pump is called.
If there's no newline in the buffer, pump simply returns without doing anything. More data (and potentially a newline) will be added to the buffer the next time the stream gets data, and then we'll have a complete object.
If there is a newline, pump slices off the buffer from the beginning to the newline and hands it off to process. It then checks again if there's another newline in the buffer (the while loop). In this way, we can process all of the lines that were read in the current chunk.
Finally, process is called once per input line. If present, it strips off the carriage return character (to avoid issues with line endings – LF vs CRLF), and then calls JSON.parse one the line. At this point, you can do whatever you need to with your object.
Note that JSON.parse is strict about what it accepts as input; you must quote your identifiers and string values with double quotes. In other words, {name:'thing1'} will throw an error; you must use {"name":"thing1"}.
Because no more than a chunk of data will ever be in memory at a time, this will be extremely memory efficient. It will also be extremely fast. A quick test showed I processed 10,000 rows in under 15ms.
Just as I was thinking that it would be fun to write a streaming JSON parser, I also thought that maybe I should do a quick search to see if there's one already available.
Turns out there is.
JSONStream "streaming JSON.parse and stringify"
Since I just found it, I've obviously not used it, so I can't comment on its quality, but I'll be interested to hear if it works.
It does work consider the following Javascript and _.isString:
stream.pipe(JSONStream.parse('*'))
.on('data', (d) => {
console.log(typeof d);
console.log("isString: " + _.isString(d))
});
This will log objects as they come in if the stream is an array of objects. Therefore the only thing being buffered is one object at a time.
As of October 2014, you can just do something like the following (using JSONStream) - https://www.npmjs.org/package/JSONStream
var fs = require('fs'),
JSONStream = require('JSONStream'),
var getStream() = function () {
var jsonData = 'myData.json',
stream = fs.createReadStream(jsonData, { encoding: 'utf8' }),
parser = JSONStream.parse('*');
return stream.pipe(parser);
}
getStream().pipe(MyTransformToDoWhateverProcessingAsNeeded).on('error', function (err) {
// handle any errors
});
To demonstrate with a working example:
npm install JSONStream event-stream
data.json:
{
"greeting": "hello world"
}
hello.js:
var fs = require('fs'),
JSONStream = require('JSONStream'),
es = require('event-stream');
var getStream = function () {
var jsonData = 'data.json',
stream = fs.createReadStream(jsonData, { encoding: 'utf8' }),
parser = JSONStream.parse('*');
return stream.pipe(parser);
};
getStream()
.pipe(es.mapSync(function (data) {
console.log(data);
}));
$ node hello.js
// hello world
I had similar requirement, i need to read a large json file in node js and process data in chunks and call a api and save in mongodb.
inputFile.json is like:
{
"customers":[
{ /*customer data*/},
{ /*customer data*/},
{ /*customer data*/}....
]
}
Now i used JsonStream and EventStream to achieve this synchronously.
var JSONStream = require("JSONStream");
var es = require("event-stream");
fileStream = fs.createReadStream(filePath, { encoding: "utf8" });
fileStream.pipe(JSONStream.parse("customers.*")).pipe(
es.through(function(data) {
console.log("printing one customer object read from file ::");
console.log(data);
this.pause();
processOneCustomer(data, this);
return data;
}),
function end() {
console.log("stream reading ended");
this.emit("end");
}
);
function processOneCustomer(data, es) {
DataModel.save(function(err, dataModel) {
es.resume();
});
}
I realize that you want to avoid reading the whole JSON file into memory if possible, however if you have the memory available it may not be a bad idea performance-wise. Using node.js's require() on a json file loads the data into memory really fast.
I ran two tests to see what the performance looked like on printing out an attribute from each feature from a 81MB geojson file.
In the 1st test, I read the entire geojson file into memory using var data = require('./geo.json'). That took 3330 milliseconds and then printing out an attribute from each feature took 804 milliseconds for a grand total of 4134 milliseconds. However, it appeared that node.js was using 411MB of memory.
In the second test, I used #arcseldon's answer with JSONStream + event-stream. I modified the JSONPath query to select only what I needed. This time the memory never went higher than 82MB, however, the whole thing now took 70 seconds to complete!
I wrote a module that can do this, called BFJ. Specifically, the method bfj.match can be used to break up a large stream into discrete chunks of JSON:
const bfj = require('bfj');
const fs = require('fs');
const stream = fs.createReadStream(filePath);
bfj.match(stream, (key, value, depth) => depth === 0, { ndjson: true })
.on('data', object => {
// do whatever you need to do with object
})
.on('dataError', error => {
// a syntax error was found in the JSON
})
.on('error', error => {
// some kind of operational error occurred
})
.on('end', error => {
// finished processing the stream
});
Here, bfj.match returns a readable, object-mode stream that will receive the parsed data items, and is passed 3 arguments:
A readable stream containing the input JSON.
A predicate that indicates which items from the parsed JSON will be pushed to the result stream.
An options object indicating that the input is newline-delimited JSON (this is to process format B from the question, it's not required for format A).
Upon being called, bfj.match will parse JSON from the input stream depth-first, calling the predicate with each value to determine whether or not to push that item to the result stream. The predicate is passed three arguments:
The property key or array index (this will be undefined for top-level items).
The value itself.
The depth of the item in the JSON structure (zero for top-level items).
Of course a more complex predicate can also be used as necessary according to requirements. You can also pass a string or a regular expression instead of a predicate function, if you want to perform simple matches against property keys.
If you have control over the input file, and it's an array of objects, you can solve this more easily. Arrange to output the file with each record on one line, like this:
[
{"key": value},
{"key": value},
...
This is still valid JSON.
Then, use the node.js readline module to process them one line at a time.
var fs = require("fs");
var lineReader = require('readline').createInterface({
input: fs.createReadStream("input.txt")
});
lineReader.on('line', function (line) {
line = line.trim();
if (line.charAt(line.length-1) === ',') {
line = line.substr(0, line.length-1);
}
if (line.charAt(0) === '{') {
processRecord(JSON.parse(line));
}
});
function processRecord(record) {
// Process the records one at a time here!
}
I solved this problem using the split npm module. Pipe your stream into split, and it will "Break up a stream and reassemble it so that each line is a chunk".
Sample code:
var fs = require('fs')
, split = require('split')
;
var stream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
var lineStream = stream.pipe(split());
linestream.on('data', function(chunk) {
var json = JSON.parse(chunk);
// ...
});
Using the #josh3736 answer, but for ES2021 and Node.js 16+ with async/await + AirBnb rules:
import fs from 'node:fs';
const file = 'file.json';
/**
* #callback itemProcessorCb
* #param {object} item The current item
*/
/**
* Process each data chunk in a stream.
*
* #param {import('fs').ReadStream} readable The readable stream
* #param {itemProcessorCb} itemProcessor A function to process each item
*/
async function processChunk(readable, itemProcessor) {
let data = '';
let total = 0;
// eslint-disable-next-line no-restricted-syntax
for await (const chunk of readable) {
// join with last result, remove CR and get lines
const lines = (data + chunk).replace('\r', '').split('\n');
// clear last result
data = '';
// process lines
let line = lines.shift();
const items = [];
while (line) {
// check if isn't a empty line or an array definition
if (line !== '' && !/[\[\]]+/.test(line)) {
try {
// remove the last comma and parse json
const json = JSON.parse(line.replace(/\s?(,)+\s?$/, ''));
items.push(json);
} catch (error) {
// last line gets only a partial line from chunk
// so we add this to join at next loop
data += line;
}
}
// continue
line = lines.shift();
}
total += items.length;
// Process items in parallel
await Promise.all(items.map(itemProcessor));
}
console.log(`${total} items processed.`);
}
// Process each item
async function processItem(item) {
console.log(item);
}
// Init
try {
const readable = fs.createReadStream(file, {
flags: 'r',
encoding: 'utf-8',
});
processChunk(readable, processItem);
} catch (error) {
console.error(error.message);
}
For a JSON like:
[
{ "name": "A", "active": true },
{ "name": "B", "active": false },
...
]
https.get(url1 , function(response) {
var data = "";
response.on('data', function(chunk) {
data += chunk.toString();
})
.on('end', function() {
console.log(data)
});
});
I think you need to use a database. MongoDB is a good choice in this case because it is JSON compatible.
UPDATE:
You can use mongoimport tool to import JSON data into MongoDB.
mongoimport --collection collection --file collection.json
File called testing.js
I can do whatever I like with the data in saveWeatherData but cannot call this function and return the data without getting 'undefined'
For example if i tried the below code in saveWeatherData it will print out the summary as expected...
console.log(The summary of the weather today is: ${dataArray[0]});
However I want to use these values within another file such as a server file that when connected to will display weather summary temperature etc.
So I need to return an array with these values in it so that I can call this function and get my data stored in an array for further use.
I know that the reason the array --dataArray is returning undefined is because asynchronous code.
The array is returned before we have gotten the data using the callback.
My question, is there anyway to do what I am trying to do?
I tried my best to explain the problem and what I want to do, hopefully its understandable.
Would I have to use a callback inside of a callback? To callback here to return the data when its been fetched?
I just cant get my head about it and have tried multiple things to try and get the result I am looking for.
My last idea and something i would prefer not to do is the use the 'fs' module to save the data to a text or json file for use in my other files through reading the data from the saved file...
I feel im close but cant get over the last hurdle, so ive decided to ask for a little help, even just point me on the right track and Ill continue to try and figure it out.
Phew...
Thank you for your time!
const request = require("request");
let dataArray = [];
let saveWeatherData = function(weatherData) {
dataArray = weatherData;
return dataArray;
};
let getWeatherData = function(callback) {
request({
url: `https://api.forecast.io/forecast/someexamplekey/1,-1`,
json: true
}, (error, response, body) => {
//Creating array to hold weather data until we can save it using callback...
let array = [];
if (error) {
console.log("Unable to connect with Dark Sky API servers.")
}
else {
console.log(`Successfully connected to Dark Sky API servers!\n`);
array.push(body.currently.summary, body.currently.temperature, body.currently.apparentTemperature, body.currently.windSpeed, body.currently.windBearing);
callback(array);
}
});
};
getWeatherData(saveWeatherData);
module.exports = {
saveWeatherData
};
My Other File...
File called server.js
const http = require("http");
const testing = require("./testing");
function onRequest(request, response){
let data = testing.saveWeatherData();
console.log(`A user made a request: ${request.url}`);
response.writeHead(200, {"context-type": "text/plain"});
response.write("<!DOCTYPE html>");
response.write("<html>");
response.write("<head>");
response.write("<title>Weather</title>");
response.write("</head>");
response.write("<body>");
response.write("Weather summary for today: " + data[0]);
response.write("</body>");
response.write("</html>");
response.end();
}
http.createServer(onRequest).listen(8888);
console.log("Server is now running on port 8888...");
I'm still not sure about what are you trying to do. However, I think you're not exporting what you suppose to be exporting. To avoid the use of so many callbacks you may use async/await.
Change this part of your server.js
async function onRequest(request, response) {
let data = await testing.getWeatherData();
console.log(`A user made a request: ${request.url}`);
response.writeHead(200, { 'context-type': 'text/plain' });
response.write('<!DOCTYPE html>');
response.write('<html>');
response.write('<head>');
response.write('<title>Weather</title>');
response.write('</head>');
response.write('<body>');
response.write('Weather summary for today: ' + data[0]);
response.write('</body>');
response.write('</html>');
response.end();
}
And this of your testing.
let getWeatherData = function() {
return new Promise(resolve =>
request(
{
url: `https://api.darksky.net/forecast/someexamplekey/1,-1`,
json: true
},
(error, response, body) => {
//Creating array to hold weather data until we can save it using callback...
let array = [];
if (error) {
console.log('Unable to connect with Dark Sky API servers.');
} else {
console.log(`Successfully connected to Dark Sky API servers!\n`);
array.push(
body.currently.summary,
body.currently.temperature,
body.currently.apparentTemperature,
body.currently.windSpeed,
body.currently.windBearing
);
resolve(array);
}
}
)
);
};
module.exports = {
getWeatherData
};
It will check for new Weather in each request. If you want to save the result to avoid checking every single time you might need to do something else. But I think for a weather app the important is to keep it updated.
I have a route in my MEEN app that accepts a file upload, then passes the data to a helper module that parses the file and stores it in an array, once this helper module finishes I want to pass it off to another module that will then process the returned array. for some reason even though I have a return statement at the end of the helper the then method is undefined.
route:
router.route('/tools/sku/reactivate').post(upload.single('csvdata'),function(req,res){
console.log('handing request over to helper.csv');
csv.reader(req,res).then(sku.reactivate(data));
});
csv.reader:
var csv = require('csv-parse');
var multer = require('multer');
var fs = require('fs');
module.exports.reader = function(req,res){
//define array for holding csv data in this case skus
const csvArray = [];
//max number of columns in csv
const maxColumns = parseInt(req.body.maxColumns);
//create an array of column headers to check chunks against so we dont parse headers
let columnHeader = req.body.moduleTarget.split(',');
//loopThrough Array to create arrays inside container array for each column
for(var i = 0; i < maxColumns; i++){
csvArray.push([]);
}
//define filesystem readstream from uploaded file
let readStream = fs.createReadStream(req.file.path).pipe(csv());
//push csv data to array ignoring headers to csvArray
readStream.on('data', function(chunk){
//get number of keys in the dataChunk
let chunkLength = Object.keys(chunk).length;
//check column count on csv if greater than expected throw error
if(chunk[maxColumns]){
throw '[ERROR] More columns than expected in CSV, please fix and try again';
}else{
//loop through chunk keys and store each one in csvArray by index
for(var i = 0; i < chunkLength; i++){
//if chunk at this index doesnt equal column header and chunk at this index exists push to array
if(chunk[i] !== columnHeader[i] && chunk[i]){
csvArray[i].push(chunk[i]);
}
}
}
});
//error handling
readStream.on('error',function(err){
console.log('Error while reading file stream '+ err);
res.json({message:err,errorType:'1'});
});
readStream.on('end',function(){
console.log('finished reading csv returning array back to router to process next middleware');
return csvArray;
});
}
i get an error right after the console.log message on the readStream end listener
From the Express website. https://expressjs.com/en/advanced/best-practice-performance.html
TL;DR
Try wrapping the contents of your reader module inside a promise and return the promise.
...However, there are two caveats:
All your asynchronous code must return promises (except emitters). If a particular library does not return promises, convert the base object by using a helper function like Bluebird.promisifyAll().
Event emitters (like streams) can still cause uncaught exceptions. So make sure you are handling the error event properly; for example:
app.get('/', wrap(async (req, res, next) => {
let company = await getCompanyById(req.query.id)
let stream = getLogoStreamById(company.id)
stream.on('error', next).pipe(res)
}))
#example:::
module.exports.reader = function(req,res){
return new Promise((resolve, reject)=>{
//define array for holding csv data in this case skus
const csvArray = [];
//max number of columns in csv
const maxColumns = parseInt(req.body.maxColumns);
//create an array of column headers to check chunks against so we dont parse headers
let columnHeader = req.body.moduleTarget.split(',');
...
//error handling
readStream.on('error',function(err){
console.log('Error while reading file stream '+ err);
res.json({message:err,errorType:'1'});
});
readStream.on('end',function(){
console.log('finished reading csv returning array back to router to process next middleware');
return csvArray;
});
})
}
I have got a Node.JS server that requests data from two web servers: bbc.co.uk and sky.com. Then the RSS feeds are parsed, and a user sees two lists: from BBC and from sky.
Here is the code.
var feed = require('feed-read');
var http = require('http');
var async = require('async');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = "Unable to connect.";
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
res.writeHead(200, {
'Content-Type' : 'text/html; charset=utf-8'
});
async.parallel([ function(callback) {
feed(BBC_URL, onRssFetched);
// TODO: where to call callback()?
}, function(callback) {
feed(SKY_URL, onRssFetched);
// TODO: where to call callback()?
} ], function done(err, results) {
console.log("Done");
if (err) {
throw err;
}
});
}
function onRssFetched(err, articles) {
console.log("RSS fetched");
var html = [];
if (err) {
html.push("<p>", UNABLE_TO_CONNECT = "</p>");
} else {
html.push("<ol>");
var i = 0;
articles.forEach(function(entry) {
if (i == LIMIT) {
return;
}
html.push("<li><a href='" + entry.link + "'>" + entry.title
+ "</a></li>");
i++;
});
}
console.log(html.join(""));
}
Now I don't know how to add the result to the web page. If I call callback() right after calling the feed method, callback() will be executed without waiting until feed has completed its job. On the other hand, I can't pass callback to feed. Maybe the approach is wrong, and I need some other module for RSS parsing.
#Maksim I know your original question included the async module, but propose an alternative:
why not stream each article to the client as it comes in rather than waiting for all RSS feeds to return before sending a response...?
By using async.parallel you are telling node:
"wait until we have a response from all these news services
and only then (combine the articles into) a single response to the client ..."
This uses up memory for each connected client while you wait for all responses (from the RSS news services) ... wasteful.
So I've written my answer without resorting to async.
And, instead of waiting for ages (while async combines all the feeds into one),
the client sees news as soon as the first rss feed returns!
var feed = require('feed-read'), // require the feed-read module
http = require("http"),
urls = [
"http://feeds.bbci.co.uk/news/rss.xml",
"http://news.sky.com/feeds/rss/home.xml",
"http://www.techmeme.com/feed.xml"
]; // Example RSS Feeds
http.createServer(function (req, res) {
// send basic http headers to client
res.writeHead(200, {
"Content-Type": "text/html",
"Transfer-Encoding": "chunked"
});
// setup simple html page:
res.write("<html>\n<head>\n<title>RSS Feeds</title>\n</head>\n<body>");
// loop through our list of RSS feed urls
for (var j = 0; j < urls.length; j++) {
// fetch rss feed for the url:
feed(urls[j], function(err, articles) {
// loop through the list of articles returned
for (var i = 0; i < articles.length; i++) {
// stream article title (and what ever else you want) to client
res.write("<h3>"+articles[i].title +"</h3>");
// check we have reached the end of our list of articles & urls
if( i === articles.length-1 && j === urls.length-1) {
res.end("</body>\n</html>"); // end http response
} // else still have rss urls to check
} // end inner for loop
}); // end call to feed (feed-read) method
} // end urls for loop
}).listen(9000);
Key Advantages:
The people connecting to your app will see news/results Much faster (almost instantly!)
Your app uses much less memory
You don't have to edit/update any code when you add new RSS news feeds!
For even more detail/notes on this solution
see: https://github.com/nelsonic/node-parse-rss
No, you don't need another library. But what you need to do is to hand over callback to your feed function instead of onRssFetched. This way the single RSS feeds are handed over to the final callback in your async.parallel call, using the result variable.
In this variable you then have access to both RSS feeds at the same time, and you can do whatever you want to do with them.
So, basically your logic needs to be:
async.parallel({
bbc: function (callback) {
feed(BBC_URL, callback);
},
sky: function (callback) {
feed(SKY_URL, callback);
}
}, function (err, result) {
if (err) {
// Somewhere, something went wrong…
}
var rssBbc = result.bbc,
rssSky = result.sky;
// Merge the two feeds or deliver them to the client or do
// whatever you want to do with them.
});
And that's it :-).
To amplify #nelsonic's answer (enough that I feel this warrants its own answer), feed-parse already processes asynchronously. At its heart, it's still running on http.request. If you look at the code, you see that you can even pass in an array of URLs directly and it will loop through them, but it uses more of an "async.eachSeries" approach, where the next call only occurs after the previous one completes, which appears not to be what you're looking for.
If you truly want to wait for calls to complete first before handling them, you're better off asynchronously buffering the data, then using underscore's _.after() to run after all URLs have finished.
But odds are, what you really want to do (unless you're just looking for an example to try out async) is #nelsonic's answer.
I would ideally stream the rss data, instead of aggregating in memory. #nelsonic has explained the correct approach to solve this problem.
Still, if we were to make your code running, consider following code:
var util = require('util');
var http = require('http');
var async = require('async');
var feed = require('feed-read');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = 'Unable to connect.';
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
util.log('Request recieved!');
res.writeHead(200, {
'Content-Type': 'text/html; charset=utf-8'
});
async.parallel({
bbc: function (callback) {
feed(BBC_URL, function (err, articles) {
var html = onRssFetched(err, articles);
callback(err, html);
});
},
sky: function (callback) {
feed(SKY_URL, function (err, articles) {
var html = onRssFetched(err, articles);
callback(err, html);
});
}
}, done);
function done(err, results) {
util.log('Received results: ' + Object.keys(results).join(','));
if (!err && results) {
var entry, html;
for (entry in results) {
html = results[entry];
res.write(html.join(''));
}
util.log('Send complete!');
res.end();
} else {
console.log(err || 'no data in results');
res.end('Unable to process your request');
}
}
}
function onRssFetched(err, articles) {
// limit number of articles;
articles = articles.slice(0, LIMIT);
var html = [];
if (err) {
html.push('<p>', UNABLE_TO_CONNECT = '</p>');
} else {
html.push('<ol>');
articles.forEach(function (entry) {
html.push('<li>' + entry.title + '</li>');
});
html.push('</ol>');
}
return html;
}
// -- Test Code ---------------------------------------------------------
if (require.main === module) {
(function () {
var req, res = {
writeHead: console.log,
write: console.log,
end: console.log
};
// onRequest(req, res);
})();
}
Let me know if you face any issues.