I have got a Node.JS server that requests data from two web servers: bbc.co.uk and sky.com. Then the RSS feeds are parsed, and a user sees two lists: from BBC and from sky.
Here is the code.
var feed = require('feed-read');
var http = require('http');
var async = require('async');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = "Unable to connect.";
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
res.writeHead(200, {
'Content-Type' : 'text/html; charset=utf-8'
});
async.parallel([ function(callback) {
feed(BBC_URL, onRssFetched);
// TODO: where to call callback()?
}, function(callback) {
feed(SKY_URL, onRssFetched);
// TODO: where to call callback()?
} ], function done(err, results) {
console.log("Done");
if (err) {
throw err;
}
});
}
function onRssFetched(err, articles) {
console.log("RSS fetched");
var html = [];
if (err) {
html.push("<p>", UNABLE_TO_CONNECT = "</p>");
} else {
html.push("<ol>");
var i = 0;
articles.forEach(function(entry) {
if (i == LIMIT) {
return;
}
html.push("<li><a href='" + entry.link + "'>" + entry.title
+ "</a></li>");
i++;
});
}
console.log(html.join(""));
}
Now I don't know how to add the result to the web page. If I call callback() right after calling the feed method, callback() will be executed without waiting until feed has completed its job. On the other hand, I can't pass callback to feed. Maybe the approach is wrong, and I need some other module for RSS parsing.
#Maksim I know your original question included the async module, but propose an alternative:
why not stream each article to the client as it comes in rather than waiting for all RSS feeds to return before sending a response...?
By using async.parallel you are telling node:
"wait until we have a response from all these news services
and only then (combine the articles into) a single response to the client ..."
This uses up memory for each connected client while you wait for all responses (from the RSS news services) ... wasteful.
So I've written my answer without resorting to async.
And, instead of waiting for ages (while async combines all the feeds into one),
the client sees news as soon as the first rss feed returns!
var feed = require('feed-read'), // require the feed-read module
http = require("http"),
urls = [
"http://feeds.bbci.co.uk/news/rss.xml",
"http://news.sky.com/feeds/rss/home.xml",
"http://www.techmeme.com/feed.xml"
]; // Example RSS Feeds
http.createServer(function (req, res) {
// send basic http headers to client
res.writeHead(200, {
"Content-Type": "text/html",
"Transfer-Encoding": "chunked"
});
// setup simple html page:
res.write("<html>\n<head>\n<title>RSS Feeds</title>\n</head>\n<body>");
// loop through our list of RSS feed urls
for (var j = 0; j < urls.length; j++) {
// fetch rss feed for the url:
feed(urls[j], function(err, articles) {
// loop through the list of articles returned
for (var i = 0; i < articles.length; i++) {
// stream article title (and what ever else you want) to client
res.write("<h3>"+articles[i].title +"</h3>");
// check we have reached the end of our list of articles & urls
if( i === articles.length-1 && j === urls.length-1) {
res.end("</body>\n</html>"); // end http response
} // else still have rss urls to check
} // end inner for loop
}); // end call to feed (feed-read) method
} // end urls for loop
}).listen(9000);
Key Advantages:
The people connecting to your app will see news/results Much faster (almost instantly!)
Your app uses much less memory
You don't have to edit/update any code when you add new RSS news feeds!
For even more detail/notes on this solution
see: https://github.com/nelsonic/node-parse-rss
No, you don't need another library. But what you need to do is to hand over callback to your feed function instead of onRssFetched. This way the single RSS feeds are handed over to the final callback in your async.parallel call, using the result variable.
In this variable you then have access to both RSS feeds at the same time, and you can do whatever you want to do with them.
So, basically your logic needs to be:
async.parallel({
bbc: function (callback) {
feed(BBC_URL, callback);
},
sky: function (callback) {
feed(SKY_URL, callback);
}
}, function (err, result) {
if (err) {
// Somewhere, something went wrong…
}
var rssBbc = result.bbc,
rssSky = result.sky;
// Merge the two feeds or deliver them to the client or do
// whatever you want to do with them.
});
And that's it :-).
To amplify #nelsonic's answer (enough that I feel this warrants its own answer), feed-parse already processes asynchronously. At its heart, it's still running on http.request. If you look at the code, you see that you can even pass in an array of URLs directly and it will loop through them, but it uses more of an "async.eachSeries" approach, where the next call only occurs after the previous one completes, which appears not to be what you're looking for.
If you truly want to wait for calls to complete first before handling them, you're better off asynchronously buffering the data, then using underscore's _.after() to run after all URLs have finished.
But odds are, what you really want to do (unless you're just looking for an example to try out async) is #nelsonic's answer.
I would ideally stream the rss data, instead of aggregating in memory. #nelsonic has explained the correct approach to solve this problem.
Still, if we were to make your code running, consider following code:
var util = require('util');
var http = require('http');
var async = require('async');
var feed = require('feed-read');
var request = require('request');
var LIMIT = 10;
var UNABLE_TO_CONNECT = 'Unable to connect.';
var BBC_URL = 'http://feeds.bbci.co.uk/news/rss.xml';
var SKY_URL = 'http://news.sky.com/feeds/rss/home.xml';
var server = http.createServer(onRequest);
server.listen(9000);
function onRequest(req, res) {
util.log('Request recieved!');
res.writeHead(200, {
'Content-Type': 'text/html; charset=utf-8'
});
async.parallel({
bbc: function (callback) {
feed(BBC_URL, function (err, articles) {
var html = onRssFetched(err, articles);
callback(err, html);
});
},
sky: function (callback) {
feed(SKY_URL, function (err, articles) {
var html = onRssFetched(err, articles);
callback(err, html);
});
}
}, done);
function done(err, results) {
util.log('Received results: ' + Object.keys(results).join(','));
if (!err && results) {
var entry, html;
for (entry in results) {
html = results[entry];
res.write(html.join(''));
}
util.log('Send complete!');
res.end();
} else {
console.log(err || 'no data in results');
res.end('Unable to process your request');
}
}
}
function onRssFetched(err, articles) {
// limit number of articles;
articles = articles.slice(0, LIMIT);
var html = [];
if (err) {
html.push('<p>', UNABLE_TO_CONNECT = '</p>');
} else {
html.push('<ol>');
articles.forEach(function (entry) {
html.push('<li>' + entry.title + '</li>');
});
html.push('</ol>');
}
return html;
}
// -- Test Code ---------------------------------------------------------
if (require.main === module) {
(function () {
var req, res = {
writeHead: console.log,
write: console.log,
end: console.log
};
// onRequest(req, res);
})();
}
Let me know if you face any issues.
Related
I use watson assistant v1
My problem is that every time I make a call to the code in Nodejs, where I return the context, to have a coordinated conversation, the context is only updated once and I get stuck in a node of the conversation
this is my code
client.on('message', message => {
//general variables
var carpetaIndividual = <../../../>
var cuerpoMensaje = <....>
var emisorMensaje = <....>
//detect if context exists
if(fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = require(carpetaIndividual+'/contexto.json');
var variableContexto = watsonContexto;
} else {
var variableContexto = {}
}
//conection with Watson Assistant
assistant.message(
{
input: { text: cuerpoMensaje },
workspaceId: '<>',
context: variableContexto,
})
.then(response => {
let messageWatson = response.result.output.text[0];
let contextoWatson = response.result.context;
console.log('Chatbot: ' + messageWatson);
//Save and create JSON file for context
fs.writeFile(carpetaIndividual+'/contexto.json', JSON.stringify(contextoWatson), 'utf8', function (err) {
if (err) {
console.error(err);
}
});
//Send messages to my application
client.sendMessage(emisorMensaje, messageWatson)
})
.catch(err => {
console.log(err);
});
}
client.initialize();
the context.json file is updated, but when it is read the code only reads the first update of the context.json file and not the other updates
This will be because you are using require to read the .json file. For all subsequent requires of an already-required file, the data is cached and reused.
You will need to use fs.readfile and JSON.parse
// detect if context exists
if (fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = fs.readFileSync(carpetaIndividual+'/contexto.json');
// Converting to JSON
var variableContexto = JSON.parse(watsonContexto);
} else {
var variableContexto = {}
}
There is another subtle problem with your code, in that you are relying on
your async call to fs.writeFile completing before you read the file. This will be the case most of the time, but as you don't wait for the fs.writeFile to complete there is the chance that you may try to read the file, before it is written.
File called testing.js
I can do whatever I like with the data in saveWeatherData but cannot call this function and return the data without getting 'undefined'
For example if i tried the below code in saveWeatherData it will print out the summary as expected...
console.log(The summary of the weather today is: ${dataArray[0]});
However I want to use these values within another file such as a server file that when connected to will display weather summary temperature etc.
So I need to return an array with these values in it so that I can call this function and get my data stored in an array for further use.
I know that the reason the array --dataArray is returning undefined is because asynchronous code.
The array is returned before we have gotten the data using the callback.
My question, is there anyway to do what I am trying to do?
I tried my best to explain the problem and what I want to do, hopefully its understandable.
Would I have to use a callback inside of a callback? To callback here to return the data when its been fetched?
I just cant get my head about it and have tried multiple things to try and get the result I am looking for.
My last idea and something i would prefer not to do is the use the 'fs' module to save the data to a text or json file for use in my other files through reading the data from the saved file...
I feel im close but cant get over the last hurdle, so ive decided to ask for a little help, even just point me on the right track and Ill continue to try and figure it out.
Phew...
Thank you for your time!
const request = require("request");
let dataArray = [];
let saveWeatherData = function(weatherData) {
dataArray = weatherData;
return dataArray;
};
let getWeatherData = function(callback) {
request({
url: `https://api.forecast.io/forecast/someexamplekey/1,-1`,
json: true
}, (error, response, body) => {
//Creating array to hold weather data until we can save it using callback...
let array = [];
if (error) {
console.log("Unable to connect with Dark Sky API servers.")
}
else {
console.log(`Successfully connected to Dark Sky API servers!\n`);
array.push(body.currently.summary, body.currently.temperature, body.currently.apparentTemperature, body.currently.windSpeed, body.currently.windBearing);
callback(array);
}
});
};
getWeatherData(saveWeatherData);
module.exports = {
saveWeatherData
};
My Other File...
File called server.js
const http = require("http");
const testing = require("./testing");
function onRequest(request, response){
let data = testing.saveWeatherData();
console.log(`A user made a request: ${request.url}`);
response.writeHead(200, {"context-type": "text/plain"});
response.write("<!DOCTYPE html>");
response.write("<html>");
response.write("<head>");
response.write("<title>Weather</title>");
response.write("</head>");
response.write("<body>");
response.write("Weather summary for today: " + data[0]);
response.write("</body>");
response.write("</html>");
response.end();
}
http.createServer(onRequest).listen(8888);
console.log("Server is now running on port 8888...");
I'm still not sure about what are you trying to do. However, I think you're not exporting what you suppose to be exporting. To avoid the use of so many callbacks you may use async/await.
Change this part of your server.js
async function onRequest(request, response) {
let data = await testing.getWeatherData();
console.log(`A user made a request: ${request.url}`);
response.writeHead(200, { 'context-type': 'text/plain' });
response.write('<!DOCTYPE html>');
response.write('<html>');
response.write('<head>');
response.write('<title>Weather</title>');
response.write('</head>');
response.write('<body>');
response.write('Weather summary for today: ' + data[0]);
response.write('</body>');
response.write('</html>');
response.end();
}
And this of your testing.
let getWeatherData = function() {
return new Promise(resolve =>
request(
{
url: `https://api.darksky.net/forecast/someexamplekey/1,-1`,
json: true
},
(error, response, body) => {
//Creating array to hold weather data until we can save it using callback...
let array = [];
if (error) {
console.log('Unable to connect with Dark Sky API servers.');
} else {
console.log(`Successfully connected to Dark Sky API servers!\n`);
array.push(
body.currently.summary,
body.currently.temperature,
body.currently.apparentTemperature,
body.currently.windSpeed,
body.currently.windBearing
);
resolve(array);
}
}
)
);
};
module.exports = {
getWeatherData
};
It will check for new Weather in each request. If you want to save the result to avoid checking every single time you might need to do something else. But I think for a weather app the important is to keep it updated.
Recently I started learning a little bit about Node.js and it's capabilities and tried to use it for some web services.
I wanted to create a web service which will serve as a proxy for web requests.
I wanted my service to work that way:
User will access my service -> http://myproxyservice.com/api/getuserinfo/tom
My service will perform request to -> http://targetsite.com/user?name=tom
Responded data would get reflected to the user.
To implement it I used the following code:
app.js:
var express = require('express');
var bodyParser = require('body-parser');
var app = express();
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
var proxy = require('./proxy_query.js')
function makeProxyApiRequest(name) {
return proxy.getUserData(name, parseProxyApiRequest);
}
function parseProxyApiRequest(data) {
returned_data = JSON.parse(data);
if (returned_data.error) {
console.log('An eror has occoured. details: ' + JSON.stringify(returned_data));
returned_data = '';
}
return JSON.stringify(returned_data);
}
app.post('/api/getuserinfo/tom', function(request, response) {
makeProxyApiRequest('tom', response);
//response.end(result);
});
var port = 7331;
proxy_query.js:
var https = require('https');
var callback = undefined;
var options = {
host: 'targetsite.com',
port: 443,
method: 'GET',
};
function resultHandlerCallback(result) {
var buffer = '';
result.setEncoding('utf8');
result.on('data', function(chunk){
buffer += chunk;
});
result.on('end', function(){
if (callback) {
callback(buffer);
}
});
}
exports.getUserData = function(name, user_callback) {
callback = user_callback
options['path'] = user + '?name=' + name;
var request = https.get(options, resultHandlerCallback);
request.on('error', function(e){
console.log('error from proxy_query:getUserData: ' + e.message)
});
request.end();
}
app.listen(port);
I wish I didn't screwed this code because I replaced some stuff to fit my example.
Anyway, the problem is that I want to post the response to the user when the HTTP request is done and I cant find how to do so because I use express and express uses asynchronous calls and so do the http request.
I know that if I want to do so, I should pass the makeProxyApiRequest the response object so he would be able to pass it to the callback but it is not possible because of asyn problems.
any suggestions?
help will be appreciated.
As you're using your functions to process requests inside your route handling, it's better to write them as express middleware functions, taking the specific request/response pair, and making use of express's next cascade model:
function makeProxyApiRequest(req, res, next) {
var name = parseProxyApiRequest(req.name);
res.locals.userdata = proxy.getUserData(name);
next();
}
function parseProxyApiRequest(req, res, next) {
try {
// remember that JSON.parse will throw if it fails!
data = JSON.parse(res.locals.userdata);
if (data .error) {
next('An eror has occoured. details: ' + JSON.stringify(data));
}
res.locals.proxyData = data;
next();
}
catch (e) { next("could not parse user data JSON."); }
}
app.post('/api/getuserinfo/tom',
makeProxyApiRequest,
parseProxyApiRequest,
function(req, res) {
// res.write or res.json or res.render or
// something, with this specific request's
// data that we stored in res.locals.proxyData
}
);
Even better would be to move those middleware functions into their own file now, so you can simply do:
var middleware = require("./lib/proxy_middleware");
app.post('/api/getuserinfo/tom',
middleware.makeProxyApiRequest,
middleware.parseProxyApiRequest,
function(req, res) {
// res.write or res.json or res.render or
// something, with this specific request's
// data that we stored in res.locals.proxyData
}
);
And keep your app.js as small as possible. Note that the client's browser will simply wait for a response by express, which happens once res.write, res.json or res.render etc is used. Until then the connection is simply kept open between the browser and the server, so if your middleware calls take a long time, that's fine - the browser will happily wait a long time for a response to get sent back, and will be doing other things in the mean time.
Now, in order to get the name, we can use express's parameter construct:
app.param("name", function(req, res, next, value) {
req.params.name = value;
// do something if we need to here, like verify it's a legal name, etc.
// for instance:
var isvalidname = validator.checkValidName(name);
if(!isvalidname) { return next("Username not valid"); }
next();
});
...
app.post("/api/getuserinfo/:name", ..., ..., ...);
Using this system, the :name part of any route will be treated based on the name parameter we defined using app.param. Note that we don't need to define this more than once: we can do the following and it'll all just work:
app.post("/api/getuserinfo/:name", ..., ..., ...);
app.post("/register/:name", ..., ..., ... );
app.get("/api/account/:name", ..., ..., ... );
and for every route with :name, the code for the "name" parameter handler will kick in.
As for the proxy_query.js file, rewriting this to a proper module is probably safer than using individual exports:
// let's not do more work than we need: http://npmjs.org/package/request
// is way easier than rolling our own URL fetcher. In Node.js the idea is
// to write as little as possible, relying on npmjs.org to find you all
// the components that you need to glue together. If you're writing more
// than just the glue, you're *probably* doing more than you need to.
var request = require("request");
module.exports = {
getURL: function(name, url, callback) {
request.get(url, function(err, result) {
if(err) return callback(err);
// do whatever processing you need to do to result:
var processedResult = ....
callback(false, processedResult);
});
}
};
and then we can use that as proxy = require("./lib/proxy_query"); in the middleware we need to actually do the URL data fetching.
I am currently trying to return a request of all the file names (in each existing folder) on a particular website. My web application is using NodeJS, Express, Cheerio, and Request to web scrape. My code is first getting a list of all the folder names. After retrieving a list of folder names, it then goes inside each folder name to get a list of file names and store them in the 'files' array. Finally, the 'files' array is what will be sent to the client-side.
Right now I am having a big issue with asynchronous stuff because my request would always return an empty list of 'files'. I have the Q node module installed and have tried using promises, but have had no luck getting the results I want. I am still new to nodeJS and would love it if someone can help me out.. :)
exports.getAllImages = function(req, res) {
var folders = [];
var files = [];
//Step 1: Get folder names and store all of them in the 'folders' array
var foldersUrl = 'http://students.washington.edu/jmzhwng/Images/';
request(foldersUrl, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
$("a:contains('-')").filter(function(){
var data = $(this)[0].attribs.href;
folders.push(data);
})
//Step 2: Using the 'folders' array, get file names in each folder and store all of them in the 'files' array
for (var i=0; i < folders.length; i++) {
var imagesUrl = 'http://students.washington.edu/jmzhwng/Images/' + folders[i];
request(imagesUrl, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
$("a:contains('.')").filter(function(){
var data = $(this)[0].attribs.href;
files.push(data);
})
}
})
}
//Step 3: Return all file names to client-side
res.json({
images: files
}, 200);
console.log('GET ALL IMAGES - ' + JSON.stringify(files));
}
})
For better readability or support, you can view the JSFiddle I created here: http://jsfiddle.net/fKGrm/
You don’t necessarily need promises for this—you’re 95% of the way there already without them. The main issue, as I think you’re aware, is that your response is being sent before the image requests come back. You just need to wait for those to finish before you send the response.
The most basic way is to count the number of callbacks you receive in your Step 2. When it equals the folders.length, then send your response.
Here’s a simplified version of that:
var request = require('request'),
cheerio = require('cheerio');
var baseUrl = 'http://students.washington.edu/jmzhwng/Images/';
var files = [];
request(baseUrl, function (error, res, body) {
var folders = folderLinks(cheerio.load(body));
count = 0;
folders.forEach(function (folder) {
request(baseUrl + folder, function (error, res, body) {
files.push.apply(files, fileLinks(cheerio.load(body)));
if (++count == folders.length) {
console.log(files);
}
});
});
});
function folderLinks ($) {
return $('a:contains(-)').get().map(function (a) {
return a.attribs.href;
});
}
function fileLinks ($) {
return $('a:contains(.)').get().map(function (a) {
return a.attribs.href;
});
}
My problem seems to be the opposite of every node.js question :-) I have a simple forEach loop to read a list of files and insert them into a Couchbase database. This works great, but it never exits after reading all the lines. So I added a counter to shutdown the couchbase connection after all inserts are complete. This works.
This process is intended to load hundreds of thousands of files, so I brought the async module into the mix to batch the inserts into groups of 100. The async.eachLimit is used to iterate over the array and insert documents in batches. Now the orig problem is back. Whatever magic async.eachLimit uses to recognize the process is complete is not happening.
I've been going through javascript scoping, callbacks, async, etc. Google searches are hitting keywords but not this issue. I've reduced the code down to the following testcase. To test, create three files and add their names to testlist.txt.
The async.eachLimit in place works up until it hits the limit, then hangs. Comment this out and uncomment array.forEach line and it works. Thanks in advance!
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
} else {
console.log('PASS ['+line+']');
}
key_count--;
if (key_count == 0) {
console.log('DONE Shutting down client, no more keys');
db.shutdown();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, lines) {
if(filelist_err) throw filelist_err;
// HACK split adds empty line to array, use replace to fix
var array = lines.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('INIT lines['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) { console.log('FAIL async err['+err+']');} );
//array.forEach(function(data){insertFile(data);return;});
});
Testcase output using array.forEach:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
LOAD [files.txt]
PASS [files.little.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys
Testcase output using async.eachLimit:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
PASS [files.big.txt]
... hang, never gets to 3...
After review with a coworker, they spotted my mistake. I missed the async callback in my insertFile function. Adding that in works and allows me to remove the key counter! Solution code below:
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line, callback) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
callback(db_err);
} else {
console.log('PASS ['+line+']');
callback();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, data) {
if(filelist_err) throw filelist_err;
// HACK stoopid bug split adds empty line to array, use replace to fix
var array = data.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('READ files['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) {
if (err) console.log('LAST with async err['+err+']');
console.log('DONE Shutting down client, no more keys');
db.shutdown();
});
});
And successful output:
$ node testcase.js
READ files[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
LOAD [files.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys