Node JS: How do I request multiple source codes from different websites?

Node JS: How do I request multiple source codes from different websites? - javascript

I get an issue where If I try to use request more than once, it doesn't work. I can give an example:
request('https://www.google.com', function (error, response, vBody1) {
request('https://www.purple.com', function (error, response, vBody2) {
request('https://www.stackoverflow.com', function (error, response, vBody3) {
console.log(vBody3);
});
console.log(vBody2);
});
console.log(vBody1);
});
It reads and outputs the body of the first site, but not the next two.
Here is another example:
vSites = ['https://www.google.com','https://www.purple.com','https://www.stackoverflow.com'];
for (i = 0; i < vSites.length; i++){
request(vSites[i], function (error, response, vBody[i]) {
console.log(vBody[i]);
});
}
This one looks cleaner and I would like this version, but it doesn't work at all. What am I doing wrong?

In your loop version you don't need the index on the vBody variable.
var request = require('request');
vSites = ['https://www.google.com','https://www.purple.com','https://www.stackoverflow.com'];
for (i = 0; i < vSites.length; i++){
request(vSites[i], function (error, response, vBody) {
console.log(vBody);
});
}
This code works fine for me on node v4.3.2

In addition to the correct solution from Michael's reply, please be aware the for-loop sends the requests in parallel. Data and error handling could be challenging if you want to do more than console.log.
To better handle the requests, I would suggest using an asynchronous control library such as async or bluebird. In comparison, async is easier to understand while bluebird provides better code readability.
Below is an example async implementation:
var async = require('async');
var request = require('request');
var vSites = ['https://www.google.com', 'http://www.purple.com','https://www.stackoverflow.com'];
async.map(
vSites,
// `async` will call this function for each `vSite`
function(vSite, next) {
// Reuse the code inside your for-loop, but call `next` to pass the error and result to the final callback
request(vSite, function(err, response, vBody) {
next(err, vBody);
});
},
// The final callback will be executed when all requests are completed or either of them fails
function(err, results) {
console.log(err);
console.log(results);
}
);
Hope this helps :)

Related

Asynchonous issue with loop in nodejs

I'm trying to iterate threw a list of item and do some actions on them by calling an API like this example :
for (i = 0; i < arr.length; i++) {
if (arr[i].id == 42) {
api.requestAction(arr[i].id, function(error, response){ });
}
}
Problem is the loop obviously ended before all the requests are done and the program exits. What should I do to manage it ? I saw the "Promise" method but don't really know how I can use it in this case or maybe there's an other solution.
Thank you by advance !

With node-fetch (a promisify http api) you can together with async/await halt the for loop until it's done but this requires node v6+ with --harmony-async-await flag added
const fetch = require('node-fetch')
async function foo() {
for (let item of arr) {
if (item.id == 42) {
let res = await fetch(url)
let body = await res.text()
console.log(body)
}
}
console.log('done (after request)')
}
now every time you add the async keyword in front of a function it will always return a promise that resolve/rejects when everything is done
foo().then(done, fail)
alternetive you can just wrap you api fn in a promise if you don't want to install node-fetch
await new Promise((rs, rj) => {
api.requestAction(arr[i].id, function(error, response){
error ? rj(error) : rs(response)
})
})

Install bluebird
npm install bluebird --save
Code
//require npm
var Promise = require("bluebird");
//code
//"promisify" converts traditional callback function into a Promise based function
var _requestAction = Promise.promisify(api.requestAction);
//loop over array
Promise.map(arr, function (value) {
if (value.id == 42) {
//async request
return _requestAction(value.id).then(function (_result) {
//success
console.log(_result);
}).catch(function (e) {
//error
console.error(e);
});
}
});

You could use async.js. It's an asyncronous control flow library which provides control flows for things like sequential loops, looping in parralel, and many other common flow control mechanism, check it out.
See code below, the code assumes that you're variable 'arr' is defined somewhere in scope.
npm install async
var async = require("async");
//Loop through each item, waiting for your
//asyncronous function to finish before continuing
//to move onto the next item in the array
//NOTE: This does not loop sequentially, if you want that function with asyncjs then user eachSeries
async.each(arr,
//Item is the current item being iterated over,
//callback is the callback you call to finish the current iteration, it accepts an error and result parameter callback(error, result);
function (item, callback) {
api.requestAction(item.id, function(error, response){
//Check for any errors...
if (error) return callback(error);
callback(null);
});
},
function (err, result) {
//You've now finished the loop
if (err) {
//Do something, you passed an error object to
//in one of the loop's iterations
}
//No errors, move on with your code..
});

Use Bluebird Promises:
var Promise = require('bluebird');
Promise.map(arrayOfIds, function(item){
return api.requestAction(item);
})
.then(function(response){
// all the requests are resolved here
})
if u want sequential execution of the ids then use Promise.mapSeries (is slow as it waits for task to finish)

How to wait for N number of async functions to finish in node.js so that I can do some work on all their combined results?

Please look at the code below. the request module is this one(https://www.npmjs.com/package/request)
var urlArray = []; // URL in this level
foo ("xyz.com", false);
function crawl (url, finished) {
request(url, function (error, response, body) {
if (finished == true) { return; }
// do some work on body (including getting getting n number of new URLS from
// the body) and set finished = true if we find what we are looking for.
// for each new url urlArray.push(newURL);
// for each new url call crawl(newurl, finished);
// Now How can I know when ALL these requests have finished?
// so that I can have a urlArray corresponding to this level of crawling tree and
// do some work before starting next level of crawl.
});
}

use Promises.
Check out the Q library (specifically I pointed to the methods you need):
Promise creation:
https://github.com/kriskowal/q/wiki/API-Reference#qdefer
var promise = Q.defer();
doAsyncStuff(callbackOfAsync);
return promise.promise;
functioncallbackOfAsync(isSuccess){
if(isSuccess){
promise.resolve();
}
else{
promise.reject();
}
}
Wait for multiple promises:
https://github.com/kriskowal/q/wiki/API-Reference#promise-for-array-methods
Q.all([getFromDisk(), getFromCloud()]).done(function (values) {
assert(values[0] === values[1]); // values[0] is fromDisk and values[1] is fromCloud
});

I don't really understand your question, but I guess you will need Promises. I assume you are using NodeJS.
function makeRequest (url) {
return new Promise(function (resolve, reject) {
request(url, function (err, response, body) {
if (err || response.statusCode !== 200)
reject(err || body);
else
resolve(body);
}
})
}
this function returns a promise. You can use it this way:
var request = makeRequest('url.com');
request.then(function (urls) {
// this is called if no error occured
urls.forEach(function (url) {
console.log (url);
});
}, function (error) {
// this is called on error
console.log (error);
});
If you want to wait for multiple requests to be answered to perform an action, use Promise.all:
var requests = [makeRequest(url1), makeRequest(url2), makeRequest(url3)];
Promise.all(requests).then(function (data) {
// everything is done
console.log(data);
});
I didn't test the code, but I hope you get the idea.

To answer your question specifically, the following flow of logic should work for you, I have added comments to help it make sense to you:
var urlArray = []; // URL in this level
var finished = false;
foo("xyz.com", false);
function start() {
while (urlArray.length) {
crawl(urlArray.pop());
}
}
function crawl(url) {
request(url, function (error, response, body) {
if (finished) {
return;
}
// 1. at this point, a given batch of crawls have all started
// AND urlArray is empty.
// 2. do some work on body (including getting getting n number of new URLS from
// the body) and set finished = true if we find what we are looking for.
// 3. for each new url urlArray.push(newURL);
// 4. start crawling on new batch of URL set
start();
});
}
All the request callbacks will be executed after start() completes, this guarantees that urlArray will be empty then.
If processing of one crawl response indicates (by setting finished = true;) that what you're looking for has been found all other processing of responses will terminate as soon as they begin.
Otherwise, reponse is processed and a new batch of urls are set for crawling. You call start() to begin crawling each.
It would help you also (as suggested in the other answer) if you acquainted yourself with the concept of Promises.

rewriting Redis calls in Node.js using promises

var Q = require('q');
Q.nfcall(client.get("time_clock", function (err, reply) {
var time = reply.toString();
return time;
})).then(function(time) {
client.get("time_predicted", function (err, replier) {
mom=replier.toString();
res.render('time', {watch: time, moment: mom});
})
}).fail(function(err){
console.log('Error.')})
.done();
};
This code fails. The code below works, without using promises (shown below). The code I want to fix is above.
client.get("time_clock", function (err, reply) {
time=reply.toString();
console.log("in here"+time); // Will print `OK`
client.get("time_predicted", function (err, replier) {
mom=replier.toString();
res.render('time', {watch: time, moment: mom});
});
});
What do I need to change in the first code example to make it work? (Note: it would be even better if I could call res.render at the very end, in the last or another 'then').

I won't directly answer to your question; but a simple solution is to use redis-then, a redis library for NodeJS that uses promises.

Nodejs Running Functions in Series

So right now I'm trying to use Nodejs to access files in order to write them to a server and process them.
I've split it into the following steps:
Traverse directories to generate an array of all of the file paths
Put the raw text data from each of file paths in another array
Process the raw data
The first two steps are working fine, using these functions:
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var pending = list.length;
if (!pending) return done(null, results);
list.forEach(function(file) {
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
if (!--pending) done(null, results);
});
} else {
results.push(file);
if (!--pending) done(null, results);
}
});
});
});
};
function processfilepaths(callback) {
// reading each file
for (var k in filepaths) { if (arrayHasOwnIndex(filepaths, k)) {
fs.readFile(filepaths[k], function (err, data) {
if (err) throw err;
rawdata[k] = data.toString().split(/ *[\t\r\n\v\f]+/g);
for (var j in rawdata[k]) { if (arrayHasOwnIndex(rawdata[k], j)) {
rawdata[k][j] = rawdata[k][j].split(/: *|: +/);
}}
});
}}
if (callback) callback();
}
Obviously, I want to call the function processrawdata() after all of the data has been loaded. However, using callbacks doesn't seem to work.
walk(rootdirectory, function(err, results) {
if (err) throw err;
filepaths = results.slice();
processfilepaths(processrawdata);
});
This never causes an error. Everything seems to run perfectly except that processrawdata() is always finished before processfilepaths(). What am I doing wrong?

You are having a problem with callback invocation and asynchronously calling functions. IMO I'll recommend that you use a library such as after-all to execute a callback once all your functions get executed.
Here's a example, here the function done will be called once all the functions wrapped with next are called.
var afterAll = require('after-all');
// Call `done` once all the functions
// wrapped with next() get called
next = afterAll(done);
// first execute this
setTimeout(next(function() {
console.log('Step two.');
}), 500);
// then this
setTimeout(next(function() {
console.log('Step one.');
}), 100);
function done() {
console.log("Yay we're done!");
}

I think for your problem, you can use async module for Node.js:
async.series([
function(){ ... },
function(){ ... }
]);
To answer you actual question, I need to explain how Node.js works:
Say, when you call an async operation (say mysql db query), Node.js sends "execute this query" to MySQL. Since this query will take some time (may be some milliseconds), Node.js performs the query using the MySQL async library - getting back to the event loop and doing something else there while waiting for MySQL to get back to us. Like handling that HTTP request.
So, In your case both functions are independent and executes almost in parallel.
For more information:
Async.js for use with Node.js

function processfilepaths(callback) {
// reading each file
for (var k in filepaths) { if (arrayHasOwnIndex(filepaths, k)) {
fs.readFile(filepaths[k], function (err, data) {
if (err) throw err;
rawdata[k] = data.toString().split(/ *[\t\r\n\v\f]+/g);
for (var j in rawdata[k]) { if (arrayHasOwnIndex(rawdata[k], j)) {
rawdata[k][j] = rawdata[k][j].split(/: *|: +/);
}}
});
}}
if (callback) callback();
}
Realize that you have:
for
readfile (err, callback) {... }
if ...
Node will call each readfile asynchronously, which only sets up the event and callback, then when it is done calling each readfile, it will do the if, before the callback probably even has a chance to get invoked.
You need to use either Promises, or a promise module like async to serialize it. What you would then do looks like:
async.XXXX(filepaths, processRawData,
function (err, ...) {
// function for when all are done
if (callback) callback();
}
);
Where XXXX is one of the functions from the library like series, parallel, each, etc... The only thing you also need to know is in your process raw data, async gives you a callback to call when done. Unless you really need sequential access (I don't think you do) use parallel so that you can queue up as many i/o events as possible, it should execute faster, maybe only marginally, but it'll better leverage the hardware.

What is the right way to make a synchronous MongoDB query in Node.js?

I'm using the Node.JS driver for MongoDB, and I'd like to perform a synchronous query, like such:
function getAThing()
{
var db = new mongo.Db("mydatabase", server, {});
db.open(function(err, db)
{
db.authenticate("myuser", "mypassword", function(err, success)
{
if (success)
{
db.collection("Things", function(err, collection)
{
collection.findOne({ name : "bob"}, function(err, thing)
{
return thing;
});
});
}
});
});
}
The problem is, db.open is an asychronous call (it doesn't block), so the getAThing returns "undefined" and I want it to return the results of the query. I'm sure I could some sort of blocking mechanism, but I'd like to know the right way to do something like this.

ES 6 (Node 8+)
You can utilize async/await
await operator pauses the execution of asynchronous function until the Promise is resolved and returns the value.
This way your code will work in synchronous way:
const query = MySchema.findOne({ name: /tester/gi });
const userData = await query.exec();
console.log(userData)
Older Solution - June 2013 ;)
Now the Mongo Sync is available, this is the right way to make a synchronous MongoDB query in Node.js.
I am using this for the same. You can just write sync method like below:
var Server = require("mongo-sync").Server;
var server = new Server('127.0.0.1');
var result = server.db("testdb").getCollection("testCollection").find().toArray();
console.log(result);
Note: Its dependent on the node-fiber and some issues are there with it on windows 8.
Happy coding :)

There's no way to make this synchronous w/o some sort of terrible hack. The right way is to have getAThing accept a callback function as a parameter and then call that function once thing is available.
function getAThing(callback)
{
var db = new mongo.Db("mydatabase", server, {});
db.open(function(err, db)
{
db.authenticate("myuser", "mypassword", function(err, success)
{
if (success)
{
db.collection("Things", function(err, collection)
{
collection.findOne({ name : "bob"}, function(err, thing)
{
db.close();
callback(err, thing);
});
});
}
});
});
}
Node 7.6+ Update
async/await now provides a way of coding in a synchronous style when using asynchronous APIs that return promises (like the native MongoDB driver does).
Using this approach, the above method can be written as:
async function getAThing() {
let db = await mongodb.MongoClient.connect('mongodb://server/mydatabase');
if (await db.authenticate("myuser", "mypassword")) {
let thing = await db.collection("Things").findOne({ name: "bob" });
await db.close();
return thing;
}
}
Which you can then call from another async function as let thing = await getAThing();.
However, it's worth noting that MongoClient provides a connection pool, so you shouldn't be opening and closing it within this method. Instead, call MongoClient.connect during your app startup and then simplify your method to:
async function getAThing() {
return db.collection("Things").findOne({ name: "bob" });
}
Note that we don't call await within the method, instead directly returning the promise that's returned by findOne.

While it's not strictly synchronous, a pattern I've repeatedly adopted and found very useful is to use co and promisify yield on asynchronous functions. For mongo, you could rewrite the above:
var query = co( function* () {
var db = new mongo.Db("mydatabase", server, {});
db = promisify.object( db );
db = yield db.open();
yield db.authenticate("myuser", "mypassword");
var collection = yield db.collection("Things");
return yield collection.findOne( { name : "bob"} );
});
query.then( result => {
} ).catch( err => {
} );
This means:
You can write "synchronous"-like code with any asynchronous library
Errors are thrown from the callbacks, meaning you don't need the success check
You can pass the result as a promise to any other piece of code

We Keep Coding

JavaScript is the programming language of the Web.

Node JS: How do I request multiple source codes from different websites? - javascript

Related

Asynchonous issue with loop in nodejs

How to wait for N number of async functions to finish in node.js so that I can do some work on all their combined results?

rewriting Redis calls in Node.js using promises

Nodejs Running Functions in Series

What is the right way to make a synchronous MongoDB query in Node.js?

Categories

Resources