I am using cheerio and node to do web scraping. I thought it would be good idea to use promise for making it easier to deal with the asynchronous code. So, tried to chain the promises but could not make it working. I am pasting my code over here such that somebody could help me figure out what exactly I have been doing wrong.
http.createServer(function(req, res){
res.writeHead(200, {"Content-Type": "application/json"})
loadPage().then(parseLoadedData);
}).listen(3000, function(error){
console.log(error);
});
function fetchMainPage(){
var deferred = q.defer();
http.get('http://www.google.com?q=node', function(response){
var responseString = '';
response.on('data', function(data){
responseString += data.toString('utf8');
});
response.on('error', function(error){
deferred.reject(error);
});
response.on('end', function(){
deferred.resolve(responseString);
});
});
return deferred.promise;
}
function parseMainContent(responseString){
var deferred = q.defer();
var $ = cheerio.load(responseString);
var rightCol = $('#right-col');
var children = rightCol.children();
var keys = Object.keys(children);
var results = [];
keys.forEach(function(key){
var div = children[key];
div.children.forEach(function(aChild){
if(aChild.name == 'h3' && aChild.children[0].data == "Some title"){
lis = aChild.next.children;
var results = lis.map(function(li){
var anchor = $(li).find('a');
if(anchor != undefined && anchor.attr('href') != undefined)
return [anchor.text(), anchor.attr('href')]
});
results = results.filter(function(result){
return result != undefined;
});
deferred.resolve(results);
}
});
});
return deferred.promise;
}
var loadPage = function(){
return fetchMainPage().then(function(data){
return data;
})
},
parseLoadedData = function(data){
return parseMainContent(data).then(function(results){
console.log(results);
});
}
The problem here is I can't get my parseLoadedData being called. The response is fetched from the server but the second chaining does not seem to be working. I would like to thank you all in advance for helping me out.
Note: The url I am using is different and so the parsing function deal with that specific url only.
You don't really need the loadPage function since fetchMainPage already returns a promise so this should work:
var loadPage = function(){
return fetchMainPage();
}
To chain promises every then callback should return another promise and you were returning the data.
Eg.:
var loadPage = function(){
var deferred = q.defer();
fetchMainPage().then(function(data){
return someOtherPromise(data);
}).then(function(someOtherData) {
return myThirdPromise(someOtherData);
}).then(function(myThirdData) {
return deferred.resolve(myThirdData);
});
}
// IS THE SAME AS
var loadPage2 = function(){
return fetchMainPage().then(function(data){
return someOtherPromise(data);
}).then(function(someOtherData) {
return myThirdPromise(someOtherData);
});
}
Related
I'm creating a Node.js module with an asynchronous method - a simple HTTP GET request. Here is the code:
//mymodule.js
var https = require('https');
function getSomething(url_str)
{
var callback_fn = function(response){
var body = '';
response.on('data', function (data) {
body += data;
});
response.on('end', function () {
//console.log(body);
return body;
});
};
return https.request(url_str, callback_fn).end();
}
var module_obj = {
getSome: getSomething
};
module.exports = module_obj;
This module is called by my app.js - a web server - like so:
//app.js
var myModule = require('./mymodule');
var http = require('http');
var qs = require('querystring');
var server_fn = function(request, response){
response.setHeader('Access-Control-Allow-Origin', '*');
response.setHeader('Access-Control-Request-Method', '*');
response.setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET');
response.setHeader('Access-Control-Allow-Headers', '*');
if ( request.method === 'OPTIONS' ) {
response.writeHead(200);
response.end();
return;
}
if (request.method == 'POST') {
var body = '';
request.on('data', function (data) {
body += data;
// Too much POST data, kill the connection!
// 1e6 === 1 * Math.pow(10, 6) === 1 * 1000000 ~~~ 1MB
if (body.length > 1e6)
request.connection.destroy();
});
request.on('end', function () {
var post = qs.parse(body),
post_url = post.url,
post_method = post.method;
var promise_flow = new Promise(function(resolve, reject){
if(post_method === 'get_me_something')
{
response_str = myModule.getSome(post_url);
resolve(response_str);
}
else
{
resolve('nothing');
}
});
promise_flow
.then(function(response){
response.write(response);
response.end();
return;
}).catch(function(error){
response.write(error);
response.end();
return;
})
});
}
};
var http_server = http.createServer(server_fn);
http_server.listen(2270);
console.log("server listening on 2270");
So basically, I start things up via node app.js, and then I post the URL, and then the module should fetch the Web page and then return the content.
Unfortunately, I'm getting the following error:
UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 1): TypeError: First argument must be a string or Buffer
I believe this is because the response I'm getting from my modules getSomething method is false, as opposed to the content of the requested Web page.
I know I can fix this by moving the https.get operation from mymodule.js and putting it inline with app.js, and then calling resolve on end, but I'd like to keep the current module setup.
Is there a workaround to get the asynchronous method in the imported module to work with the existing promise chain as setup?
UPDATE
After further review, I noticed that I wasn't quite running things the right way. I updated the code as follows:
//...
var promise_flow = new Promise(function(resolve, reject){
if(post_method === 'get_me_something')
{
myModule.getSome(post_url)
.then(function(data){
resolve(data);
})
.catch(function(err){
reject(err);
});
}
else
{
resolve('nothing');
}
});
//...
This way, I think it fits with the true spirit of Promises.
Your getSomething function doesn't return a promise. Make it returns a promise, and fulfill the promise in response.on('end').
function getSomething(url_str)
{
return new Promise(function(resolve, reject) {
var callback_fn = function(response){
var body = '';
response.on('data', function (data) {
body += data;
});
response.on('end', function () {
//console.log(body);
resolve(body);
});
};
https.request(url_str, callback_fn).end();
});
}
Then in your main file, call it like this : myModule.getSomething(post_url).then(resolve);.
In my app i'm using restangular, and i have such method (which i converted from the plain $http request).
And i don't know how to use correctly restangular with promises. How is it possible?
Here is my code:
var test = function(){
var data = '{"Office":"' + office + '"}';
var deferred = $q.defer();
var person = Restangular.one('persons', id)
$scope.person.patch(data).then(function (response) {
deferred.resolve(response);
},function (err, status) {
deferred.reject(status);
});
return deferred.promise;
}
var runIt = function(){
test.then(...)...
}
You could simply use promise returned by patch method of Restangular.one instead of creating a new custom promise.
Code
var test = function() {
var data = '{"Office":"' + office + '"}';
var person = Restangular.one('persons', id);
//returned promise
return person.patch(data).then(function(response) {
return response.data;
}, function(err, status) {
return response.status;
});
}
test().then(...)
I have a simple service method that gather several .get() and I'm having some troubles on the "printing" part as by that time I only have one part of the result.
what I'm doing is:
var service = function() {
var players = []; // will hold 100 objects
var getMembers = function(id) {
$.get(url, function(data) {
for(i=0; i<data.length; i++) {
var p = data[i];
// get more info for this member
getMemberDetails(p.member_id);
// put the current data into the players
players.push(p);
}
});
calculateAndPrint();
};
var getMemberDetails = function(id) {
$.get(url, function(data) {
var result = $.grep(players, function(e){ return e.member_id == id; });
if (result.length == 0) { /* not found */ }
else if (result.length == 1) {
// push new data to player object
result[0].details = data;
}
});
};
var calculateAndPrint = function() {
for(i=0; i<players.length; i++) {
var p = players[i];
console.log(p);
}
};
})();
and this does not work, as when I reach calculateAndPrint, the details is not even designed yet...
so I tried $.Deferred() and the only issue I'm having is that if I defer getMemberDetails method, that call includes already a deffer call (the .get()) and I'm back to the same issue ...
what is the best option to only run calculateAndPrint after all 100 calls were made?
It seems easy enough but I'm just blank :/
This should be pretty easy if you use promises:
var service = function() {
var getMembers = function(id) {
return Promise.when($.get("some service url"))
.then(function (data) {
return Promise.all(data.map(getMemberDetails));
});
};
var getMemberDetails = function(player) {
return Promise.when($.get("some service Url?id=" + player.member_id));
};
var calculateAndPrint = function(players) {
players.forEach(function (player) {
console.log(player);
});
};
return {
getMembers: getMembers,
calculateAndPrint: calculateAndPrint
};
})();
service.getMembers().then(function (players) {
service.calculateAndPrint(players);
});
you could just create a deferred object $.deferred for every ajax call your making & then wait ($.when) for all those deferred jobs to complete before you run the calculateAndPrint() method.
How It Works:
Create a deferred object for every ajax call $.deferred & return the promise object .promise().
depending on whether ajax call is successful or not , either resolve with response data .resolve(responseData) or reject with error data .reject(errorData).
Monitor all the ajax calls by there promise objects returned from step1 and on completion , call the calculateAndPrint() method.
For arbitrary ajax calls most of the above logic remains same,except that those are called in for loop and each of those deferred calls are pushed into a deferredCalls array.
Note:I would suggest to show some loader/spinner image when your making ajax calls, since you would not get the response immediately & keeping user informed about the background operation is always good User experience sign.
JS CODE:
/* utils */
var $ul = $('ul');
function msg(text) {
$ul.append('<li>' + text + '</li>');
}
/* functions */
function asyncThing1() {
var dfd = $.Deferred();
setTimeout(function() {
msg('asyncThing1 seems to be done...');
dfd.resolve('banana');
}, 1000);
return dfd.promise();
}
function asyncThing2() {
var dfd = $.Deferred();
setTimeout(function() {
msg('asyncThing2 seems to be done...');
dfd.resolve('apple');
}, 500);
return dfd.promise();
}
function asyncThing3() {
var dfd = $.Deferred();
setTimeout(function() {
msg('asyncThing3 seems to be done...');
dfd.resolve('orange');
}, 1500);
return dfd.promise();
}
/* do it */
$.when(asyncThing1(), asyncThing2(), asyncThing3()).done(function(res1, res2, res3) {
msg('all done!');
msg(res1 + ', ' + res2 + ', ' + res3);
});
Live Demo # JSFiddle
Arbitrary Deferred calls Original SO Post :
//Push all arbitrary ajax calls to deferred array
var deferreds = [];
function getSomeDeferredStuff() {
var i = 1;
for (i = 1; i <= 10; i++) {
var count = i;
deferreds.push(
$.post('/echo/html/', {
html: "<p>Task #" + count + " complete.",
delay: count
}).success(function(data) {
$("div").append(data);
}));
}
}
// define a extension method for $.when for creating/managing deferred
// objects for every ajax call
if (jQuery.when.all===undefined) {
jQuery.when.all = function(deferreds) {
var deferred = new jQuery.Deferred();
$.when.apply(jQuery, deferreds).then(
function() {
var deferredObjs= function (arguments) { return deferreds.length > 1 ? $.makeArray(arguments) : [arguments]; }
deferred.resolve(deferredObjs);
},
function() {
deferred.fail(deferredObjs);
});
return deferred;
}
}
//passing the deferred calls array to $.when
$.when.all(deferreds).then(function(objects) {
//process when all deferred objects compelted
console.log("Resolved/rejected objects:", objects);
});
Working example for arbitrary ajax calls #JSFiddle
I have this class:
(function(){
"use strict";
var FileRead = function() {
this.init();
};
p.read = function(file) {
var fileReader = new FileReader();
var deferred = $.Deferred();
fileReader.onload = function(event) {
deferred.resolve(event.target.result);
};
fileReader.onerror = function() {
deferred.reject(this);
};
fileReader.readAsDataURL(file);
return deferred.promise();
};
lx.FileRead = FileRead;
}(window));
The class is called in a loop:
var self = this;
$.each(files, function(index, file){
self.fileRead.read(file).done(function(fileB64){self.fileShow(file, fileB64, fileTemplate);});
});
My question is, is there a way to call a method once the loop has completed and self.fileRead has returned it's deferred for everything in the loop?
I want it to call the method even if one or more of the deferred fails.
$.when lets you wrap up multiple promises into one. Other promise libraries have something similar. Build up an array of promises returned by fileRead.read and then pass that array to $.when and hook up then/done/fail/always methods to the promise returned by .when
// use map instead of each and put that inside a $.when call
$.when.apply(null, $.map(files, function(index, file){
// return the resulting promise
return self.fileRead.read(file).done(function(fileB64){self.fileShow(file, fileB64, fileTemplate);});
}).done(function() {
//now everything is done
})
var self = this;
var processFiles = function (data) {
var promises = [];
$.each(files, function (index, file) {
var def = data.fileRead.read(file);
promises.push(def);
});
return $.when.apply(undefined, promises).promise();
}
self.processFiles(self).done(function(results){
//do stuff
});
$.when says "when all these promises are resolved... do something". It takes an infinite (variable) number of parameters. In this case, you have an array of promises;
I know this is closed but as the doc states for $.when: In the multiple-Deferreds case where one of the Deferreds is rejected, jQuery.when immediately fires the failCallbacks for its master Deferred. (emphasis on immediately is mine)
If you want to complete all Deferreds even when one fails, I believe you need to come up with your own plugin along those lines below. The $.whenComplete function expects an array of functions that return a JQueryPromise.
var whenComplete = function (promiseFns) {
var me = this;
return $.Deferred(function (dfd) {
if (promiseFns.length === 0) {
dfd.resolve([]);
} else {
var numPromises = promiseFns.length;
var failed = false;
var args;
var resolves = [];
promiseFns.forEach(function (promiseFn) {
try {
promiseFn().fail(function () {
failed = true;
args = arguments;
}).done(function () {
resolves.push(arguments);
}).always(function () {
if (--numPromises === 0) {
if (failed) {
//Reject with the last error
dfd.reject.apply(me, args);
} else {
dfd.resolve(resolves);
}
}
});
} catch (e) {
var msg = 'Unexpected error processing promise. ' + e.message;
console.error('APP> ' + msg, promiseFn);
dfd.reject.call(me, msg, promiseFn);
}
});
}
}).promise();
};
To address the requirement, "to call the method even if one or more of the deferred fails" you ideally want an .allSettled() method but jQuery doesn't have that particular grain of sugar, so you have to do a DIY job :
You could find/write a $.allSettled() utility or achieve the same effect with a combination of .when() and .then() as follows :
var self = this;
$.when.apply(null, $.map(files, function(index, file) {
return self.fileRead.read(file).then(function(fileB64) {
self.fileShow(file, fileB64, fileTemplate);
return fileB64;//or similar
}, function() {
return $.when();//or similar
});
})).done(myMethod);
If it existed, $.allSettled() would do something similar internally.
Next, "in myMethod, how to distinguish the good responses from the errors?", but that's another question :)
I am following this Angular tutorial on promises. I have a service that checks if an array is empty and if so, hits a REST service, returns a promise and updates the array. Here is the relative code:
requestingProviders: [],
getRequestingProviders: function() {
var that = this;
var deferred = $q.defer();
if(this.requestingProviders.length === 0) {
this.getResource().search({
role: 'REQUESTING_PROVIDER'
}, function(data) {
that.requestingProviders = data.providers;
deferred.resolve(data.providers);
});
return deferred.promise;
} else {
return that.requestingProviders;
}
}
The service is being called from a controller. Here is the code where it is being called:
$scope.providers = providerService.getRequestingProviders();
The REST call is made and returns fine, but the view is never updated. This is not working like the tutorial explained. Here is a plunker that shows what I am expecting. What am I doing wrong?
You need to resolve your promise:
var prom = providerService.getRequestingProviders();
prom.then(function (data) {
$scope.providers = data;
});
Also, change your code to always return the promise:
getRequestingProviders: function() {
var that = this;
var deferred = $q.defer();
if(this.requestingProviders.length === 0) {
this.getResource().search({
role: 'REQUESTING_PROVIDER'
}, function(data) {
that.requestingProviders = data.providers;
deferred.resolve(data.providers);
});
} else {
deferred.resolve(that.requestingProviders);
}
return deferred.promise;
}