I'm facing issue to receive an array from a http request before using async.map to launch queries on them.
My server side controller code below (express 4) :
'use strict';
var _ = require('lodash');
var request = require('request');
var asynce = require('async');
exports.index = function (req, res) {
function cleanip(str) {
return str.replace("/", "%2F");
}
var myUrls = [];
var IpBlockedForSpam = [];
var list = ["127.0.0.1/32", "192.168.0.1/32"];
for (var i in list) {
myUrls.push("http://localhost:9000/myapi/ip/blockedForSpam/" + cleanip(list[i]));
}
asynce.map(myUrls, function (url, callback) {
request(url, function (error, response, html) {
var r = JSON.parse(html);
for (var i in r) {
IpBlockedForSpam.push(r[i]);
}
callback(error, html);
});
}, function (err, results) {
res.jsonp(IpBlockedForSpam);
});
};
This code work with var list as static.
What i want to achieive is to be able fill this variable using a http request like this :
request("http://localhost:9000/myapi/ip", function(error, response, body) {
//console.log(body);
remotelist.push(JSON.parse(body));
});
Calling http://localhost:9000/myapi/ip return :
[
"127.0.0.1/32",
"192.168.0.1/32"
]
I tried many thing without results because most time, my async method is launched before my required http call request to retrieve list.
Another thing, is it possible to not use url like http://localhost:9000/myapi/* and use only /myapi/*
Thank you in advance for suggestions, maybe i am wrong with this method.
See you.
You can just put the code inside the request() callback so that the list is obtained first and only when it has been retrieved run the rest of the code:
'use strict';
var _ = require('lodash');
var request = require('request');
var asynce = require('async');
exports.index = function (req, res) {
function cleanip(str) {
return str.replace("/", "%2F");
}
var myUrls = [];
var IpBlockedForSpam = [];
var list = ["127.0.0.1/32", "192.168.0.1/32"];
request("http://localhost:9000/myapi/ip", function(error, response, body) {
// add the IP address array retrieved from this request
list = list.concat(JSON.parse(body));
list.forEach(function(item, i) {
myUrls.push("http://localhost:9000/myapi/ip/blockedForSpam/" + cleanip(item));
});
asynce.map(myUrls, function (url, callback) {
request(url, function (error, response, html) {
var r = JSON.parse(html);
for (var i in r) {
IpBlockedForSpam.push(r[i]);
}
callback(error, html);
});
}, function (err, results) {
res.jsonp(IpBlockedForSpam);
});
});
};
P.S. it is not considered a good practice to iterate arrays with for (var i in r) because that is a property iteration that will accidentially include any enumerable properties of the array, not just array items.
I got it now, below working code :
'use strict';
var _ = require('lodash');
var request = require('request');
var asynce = require('async');
exports.index = function(req, res) {
function cleanip(str) {
return str.replace("/", "%2F");
}
var myUrls = [];
var IpBlockedForSpam = [];
//var list = ["127.0.0.1/32", "192.168.0.1/32"];
var list = [];
request("http://localhost:9000/myapi/ip", function(error, response, body) {
list = list.concat(JSON.parse(body));
list.forEach(function(item, i) {
myUrls.push("http://localhost:9000/myapi/ip/blockedForSpam/" + cleanip(item));
});
asynce.map(myUrls, function(url, callback) {
request(url, function(error, response, html) {
var r = JSON.parse(html);
r.forEach(function(item, i) {
IpBlockedForSpam.push(r[i]);
});
callback(error, html);
});
}, function(err, results) {
res.jsonp(IpBlockedForSpam);
});
});
};
Some brackets was not closed and concact instead of concat (it helped me to really understand this approach from now) :)
Last thing, is it possible to not use url like http://localhost:9000/myapi/* and use only /myapi/* ?
Thank you #jfriend00
Related
I have three files: user.js, influencer.js, & validate.js
In user.js, I import ./influencer (as var = influencer) & ./validate (as var = validate).
My function in user.js:
addAccount: function(){
return functions.database.ref('/acct/{accountID}/name/').onCreate(event => {
var accountID = event.params.accountID;
var name = JSON.stringify(event.data.val()).replace(/['"]+/g, '');
console.log("New Account Added ("+accountID+")");
console.log("Nickname: " +name);
influencer.getIG(name);
var data = influencer.data;
validate.validateThis(data);
});
}
With influencer.getIG(name), I am passing the name we defined above to the function getIG (inside of influencer.js). This works like a charm. The result is JSON body.
What I want to do now is take this JSON body result and pass it to the validate function (in validate.js). In influencer.js, I also added "exports.data = data;".
With that being said, I can't seem to figure out how to pass "data" to validate.js. I log it, and it returns undefined. I added a timeout before running validateThis(data) and still undefined. The validate function on its own works great; I've tested it. But clearly, I am not doing this the correct way.
This is my influencer.getIG function:
module.exports = {
getIG: function (name) {
var url = "https://www.instagram.com/"+name+"/?__a=1"
console.log(url);
request({
url: url
}, function (error, response, body) {
var data = JSON.parse(body);
console.log(data);
exports.data = data;
})
}
}
How can I pass the result of the second module to the third module in my function? What am I doing wrong?
You can try passing callback function as another parameter to getIG
Your influencer file will look like this.
module.exports = {
getIG: function (name, callback) {
var url = "https://www.instagram.com/"+name+"/?__a=1"
request({
url: url
}, callback)
}
}
And your user file will look like this
addAccount: function(){
return functions.database.ref('/acct/{accountID}/name/').onCreate(event => {
var accountID = event.params.accountID;
var name = JSON.stringify(event.data.val()).replace(/['"]+/g, '');
influencer.getIG(name, function (error, response, body) {
var data = JSON.parse(body);
validate.validateThis(data);
});
});
}
Using callback will ensure that data is retrieved before you call it.
As the two other commentors noted - you have an asynchronous function with a callback. One way around this is to define the callback inside the user.js file, and pass it to the getIG function. So you would have
user.js
<pre><code>
addAccount: function(){
return functions.database.ref('/acct/{accountID}/name/').onCreate(event => {
var accountID = event.params.accountID;
var name = JSON.stringify(event.data.val()).replace(/['"]+/g, '');
console.log("New Account Added ("+accountID+")");
console.log("Nickname: " +name);
function callback(err, res, data) {
var data = JSON.parse(body);
console.log(data);
validate.validateThis(data)
}
influencer.getIG(name, callback);
});
}
</pre></code>
then in the other file
influencer.js
module.exports = {
getIG: function (name, callback) {
var url = "https://www.instagram.com/"+name+"/?__a=1"
request({
url: url
}, callback)
}
}
This way the asynchronous function runs inside of influencer, and then calls back to the user when the result is done. Data is now in scope for the user file to utilize.
The alternative (and better) way is to use promises. In that case the user code would be along the lines of
influencer.getIg(name).then(data => //use data here in user.js//)
I'm trying to parse Reddit's RSS feed to grab the titles of front page articles, and having some trouble. Source code below:
//var util = require('util');
//var cheerio = require('cheerio');
var fs = require('fs');
var request = require('request');
var parseString = require('xml2js').parseString;
url = 'http://www.reddit.com/.xml';
request(url, function(error, response, xml){
parseString(xml, function(err, result) {
result = result.rss.channel[0];
console.log(result.item[0]['title']); // works fine, gets first title
for(var key in result){
console.log(result[key]['title']); // returns a bunch of 'undefined'
}
//console.log(util.inspect(result,false,null));
fs.writeFile("index.html", result, function(err){
if(err) { return console.log(err); }
return console.log("File saved.");
});
});
});
You get undefined because you should be iterating over result.item instead of just result. For example:
for(var key in result.item) {
console.log(result.item[key]['title']);
}
Additionally, you should just use a regular for-loop instead of using for..in, since it seems like result.item is just a plain array. For example:
var items = result.item;
for (var i = 0; i < items.length; ++i) {
console.log(items[i].title);
}
I know this question have been asked many times, but I can't make it work.
Here is my situation. I had a string called data, and I want to unshorten all the link inside that string.
Code:
var Bypasser = require('node-bypasser');
var URI = require('urijs');
var data = 'multiple urls : http://example.com/foo http://example.com/bar';
var result = URI.withinString(data, function(url) {
var unshortenedUrl = null;
var w = new Bypasser(url);
w.decrypt(function(err, res) {
// How can I return res ?
unshortenedUrl = res;
});
// I know the w.descrypt function is a asynchronous function
// so unshortenedUrl = null
return unshortenedUrl;
});
Let's me walk you through the code.
URI.withinString will match all the URLs in data, manipulate it and return the result.
You can view an example from URI.js docs
What I want to with these URLs is to unshorten all of them using node-passer.
This is from node-bypasser document:
var Bypasser = require('node-bypasser');
var w = new Bypasser('http://example.com/shortlink');
w.decrypt(function(err, result) {
console.log('Decrypted: ' + result);
});
This is the result that I want multiple urls : http://example.com/foo_processed http://example.com/bar_processed
I created a notebook at tonicdev.com
Solution
var getUrlRegEx = new RegExp(
"(^|[ \t\r\n])((ftp|http|https|gopher|mailto|news|nntp|telnet|wais|file|prospero|aim|webcal):(([A-Za-z0-9$_.+!*(),;/?:#&~=-])|%[A-Fa-f0-9]{2}){2,}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*(),;/?:#&~=%-]*))?([A-Za-z0-9$_+!*();/?:~-]))"
, "g"
);
var urls = data.match(getUrlRegEx);
async.forEachLimit(urls, 5, function (url, callback) {
let w = new Bypasser(url);
w.decrypt(function (err, res) {
if (err == null && res != undefined) {
data = data.replace(url, res);
callback();
}
});
}, function(err) {
res.send(data);
});
You don't really understand what callback is. The callback serves to allow asynchronous code to run without Javascript waiting for it. If you were less lazy and added some debug in your code:
console.log("Started parsing");
var result = URI.withinString(data, function(url) {
console.log("URL parsed (or whatever)");
var unshortenedUrl = null;
var w = new Bypasser(url);
w.decrypt(function(err, res) {
// How can I return res ?
unshortenedUrl = res;
});
// I know the w.descrypt function is a asynchronous function
// so unshortenedUrl = null
return unshortenedUrl;
});
console.log("Call to library over");
You would (most likely) see messages in this order:
Started parsing
Call to library over
URL parsed (or whatever)
The answer: Callback is not guaranteed to run before any code you execute after assigning it. You can't put data in your result variable because the data might not be fetched yet.
I want to programmatically fetch meta tags from multiple urls and use for furthur processing.
I'm using this below code snippet, this one always prints only the first urls meta tag and the async callbacks res is undefined. Am I missing anything here with async?
var http = require('http'),
cheerio = require('cheerio'),
async = require('async'),
urls = [
"http://theatlantic.com",
"http://nytimes.com"
];
function test() {
var $, data = '';
getMetaData = function(uri, callback) {
http.get(uri, function(resp) {
console.log('Fetching Url:' + uri);
resp.on('data', function (chunk){
data += chunk;
});
resp.on('end', function () {
$ = cheerio.load(data);
console.log('Meta Tag:' + $('meta[property="og:description"]').attr('content') + '\n'); //use for furthur processing
callback(null, $('meta[name="description"]').attr('content'));
});
});
}
async.each(urls, getMetaData, function(err, res) {
console.log(res);
});
};
test();
Firstly, here is my code as I've progressed so far:
var http = require("http");
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
var cheerio = require("cheerio");
var url = "http://www.bloglovin.com/en/blogs/1/2/all";
var myArray = [];
var a = 0;
var getLinks = function(){download(url, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$(".content").each(function(i, e) {
var blogName = $(e).find(".blog-name").text();
var followLink = $(e).find("a").attr("href");
var blogSite = $(e).find(".description").text();
myArray[a] = [a];
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
a++;
console.log(myArray);
});
}
});
}
getLinks();
As you can see, followLinks is concatenated to followUrl, of which I'd like to pass through the 'url' download, so effectively I'll be scraping each of the pages using the same CSS rules, which will be added to the multidimensional array for the corresponding blogger.
How can I go about this?
I do something similar in one of my scraping jobs, but I use the async.js library to accomplish. Note that I'm also using the request module and cheerio.js in my scraping. I fetch and scrape rows of data from a single webpage, but suspect you could do something similar to fetch URLs and request / scrape them in the same manner.
I also admit this is quite basic coding, certainly could be optimized with a bit of refactoring. Hope it gives you some ideas at least...
First, I use request to fetch the page and call my parse function -
var url = 'http://www.target-website.com';
function(lastCallback) {
request(url, function(err, resp, body) {
if(!err) { parsePage(err, resp, body, lastCallback); }
else { console.log('web request error:' + resp.statusCode); }
}
}
Next, in my parsePage function, I load the website into Cheerio, fetch the HTML of each data row into an array, push my parseRow function and each HTML segment into another array, and use async.parallel to process each iteration -
var rows = [];
function parsePage(err, resp, body, callback1) {
var $ = cheerio.load(body);
$('div#targetTable tr').each(function(i, elem) {
rows.push($(this).html());
});
var scrRows = [];
rows.forEach(function(row) {
scrRows.push(function(callback2) {
parseRow(err, resp, row);
callback2();
});
async.parallel(scrRows, function() {
callback1();
});
}
Inside your loop, just create an object with the properties you scrape then push that object onto your array.
var blogInfo = {
blogName: blogName,
followLink: "http://www.bloglovin.com"+followLink;
blogSite: blogSite
};
myArray.push(blogInfo);
You have defined a = 0; So
myArray[a] = [a]; // => myArray[0] = [0]; myArray[0] becomes an array with 0 as only member in it
All these statements throw an error since Array can have only integer as keys.
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
Instead try this:
var obj = {
index: a,
blogName: blogName,
followLink: "http://www.bloglovin.com" + followLink,
blogSite: blogSite
}
myArray.push(obj);
console.log(myArray);