variable scope in module asynchronous function - javascript

This is my first week in node so I'm sorry if this is a no brainier.
The code works and does what it should. But I can't figure out how to match the name (url) that starts http.get whit the result it gets from the website.
I found this witch is almost like my problem, except this is a premade function so I can't edit the function and add a callback.
variable scope in asynchronous function
If I could run this code synchronous or make a callback in the http.get function it would all be good. But I don't have the skills and don't know if you even can do it.
Thanks
- Robin.
http = require('http');
function download(name) {
//name is an array whit csgo items names.
for (var i = 0; i < name.length; i++) {
var marketHashName = getGoodName(name[i]);
var url = 'http://steamcommunity.com/market/priceoverview/?currency=1&appid=730&market_hash_name=' + marketHashName;
http.get(url, function (res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function () {
data = JSON.parse(data);
var value= 0;
//get the value in the json array
if(data.median_price) {
value = data.median_price;
}else{
value = data.lowest_price;
}
value = value.substr(5);
console.log("WEAPON",value);
//callback whit name/link and value?
//callback(name,value);
});
}).on("error", function () {
});
}
}

You can just add a callback argument and then call it with the final data. And, if you want to pass to the callback the particular marketHashName that was being processed, then you can create a closure to capture that uniquely for each time through the for loop:
http = require('http');
function download(name, callback) {
//name is an array whit csgo items names.
for (var i = 0; i < name.length; i++) {
var marketHashName = getGoodName(name[i]);
// create closure to capture marketHashName uniquely for each
// iteration of the for loop
(function(theName) {
var url = 'http://steamcommunity.com/market/priceoverview/?currency=1&appid=730&market_hash_name=' + marketHashName;
http.get(url, function (res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function () {
data = JSON.parse(data);
var value= 0;
//get the value in the json array
if(data.median_price) {
value = data.median_price;
}else{
value = data.lowest_price;
}
value = value.substr(5);
console.log("WEAPON",value);
// now that the async function is done, call the callback
// and pass it our results
callback(theName, value, data);
});
}).on("error", function () {
});
})(marketHasName);
}
}
// sample usage:
download("whatever", function(name, value, data) {
// put your code here to use the results
});
FYI, you may find that the request module which is a higher level set of functionality on top of the http module will save you some work.

Related

How do I get all the values of an array from an async function

I would like to download an XML file, convert it to JSON, Fill an array, send Values to an different API.
The things that work:
Download mulitple XML files
Convert it to JSON
Fill an array with all the values
I can't get my head around the callback function. The code below will output:
Loading..
34
done
34
done
Expected is Loading..34 done
function fillArray(callback) {
console.log("Loading..");
for (let i = 0;i < urlArray.length; i++){
xml2json(urlArray[i], (json) => {
let myArray = findProp(json, "value");
for(let i = 0; i < myArray.length; i++){
myArrlabel.push(myArray[i]);
}
callback();
});
}
//return myArrlabel;
}
function useArray(){
console.log(myArrlabel[15]);
console.log("done");
}
fillArray(useArray);
The callback is inside the for loop, that's why I will get the above output. It could be a stupid question but I can't understand how to code this correctly.
edit.. added xml2json()
function xml2json(url, callback){
http.get(url, (resp) => {
let data = '';
// A chunk of data has been recieved.
resp.on("data", (chunk) => {
data += chunk;
});
// The whole response has been received. Print out the result.
resp.on("end", () => {
let json = parser.toJson(data, options);
callback(json);
});
}).on("error", (err) => {
console.log("Error: " + err.message);
});
}
try adding a var that adds 1 every time data loads like your callback

Can I stop an event handler while it's executing?

Is it possible to stop the execution of a previous event when the event is called again?
To clarify, I have a button <button onclick='load()'>load</button> that calls a load() function which gets an array, processes each element and displays it in a list <ul id='main'></ul>
function load(event) {
$("#main").empty(); //empty old elements
$.get("load.php", '', function (data) {
var arr = JSON.parse(data);
for (i = 0; i < arr.length; ++i) {
process(arr[I]); //process and append to #main
}
});
}
Problem is, that if I click the button again while its still putting the elements into the array, I get the new list plus the rest of the old list.
Is there a way to stop the first event while its still executing but still execute the second event?
You should try this:
var xhr;
function load(ev){
if(ev.eventPhase === 2){
if(xhr)xhr.abort();
$('#main').empty();
xhr = $.get('load.php', function(data){
var a = JSON.parse(data);
for(var i=0,l=a.length; i<l; i++){
process(a[i]);
}
});
}
}
I can be wrong, but...
var req = $.ajax({
$("#main").addEventListener("click",()=>{req.abort()})
...
...
$("#main").removeEventListener("click",()=>{req.abort()})
});
As noted, you can stop the event by setting a flag and checking it, but a better approach would simply be to assign the new value directly. If your code works it means JSON.parse is returning an array already.
That means
"use strict";
(function () {
function load(event) {
$("#main").empty();
$.get("load.php", '', function (data) {
process = JSON.parse(data);
$("#main").whateverMethodFillsTheElement(process);
});
}());
Also, when writing asynchronous JavaScript code that makes HTTP requests, promises are preferred to callbacks. Since $.get returns a Promise you can write
"use strict";
(function () {
function load(event) {
$("#main").empty();
$.get("load.php")
.then(function (data) {
var items = JSON.parse(data);
$("#main").whateverMethodFillsTheElement(items);
});
}
}());
As discussed in comments, the aim is to use each item in another request which provides the actual value to add to 'main'. So loading data triggers an asynchronous call for each loaded item.
To accommodate this, we need to determine a key field that we can use to track each item so we do not append existing items to the list. We will call this field id for the sake of exposition.
"use strict";
(function () {
var allItems = [];
function load(event) {
$("#main").empty();
$.get("load.php")
.then(function (data) {
return JSON.parse(data);
})
.then(function (items) {
items.forEach(item => {
processItem(item)
.then(function (processed) {
var existingItem = allItems.filter(i => i.id === item.id)[0];
if(existingItem) {
var existingIndex = allItems.indexOf(existingItem);
allItems[existingIndex] = processed;
}
else {
allItems.push(processed);
}
});
});
});
}
}());
Ok, seems like it's not possible to stop an Ajax success function after it began executing or to stop a past event without aborting the current one.
But the following solution worked for me so I figured I'll post it here:
var num = 0;
function load() {
var curNum = ++num;
$("#main").empty();
$.get("load.php", '', function (data) {
var arr = JSON.parse(data);
for (i = 0; i < arr.length; ++i) {
process(arr[i], curNum);
}
});
}
function process(item, curNum) {
if(curNum === num) { //don't process if a new request has been made
//get 'data' based on 'item'...
if(curNum === num) { //check again in case a new request was made in the meantime
$("#main").append(data);
}
}
}
I appreciate everyone's help.

Parse.Cloud.job promise not working

What I am trying to do here are:
Remove all contents in a class first, because every day the events.json file will be updated. I have my first question here: is there a better way to remove all contents from a database class on Parse?
Then I will send a request to get the events.json and store "name" and "id" of the result into a 2D array.
Then I will send multiple requests to get json files of each "name" and "id" pairs.
Finally, I will store the event detail into database. (one event per row) But now my code will terminate before it downloaded the json files.
Code:
function newLst(results) {
var event = Parse.Object.extend("event");
for (var i = 0; i < results.length; i++){
Parse.Cloud.httpRequest({
url: 'https://api.example.com/events/'+ results[i].name +'/'+ results[i].id +'.json',
success: function(newLst) {
var newJson = JSON.parse(newLst.text);
var newEvent = new event();
newEvent.set("eventId",newJson.data.id);
newEvent.set("eventName",newJson.data.title);
newEvent.save(null, {
success: function(newEvent) {
alert('New object created with objectId: ' + newEvent.id);
},
error: function(newEvent, error) {
alert('Failed to create new object, with error code: ' + error.message);
}
});
},
error: function(newLst) {
}
});
}
};
Parse.Cloud.job("getevent", function(request, status) {
var event = Parse.Object.extend("event");
var query = new Parse.Query(event);
query.notEqualTo("objectId", "lol");
query.limit(1000);
query.find({
success: function(results) {
for (var i = 0; i < results.length; i++) {
var myObject = results[i];
myObject.destroy({
success: function(myObject) {
},
error: function(myObject, error) {
}
});
}
},
error: function(error) {
alert("Error: " + error.code + " " + error.message);
}
});
var params = { url: 'https://api.example.com/events.json'};
Parse.Cloud.httpRequest(params).then(function(httpResponse) {
var results = [];
var jsonobj = JSON.parse(httpResponse.text);
for (var i = 0; i < jsonobj.data.length; i++) {
var tmp2D = {"name":"id"}
tmp2D.name = [jsonobj.data[i].name];
tmp2D.id = [jsonobj.data[i].id];
results.push(tmp2D);
}
newLst(results);
}).then(function() {
status.success("run job");
}, function(error) {
status.error(error);
});
});
I think my original answer is correct as a standalone. Rather than make it unreadable with the additional code, here it is made very specific to your edit.
The key is to eliminate passed callback functions. Everything below uses promises. Another key idea is decompose the activities into logical chunks.
A couple of caveats: (1) There's a lot of code there, and the chances that either your code is mistaken or mine is are still high, but this should communicate the gist of a better design. (2) We're doing enough work in these functions that we might bump into a parse-imposed timeout. Start out by testing all this with small counts.
Start with your question about destroying all instances of class...
// return a promise to destroy all instances of the "event" class
function destroyEvents() {
// is your event class really named with lowercase? uppercase is conventional
var query = new Parse.Query("event");
query.notEqualTo("objectId", "lol"); // doing this because the OP code did it. not sure why
query.limit(1000);
return query.find().then(function(results) {
return Parse.Object.destroyAll(results);
});
}
Next, get remote events and format them as simple JSON. See the comment. I'm pretty sure your idea of a "2D array" was ill-advised, but I may be misunderstanding your data...
// return a promise to fetch remote events and format them as an array of objects
//
// note - this differs from the OP data. this will evaluate to:
// [ { "name":"someName0", id:"someId0" }, { "name":"someName1", id:"someId1" }, ...]
//
// original code was producing:
// [ { "name":["someName0"], id:["someId0"] }, { "name":["someName1"], id:["someId1"] }, ...]
//
function fetchRemoteEvents() {
var params = { url: 'https://api.example.com/events.json'};
return Parse.Cloud.httpRequest(params).then(function(httpResponse) {
var results = [];
var remoteEvents = JSON.parse(httpResponse.text).data;
for (var i = 0; i < remoteEvents.length; i++) {
var remoteEvent = { "name": remoteEvents[i].name, "id": remoteEvents[i].id };
results.push(remoteEvent);
}
return results;
});
}
Please double check all of my work above regarding the format (e.g. response.text, JSON.parse().data, etc).
Its too easy to get confused when you mix callbacks and promises, and even worse when you're generating promises in a loop. Here again, we break out a simple operation, to create a single parse.com object based on one of the single remote events we got in the function above...
// return a promise to create a new native event based on a remoteEvent
function nativeEventFromRemoteEvent(remoteEvent) {
var url = 'https://api.example.com/events/'+ remoteEvent.name +'/'+ remoteEvent.id +'.json';
return Parse.Cloud.httpRequest({ url:url }).then(function(response) {
var eventDetail = JSON.parse(response.text).data;
var Event = Parse.Object.extend("event");
var event = new Event();
event.set("eventId", eventDetail.id);
event.set("eventName", eventDetail.title);
return event.save();
});
}
Finally, we can bring it together in a job that is simple to read, certain to do things in the desired order, and certain to call success() when (and only when) it finishes successfully...
// the parse job removes all events, fetches remote data that describe events
// then builds events from those descriptions
Parse.Cloud.job("getevent", function(request, status) {
destroyEvents().then(function() {
return fetchRemoteEvents();
}).then(function(remoteEvents) {
var newEventPromises = [];
for (var i = 0; i < remoteEvents.length; i++) {
var remoteEvent = remoteEvents[i];
newEventPromises.push(nativeEventFromRemoteEvent(remoteEvent));
}
return Parse.Promise.when(newEventPromises);
}).then(function() {
status.success("run job");
}, function(error) {
status.error(error);
});
});
The posted code does just one http request so there's no need for an array of promises or the invocation of Promise.when(). The rest of what might be happening is obscured by mixing the callback parameters to httpRequest with the promises and the assignment inside the push.
Here's a clarified rewrite:
Parse.Cloud.job("getevent", function(request, status) {
var promises = [];
var params = { url: 'https://api.example.com'};
Parse.Cloud.httpRequest(params).then(function(httpResponse) {
var results = [];
var jsonobj = JSON.parse(httpResponse.text);
for (var i = 0; i < jsonobj.data.length; i++) {
// some code
}
}).then(function() {
status.success("run job");
}, function(error) {
status.error(error);
});
});
But there's a very strong caveat here: this works only if ("// some code") that appears in your original post doesn't itself try to do any asynch work, database or otherwise.
Lets say you do need to do asynch work in that loop. Move that work to a promise-returning function collect those in an array, and then use Promise.when(). e.g....
// return a promise to look up some object, change it and save it...
function findChangeSave(someJSON) {
var query = new Parse.Query("SomeClass");
query.equalTo("someAttribute", someJSON.lookupAttribute);
return query.first().then(function(object) {
object.set("someOtherAttribute", someJSON.otherAttribute);
return object.save();
});
}
Then, in your loop...
var jsonobj = JSON.parse(httpResponse.text);
var promises = [];
for (var i = 0; i < jsonobj.data.length; i++) {
// some code, which is really:
var someJSON = jsonobj.data[i];
promises.push(findChangeSave(someJSON));
}
return Parse.Promise.when(promises);

node.js + cheerio scrape: Passing an array of urls to download?

Firstly, here is my code as I've progressed so far:
var http = require("http");
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
var cheerio = require("cheerio");
var url = "http://www.bloglovin.com/en/blogs/1/2/all";
var myArray = [];
var a = 0;
var getLinks = function(){download(url, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$(".content").each(function(i, e) {
var blogName = $(e).find(".blog-name").text();
var followLink = $(e).find("a").attr("href");
var blogSite = $(e).find(".description").text();
myArray[a] = [a];
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
a++;
console.log(myArray);
});
}
});
}
getLinks();
As you can see, followLinks is concatenated to followUrl, of which I'd like to pass through the 'url' download, so effectively I'll be scraping each of the pages using the same CSS rules, which will be added to the multidimensional array for the corresponding blogger.
How can I go about this?
I do something similar in one of my scraping jobs, but I use the async.js library to accomplish. Note that I'm also using the request module and cheerio.js in my scraping. I fetch and scrape rows of data from a single webpage, but suspect you could do something similar to fetch URLs and request / scrape them in the same manner.
I also admit this is quite basic coding, certainly could be optimized with a bit of refactoring. Hope it gives you some ideas at least...
First, I use request to fetch the page and call my parse function -
var url = 'http://www.target-website.com';
function(lastCallback) {
request(url, function(err, resp, body) {
if(!err) { parsePage(err, resp, body, lastCallback); }
else { console.log('web request error:' + resp.statusCode); }
}
}
Next, in my parsePage function, I load the website into Cheerio, fetch the HTML of each data row into an array, push my parseRow function and each HTML segment into another array, and use async.parallel to process each iteration -
var rows = [];
function parsePage(err, resp, body, callback1) {
var $ = cheerio.load(body);
$('div#targetTable tr').each(function(i, elem) {
rows.push($(this).html());
});
var scrRows = [];
rows.forEach(function(row) {
scrRows.push(function(callback2) {
parseRow(err, resp, row);
callback2();
});
async.parallel(scrRows, function() {
callback1();
});
}
Inside your loop, just create an object with the properties you scrape then push that object onto your array.
var blogInfo = {
blogName: blogName,
followLink: "http://www.bloglovin.com"+followLink;
blogSite: blogSite
};
myArray.push(blogInfo);
You have defined a = 0; So
myArray[a] = [a]; // => myArray[0] = [0]; myArray[0] becomes an array with 0 as only member in it
All these statements throw an error since Array can have only integer as keys.
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
Instead try this:
var obj = {
index: a,
blogName: blogName,
followLink: "http://www.bloglovin.com" + followLink,
blogSite: blogSite
}
myArray.push(obj);
console.log(myArray);

nodejs multiple http requests in loop

I'm trying to make simple feed reader in node and I'm facing a problem with multiple requests in node.js.
For example, I got table with urls something like:
urls = [
"http://url1.com/rss.xml",
"http://url2.com",
"http://url3.com"];
Now I want to get contents of each url. First idea was to use for(var i in urls) but it's not good idea. the best option would be to do it asynchronously but I don't know how to make it.
Any ideas?
EDIT:
I got this code:
var data = [];
for(var i = 0; i<urls.length; i++){
http.get(urls[i], function(response){
console.log('Reponse: ', response.statusCode, ' from url: ', urls[i]);
var body = '';
response.on('data', function(chunk){
body += chunk;
});
response.on('end', function() {
data.push(body);
});
}).on('error', function(e){
console.log('Error: ', e.message);
});
}
Problem is that first is call line "http.get..." for each element in loop and after that event response.on('data') is called and after that response.on('end'). It makes mess and I don't know how to handle this.
I know this is an old question, but I think a better solution would be to use JavaScripts Promise.all():
const request = require('request-promise');
const urls = ["http://www.google.com", "http://www.example.com"];
const promises = urls.map(url => request(url));
Promise.all(promises).then((data) => {
// data = [promise1,promise2]
});
By default node http requests are asynchronous. You can start them sequentially in your code and call a function that'll start when all requests are done. You can either do it by hand (count the finished vs started request) or use async.js
This is the no-dependency way (error checking omitted):
var http = require('http');
var urls = ["http://www.google.com", "http://www.example.com"];
var responses = [];
var completed_requests = 0;
for (i in urls) {
http.get(urls[i], function(res) {
responses.push(res);
completed_requests++;
if (completed_requests == urls.length) {
// All download done, process responses array
console.log(responses);
}
});
}
You need to check that on end (data complete event) has been called the exact number of requests... Here's a working example:
var http = require('http');
var urls = ['http://adrianmejia.com/atom.xml', 'http://twitrss.me/twitter_user_to_rss/?user=amejiarosario'];
var completed_requests = 0;
urls.forEach(function(url) {
var responses = [];
http.get(url, function(res) {
res.on('data', function(chunk){
responses.push(chunk);
});
res.on('end', function(){
if (completed_requests++ == urls.length - 1) {
// All downloads are completed
console.log('body:', responses.join());
}
});
});
})
You can use any promise library with ".all" implementation. I use RSVP library, Its simple enough.
var downloadFileList = [url:'http://stuff',dataname:'filename to download']
var ddownload = downloadFileList.map(function(id){
var dataname = id.dataname;
var url = id.url;
return new RSVP.Promise(function(fulfill, reject) {
var stream = fs.createWriteStream(dataname);
stream.on('close', function() {
console.log(dataname+' downloaded');
fulfill();
});
request(url).on('error', function(err) {
console.log(err);
reject();
}).pipe(stream);
});
});
return new RSVP.hashSettled(ddownload);
Promise.allSettled will not stop at error. It make sure you process all responses, even if some have an error.
Promise.allSettled(promises)
.then((data) => {
// do your stuff here
})
.catch((err) => {
console.log(JSON.stringify(err, null, 4));
});
The problem can be easily solved using closure. Make a function to handle the request and call that function in the loop. Every time the function would be called, it would have it's own lexical scope and using closure, it would be able to retain the address of the URL even if the loop ends. And even is the response is in streams, closure would handle that stuff too.
const request = require("request");
function getTheUrl(data) {
var options = {
url: "https://jsonplaceholder.typicode.com/posts/" + data
}
return options
}
function consoleTheResult(url) {
request(url, function (err, res, body) {
console.log(url);
});
}
for (var i = 0; i < 10; i++) {
consoleTheResult(getTheUrl(i))
}

Categories