learnyounode - Juggling Async - different order - javascript

This is from the learnyounode tutorial exercise 9 on node.js. I'm having trouble understanding why my code doesn't print out the data in order.
let http = require('http'),
bl = require('bl'),
urlArray = [process.argv[2], process.argv[3], process.argv[4]]
results = []
//counter = 0;
function collectData(i) {
http.get(urlArray[i], (res) => {
res.pipe(bl((err, data) => {
if (err) {
return console.log(err);
}
data = data.toString();
results[i] = data;
//counter++;
//if (counter === 3) {
if (results.length === 3) {
results.forEach((result) => {
console.log(result);
})
}
}))
})
}
for (let i = 0; i < urlArray.length; i++) {
collectData(i);
}
The for loop should start from the first url and go through to the last in order. From my understanding, whatever happens in the current iteration of the loop must resolve for the loop to move to the next iteration. However, the results seem to be random. If I run my solution on the command line, sometimes the results are in order and sometimes they're not.
Edit: This is my current solution which works. I added the counter variable and put the http request into a function.

The reason you're getting different results on each run is because the get-function of http is implemented asynchronously (async). You're doing the requests in the right order, but the webserver on the get-URL responds not instantly.
So basically, if you have two URLs to call:
http://stackoverflow.com
http://google.com
You call them in this order, but google have a good response time this run, like 10ms, stackoverflow needs a little bit longer like 20ms, your callback function for google is called at first, then the callback-function for stackoverflow.
The response times can be different each run, thats why you experience different results each run.
This is your callback-function:
res.pipe(bl((err, data) => {
if (err) {
return console.log(err);
}
data = data.toString();
console.log(data);
}

The entire problem is with the variable "i" and the asynchronous calls. With this particular logic, you don't have control over the value of i because of the async calls.
In order to understand the problem with your code, print console.log after the line:
results[i] = data;
This is my solution to the problem:
var http = require('http');
var count =3;
var contentResults = [];
function hitRequests(i) {
http.get(process.argv[i+1],function(response){
response.setEncoding('utf8');
var entireContent='';
response.on('data', function(chunk){
entireContent += chunk;
});
response.on('end', function(chunk){
contentResults[i] = entireContent;
count --;
if(count <= 0) {
printAll();
}
});
}).on('error',function(e){
console.log('error'+e);
});
}
for(i=1;i<=3;i++) {
hitRequests(i);
}
function printAll() {
contentResults.forEach(function(result){
console.log(result);
});
}

Related

Not able to push data in an empty array, I'm getting that data from an external api, using https module in Node js

var https = require("https");
const arr = [];
for (let i = 1; i < 26; i++) {
https.get(
`https://jsonmock.hackerrank.com/api/countries?page=${i}`,
(res) => {
res.on("data", (data) => {
JSON.parse(data).data.map((info, i) => {
let { name } = info;
arr.push(name);
console.log(name);
});
});
}
);
}
console.log(arr);
when I'm just logging JSON.parse(data) I'm getting the required data on my console
but When I'm trying to push it into an array it's not happening instead it logs an empty array onto the console
really need to know the reason as I'm stuck with this for 3 days now
Your issue is that the callback to https.get - i.e. (res) => is called asynchronously - therefore, console.log(arr); is executed before the 25 https.get requests are even made - you'll see that arr.push does actually work if you console.log(arr); where you console.log(name); - so your assumption that you are not able to push is incorrect
You are pushing to the array, you just never console.log the array when it has data in it
I guess one way I can suggest doing this with rather old version of Node.js you are using (14.16.0) is as follows
var https = require("https");
function fn(callback) {
const result = [];
let done = 0;
for (let i = 1; i < 26; i++) {
https.get(`https://jsonmock.hackerrank.com/api/countries?page=${i}`, (res) => {
res.on("data", (data) => {
JSON.parse(data).data.map((info) => {
let { name } = info;
result.push(name);
console.log(name);
});
// keep track of how many requests have "completed"
done = done + 1;
// when all are done, call the callback
if (done === 25) {
callback(result);
}
});
});
}
}
fn(arr => { // here is where arr is populated, nowhere else
console.log(arr);
});
Note, arr will only be accessible inside the callback - you will NOT be able to access arr at the top-level like you want - the only possible way you could is to use a version of Node.js that supports top-level await - and convert the code to use Promise (which is a trivial task)
Not sure it's important, but there's no guarantee that the names in arr will be in the correct order ... i.e. the results from iteration 3 may be pushed after iteration 4, for example, since that's the nature of network requests, there is no guarantee when they will finish
As an aside. If you were to use the latest (18.1 at the time of writing) version of node.js - the above can be written like
const promises = Array.from({length:25}, async (_, i) => {
const res = await fetch(`https://jsonmock.hackerrank.com/api/countries?page=${i+1}`);
const data = await res.json();
return data.map(({name}) => name);
});
const arr = (await Promise.all(promises)).flat(2);
console.log(arr);
Things of note are
native fetch - which makes networks requests simple compared to regular node.js methods
top-level await - so, you can use arr at the top-level of the code
6 lines of code vs over 20

Run HTTP requests in chunks

I want to run 1 thundered http requests in configurable chunks, and set configurable timeout between chunk requests. The request is based on the data provided with some.csv file.
It doesn't work because I am getting a TypeError, but when I remove () after f, it doesn't work either.
I would be very grateful for a little help. Probably the biggest problem is that I don't really understand how exactly promises work, but I tried multiple solutions and I wasn't able to achieve what I want.
The timeout feature will probably give me even more headache so I would appreciate any tips for this too.
Can you please help me to understand why it doesn't work?
Here is the snippet:
const rp = require('request-promise');
const fs = require('fs');
const { chunk } = require('lodash');
const BATCH_SIZE = 2;
const QUERY_PARAMS = ['clientId', 'time', 'changeTime', 'newValue'];
async function update(id, time, query) {
const options = {
method: 'POST',
uri: `https://requesturl/${id}?query=${query}`,
body: {
"prop": {
"time": time
}
},
headers: {
"Content-Type": "application/json"
},
json: true
}
return async () => { return await rp(options) };
}
async function batchRequestRunner(data) {
const promises = [];
for (row of data) {
row = row.split(',');
promises.push(update(row[0], row[1], QUERY_PARAMS.join(',')));
}
const batches = chunk(promises, BATCH_SIZE);
for (let batch of batches) {
try {
Promise.all(
batch.map(async f => { return await f();})
).then((resp) => console.log(resp));
} catch (e) {
console.log(e);
}
}
}
async function main() {
const input = fs.readFileSync('./input.test.csv').toString().split("\n");
const requestData = input.slice(1);
await batchRequestRunner(requestData);
}
main();
Clarification for the first comment:
I have a csv file which looks like below:
clientId,startTime
123,13:40:00
321,13:50:00
the file size is ~100k rows
the file contains information how to update time for a particular clientId in the database. I don't have an access to the database but I have access to an API which allows to update entries in the database.
I cannot make 100k calls at once, because: my network is limited (I work remotely because of coronavirus), it comsumpts a lot of memory, and API can also be limited and can crash if I will make all the requests at once.
What I want to achieve:
Load csv into memory, convert it to an Array
Handle api requests in chunks, for example take first two rows from the array, make API call based on the first two rows, wait 1000ms, take another two rows, and continue processing until the end of array (csv file)
Well, it seems like this is a somewhat classic case of where you want to process an array of values with some asynchronous operation and to avoid consuming too many resources or overwhelming the target server, you want to have no more than N requests in-flight at the same time. This is a common problem for which there are pre-built solutions for. My goto solution is a small piece of code called mapConcurrent(). It's analagous to array.map(), but it assumes a promise-returning asynchronous callback and you pass it the max number of items that should ever be in-flight at the same time. It then returns to you a promise that resolves to an array of results.
Here's mapConcurrent():
// takes an array of items and a function that returns a promise
// returns a promise that resolves to an array of results
function mapConcurrent(items, maxConcurrent, fn) {
let index = 0;
let inFlightCntr = 0;
let doneCntr = 0;
let results = new Array(items.length);
let stop = false;
return new Promise(function(resolve, reject) {
function runNext() {
let i = index;
++inFlightCntr;
fn(items[index], index++).then(function(val) {
++doneCntr;
--inFlightCntr;
results[i] = val;
run();
}, function(err) {
// set flag so we don't launch any more requests
stop = true;
reject(err);
});
}
function run() {
// launch as many as we're allowed to
while (!stop && inflightCntr < maxConcurrent && index < items.length) {
runNext();
}
// if all are done, then resolve parent promise with results
if (doneCntr === items.length) {
resolve(results);
}
}
run();
});
}
Your code can then be structured to use it like this:
function update(id, time, query) {
const options = {
method: 'POST',
uri: `https://requesturl/${id}?query=${query}`,
body: {
"prop": {
"time": time
}
},
headers: {
"Content-Type": "application/json"
},
json: true
}
return rp(options);
}
function processRow(row) {
let rowData = row.split(",");
return update(rowData[0], rowData[1], rowData[2]);
}
function main() {
const input = fs.readFileSync('./input.test.csv').toString().split("\n");
const requestData = input.slice(1);
// process this entire array with up to 5 requests "in-flight" at the same time
mapConcurrent(requestData, 5, processRow).then(results => {
console.log(results);
}).catch(err => {
console.log(err);
});
}
You can obviously adjust the number of concurrent requests to whatever number you want. I set it to 5 here in this example.

Loops and Callback hell

Suppose you have an Array/Object that contains a list of values. Lets say those a mysql commands or urls or filespaths. Now you want to iterate over all of them and execute some code over every entry.
for(let i = 0; i < urls.length; i++){
doSthWith(urls[i]);
}
No Problem so far. But now lets say each function has a callback and needs the result of the last execution. e.g. you request something from one website and you want to use the results of this request for one of your following requests.
for(let i = 0; i < urls.length; i++){
if(resultOfLastIteration.successful){ //or some other result besides the last one
doSthWith(urls[i]);
}
}
Now lets say the length of urls (or sth similar) is over 100. Thats why you normaly use a loop so you dont need to write the same function a 100 times. That also means that Promises wont do the trick either (except Im unaware trick a trick), because you have the same problem:
doSthWith(urls[0]).then(...
doSthWith(urls[1]).then(... //either put them inside each other
).then(...
doSthWith(urls[i]) //or in sequence
...
).catch(err){...}
Either way I dont see a way to use a loop.
A way that I found but isnt really "good" is to use the package "wait.for"(https://www.npmjs.com/package/wait.for). But what makes this package tricky is to launch a fiber each time you want to use wait.for:
//somewhere you use the function in a fiber Context
wait.for(loopedExecutionOfUrls, urls);
//function declaration
function loopedExecutionOfUrls(urls, cb){
//variables:
for(let i = 0; i < urls.length; i++){
if(someTempResultVar[i-1] === true){
someTempResultVar = wait.for(doSthWith,urls[i]);
} else if(...){...}
}
}
But Im not sure if this approach is really good, besides you always have to check if you have wrapped the whole thing in a Fiber so for each function that has loops with functions that have callbacks. Thus you have 3 levels: the lauchFiber level, wait.for(loopedFunction) level and the wait.for the callback function level. (Hope I that was formulated understandable)
So my questions is: Do you guys have a good approach where you can loop throw callback functions and can use results of those whenever you like?
good = easy to use, read, performant, not recursive,...
(Im sorry if this question is stupid, but I really have problems getting along with this asynchronous programming)
If you want to wait for doSthWith to finish before doing the same but with the nex url, you have to chain your promises and you can use array.prototype.reduce to do that:
urls = ["aaa", "bbb", "ccc", "ddd"];
urls.reduce((lastPromise, url) => lastPromise.then((resultOfPreviousPromise) => {
console.log("Result of previous request: ", resultOfPreviousPromise); // <-- Result of the previous request that you can use for the next request
return doSthWith(url);
}), Promise.resolve());
function doSthWith(arg) { // Simulate the doSthWith promise
console.log("do something with: ", arg);
return new Promise(resolve => {
setTimeout(() => resolve("result of " + arg), 2000);
});
}
Use async, specifically async.each:
const async = require('async');
function doSthWith(url, cb) {
console.log('doing something with ' + url);
setTimeout(() => cb(), 2000);
}
const urls = ['https://stackoverflow.com/', 'https://phihag.de/'];
async.each(urls, doSthWith, (err) => {
if (err) {
// In practice, likely a callback or throw here
console.error(err);
} else {
console.log('done!');
}
});
Use async.map if you are interested in the result.
When I need to loop over promises I use my handy dandy ploop function. Here is an example:
// Function that returns a promise
var searchForNumber = function(number) {
return new Promise(function(resolve, reject) {
setTimeout(function() {
var min = 1;
var max = 10;
var val = Math.floor(Math.random()*(max-min+1)+min);
console.log('Value is: ' + val.toString());
return resolve(val);
}, 1000);
});
};
// fn : function that should return a promise.
// args : the arguments that should be passed to fn.
// donefn : function that should check the result of the promise
// and return true to indicate whether ploop should stop or not.
var ploop = function(fn, args, donefn) {
return Promise.resolve(true)
.then(function() {
return(fn.apply(null, args));
})
.then(function(result) {
var finished = donefn(result);
if(finished === true){
return result;
} else {
return ploop(fn, args, donefn);
}
});
};
var searchFor = 4;
var donefn = function(result) {
return result === searchFor;
};
console.log('Searching for: ' + searchFor);
ploop(searchForNumber, [searchFor], donefn)
.then(function(val) {
console.log('Finally found! ' + val.toString());
process.exit(0);
})
.catch(function(err) {
process.exit(1);
});

Save results of multiple requests into array [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 years ago.
Improve this question
I was trying to make multiple API requests and store responses into the array.
Array stayed empty because I didn't know about async behavior of for loop.
const results = [];
for (let i = 0; i < apiUrls.length; i++) {
apiCall(apiUrls[i], res => {
results.push(res);
});
}
console.log(results) // []
So I rewrite it to:
const results = []
async function makeApiCalls () {
for (const apiUrl in apiUrls) {
const res = await apiCall(apiUrl)
results.push(res)
}
console.log(results) // [data, someData, otherData...]
}
makeApiCalls()
It works! But runs in sequence. We can improve it to run in parallel like this:
let results = []
async function makeApiCalls () {
const promises = []
// fire all requests
for (const apiUrl in apiUrls) {
promises.push(apiCall(apiUrl))
}
// wait until all promises are resolved and copy responses to array
results = [...await Promise.all(promises)];
console.log(results) // [data, someData, otherData...]
}
makeApiCalls()
Looping over asynchronous calls is not going to work very well, because you'll have no idea when you're finished and the stats array is populated. You need some additional machinery to handle this. Without using async or promises, the basic idea is:
var stats = [];
var finishedCount = 0; // keep track of how many callbacks have completed
for (let i = 0; i<tempBackends.length; i++)
{
http.get(tempBackends[i], function(res) {
console.log("Received response: " + res.statusCode);
if(res.statusCode == 200) {
stats.push('OK');
console.log('pushed OK\n');
}
else {
stats.push('Not OK');
console.log('pushed Not OK\n');
}
// ADD LOGIC HERE TO HANDLE ALL CALLBACKS HAVING FINISHED
finishedCount++;
if (finishedCount === tempBackends.length) {
console.log("ALL DONE!! stats is", stats);
}
});
}
However, with this approach, the stats array is not going to be aligned with tempBackends. The order of the values in stats will be based on the order in which the asynchronous calls finished. In addition, this style of code is going to be hard to maintain. Instead, you should use async or promises. The async approach would be:
async.map(
tempBackends,
function(backend, callback) {
http.get(backend, function(res) {
callback(null, res.statusCode === 200 ? "OK" : "Not OK");
});
function(err, data) {
console.log("ALL DONE!! stats is", data);
}
);
The promise approach is more readable and writable. First, make a promisified version of http.get:
function getPromise(backend) {
return new Promise(function(resolve) {
http.get(backend, function(res) {
resolve(res.statusCode === 200 ? "OK" : "Not OK");
});
});
}
Now you can just write
Promise.all(tempBackends . map(getPromise)) .
then(function(stats) {
console.log("ALL DONE!! stats is", stats);
});
var http = require('http');
var list = ['http://rest-service.guides.spring.io/greeting',
'http://rest-service.guides.spring.io/greeting',
'http://rest-service.guides.spring.io/greeting',
'http://rest-service.guides.spring.io/greeting',
'http://rest-service.guides.spring.io/greeting'];
var responseArray = [];
var calls = function(){
var index = 0;
var callWithIndex = function (i){
http.get(list[i], function(apiResponse) {
var statusCode = apiResponse.statusCode;
apiResponse.on('data', function(chunk) {
});
apiResponse.on('end', function() {
responseArray.push(statusCode);
i += 1;
if(i < list.length){
callWithIndex(i);
} else {
console.log(responseArray.toString());
}
});
});
};
callWithIndex(index);
};
calls();
If you mess with nodejs, you should know which callback you should execute your next iteration and when you should process your data. Notice how I called the next iteration in the response.on('end') part as well as how I pushed the statusCode to the array there as well.
Try passing your array variable stats inside the function, because the scope of a function is local.
http.get(tempBackends[i], function(res,stats) {
// access stats here now
}

learnyounode - juggling async - why doesnt my solution work

Can someone explain why the following does not work as a solution to the "juggling async" lesson in the learnyounode workshop?
FYI I have left my log lines in if that helps. I am struggling to see any fundamental differences between my solution and an answer I found online here:
https://github.com/olizilla/nodeschooling/blob/master/learnyounode-answers/09-juggling-async.js
Thanks in advance!
var urlList = process.argv.slice(2, process.argv.length);
//console.log(urlList);
var urlResponseList = {};
for(var i=0; i < urlList.length; i++)
{
urlResponseList[urlList[i]] = '';
}
var http = require('http');
console.log(urlList);
for(var i = 0; i < urlList.length; i++) {
//console.log(i);
var url = urlList[i];
console.log("1 " + url);
http.get(url, function (response) {
console.log("2 " + url);
console.log("3 " + i);
response.setEncoding('utf8');
response.on('data', function (data) {
urlResponseList[url] = urlResponseList[url] + data;
});
response.on('end', function () {
//console.log(stringResponse);
//console.log(url);
});
});
}
console.log(urlResponseList);
for(var i=0; i < urlList.length; i++){
console.log(urlResponseList[urlList[i]]);
}
I also have a question about a solution I found posted online here:
https://github.com/olizilla/nodeschooling/blob/master/learnyounode-answers/09-juggling-async.js
urls.forEach(function (item, index) {
http.get(item, function (req) {
req.setEncoding('utf8')
req.pipe(concat(function (res) {
data[index] = res;
responseCount++
if (responseCount === urls.length) {
console.log(data.join('\n'));
}
}))
})
if http.get is async and can the "index" variable be trusted in the http.get callback even though it is being set outside of the callback (in the foreach loop)?
I just wanted to post the updated solution below. Thanks for all the help. My issue was I didn't fully understand how closures worked.
var urlList = process.argv.slice(2, process.argv.length);
//console.log(urlList);
var urlResponseList = [];
for(var i=0; i < urlList.length; i++)
{
urlResponseList.push('');
}
var http = require('http');
var responseCount = 0;
//console.log(urlList);
urlList.forEach(function(item, index){
//console.log(i);
var url = urlList[index];
//console.log("1 " + url);
http.get(item, function (response) {
//console.log("2 " + url);
//console.log("3 " + i);
response.setEncoding('utf8');
response.on('data', function (data) {
urlResponseList[index] = urlResponseList[index] + data;
});
response.on('end', function () {
responseCount++;
if(responseCount == urlList.length)
{
//console.log("help");
console.log(urlResponseList.join('\n'));
}
});
});
});
//console.log(urlResponseList);
http.get() returns its result asynchronously (e.g. sometime later) in the callback. Your code execution does not wait for that result - the rest of your code keeps running even though the response is not yet here.
Thus, all requests will be sent at once from your first for loop, then your second for loop will run and THEN sometime later, the responses will arrive and the callbacks will be called. Your responses are going to arrive async and thus will NOT be available at the end of the first for loop. So, your first console.log(urlResponseList); will be empty and the second for loop will have nothing to do.
Async responses MUST be processed inside the callback in which they are delivered or they must be stored somewhere until the last response is done and then all responses can be processed at that time.
If you don't understand the issues behind asynchronous responses, then read these two references:
How do I return the response from an asynchronous call?
Why is my variable unaltered after I modify it inside of a function? - Asynchronous code reference
You also have issues with local variables declared before the callback that will have changed before the callback is called.
Here's an example of how you solve the for loop issue: JavaScript closure inside loops – simple practical example

Categories