Execute a forEach like a waterfall in async - javascript

I'm trying to retrieve longitude and latitude from a list of addresses with the Google API via a Node.js script. The call itself works fine but since I have around 100 addresses to submit. I use a async.forEach on an array, but the calls are made too fast and I get the error "You have exceeded your rate-limit for this API."
I found that the number of calls is limited to 2500 every 24h and maximum 10 a second. While I'm OK for the 2500 a day, I make my calls way too fast for the rate limit.
I now have to write a function who will delay the calls enough not to reach the limit. Here is a sample of my code :
async.forEach(final_json, function(item, callback) {
var path = '/maps/api/geocode/json?address='+encodeURIComponent(item.main_address)+'&sensor=false';
console.log(path);
var options = {
host: 'maps.googleapis.com',
port: 80,
path: path,
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}
// a function I have who makes the http GET
rest.getJSON(options, function(statusCode, res) {
console.log(res);
callback();
});
}, function() {
// do something once all the calls have been made
});
How would you proceed to achieve this? I tried putting my rest.getJSON inside a 100ms setTimeout but the forEach iterates through all the rows so fast that it starts all the setTimeout almost at the same time and therefore it doesn't change anything...
The async.waterfall looks like it would do the trick, but the thing is I don't know exactly how many rows I will have, so I can't hardcode all the function calls. And to be honest, it would make my code really ugly

The idea is that you can create a rateLimited function that acts much like a throttled or debounced function, except any calls that don't execute immediately get queued and run in order as the rate limit time period expires.
Basically, it creates parallel 1 second intervals that self-manage via timer rescheduling, but only up to perSecondLimit intervals are allowed.
function rateLimit(perSecondLimit, fn) {
var callsInLastSecond = 0;
var queue = [];
return function limited() {
if(callsInLastSecond >= perSecondLimit) {
queue.push([this,arguments]);
return;
}
callsInLastSecond++;
setTimeout(function() {
callsInLastSecond--;
var parms;
if(parms = queue.shift()) {
limited.apply(parms[0], parms[1]);
}
}, 1010);
fn.apply(this, arguments);
};
}
Usage:
function thisFunctionWillBeCalledTooFast() {}
var limitedVersion = rateLimit(10, thisFunctionWillBeCalledTooFast);
// 10 calls will be launched immediately, then as the timer expires
// for each of those calls a new call will be launched in it's place.
for(var i = 0; i < 100; i++) {
limitedVersion();
}

Here's how I would hack it (Note: arr is your array of locations):
function populate(arr, callback, pos) {
if(typeof pos == "undefined")
pos=0;
var path = '/maps/api/geocode/json?address='+encodeURIComponent(arr[pos].main_address)+'&sensor=false';
console.log(path);
var options = {
host: 'maps.googleapis.com',
port: 80,
path: path,
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}
// a function I have who makes the http GET
rest.getJSON(options, function(statusCode, res) {
console.log(res);
});
pos++;
if(pos<arr.length)
setTimeout(function(){
populate(arr,callback,pos);
},110); //a little wiggle room since setTimeout isn't exact
else
callback();
}
You could add a rate limiting function, but, IMHO, it introduces unnecessary complexity. All you really want to do is call the function every tenth of a second or so until you're done with your list, so do that.
It's certainly not as extensible as the alternative, but I'm a fan of simplicity.

Related

Add delay to on('data') execution in Node.js

I have this function. Basically it searches data from database and does something with it. In this demo it just increases counter.
exports.fullThreads = function(){
return new Promise((resolve, reject) => {
MongoClient.connect(mongoUrl, (err, db) => {
var fullThreads = db.collection('tmp_fullThreads'),
threadPages = db.collection('tmp_threadPages').find();
var counter = 0;
threadPages.on('data', (doc) => {
setTimeout(function(){
counter++;
}, 200)
});
threadPages.on('end', () => {
console.log('end');
console.log(counter);
});
});//connect
});//promise
}//fullthreads
In this example I expected that it would call for data, wait 200ms, and then increase counter. Once it reaches the end, it would end. However it is different. on('end') is called before on('data') actually finished. I guess that is because it didn't return value immediately. So this setTimeout function is not a way to go. However I have a real problem here. This is simply way too fast and in real life code, this wouldn't increase counter, it would call foreign API which doesn't accept so many calls in so little time, so I wanted to make a delay between each call, how is this done properly?
Then you would need some kind of queu
eg:
queuTasks = [];
queuTasks.push("Task1");
queuTasks.push("Task2");
function doTasks(){
// Do your task
if(queuTasks.length > 0){
console.log(queuTasks[0]);
queuTasks.splice(0,1); // Remove it from the queu
}
setTimeout(function(){ doTasks();}, 1000);
}
This is just some quick code i made, may not work out of the box. But think you get the idea.
I know this is not entirely what you were asking about
This is my workaround:
var threadPages = db.collection('tmp_threadPages').find();
var delay = 0;
function test(doc, delay){
setTimeout(function(){
console.log(delay);
}, delay)
}
threadPages.on('data', (doc) => {
test(doc, delay);
delay += 100;
});
It works well if you do not need to bind proper events to on('end method. Basically it increases delay per request, otherwise it would fire all of them at once. This makes code go slower and it wouldn't overkill API with too many requests per second.

AJAX - How to make Asynchronous request one by one without freeze browser?

i have a textarea like below:
<textarea name="mailist" id="mailist" placeholder="email#domain.com"></textarea>
I want to make a request from the mailist, and send the data (email-per-line) to some URL using "split" and "foreach" . And it success.
But the problem is, when i click the submit button, the all request sent at the same time. How can i make it to:
Send the request after the end of the other
Make a request Asynchronously
Not freeze the browser
What you are looking for is called a promise. Every $.ajax function in jQuery returns a promise. You can read the documentation here to get more details. The gist is, the returned promise exposes a function called then, which can be used to chain requests one after the other, in the manner you desire.
Consider an ajax request of this sort
var request = $.ajax({
method: 'get',
url: 'www.google.com'
});
Since it runs asynchronously, you need to register handlers to let you know when the request is completed successfully, or failed, like so
request.then(
function successHandler(response) {
/* The first function will be called if the request is successful */
},
function failureHandler(response) {
/* The second function will be called if the request fails */
}
);
Using this pattern, you could wait for a request to either succeed or fail, before proceeding to the next one.
Leaving the earlier code section intact
You need to implement a queue of sorts. Most browsers have a sane limit of about 2-5 simultaneous requests. An example would be
var emailAddresses = [/* a list email addresses */];
var MAX_SIMULTANEOUS_REQUESTS = 4;
var CURRENT_REQUESTS = [];
function runRequests(emailAddresses, max, running) {
while(running.length < max) {
running.push(createRequest(emailAddress.pop(), running));
}
return emailAddresses.length;
}
function createRequest(emailAddress, running) {
var request = /* Creates requests in some manner */;
request
.then(
function() {
/* Remove this request from the list of running requests */
var indexOfRequest = running.indexOf(request);
running.splice(indexOfRequest, 1);
},
function() {
/* For extra points, add this to a retry queue */
}
);
return request;
}
/* Usage */
var interval = setInterval(function() {
var remaining = runRequests(emailAddresses, MAX_SIMULTANEOUS_REQUESTS, CURRENT_REQUESTS);
if (remaining === 0) {
clearInterval(interval);
}
}, 500);

Timing a set of functions with asyncronous subroutines

I have two functions periodically called via setInterval. The goal is to defer Function B until Function A is done (and vis versa). Currently, Function A will start, complete some of its subroutines, but not reach the end before Function B begins.
I've tried passing Function B as an argument of Function A. I am not sure if that was sufficient to create a callback. I also tried jQuery's $.when(setInterval(functionA, 10000)).then(setInterval(functionB, 5000)).
How do I ask JavaScript to wait for functions/blocks of code to finish? Thank you in advance.
Edit: Below is code very similar to my original. Sorry for not being concise.
Function A, getFruits(): There is a remote JSON that changes on its own (fruits.json). getFruits() does two things: 1) It empties an array, [allFruits] (just in case); 2) It adds all the names of fruit currently in the remote JSON to [allFruits]. Now, [allFruits] is an instanced copy of the remote JSON. Before this question, I only called getFruits() once, at startup; in other words, I did not use setInterval for getFruits().
Function B, checkFruits(): Now checkFruits() periodically (setInterval(checkFruits, 5000)) compares [allFruits] to the remote version. If any fruit was added to the remote version, checkFruits appends [allFruits] with those fruits' names; it also runs useful code (i.e. pushes the new names to an array [queue]).
For this implementation, it is important to create an initial list so only new (post-startup) fruit trigger the useful code of checkFruits(). Moreover, it is important only to add (never subtract) names from [allFruits] within a session. This is to prevent a new fruit from triggering the useful code more than once per session.
Problem: Now I want to make getFruits() (Function A) periodic. Because getFruits() empties [allFruits], it will allow the names that built up to again trigger useful code (but only once in between invocations of getFruits()). However, when I use setInterval(getFruits, 10000), there are times (in this example, always) when getFruits() overlaps with checkFruits(). When that happens, I notice only part of getFruits() finishes before checkFruits() starts. The console.log() messages appear in this order: 'getFruits() start:', 'checkFruits():', 'getFruits() end:'. Furthermore, my useful code is ran before getFruits() finishes (this is what is really undesired), and [allFruits] gets duplicates. This would not occur if getFruits() completely finished before checkFruits() jumped in.
debugging = true;
var debug = function() {
if (debugging){
console.log.apply(console, arguments)
};
}
var allFruits = [];
var queue = [];
var getFruits = function() {
allFruits = []; // Empty the list
debug('getFruits() start:', 'allFruits =', allFruits, 'queue =', queue);
$.ajax({
url: 'fruits.json',
dataType: 'json',
success: function(data) {
data.fruits.forEach(function(element) {
allFruits.push(element.name);
});
debug('getFruits() end:', 'data =', data, 'allFruits =', allFruits, 'queue =', queue);
},
});
}
var checkFruits = function() {
$.ajax({
url: 'fruits.json',
dataType: 'json',
success: function(data) {
data.fruits.forEach(function(element) {
if (allFruits.indexOf(element.name) === -1) {
queue.push(['fruit', element.name]);
allFruits.push(element.name);
}
});
debug('checkFruits():', 'data =', data, 'allFruits =', allFruits, 'queue =', queue);
}
});
}
getFruits();
setInterval(checkFruits, 5000);
// setInterval(getFruits, 10000); // When I try this, checkFruits() does not wait for getFruits() to finish.
The analogy of my actual remote resource is fruits.json. fruits.json can simply be the following:
{"fruits":[{"name":"apple","color":"red"},{"name":"banana","color":"yellow"},{"name":"tangerine","color":"orange"}]}
Again, the actual, remote JSON changes independently.
What you have here are two methods that each do asynchronouse stuff. Here are some good stack overflow posts on what that means.
Easy to understand definition of "asynchronous event"?
Does async programming mean multi-threading?
Are JavaScript functions asynchronous?
We have no idea how long it will take for an asynchronous call to finish. In your case, the AJAX request could take up to a few seconds depending on network speeds so regardless of when each of these methods are executed you CANNOT know which one will finish first. So what to do? Well, generally when you write/use an asynchronous method (like $.ajax) you give it a callback that will be executed when the asynchronous work is finished. And you have done this in the form of the success callback. And here is the good news. The success callbacks are SYNCHRONOUS (note the missing a). This means that the "useful code" in the success callback that needs to be run when a request finishes will complete (so long as none of it is async) before the "other useful code" in the other success callback is executed at all. And this works no matter which request finishes first. Each success callback will always wait for the other. So I think what was confusing you was your debug statements. If you add the following statements to your code the execution flow may make more sense:
debugging = true;
var debug = function() {
if (debugging) {
console.log.apply(console, arguments)
};
}
var allFruits = [];
var queue = [];
var getFruits = function() {
debug("getFruits: make request");
$.ajax({
url: 'fruits.json',
dataType: 'json',
success: function(data) {
debug("getFruits: start processing");
allFruits = []; // Empty the list
data.fruits.forEach(function(element) {
allFruits.push(element.name);
});
debug('getFruits: finished processing');
},
});
debug("getFruits: request sent, now we wait for a response.");
}
var checkFruits = function() {
debug("checkFruits: make request");
$.ajax({
url: 'fruits.json',
dataType: 'json',
success: function(data) {
debug("checkFruits: start processing");
data.fruits.forEach(function(element) {
if (allFruits.indexOf(element.name) === -1) {
queue.push(['fruit', element.name]);
allFruits.push(element.name);
}
});
debug("checkFruits: finished processing");
}
});
debug("checkFruits: request sent, now we wait for a response.");
}
getFruits();
setInterval(checkFruits, 5000);
// setInterval(getFruits, 10000); // When I try this, checkFruits() does not wait for getFruits() to finish.
After thinking about it I believe the only reason things may not have been behaving as expected is because you're emptying the allFruits array outside of the callback. If you move it as I have done I would think everything should work fine.
Now, I don't know why you need to re-initialize the data since each time you make the request your getting the latest information but lets roll with it. Since both methods make the same request lets consolidate that into a single method. No need to duplicate code ;). And since all of your examples have the getFruits running twice as slow as the checkFruits we could easily add a counter to accomplish the same sequence of events like so:
debugging = true;
var debug = function() {
if (debugging) {
console.log.apply(console, arguments)
};
}
var allFruits = [];
var queue = [];
var count = 0;
var doOneThing = function(data) {
//do stuff
}
var doAnotherThing= function(data) {
//do other stuff
}
var requestFruits = function() {
$.ajax({
url: 'fruits.json',
dataType: 'json',
success: function(data) {
// if count is even...or, do this every other time.
if (count % 2 === 0) {
count++;
doOneThing(data);
}
// do this everytime
doAnotherThing(data);
},
});
}
setInterval(requestFruits, 5000);
Hope this helps. Cheers.
your last code example first executes setInterval(functionA), and when the deferred execution of functionA is setup, executes setInterval(functionB), meaning that B will called +- 5 seconds after that line is executed, while functionA is called +- 10 seconds.
edit to reflect your additional information:
setInterval(function(){
functionA();
functionB();
}, 10000)
setTimeout(function(){
setInterval(functionB, 10000)
}, 5000)
This is a crude answer. I sense that callbacks can achieve this, but I am not sure how to code them, especially involving setInterval.
I create two global variables, getFruitsIsBusy = false and checkFruitsIsBusy = false. I create an IF for both getFruits() and checkFruits(). Here is getFruits():
var getFruits = function() {
if (checkFruitsIsBusy) { // New
setTimeout(getFruits, 100); // New
return; // New
} else { // New
getFruitsIsBusy = true // New
allFruits = []; // Empty the list
debug('getFruits() start:', 'allFruits =', allFruits, 'queue =', queue);
$.ajax({
url: 'fruits.json',
dataType: 'json',
success: function(data) {
data.fruits.forEach(function(element) {
allFruits.push(element.name);
});
getFruitsIsBusy = false // New; in the success function
debug('getFruits() end:', 'data =', data, 'allFruits =', allFruits, 'queue =', queue)
},
});
}
}
If also using this paradigm for checkFruits(), it seems both functions will wait for each other to finish.
Based on an analysis of the timing of two functions (A and B), consider the following solution (Chionglo, 2016):
Keep state information for each of function A and function B. The state of each function should be set within each of the respective functions.
Create a wrapper function for each of function A and function B. The wrapper function calls on the respective function, and then checks for the state of the respective function.
a. The check in wrapper function A: if function A has reached is final state, clear the interval associated with wrapper function A and schedule an interval for wrapper function B.
b. The check in wrapper function B: if function B has reached its final state, clear the interval associated with wrapper function B.
To begin the process, schedule an interval for wrapper function A.
Sample code:
var ac = Math.round(4*Math.random())+4;
var bc = Math.round(6*Math.random())+6;
var ai;
var Astate = false;
var Bstate = false;
function A() {
// Do your thing for A here.
// The following changes the “state of A” and then determines if the final state has been reached.
ac -= 1;
if (ac<1) Astate = true;
else Astate = false;
}
function B() {
// Do your thing for B here.
// The following changes the “state of B” and then determines if the final state has been reached.
bc -= 1;
if (bc<1) Bstate = true;
else Bstate = false;
}
ai = setInterval("processA()", 1000);
function processA() {
A();
if (Astate) {
clearInterval(ai);
ai = setInterval("processB()", 500);
}
}
function processB() {
B();
if (Bstate) {
clearInterval(ai);
ai = undefined;
}
}
Reference
Chionglo, J. F. (2016). An analysis for timing a set of functions. Available at http://www.aespen.ca/AEnswers/1458200332.pdf.

Prevent multiple AJAX calls to the same url within timeframe

I have an app which wants to get info about every marker on a map.
Each marker has a class, such as "car" or "pedestrian".
The app makes (via jQuery) a getJSON call to "http://myserver/info/".
However, since multiple markers may have the same class, the server could end up getting hit with many requests.
Accordingly, I'd like to pool requests which occur within a specified time frame (maybe 5 seconds or so) so that only one request is made, but each calling instance of getJSON is unaware of it.
My thought is to wrap getJSON in another function which stores the URLS in a hashmap/dictionary and stores up promises for each requester. When data is returned, the promises are fulfilled.
I ask, is there a standard way of doing this (debouncing an AJAX request, as it were)?
I created something (in 25 minutes ^^) that might help you; it's a Timeout manager:
var requestsPool = {
requests: {}, //list of urls
timeout: 5000, //In milliseconds
add: function(url) {
if(requestsPool.exists(url)) return false; //check if url is already present in the pool
requestsPool.requests[url] = setTimeout(function(u) {
requestsPool.remove(u);
}.bind(this, url), requestsPool.timeout); //Defining the timeout
return true;
},
exists: function(url) {
return requestsPool.requests[url]; //Return the Timeout ID if present or undefined
},
remove: function(url) {
return delete requestsPool.requests[url]; //return true almost always #link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/delete
},
cancel: function(url) {
clearTimeout(requestsPool.requests[url]); //cancel the timeout
return requestsPool.remove(url); //remove the url form the pool
}
}
$(anchor).click(function() {
if(requestsPool.exists(anchor.href)) {
// If cooldown is present
} else {
$.getJSON(anchor.href, function(data) {
requestsPool.add(anchor.href);
});
}
})
My thought is to wrap getJSON in another function which stores the URLS in a hashmap/dictionary and stores up promises for each requester
Yes, that's a good idea. It might look like this:
var debouncedGet = (function() {
var pool = {};
return function get(url) {
if (!pool[url]) {
pool[url] = $.getJSON(url);
setTimeout(function() {
pool[url] = null;
}, 5000); // you might want to move this into a `pool[url].always(…)` callback
// so the timer starts when the request returned
}
return pool[url];
};
}());
Here's my bid:
(function(window,$,undefined){
'use strict';
var cache = {},
timeout = 5e3;
// Use like traditional $.getJSON
$.getJSON = function(url,data,callback){
if ($.isFunction(data)){
callback = data;
data = undefined;
}
// Establish a cache key so we can re-reference existing
// requests to subsequent ones (within the timeout window).
var cacheKey = url;
if (cache[cacheKey]){
// This is an existing request; Simple add the callback
// onto the promise and return it.
return cache[cacheKey].done(callback);
} else {
// This is a new request. Build up a new request,
// attach the callback to the promise, and also add
// a couple cleanup methods for disposing the cache
// when appropriate.
cache[cacheKey] = $.ajax($.extend({
url: url,
type: 'get',
dataType: 'json',
data: data,
}, $.isPlainObject(url) && url))
.done(callback)
.always(function(){
delete cache[cacheKey];
});
setTimeout(function(){
// TODO: Probbaly want to store a reference to
// this timeout and clear it in the .always (to
// avoid race condition between .always firing
// and new request coming in but not returning yet)
cache[cacheKey] && delete cache[cacheKey];
}, timeout);
return cache[cacheKey];
}
};
})(window,jQuery);
And, FWIW, a jsFiddle: http://jsfiddle.net/ajtbdxt7/

Call function multiple times in the same moment but execute different calls with delay in nodejs

I need to call a function multiple times from different contexts, but i need that each call fires not before that one second has passed after the previous call started.
i'll make an example:
var i = 0;
while(i<50) {
do_something(i)
i++
}
function do_something(a) {
console.log(a)
}
I want that this log:
'1', then after a second '2', then after a second '3', then after a second '4'...
I can't use simple setInterval or setTimeout because this function 'do_something(param)' can be called in the same moment from different sources cause i am working with async function in nodejs.
I want that the order of calls is kept, but that they fires with minimum delay of one second.
I think i should add these calls to a queue, and then each second a call is dequeued and the function fires, but i really don't know how to do it in nodejs. Thank you in advance
i had to do something like this:
var tasks = [] //global var
var processor = setInterval(function() {
process_task()}, 1000)
function add_task() {
tasks.push('my task') //add task to the end of queue
}
process_task() {
var task_to_use = tasks[0];
tasks.shift() //remove first task in the queue (tasks[0])
//do what i need to with the task 'task_to_use'
}
in this way i can add tasks to the queue from wherever i want (tasks is a variable of the global context) just calling tasks.push('mytask') and the tasks will be processed one each second following the order they were put in the queue.
However, i didn't really need to do it. I needed because i am using Twilio's apis, and in their doc i read each phone number can send up to an sms for second and no more, but then the support told me they queue requests and send one message each second, so that sending more than a request for second is really not a problem and no sms sending will fail. Hope this will help, byee
Coming late to a party
I know I am late, but I had this exact same problem with this exact same technologies.
Your post was very helpful, but it lacked good practices and used Global variables.
My solution
If you are reading this today, I want you to know that after a week of bashing my head I ended up creating a question that lead to two different answers, both capable of helping you:
How to delay execution of functions, JavaScript
The queue approach, pioneered by #Arg0n and revamped by me is the closest one to your example, but with none of you drawbacks:
let asyncFunc = function(url) {
return new Promise((resolve, reject) => {
setTimeout(function() {
resolve({
url: url,
data: "banana"
});
}, 5000);
});
};
let delayFactory = function(args) {
let {
delayMs
} = args;
let queuedCalls = [];
let executing = false;
let queueCall = function(url) {
return new Promise((resolve, reject) => {
queuedCalls.push({
url,
resolve,
reject
});
if (executing === false) {
executing = true;
nextCall();
}
});
};
let execute = function(call) {
console.log(`sending request ${call.url}`);
asyncFunc(call.url)
.then(call.resolve)
.catch(call.reject);
setTimeout(nextCall, delayMs);
};
let nextCall = function() {
if (queuedCalls.length > 0)
execute(queuedCalls.shift());
else
executing = false;
};
return Object.freeze({
queueCall
});
};
let myFactory = delayFactory({
delayMs: 1000
});
myFactory.queueCall("http://test1")
.then(console.log)
.catch(console.log);
myFactory.queueCall("http://test2")
.then(console.log)
.catch(console.log);
myFactory.queueCall("http://test3")
.then(console.log)
.catch(console.log);
Give it a try and have fun!

Categories