Add delay to on('data') execution in Node.js - javascript

I have this function. Basically it searches data from database and does something with it. In this demo it just increases counter.
exports.fullThreads = function(){
return new Promise((resolve, reject) => {
MongoClient.connect(mongoUrl, (err, db) => {
var fullThreads = db.collection('tmp_fullThreads'),
threadPages = db.collection('tmp_threadPages').find();
var counter = 0;
threadPages.on('data', (doc) => {
setTimeout(function(){
counter++;
}, 200)
});
threadPages.on('end', () => {
console.log('end');
console.log(counter);
});
});//connect
});//promise
}//fullthreads
In this example I expected that it would call for data, wait 200ms, and then increase counter. Once it reaches the end, it would end. However it is different. on('end') is called before on('data') actually finished. I guess that is because it didn't return value immediately. So this setTimeout function is not a way to go. However I have a real problem here. This is simply way too fast and in real life code, this wouldn't increase counter, it would call foreign API which doesn't accept so many calls in so little time, so I wanted to make a delay between each call, how is this done properly?

Then you would need some kind of queu
eg:
queuTasks = [];
queuTasks.push("Task1");
queuTasks.push("Task2");
function doTasks(){
// Do your task
if(queuTasks.length > 0){
console.log(queuTasks[0]);
queuTasks.splice(0,1); // Remove it from the queu
}
setTimeout(function(){ doTasks();}, 1000);
}
This is just some quick code i made, may not work out of the box. But think you get the idea.
I know this is not entirely what you were asking about

This is my workaround:
var threadPages = db.collection('tmp_threadPages').find();
var delay = 0;
function test(doc, delay){
setTimeout(function(){
console.log(delay);
}, delay)
}
threadPages.on('data', (doc) => {
test(doc, delay);
delay += 100;
});
It works well if you do not need to bind proper events to on('end method. Basically it increases delay per request, otherwise it would fire all of them at once. This makes code go slower and it wouldn't overkill API with too many requests per second.

Related

How do you run a setTimeout once a second inside a forEach loop?

My code in order to create multiple placements in an online service that has a 60 write constraint per minute:
placementsToAdd.forEach((placement, index) => {
setTimeout(() => {
options.url = `https://api.company.com/placement?publisher_id=${existingPub ? existingPub : placementsJson[0].PublisherId}&site_id=${placement.siteId}`
options.body = `{"placement":{"name":"${placement.placement}"}}`
request(options, callback);
},1000 * (index + 1))
})
It works this way but I am concerned about the wait time if there are a list of placements of 2000 or 3000 at one time, the wait time might be excessively long.
Is there a better way to refactor this code in order to get my requests built one per second no matter what? Without that "* (index + 1)," it seems to keep trying to build all at once hitting the wall after 60.
I've tried to use promises and async await (which is new to me) but it doesn't seem to change the behavior.
Thanks!
As requested, show how I've tried to use promises with this code:
async function createThePlacements() {
let promise = new Promise((resolve, reject) => {
for (let i = 0; i < placementsToAdd.length; i++) {
setTimeout(() => {
options.url = `https://api.company.com/placement?publisher_id=${existingPub ? existingPub : placementsJson[0].PublisherId}&site_id=${placementsToAdd[i].siteId}`
options.body = `{"placement":{"name":"${placementsToAdd[i].placement}"}}`
request(options, callback);
},1000)
}
});
let result = await promise; // pause till the promise resolves
console.log('result - ', result);
}
createThePlacements();
So, bit of a disclaimer - as mentioned, I've never used Async Await before so reading up to try to understand how it works as well. This seems to be the syntax but my result doesn't seem to be anything at the moment but the code also continues to do what it's supposed to do, just trying to make all the calls in my test of 300 all at once.
Also, of note, i have a resolve inside the callback of the request call. It resolves so even the next parts of my app finish up all the way to the end. That's why I don't have a reject or resolve here.
How do you run a setTimeout once a second inside a forEach loop?
The most straightforward approach would be:
const wait = ms => new Promise(resolve => setTimeout(resolve, ms));
for (const placement of placementsToAdd) {
const options = {...};
request(options, callback);
await wait(1000);
}
await works predictably inside plain for-loops, not inside forEach.
I haven't touched your callback but it would need to handle errors. More refactoring is possible.
The most significant improvement here, I think, is that we're not pushing requests ahead of time. This way we retain control, and should needs change or anything go haywire, we can break out of the loop without spamming the server for another minute.
The best option would be to have a request method that returns a Promise.
Then you could rewrite your code like this.
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function requestPlacement(placement) {
const options = {...};
return request(options);
}
async function requestAllPlacements(placements) {
for(let i = 0; i < placements.length; i+=60) {
if (i > 0) {
// wait 1 minute
await(sleep(60000));
}
await Promise.all(
placements
.slice(i, 60)
.map(requestPlacement);
);
}
}

JS: check if setTimeout has finished or been canceled

I am creating a script that will execute a task after a certain amount of time has passed. I want the user to be able to cancel execution of these tasks. I can do this by saving the timeout to a list which I then use to cancel all timeouts. However if the timeout completes normally it is still stored in that list.
How do I check if a timeout has already been completed or canceled before attempting to clear it?
var timeouts = [];
$(document).on('click', '.doTask', function() {
var timeout = setTimeout(() => {
doTaskAfter();
}, 10000);
timeouts.push(timeout);
});
$(document).on('click', '.cancelTasks', function() {
var i = timeouts.length;
while (i--) {
if (timeouts[i].finished || timeouts[i].timeoutCleared) { // How to I check this?
clearTimeout(timeouts[i]);
}
timeouts.splice(i, 1);
}
});
I think you can do it using promises.
Every time that you create a timer, put it inside of a promise, and push that promise into the array. The promise will resolve once the timer has ran.
Literally write your timer like..
setTimeout(() => {
resolve("True");
}, 10000);
From there, what you can do is this. When you walk through your code, use promise.race, along with a second promise that returns false.
let race = [timeouts[i],Promise.resolve("False") ]
Use Promise.race to find if the promise has resolved. This works because if they have both resolved, Promise.race will return the first Promise, which will return "True". If your timer is still out, then it will return false.
Promise.race(race).then((res, rej) => {
if(res === true) {
clearTimeout(timeouts[i]);
}
})
I don't know if I understood correctly, but why would you remove already finished timeouts?
The timeout can remove itself from the array once it completes:
timeouts.splice(timeouts.indexOf(timeout), 1);
Otherwise you can use clearTimeout and splice to kill all registered/running timeouts from your timeouts list.
var timeouts = [];
$(document).on('click', '.doTask', function() {
var timeout = setTimeout(() => {
doTaskAfter();
timeouts.splice(timeouts.indexOf(timeout), 1); // <=== HERE
}, 10000);
timeouts.push(timeout);
});
$(document).on('click', '.cancelTasks', function() {
var i = timeouts.length;
while (i--) {
clearTimeout(timeouts[i]);
timeouts.splice(i, 1);
}
});
One thing that may or may not matter is that calling clearTimeout on an already cleared or completed timeout doesn't do anything, so you might not need to keep track of it.
Otherwise, you can just keep track of the state, something like:
var timeouts = [];
$(document).on('click', '.doTask', function() {
var timeout = { state: 'waiting' };
timeout.id = setTimeout(() => {
timeout.state = 'finished';
doTaskAfter();
}, 10000);
timeouts.push(timeout);
});
$(document).on('click', '.cancelTasks', function() {
var i = timeouts.length;
while (i--) {
if (timeouts[i].state !== 'finished' && timeouts[i].state !== 'canceled') {
// i.e. timeouts[i].state is 'waiting'
clearTimeout(timeouts[i].id);
timeouts[i].state = 'canceled';
}
timeouts.splice(i, 1);
}
});
Just put the function that contains the code you need to execute inside the set timeout like this👇:
setTimeout(() => { youfunction()}, 2000)
yourfunction(){
put your code here
}
yourfunction will execute after 2 second.

Make several requests to an API that can only handle 20 request a minute

I've got a method that returns a promise and internally that method makes a call to an API which can only have 20 requests every minute. The problem is that I have a large array of objects (around 300) and I would like to make a call to the API for each one of them.
At the moment I have the following code:
const bigArray = [.....];
Promise.all(bigArray.map(apiFetch)).then((data) => {
...
});
But it doesnt handle the timing constraint. I was hoping I could use something like _.chunk and _.debounce from lodash but I can't wrap my mind around it. Could anyone help me out ?
If you can use the Bluebird promise library, it has a concurrency feature built in that lets you manage a group of async operations to at most N in flight at a time.
var Promise = require('bluebird');
const bigArray = [....];
Promise.map(bigArray, apiFetch, {concurrency: 20}).then(function(data) {
// all done here
});
The nice thing about this interface is that it will keep 20 requests in flight. It will start up 20, then each time one finishes, it will start another. So, this is a potentially more efficient than sending 20, waiting for all to finish, sending 20 more, etc...
This also provides the results in the exact same order as bigArray so you can identify which result goes with which request.
You could, of course, code this yourself with generic promises using a counter, but since it is already built in the the Bluebird library, I thought I'd recommend that way.
The Async library also has a similar concurrency control though it is obviously not promise based.
Here's a hand-coded version using only ES6 promises that maintains result order and keeps 20 requests in flight at all time (until there aren't 20 left) for maximum throughput:
function pMap(array, fn, limit) {
return new Promise(function(resolve, reject) {
var index = 0, cnt = 0, stop = false, results = new Array(array.length);
function run() {
while (!stop && index < array.length && cnt < limit) {
(function(i) {
++cnt;
++index;
fn(array[i]).then(function(data) {
results[i] = data;
--cnt;
// see if we are done or should run more requests
if (cnt === 0 && index === array.length) {
resolve(results);
} else {
run();
}
}, function(err) {
// set stop flag so no more requests will be sent
stop = true;
--cnt;
reject(err);
});
})(index);
}
}
run();
});
}
pMap(bigArray, apiFetch, 20).then(function(data) {
// all done here
}, function(err) {
// error here
});
Working demo here: http://jsfiddle.net/jfriend00/v98735uu/
You could send 1 block of 20 requests every minute or space them out 1 request every 3 seconds (latter probably preferred by the API owners).
function rateLimitedRequests(array, chunkSize) {
var delay = 3000 * chunkSize;
var remaining = array.length;
var promises = [];
var addPromises = function(newPromises) {
Array.prototype.push.apply(promises, newPromises);
if (remaining -= newPromises.length == 0) {
Promise.all(promises).then((data) => {
... // do your thing
});
}
};
(function request() {
addPromises(array.splice(0, chunkSize).map(apiFetch));
if (array.length) {
setTimeout(request, delay);
}
})();
}
To call 1 every 3 seconds:
rateLimitedRequests(bigArray, 1);
Or 20 every minute:
rateLimitedRequests(bigArray, 20);
If you prefer to use _.chunk and _.debounce1 _.throttle:
function rateLimitedRequests(array, chunkSize) {
var delay = 3000 * chunkSize;
var remaining = array.length;
var promises = [];
var addPromises = function(newPromises) {
Array.prototype.push.apply(promises, newPromises);
if (remaining -= newPromises.length == 0) {
Promise.all(promises).then((data) => {
... // do your thing
});
}
};
var chunks = _.chunk(array, chunkSize);
var throttledFn = _.throttle(function() {
addPromises(chunks.pop().map(apiFetch));
}, delay, {leading: true});
for (var i = 0; i < chunks.length; i++) {
throttledFn();
}
}
1You probably want _.throttle since it executes each function call after a delay whereas _.debounce groups multiple calls into one call. See this article linked from the docs
Debounce: Think of it as "grouping multiple events in one". Imagine that you go home, enter in the elevator, doors are closing... and suddenly your neighbor appears in the hall and tries to jump on the elevator. Be polite! and open the doors for him: you are debouncing the elevator departure. Consider that the same situation can happen again with a third person, and so on... probably delaying the departure several minutes.
Throttle: Think of it as a valve, it regulates the flow of the executions. We can determine the maximum number of times a function can be called in certain time. So in the elevator analogy.. you are polite enough to let people in for 10 secs, but once that delay passes, you must go!

Call function multiple times in the same moment but execute different calls with delay in nodejs

I need to call a function multiple times from different contexts, but i need that each call fires not before that one second has passed after the previous call started.
i'll make an example:
var i = 0;
while(i<50) {
do_something(i)
i++
}
function do_something(a) {
console.log(a)
}
I want that this log:
'1', then after a second '2', then after a second '3', then after a second '4'...
I can't use simple setInterval or setTimeout because this function 'do_something(param)' can be called in the same moment from different sources cause i am working with async function in nodejs.
I want that the order of calls is kept, but that they fires with minimum delay of one second.
I think i should add these calls to a queue, and then each second a call is dequeued and the function fires, but i really don't know how to do it in nodejs. Thank you in advance
i had to do something like this:
var tasks = [] //global var
var processor = setInterval(function() {
process_task()}, 1000)
function add_task() {
tasks.push('my task') //add task to the end of queue
}
process_task() {
var task_to_use = tasks[0];
tasks.shift() //remove first task in the queue (tasks[0])
//do what i need to with the task 'task_to_use'
}
in this way i can add tasks to the queue from wherever i want (tasks is a variable of the global context) just calling tasks.push('mytask') and the tasks will be processed one each second following the order they were put in the queue.
However, i didn't really need to do it. I needed because i am using Twilio's apis, and in their doc i read each phone number can send up to an sms for second and no more, but then the support told me they queue requests and send one message each second, so that sending more than a request for second is really not a problem and no sms sending will fail. Hope this will help, byee
Coming late to a party
I know I am late, but I had this exact same problem with this exact same technologies.
Your post was very helpful, but it lacked good practices and used Global variables.
My solution
If you are reading this today, I want you to know that after a week of bashing my head I ended up creating a question that lead to two different answers, both capable of helping you:
How to delay execution of functions, JavaScript
The queue approach, pioneered by #Arg0n and revamped by me is the closest one to your example, but with none of you drawbacks:
let asyncFunc = function(url) {
return new Promise((resolve, reject) => {
setTimeout(function() {
resolve({
url: url,
data: "banana"
});
}, 5000);
});
};
let delayFactory = function(args) {
let {
delayMs
} = args;
let queuedCalls = [];
let executing = false;
let queueCall = function(url) {
return new Promise((resolve, reject) => {
queuedCalls.push({
url,
resolve,
reject
});
if (executing === false) {
executing = true;
nextCall();
}
});
};
let execute = function(call) {
console.log(`sending request ${call.url}`);
asyncFunc(call.url)
.then(call.resolve)
.catch(call.reject);
setTimeout(nextCall, delayMs);
};
let nextCall = function() {
if (queuedCalls.length > 0)
execute(queuedCalls.shift());
else
executing = false;
};
return Object.freeze({
queueCall
});
};
let myFactory = delayFactory({
delayMs: 1000
});
myFactory.queueCall("http://test1")
.then(console.log)
.catch(console.log);
myFactory.queueCall("http://test2")
.then(console.log)
.catch(console.log);
myFactory.queueCall("http://test3")
.then(console.log)
.catch(console.log);
Give it a try and have fun!

Execute a forEach like a waterfall in async

I'm trying to retrieve longitude and latitude from a list of addresses with the Google API via a Node.js script. The call itself works fine but since I have around 100 addresses to submit. I use a async.forEach on an array, but the calls are made too fast and I get the error "You have exceeded your rate-limit for this API."
I found that the number of calls is limited to 2500 every 24h and maximum 10 a second. While I'm OK for the 2500 a day, I make my calls way too fast for the rate limit.
I now have to write a function who will delay the calls enough not to reach the limit. Here is a sample of my code :
async.forEach(final_json, function(item, callback) {
var path = '/maps/api/geocode/json?address='+encodeURIComponent(item.main_address)+'&sensor=false';
console.log(path);
var options = {
host: 'maps.googleapis.com',
port: 80,
path: path,
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}
// a function I have who makes the http GET
rest.getJSON(options, function(statusCode, res) {
console.log(res);
callback();
});
}, function() {
// do something once all the calls have been made
});
How would you proceed to achieve this? I tried putting my rest.getJSON inside a 100ms setTimeout but the forEach iterates through all the rows so fast that it starts all the setTimeout almost at the same time and therefore it doesn't change anything...
The async.waterfall looks like it would do the trick, but the thing is I don't know exactly how many rows I will have, so I can't hardcode all the function calls. And to be honest, it would make my code really ugly
The idea is that you can create a rateLimited function that acts much like a throttled or debounced function, except any calls that don't execute immediately get queued and run in order as the rate limit time period expires.
Basically, it creates parallel 1 second intervals that self-manage via timer rescheduling, but only up to perSecondLimit intervals are allowed.
function rateLimit(perSecondLimit, fn) {
var callsInLastSecond = 0;
var queue = [];
return function limited() {
if(callsInLastSecond >= perSecondLimit) {
queue.push([this,arguments]);
return;
}
callsInLastSecond++;
setTimeout(function() {
callsInLastSecond--;
var parms;
if(parms = queue.shift()) {
limited.apply(parms[0], parms[1]);
}
}, 1010);
fn.apply(this, arguments);
};
}
Usage:
function thisFunctionWillBeCalledTooFast() {}
var limitedVersion = rateLimit(10, thisFunctionWillBeCalledTooFast);
// 10 calls will be launched immediately, then as the timer expires
// for each of those calls a new call will be launched in it's place.
for(var i = 0; i < 100; i++) {
limitedVersion();
}
Here's how I would hack it (Note: arr is your array of locations):
function populate(arr, callback, pos) {
if(typeof pos == "undefined")
pos=0;
var path = '/maps/api/geocode/json?address='+encodeURIComponent(arr[pos].main_address)+'&sensor=false';
console.log(path);
var options = {
host: 'maps.googleapis.com',
port: 80,
path: path,
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}
// a function I have who makes the http GET
rest.getJSON(options, function(statusCode, res) {
console.log(res);
});
pos++;
if(pos<arr.length)
setTimeout(function(){
populate(arr,callback,pos);
},110); //a little wiggle room since setTimeout isn't exact
else
callback();
}
You could add a rate limiting function, but, IMHO, it introduces unnecessary complexity. All you really want to do is call the function every tenth of a second or so until you're done with your list, so do that.
It's certainly not as extensible as the alternative, but I'm a fan of simplicity.

Categories