nodejs/javascript limit how many async calls [duplicate] - javascript

I am building an application that will be making about a million calls to a remote api server. Will I be able to limit amount of connections to for example 10? Do I set max sockets to 10 will do it?
I am trying to understand what do these parameters do:
keepAlive: false,
maxSockets: 999,
maxFreeSockets: 1
In node http get function, in the following code:
var inputData = [];
for(i=1; i<=5000;i++){
inputData.push('number' + i);
}
var options = {
host: "localhost",
port: 80,
path: "/text.txt",
keepAlive: false,
maxSockets: 999,
maxFreeSockets: 1
}
var limit = inputData.length;
var counter = 0;
function fetchData(number){
return new Promise(function(resolve, reject){
var http = require('http');
fetch = function(resp){
var body = '';
resp.on('data',function(chunk){
body += chunk;
})
resp.on('end',function(){
console.log(resp)
resolve()
})
resp.on('error',function(err){
console.log('error');
})
}
var req = http.request(options, fetch);
req.end();
})
}
Promise.all(inputData.map(number => fetchData(number))).then(function(results) {
console.log('finished');
connection.end();
})
.catch(function(error) {
console.log('there wa an error');
console.log(error);
});

You really don't want to fire off 1,000,000 requests and somehow hope that maxSockets manages it to 100 at a time. There are a whole bunch of reasons why that is not a great way to do things. Instead, you should use your own code that manages the number of live connections to 100 at a time.
There are a number of ways to do that:
Write your own code that fires up 100 and then each time one finishes, it fires up the next one.
Use Bluebird's Promise.map() which has a built-in concurrency feature that will manage how many are inflight at the same time.
Use Async's async.mapLimit() which has a built-in concurrency feature that will manage how many are inflight at the same time.
As for writing code yourself to do this, you could do something like this;
function fetchAll() {
var start = 1;
var end = 1000000;
var concurrentMax = 100;
var concurrentCnt = 0;
var cntr = start;
return new Promise(function(resolve, reject) {
// start up requests until the max concurrent requests are going
function run() {
while (cntr < end && concurrentCnt < concurrentMax) {
++concurrentCnt;
fetchData(cntr++).then(function() {
--concurrentCnt;
run();
}, function(err) {
--concurrentCnt;
// decide what to do with error here
// to continue processing more requests, call run() here
// to stop processing more requests, call reject(err) here
});
}
if (cntr >= end && concurrentCnt === 0) {
// all requests are done here
resolve();
}
}
run();
});
}

I decided to use the async library.
Here is my complete solution to this:
var async = require('async')
var http = require('http');
var inputData = [];
for(i=1; i<=2000;i++){
inputData.push('number' + i);
}
var options = {
host: "o2.pl",
path: "/static/desktop.css?v=0.0.417",
port: 80
}
function fetchData(number, callback){
return new Promise(function(resolve, reject){
fetch = function(resp){
var body = '';
resp.on('data',function(chunk){
body += chunk;
})
process.stdout.write('.')
callback()
resp.on('error',function(err){
console.log('error');
console.log(err);
})
}
var req = http.request(options, fetch);
req.end();
})
}
function foo(item, callback){
return callback(false, 'foo');
}
async.mapLimit(inputData,100,fetchData,function(err, result){
console.log('finished');
})
Thank you for your help.

Related

HTTPs Request Image really slow?

I have a function for getting images from a given https URL. Looking its performance I have reached that it takes around 20 seconds to finish executing... This is insane...
This is the code of the function I have implemented:
function getImageFromUrl(imageUrl) {
// Profiling time
const NS_PER_SEC = 1e9;
const MS_PER_NS = 1e-6;
const time = process.hrtime();
const options = url.parse(imageUrl); // Automatically converted to an ordinary options object by https.request
return new Promise((resolve, reject) => {
const request = https.request(options, (response) => {
// Reject on bad status
const { statusCode } = response;
if (statusCode < 200 || statusCode >= 300) {
return reject(new Error("Status Code:", statusCode));
}
// Cumulate data
let chuncks = [];
response.on("data", (chunck) => {
chuncks.push(chunck);
});
// Resolve on end
response.on("end", () => {
// End time of execution
const diff = process.hrtime(time);
console.log(
`Get image from url took ${
(diff[0] * NS_PER_SEC + diff[1]) * MS_PER_NS
} milliseconds`
);
resolve(Buffer.concat(chuncks));
});
});
// Reject on request error
request.on("error", (err) => {
// This is not a "Second reject", just a different sort of failure
reject(err.message);
});
// Close HTTP connection.
request.end();
});
}
What I am doing wrong? Note that the requested url is https but doesn't have "www.". I don't thinks thats the problem, 20 seconds is too much time...
Any ideas? Is there any other better and easier way to handle this?

Problems with asynchronous in loop

I need to:
1) Make two request
2) Then take data from requests
3) And send response
I am using firebase database. I am taking data from database by forEach
I need to process data and response data(json)
Can anyone help me with asynchronous? Or help write callback function
I read a lot of information about this, but dont understant
My code here works not fine
I have problems with asynchronous
So how can I improve it?
router.get('/places/all', function(req, res, next) {
var lat = req.param('lat');
lon = req.param('lon');
result = [],
resData = [];
var query = firebase.database().ref('Places');
var i = 0;
var promise1 = new Promise(function(resolve, reject) {
query.on("value", function(snapshot) {
console.log(snapshot.numChildren())
snapshot.forEach(function(childSnapshot) {
childData = childSnapshot.val();
var gmapApiKey = 'API_KEY';
placeID = childData.placeID;
language = req.param('lang');
url = 'https://maps.googleapis.com/maps/api/place/details/json?placeid=' + placeID + '&key=' + gmapApiKey + '&language=' + language;
placeLat = childData.lat;
placeLon = childData.lon;
distanceMatrixApiUrl = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins=' + lat + ',' + lon + '&destinations=' + placeLat + ',' + placeLon + '&key=' + gmapApiKey;
i++;
var requestDistance = new Promise(function(resolve, reject) {
https.get(distanceMatrixApiUrl, res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
body = JSON.parse(body);
resolve(body);
});
});
console.log(requestDistanceApi)
requestDistance = Promise.resolve(requestDistanceApi)
});
var requestPlaces = new Promise(function(resolve, reject) {
https.get(url, res => {
res.setEncoding("utf8");
let body = "";
res.on("data", data => {
body += data;
});
res.on("end", () => {
i++;
result = JSON.parse(body);
resolve(result);
});
});
console.log(requestPlaceApi)
requestPlaces = Promise.resolve(requestPlacesApi)
i++;
});
requestDistance.then(function(valueDistance) {
requestPlaces.then(function(valuePlace) {
resData.push({
name: valuePlace.result.name,
address: valuePlace.result.vicinity,
open_now: valuePlace.result.opening_hours.open_now,
weekday_text: valuePlace.result.opening_hours.weekday_text,
latitude: valuePlace.result.geometry.location.lat,
longitude: valuePlace.result.geometry.location.lng,
distance: valueDistance.rows[0].elements[0].distance.text,
});
}).catch((error) => {
assert.isNotOk(error,'Promise Places error');
done();
});
}).catch((error) => {
assert.isNotOk(error,'Promise Distance error');
done();
});
});
});
});
promise1.then(function(value) {
res.send(value);
}).catch((error) => {
assert.isNotOk(error,'Promise error');
done();
});
});
I'm not going to rewrite this all but rather will give you an outline.
First of all the firebase SDK has promises available in lieu of using callbacks. Use those instead of adding new Promise to wrap them. This will help streamline the code structure and enable making the whole promise chain simpler
I'm not sure which package htts.get() comes from but it too likely has promises. Convert to using those also
The other trick will be to combine the 2 nested requests into one promise which can be done using Promise.all()
So here's the basic code outline. Note that I have not paid a lot of attention to exactly how you want all this data processed in order to send to client. You will need to adjust as needed
You probably want to break this whole code block down into smaller functions also to make the logic easier to follow
Also note I have not added any error handling catch()s which is going to be up to you
// use firebase promise instead of callback
var mainpromise = query.on("value").then(function(snapshot) {
var subPromises = [];
snapshot.forEach(function(childSnapshot) {
//childData = ...
// look into https promises instead of wrapping in `new Promise
var placesPromise new Promise(function(resolve, reject) {
https.get(distanceMatrixApiUrl, res => {
// .....
res.on("end", () => {
body = JSON.parse(body);
resolve(body);
});
});
});
var distancePromise = new Promise.... // similar to above
// combine these promises into one
var combinedPromise = Promise.all([placesPromise, distancePromise]).then(function(resultsArray) {
var places = resultsArray[0],
distances = resultsArray[1];
//assign above to childData or whatever you want to do with them
return childData;// or return processed results or skip this `then and do all processing in the next then() commented below
})
// store combined promises in array
subPromises.push(combinedPromise);
});
// return promise to resolve mainPromise
return Promise.all(subPromises)// might need a `then()` here to return processed data depending on structure you want returned
});
mainPromise.then(function(results) {
// process and send results which is array of all the childData from snapshot.forEach
res.send(processedData);
})

Using imported asynchronous method with promises in main entry point

I'm creating a Node.js module with an asynchronous method - a simple HTTP GET request. Here is the code:
//mymodule.js
var https = require('https');
function getSomething(url_str)
{
var callback_fn = function(response){
var body = '';
response.on('data', function (data) {
body += data;
});
response.on('end', function () {
//console.log(body);
return body;
});
};
return https.request(url_str, callback_fn).end();
}
var module_obj = {
getSome: getSomething
};
module.exports = module_obj;
This module is called by my app.js - a web server - like so:
//app.js
var myModule = require('./mymodule');
var http = require('http');
var qs = require('querystring');
var server_fn = function(request, response){
response.setHeader('Access-Control-Allow-Origin', '*');
response.setHeader('Access-Control-Request-Method', '*');
response.setHeader('Access-Control-Allow-Methods', 'OPTIONS, GET');
response.setHeader('Access-Control-Allow-Headers', '*');
if ( request.method === 'OPTIONS' ) {
response.writeHead(200);
response.end();
return;
}
if (request.method == 'POST') {
var body = '';
request.on('data', function (data) {
body += data;
// Too much POST data, kill the connection!
// 1e6 === 1 * Math.pow(10, 6) === 1 * 1000000 ~~~ 1MB
if (body.length > 1e6)
request.connection.destroy();
});
request.on('end', function () {
var post = qs.parse(body),
post_url = post.url,
post_method = post.method;
var promise_flow = new Promise(function(resolve, reject){
if(post_method === 'get_me_something')
{
response_str = myModule.getSome(post_url);
resolve(response_str);
}
else
{
resolve('nothing');
}
});
promise_flow
.then(function(response){
response.write(response);
response.end();
return;
}).catch(function(error){
response.write(error);
response.end();
return;
})
});
}
};
var http_server = http.createServer(server_fn);
http_server.listen(2270);
console.log("server listening on 2270");
So basically, I start things up via node app.js, and then I post the URL, and then the module should fetch the Web page and then return the content.
Unfortunately, I'm getting the following error:
UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 1): TypeError: First argument must be a string or Buffer
I believe this is because the response I'm getting from my modules getSomething method is false, as opposed to the content of the requested Web page.
I know I can fix this by moving the https.get operation from mymodule.js and putting it inline with app.js, and then calling resolve on end, but I'd like to keep the current module setup.
Is there a workaround to get the asynchronous method in the imported module to work with the existing promise chain as setup?
UPDATE
After further review, I noticed that I wasn't quite running things the right way. I updated the code as follows:
//...
var promise_flow = new Promise(function(resolve, reject){
if(post_method === 'get_me_something')
{
myModule.getSome(post_url)
.then(function(data){
resolve(data);
})
.catch(function(err){
reject(err);
});
}
else
{
resolve('nothing');
}
});
//...
This way, I think it fits with the true spirit of Promises.
Your getSomething function doesn't return a promise. Make it returns a promise, and fulfill the promise in response.on('end').
function getSomething(url_str)
{
return new Promise(function(resolve, reject) {
var callback_fn = function(response){
var body = '';
response.on('data', function (data) {
body += data;
});
response.on('end', function () {
//console.log(body);
resolve(body);
});
};
https.request(url_str, callback_fn).end();
});
}
Then in your main file, call it like this : myModule.getSomething(post_url).then(resolve);.

How to run two functions synchronously with NodeJS?

I am pretty new with NodeJS and got lost with the asynchronous mechanism.
I have a code that should send a HTTP post request to the first URL (for example - https://example.com/first), and then when it got answered with status code 200, send another request to the same server that checks if the server is done with processing the first request (for example - https://example.com/statusCheck).
The server should return a text/plain response contains "true" if it's busy, and "false" if it's ready to use.
I wrote it with a while loop that queries the server every 2 seconds, up to maximum of 10 iterates.
var request = require('request');
var firstURL = "https://example.com/first";
var serverCheck = "https://example.com/statusCheck";
// Sends up to 10 requests to the server
function checkServerStatus(){
var serverReady = false;
var count = 0;
while (!serverReady && count < 10) {
count++;
setTimeout(function(){
request.get(serverCheck, function(err, resp, body){
if (err){
console.log(err);
} else if (body == "false") {
generatorReady = true;
}
})
}, 2000);
}
return generatorReady;
}
// Sends the first request and return True if the response equals to 200
function sendFirstRequest(){
var req = request.post(firstURL, function (err, resp, body) {
if (err) {
console.log(err);
return false;
} else if (resp.statusCode === 200){
return true;
} else {
return false;
}
});
};
module.exports = function (){
// Sends the first request
var firstRequestStatus = sendFirstRequest();
if (firstRequestStatus) {
return checkServerStatus();
}
};
In other words, I want to run sendFirstRequest first, wait for the response, and in case that the response is true, I want to run the checkServerStatus and get his returned value. If it's possible to do it with a sleep between each while iterate, it will be great (because the setTimeout does not work for me as well) .
Edit: I've heard that I can use function* with yield, or async-await in order to avoid callback hell - how can I implement them in this case?
You should use a Promise to do this. Below is some code using bluebird which will do what you want. The Promise.any method will return the first successful call from the Array out of 10 tries.
const Promise = require('bluebird');
var request = Promise.promisifyAll(require('request'));
var firstURL = "https://example.com/";
var serverCheck = "https://example.com/statusCheck";
request.postAsync(firstURL).then(res => {
if (res.statusCode === 200) return true;
throw new Error('server not ready');
}).then(() =>
Promise.any(new Array(10).fill(request.getAsync(serverCheck)))
).then(res => {
console.log(res);
}).catch(err => console.log(err));
You have to understand that the asynchronous operations can not return a result right after their call. They trigger some handler when they have executed. You can/should use that entry point to initiate or continue your logic flow.
http.post(params, handler(err, resp, body){
if(err){
failFlow(err);
}else if(resp.statusCode === 200) {
successFlow(resp);
}
});
and you can chain as many such asynchronous calls as you need but you can not return a response in this manner.
Also you might be interested in the concept of a Promise.
var request = require('request');
var firstURL = "https://example.com/first";
var serverCheck = "https://example.com/statusCheck";
var count = 0;
// Sends up to 10 requests to the server
function checkServerStatus() {
if (count++ > 10) return;
request.get(serverCheck, function(err, resp, body) {
if (err) {
console.log(err);
checkServerStatus();
} else if (body == "false") {
// go further
}
});
}
// Sends the first request and return True if the response equals to 200
function sendFirstRequest(cb) {
var req = request.post(firstURL, function(err, resp, body) {
if (err) {
console.log(err);
return false;
} else if (resp.statusCode === 200) {
cb();
} else {
return false;
}
});
};
module.exports = function() {
// Sends the first request
sendFirstRequest(checkServerStatus);
};
You can use the async library.
you dont need to do a setInterval or any timer for that matter, just wait for the response.
specifically you can use async.waterfall for this, something like:
var async = require('async')
var request = require('request')
async.waterfall([
function(cb) {
// send the first request
request.post("https://example.com/first", function (err, resp) {
// send the response to the next function or break in case there was an error
cb(err, resp)
})
},
function(resp, cb) {
// check for the response
if (resp.statusCode === 200) {
// in case the response code is 200 continue to the next function
return cb()
}
// if its not 200 break with the response code as an error
return cb(resp.statusCode)
},
function(cb) {
// send the verify
request.get("https://example.com/statusCheck", function (err, resp, body) {
// send the body of the response to the next function or break in case of an error
cb(err, body)
})
}
], function (err, result) {
// check if there was an error along the way
if (err) {
console.log("there was an error", err)
} else {
// all is good print the result
console.log("result:", result)
}
})
async function main() {
console.log('First call started');
let response1 = await $.ajax({url: "https://api.stackexchange.com/2.2/questions/269754/answers/?order=desc&site=meta.stackoverflow&client_id=3519&callback=?"})
console.log('First call finished', response1);
console.log('Second call started');
let response2 = await $.ajax({url: "https://api.stackexchange.com/2.2/questions/269754/answers/?order=desc&site=meta.stackoverflow&client_id=3519&callback=?"})
console.log('Second call finished',response2);
}
main();
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
In newer versions of nodejs you can use async await like the example above
Notice that $.ajax is not a node function. It is just for demonstration
You can use await on any function that return a promise.
For the next example you need to install request package and use Node >= 8 for using promisify
const {promisify} = require('util');
const request = require('request')
async function main() {
let get = promisify(request);
let response1 = await get('https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new');
console.log('first random: ',response1.body)
let response2 = await get('https://www.random.org/integers/?num=1&min=1&max=100&col=1&base=10&format=plain&rnd=new');
console.log('second random: ',response2.body)
}
main();
http://2ality.com/2017/05/util-promisify.html
https://github.com/request/request

Node.js synchronously loop or iterate over asynchronous statements

I want to do a for each loop but have it run synchronously. Each iteration of the loop will do an http.get call and that will return json for it to insert the values into a database. The problem is that the for loop runs asynchronously and that causes all of the http.gets to all run at once and my database doesn't end up inserting all of the data.I am using async-foreach to try to do what I want it to do, but I don't have to use it if I can do it the right way.
mCardImport = require('m_cardImport.js');
var http = require('http');
app.get('/path/hi', function(req, res) {
mCardImport.getList(function(sets) {
forEach(sets, function(item, index, arr) {
theUrl = 'http://' + sets.set_code + '.json';
http.get(theUrl, function(res) {
var jsonData = '';
res.on('data', function(chunk) {
jsonData += chunk;
});
res.on('end', function() {
var theResponse = JSON.parse(jsonData);
mCardImport.importResponse(theResponse.list, theResponse.code, function(theSet) {
console.log("SET: " + theSet);
});
});
});
});
});
});
and my model
exports.importResponse = function(cardList, setCode, callback) {
mysqlLib.getConnection(function(err, connection) {
forEach(cardList, function(item, index, arr) {
var theSql = "INSERT INTO table (name, code, multid, collector_set_num) VALUES "
+ "(?, ?, ?, ?) ON DUPLICATE KEY UPDATE id=id";
connection.query(theSql, [item.name, setCode, item.multid, item.number], function(err, results) {
if (err) {
console.log(err);
};
});
});
});
callback(setCode);
};
With recursion the code is pretty clean. Wait for the http response to come back then fire off next attempt. This will work in all versions of node.
var urls = ['http://stackoverflow.com/', 'http://security.stackexchange.com/', 'http://unix.stackexchange.com/'];
var processItems = function(x){
if( x < urls.length ) {
http.get(urls[x], function(res) {
// add some code here to process the response
processItems(x+1);
});
}
};
processItems(0);
A solution using promises would also work well, and is more terse. For example, if you have a version of get that returns a promise and Node v7.6+, you could write an async/await function like this example, which uses some new JS features.
const urls = ['http://stackoverflow.com/', 'http://security.stackexchange.com/', 'http://unix.stackexchange.com/'];
async function processItems(urls){
for(const url of urls) {
const response = await promisifiedHttpGet(url);
// add some code here to process the response.
}
};
processItems(urls);
Note: both of these examples skip over error handling, but you should probably have that in a production app.
To loop and synchronously chain asynchronous actions, the cleanest solution is probably to use a promise library (promises are being introduced in ES6, this is the way to go).
Using Bluebird, this could be
Var p = Promise.resolve();
forEach(sets, function(item, index, arr) {
p.then(new Promise(function(resolve, reject) {
http.get(theUrl, function(res) {
....
res.on('end', function() {
...
resolve();
}
}));
});
p.then(function(){
// all tasks launched in the loop are finished
});
I found out that I wasn't releasing my mysql connections after I was done with each call and this tied up the connections causing it to fail and appear to be an issue with synchronization.
After explicitly calling connection.release(); it caused my code to work 100% correctly even in an asynchronous fashion.
Thanks for those who posted to this question.
"use strict";
var Promise = require("bluebird");
var some = require('promise-sequence/lib/some');
var pinger = function(wht) {
return new Promise(function(resolve, reject) {
setTimeout(function () {
console.log('I`ll Be Waiting: ' + wht);
resolve(wht);
}, Math.random() * (2000 - 1500) + 1500);
});
}
var result = [];
for (var i = 0; i <= 12; i++) {
result.push(i);
}
some(result, pinger).then(function(result){
console.log(result);
});
Just wrap the loop in an async function. This example illustrates what I mean:
const oneSecond = async () =>
new Promise((res, _) => setTimeout(res, 1000));
This function completes after just 1 second:
const syncFun = () => {
for (let i = 0; i < 5; i++) {
oneSecond().then(() => console.log(`${i}`));
}
}
syncFun(); // Completes after 1 second ❌
This one works as expected, finishing after 5 seconds:
const asyncFun = async () => {
for (let i = 0; i < 5; i++) {
await oneSecond();
console.log(`${i}`);
}
}
asyncFun(); // Completes after 5 seconds ✅
var urls = ['http://stackoverflow.com/', 'http://security.stackexchange.com/', 'http://unix.stackexchange.com/'];
for (i = 0; i < urls.length; i++){
http.get(urls[i], function(res) {
// add some code here to process the response
});
}

Categories