I want to request a website for 40 times.
I want this to be synchronously, like 10 requests 4 times.
This is My code for 1 request - 40 times:
'use strict';
var request = require('request');
var co = require('co');
function callUrl(url) {
return new Promise((resolve, reject) => {
request.get(url, (e, r, b) => {
if (e) reject(e)
else
resolve(`Response from ${url}`);
});
})
}
co(function*() {
for (var i = 1; i < 41; i++) {
let rs = yield callUrl('https://www.google.com/?q=' + i);
// let rs = yield makeUrls(10,i);
console.log(rs);
}
});
I can make an array of promises, but I can't figure it out how to change the value of q to be different.
You don't want to run them synchronously - you want to synchronize them - those are different.
You'd use an array of promises together with Promise#all. When you create a promise the action is already being executed - only yield synchronizes things.
You can make 10 requests at once like so:
co(function*() {
for (var i = 1; i < 41;) {
var promises = [];
for(var lim = i + 10; i < Math.max(lim, 41); i++) {
promises.push(callUrl('https://www.google.com/?q=' + i));
}
let rs = yield Promise.all(promises); // wait for everything
console.log(rs); // an array of 10 results
});
Note that in addition to that, your code is still not very efficient - what happens if 9 out of 10 requests are really fast and one takes a minute? You'll only have one outgoing requests. You can use a library like bluebird which has a more efficient Promise.map method with a concurrency parameter.
this might work w/o using generators
const urls = [/*array of urls*/];
const initialPromise = request[urls[0]];
let promise = initialPromise;
for(let i= 1; i<40;i++){
let thenFunction = response => {
//do something with the response
return request(urls[i])
}
promise = promise.then(thenFunction)
}
the idea behind this is to build the chain of promises so the next one will waif for the previous one to finish
Related
I have the following web data collector:
function start(urls) {
Promise.map(urls, requestPromise)
.map((htmlPage, index) => {
const $ = cheerio.load(htmlPage);
$(".fixedttitle2").each(function () {
mytxt = $(this).text();
myarray.push(mytxt);
});
mainarray[urls[index]] = myarray;
});
fs.writeFileSync("1.json", JSON.stringify(mainarray));
}
var urls = [];
for (i = 1; i <= 100; i++) {
urls = "https://thisurl.com/" + i.toString();
start(urls);
}
Now I want to check response of each request at first, How I can check the response code at first inorder to get rid of some URLs that return 500 Error? How I can handle it?
You might be looking for something like this.
scrape (née start) processes a single URL and returns a promise of [url, content], or if there's an error, [url, null].
main generates the list of URLs to scrape, then starts scrape for all of them.
Note that all 100 requests start at once; this may or may not be a problem for you.
Finally, when all of the scrape promises complete, their return values are gathered into response, and that's written into the JSON file.
This differs from the original in that the original kept re-writing the file as new content was scraped.
async function scrape(url) {
try {
const htmlPage = await requestPromise(url);
const $ = cheerio.load(htmlPage);
const texts = [];
$('.fixedttitle2').each(function () {
texts.push($(this).text());
});
return [url, texts];
} catch (err) {
console.error(`Error processing url: ${url}: ${err}`);
return [url, null];
}
}
async function main() {
const urls = [];
for (var i = 1; i <= 100; i++) {
urls.push(`https://thisurl.com/${i}`);
}
const response = await Promise.all(urls.map(scrape));
fs.writeFileSync('1.json', JSON.stringify(response));
}
If you'd like the requests to be done sequentially, you can await scrape() in the loop:
async function main() {
const response = [];
for (var i = 1; i <= 100; i++) {
const url = `https://thisurl.com/${i}`;
response.push(await scrape(url));
}
fs.writeFileSync('1.json', JSON.stringify(response));
}
You could also move the write file call into the loop if you wanted the same incremental behavior your original code had.
EDIT
You can also add a delay to the sequential loop:
// Basic promisified delay function
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
async function main() {
const response = [];
for (var i = 1; i <= 100; i++) {
const url = `https://thisurl.com/${i}`;
response.push(await scrape(url));
await delay(1000); // Wait for 1000 ms between scrapes
}
fs.writeFileSync('1.json', JSON.stringify(response));
}
I have few functions which does the same for different objects. For example:
const returnMiscArray = () => {
var i = 0;
var id = 0;
misc = _util.toNull(misc);
const promise = new Promise((resolve, reject) => {
while (Number(order.id[id]) != Number(orderid)) id++;
connection.query(`SELECT * FROM misc WHERE uniqid = ?`, [order.uniqid[id]], function(error, results) {
while (i < results.length) {
misc.uniqid[i] = results[i].uniqid;
misc.id[i] = results[i].id;
misc.count[i] = results[i].count;
misc.cost[i] = results[i].cost;
misc.product[i] = results[i].product;
misc.fcost[i] = results[i].fcost;
i++;
}
resolve(misc);
});
});
return (promise);
}
and
const returnPositionsArray = () => {
var i = 0;
var id = 0;
positions = _util.toNull(positions);
const promise = new Promise((resolve, reject) => {
while (Number(order.id[id]) != Number(orderid)) id++;
connection.query(`SELECT * FROM positions WHERE uniqid = ?`, [order.uniqid[id]], function(error, results) {
while (i < results.length) {
positions.uniqid[i] = results[i].uniqid;
positions.id[i] = results[i].id;
positions.length[i] = results[i].len;
positions.sqr[i] = results[i].sqr;
positions.cost[i] = results[i].cost;
positions.count[i] = results[i].count;
i++;
}
resolve(positions);
});
});
return (promise);
}
I want to make one function which takes object as argument, changes it and returns promise.
thinking of something like that:
const returnArray = (object, cur_order, id) => {
var i = 0;
var j = 0;
object = _util.toNull(object);
var objstr = somevalue; /* CONVERT PASSED OBJECT VARIABLE NAME TO STRING */
const promise = new Promise((resolve, reject) => {
while (Number(cur_order.id[j] != Number(id))) j++;
connection.query(`SELECT * FROM ` + objstr + ` WHERE uniqid = ?`, [cur_order.uniqid[j]], function(error, results) {
while (i < results.length) {
/*
SOMETHING HERE
*/
}
resolve(object);
});
return (promise);
});
}
but I have no idea how can i make it work. I know we can go through object parameters using keys, but how can I do the same for sql results?
Is there a way to make it work?
Ok so you have two functions which handle specific tables. You're already seeing that this is an opportunity to avoid repetition by using abstraction. The question is: what do we abstract?
Look at the two functions and see what is different between the two. Based on what you've written here, what you need to pass in is a table name and a callback that takes a result and it's index i.
But I don't love the code that you've written for a bunch of reasons.
It would probably be better to fetch multiple ids in one SQL call using IN with an array.
Your query callback needs to reject the Promise in the case of an error.
You’re resolving a promise from a loop?
I genuinely don't understand what you are doing with misc.uniqid[i]... Why not just return the results, or a mapped version of them?
Where are these variables order.id and orderid in the first two functions coming from? Can you write the function such that it gets everything it needs from its arguments?
I am lost in the promised land and could really use some guidance. I have exhausted searching numerous SO questions (2-3 hours of reading solutions + docs) related to this seemingly common issue and feel I just am not getting it.
Overview
Below I have code that takes in an Object type (resources), grabs a few values from this Object and then calculates distance and duration from the GoogleMaps Distance Matrix. The results of the function googleRequest() are a promise containing two values (distance and duration).
I would like to get these two values back within the for loop, execute pushToRows(), and then return an array called final_rows.
Problem
final_rows shows UNDEFINED for the duration and distance keys within each row. I speculate this is occurring because I am attempting to access the values in dist_dur inappropriately. I would appreciate any help on resolving this issue. Thanks.
Code
final_rows = []
function getDistTime(resources){
for (var i = 0; i < resources.data.length; i++) {
var origin1 = $("#citystate").val();
var destinationA = resources.data[i]['DEMOBILIZATION CITY'] + ',' + resources.data[i]['DEMOBILIZATION STATE'];
var dist_time_data = googleRequest(origin1, destinationA).then((values) => {
return values
})
pushToRows(resources.data[i], dist_time_data)
}
// console.log(final_rows)
}
function pushToRows(resources, dist_dur){
resources["DISTANCE_MI"] = dist_dur[0];
resources["ACTUAL_DUR_HR"] = dist_dur[1];
resources["FINANCE_DUR_HR"] = (dist_dur[0] / 45.0).toFixed(2)
final_rows.push(resources)
}
So, what you would need to do is just store promises in an array in the for loop and then wait for these promises to resolve using Promise.all but this would parallelize your requests to google distance api.
function getDistTime(resources){
const promiseArr = [];
for (var i = 0; i < resources.data.length; i++) {
var origin1 = $("#citystate").val();
var destinationA = resources.data[i]['DEMOBILIZATION CITY'] + ',' + resources.data[i]['DEMOBILIZATION STATE'];
promiseArr.push(googleRequest(origin1, destinationA));
}
// Not sure how would you use the data pushed in rows but since you are not waiting for promises to be resolved, data would be updated later on
return Promise.all(promiseArr)
.then((resultsArr) => {
resultsArr.forEach((result, i) => pushToRows(resources.data[i], result));
})
}
function pushToRows(resources, dist_dur){
resources["DISTANCE_MI"] = dist_dur[0];
resources["ACTUAL_DUR_HR"] = dist_dur[1];
resources["FINANCE_DUR_HR"] = (dist_dur[0] / 45.0).toFixed(2)
final_rows.push(resources)
}
I would recommend to use async-await which are syntactic sugar to promises but make your code easy to understand and remove the complications that come with promise chaining.
If you move your pushToRows() inside where you return values, you will have access to that data.
googleRequest(origin1, destinationA).then((values) => {
pushToRows(resources.data[i], values);
});
Until that promise resolves, dist_time_data would be undefined
You could also convert to Promise.all() which takes an array of promises and resolves when all of the promises are complete:
function getDistTime(resources){
const promises = [];
for (var i = 0; i < resources.data.length; i++) {
var origin1 = $("#citystate").val();
var destinationA = resources.data[i]['DEMOBILIZATION CITY'] + ',' + resources.data[i]['DEMOBILIZATION STATE'];
promises.push(googleRequest(origin1, destinationA));
}
return Promise.all(promises).then((results) => {
return results.map((result, i) => {
return {
...resources.data[i],
DISTANCE_MI: result[0],
ACTUAL_DUR_HR: result[1],
FINANCE_DUR_HR: (result[0] / 45.0).toFixed(2)
};
});
});
}
getDistTime(resources).then(result => {
//result is now "final_rows"
});
I need to read a grid and take that data and call a $getJSON url. The grid could have over 100 lines of data. The getJSON returns a list of comma separated values that I add to an array. Once the loop is finished I take the array and process it for the duplicates. I need to use the duplicates in another process. I know that I can't determine the order of the data that is coming back but I need to know that all of the calls have been make.
for (let i = 0; i < rowscount; i++){
$.getJSON(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=500&term=" +
terms,
function (data) {
var pmids = data.esearchresult.idlist;
var pmidlist = pmids.join();
pmid_List.push(pmidlist);
if (i == rowscount - 1) {
// call the related function
}
});
}
I can't figure out how to be sure that the process has finished. The call to the related function has been done early at times.
Well if we keep track of how many have completed we can fire off the code when the last one is done.
let complete = 0;
for (let i = 0; i < rowscount; i++){
$.getJSON(
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=500&term=" +
terms,
function (data) {
var pmids = data.esearchresult.idlist;
var pmidlist = pmids.join();
pmid_List.push(pmidlist);
complete += 1;
if (complete == rowscount) {
// call the related function
}
});
}
I'd use fetch and Promise.all
const link = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=500&term=";
Promise.all(Array.from({
length: 3
}, () => fetch(link + 'foo').then(e => e.json()))).then(e => {
//called when all requests are done
console.log(e);
})
Try this
function getJson(url, i) {
return $.getJSON(url, function (data) {
//var pmids = data.esearchresult.idlist;
//var pmidlist = pmids.join();
//pmid_List.push(pmidlist);
console.log('completed', i)
return data;
});
}
function run() {
let promises = []
for (let i = 0; i < rowscount; i++) {
const terms = 'foot';
const url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=500&term=" + terms;
promises.push(getJson(url, i));
}
return promises;
}
Promise.all(run()).then(() => console.log('All are completed'));
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
I'm working with Node.js since 5 years and from 2 years on big projects with this framework. For two years, I'm confronted to a problem: how to work asynchronously and faster with non-async third party applications who's stacks requests, like MySQL, MongoDB or Apache SolR ?
I'm used to work with promises and to prepared several promises requests, like this:
const promises = []
for (let i = 0; i < 1000; i += 1) {
const promise = mySqlRequest()
promises.push(promise)
}
Promise.all(promises)
.then()
.catch()
This example will work but will send 1000 requests at the same time to MySQL server, who's will stacks these requests and become very slow, will consume very large quantity of RAM.
The best solution is to do only one big request, but in some case it's impossible and I'm forced to make recursive function,
which comes down to be synchronous and slower.
So, what the best way to work fast and asynchronous with Node.js and a stacking third party ?
If sending all requests at once doesn't work and sending them one by one doesn't work either, you'd need something similar to a thread-pool where some arbitrary number of tasks execute simultaneously. This is easily implementable using promises, for example like this:
Promise.pooled = function(arr, num = 5) {
return new Promise(function(resolve, reject) {
var i = -1;
var error = false;
var end = function() {
num--;
if(num === 0) resolve();
}
var next = function() {
if(error) return;
i++;
if(i >= arr.length)
end();
else
arr[i]().then(next).catch(onerr);
}
var onerr = function() {
if(error) return
error = true
reject.call(arguments)
}
for(var j = 0; j < num; j++)
next()
});
}
What this allows you is pass an array of functions as first argument, those functions should take no parameter and return a promise. It will then execute exactly num simultaneously. If one of the promises fail, it will fail its own promise aswell and stop executing (this is changeable easily).
Example:
Promise.after = function(ms) {
return new Promise(function(resolve) {
setTimeout(resolve, ms)
});
}
Promise.pooled = function(arr, num = 5) {
return new Promise(function(resolve, reject) {
var i = -1;
var error = false;
var end = function() {
num--;
if(num === 0) resolve();
}
var next = function() {
if(error) return;
i++;
if(i >= arr.length)
end();
else
arr[i]().then(next).catch(onerr);
}
var onerr = function() {
if(error) return
error = true
reject.call(arguments)
}
for(var j = 0; j < num; j++)
next()
});
}
var test = [
afterH(1000),
afterH(500),
afterH(800),
afterH(600),
afterH(3000),
afterH(300),
afterH(900),
afterH(2000),
afterH(1500),
afterH(900),
afterH(700),
afterH(600),
afterH(700)
];
// helper function, returns a function which when invoked returns a promise
function afterH(ms) {
return function() {
console.log("Starting one job")
return Promise.after(ms);
}
}
Promise.pooled(test, 3).then(function() {console.log("All jobs finished") }).catch(function() {console.log("Job failed")})