Node js how to run axios.get every 2 seconds? - javascript

I am kinda of a newbie to node js, Here is what i am trying to do: i am looping through a json file full of links of our website via the map function (around 3000 links), inside the loop i am doing a axios get for each link and getting the response status code(will do other things in the future). But i want to run the axios get only like every 2 seconds or 5 seconds otherwise i am overwhelming the webserver. I am trying to input async await but it's still too fast and server is taking a hit (i am technically DDos-ing my own website). I put a SetTimeout around the axios but that doesn't seem like it worked, since in the console results are printing way too fast. so the question is, how do i make each axios.get request wait every 2 seconds before running in the map loop?.
var axios = require('axios');
const fs = require('fs');
var statusCheck = 0;
var main = [];
let rawdata = fs.readFileSync('C:/Users/jay/Documents/crawl/filtered2.json');
let jsonParsed = JSON.parse(rawdata);
jsonParsed.map(async(line) => {
var encodeLink = encodeURI(line.link);
const response = await axios.get(encodeLink).catch((err) => {
var Status_ErrorsCatchaxios = {
"status Code": err.response.status ? err.response.status : "No status code available",
"Page title:": $('title').text() ? $('title').text() : 'No title avaialble',
"Original Link": encodeLink ? encodeLink : "No Original Link Available",
"errorCode": err
}
main.push(Status_ErrorsCatchaxios)
})
try {
console.log(response.status)
statusCheck = statusCheck + 1;
console.log("Link: ", statusCheck)
} catch (error) {
console.log(error)
}
})

The [].map function doesn't wait for your items to resolve, so your code is currently dispatching all the requests (as you said, around 3000) in parallel.
You can use for...of instead to only run one request at a time. For example:
async function makeRequests (lines) {
for (const line of lines) {
const encodedLink = encodeURI(line.link)
const response = await axios.get(encodedLink)
// ...your response handling code here...
}
}
makeRequests(jsonParsed)
If you want to wait for 2s between each request, you can add this line of code inside your for...of loop:
await new Promise(resolve => setTimeout(resolve, 2000))
Better solution
The solution above works, but I assume your webserver can probably take more than one request at a time, so maybe the ideal scenario would be to limit your code to make only up to N requests in parallel at a given time. This way you don't flood your server but you're able to get your results faster than just doing one request at a time.
The bluebird NPM module allows you to do that with their Promise.map function.
This function receives your list of items as the first argument, a function that executes something and returns a promise for each item as the second argument, and an object with a concurrency key describing how many items you want to allow to be handled in parallel as the third argument.
Here's how it could work:
const bluebird = require('bluebird')
async function makeRequests (lines) {
await bluebird.map(
lines,
async (line) => {
const encodedLink = encodeURI(line.link)
const response = await axios.get(encodedLink)
// ...your response handling code here...
},
{ concurrency: 3 }
)
}
makeRequests(jsonParsed)

Ditch the map, replace with a for ... of, await a promise that takes 2s to resolve, wrap everything inside an async IIFE for the await to be legal.
// dummy data
const fakeJson = new Array(5).fill();
const fakeRequest = () => console.log(`request at ${new Date().toUTCString()}`);
// iteration with 2s in between
(async () => {
for (let line of fakeJson) {
await new Promise(r => setTimeout(r, 2000));
fakeRequest();
}
})()
You can also use more classically use setInterval but HTTP requests are asynchronous, so might as well start with a structure that handles well async and loops.

You are hitting all at once because .map,.forEach,.reduce etc doesn't wait for the Promise to resolve. Use Simple For loop, it will wait for each promise to resolve or reject.
for(let i=0;i<jsonParsed.length;i++) {
var encodeLink = encodeURI(line.link);
const response = await axios.get(encodeLink).catch(...)
try {
....
} catch (error) {
...
}
})
Why it doesn't work?
If we imitate the forEach loop it will be something like,
function forEach(arr, cb){
for(let i=0;i<arr.length;i++){
cb(arr[i], i, cb);
}
}
So you see it doesn't await the cb.

The reason why timeout wont work in a loop is because it will fire all the requests/functions all at once after the timeout delay.
The idea is to put delay in each iteration and only after delay start the next iteration.
you can run a self invoking function which calls itself after the delay. So to run the function every 2 seconds, you can try this:
let jsonParsed = JSON.parse(rawdata);
let len = jsonParsed.length;
(function requestLoop (i) {
setTimeout(function () {
let line = jsonParsed[len-i]
var encodeLink = encodeURI(line.link);
const response = await axios.get(encodeLink).catch((err) => {
var Status_ErrorsCatchaxios = {
"status Code": err.response.status ? err.response.status : "No status code available",
"Page title:": $('title').text() ? $('title').text() : 'No title avaialble',
"Original Link": encodeLink ? encodeLink : "No Original Link Available",
"errorCode": err
}
main.push(Status_ErrorsCatchaxios)
})
try {
console.log(response.status)
statusCheck = statusCheck + 1;
console.log("Link: ", statusCheck)
} catch (error) {
console.log(error)
}
let jsonParsed = JSON.parse(rawdata);
if (--i) requestLoop(i);
}, 2000)
})(len);

You can use
var i=0;
jsonParsed.map(async(line) => {
i++
setTimeout(async() => {
},i*2000)
}

you can use setTimeout function for running codes every 2 second!
setTimeout(async() => {
// await postRequest()
},2000)

Related

Array of filtered axios results from paginated API is empty

In my code below I get an empty array on my console.log(response) but the console.log(filterdIds) inside the getIds function is showing my desired data. I think my resolve is not right.
Note that I run do..while once for testing. The API is paged. If the records are from yesterday it will keep going, if not then the do..while is stopped.
Can somebody point me to the right direction?
const axios = require("axios");
function getToken() {
// Get the token
}
function getIds(jwt) {
return new Promise((resolve) => {
let pageNumber = 1;
const filterdIds = [];
const config = {
//Config stuff
};
do {
axios(config)
.then((response) => {
response.forEach(element => {
//Some logic, if true then:
filterdIds.push(element.id);
console.log(filterdIds);
});
})
.catch(error => {
console.log(error);
});
} while (pageNumber != 1)
resolve(filterdIds);
});
}
getToken()
.then(token => {
return token;
})
.then(jwt => {
return getIds(jwt);
})
.then(response => {
console.log(response);
})
.catch(error => {
console.log(error);
});
I'm also not sure where to put the reject inside the getIds function because of the do..while.
The fundamental problem is that resolve(filterdIds); runs synchronously before the requests fire, so it's guaranteed to be empty.
Promise.all or Promise.allSettled can help if you know how many pages you want up front (or if you're using a chunk size to make multiple requests--more on that later). These methods run in parallel. Here's a runnable proof-of-concept example:
const pages = 10; // some page value you're using to run your loop
axios
.get("https://httpbin.org") // some initial request like getToken
.then(response => // response has the token, ignored for simplicity
Promise.all(
Array(pages).fill().map((_, i) => // make an array of request promisess
axios.get(`https://jsonplaceholder.typicode.com/comments?postId=${i + 1}`)
)
)
)
.then(responses => {
// perform your filter/reduce on the response data
const results = responses.flatMap(response =>
response.data
.filter(e => e.id % 2 === 0) // some silly filter
.map(({id, name}) => ({id, name}))
);
// use the results
console.log(results);
})
.catch(err => console.error(err))
;
<script src="https://unpkg.com/axios/dist/axios.min.js"></script>
The network tab shows the requests happening in parallel:
If the number of pages is unknown and you intend to fire requests one at a time until your API informs you of the end of the pages, a sequential loop is slow but can be used. Async/await is cleaner for this strategy:
(async () => {
// like getToken; should handle err
const tokenStub = await axios.get("https://httpbin.org");
const results = [];
// page += 10 to make the snippet run faster; you'd probably use page++
for (let page = 1;; page += 10) {
try {
const url = `https://jsonplaceholder.typicode.com/comments?postId=${page}`;
const response = await axios.get(url);
// check whatever condition your API sends to tell you no more pages
if (response.data.length === 0) {
break;
}
for (const comment of response.data) {
if (comment.id % 2 === 0) { // some silly filter
const {name, id} = comment;
results.push({name, id});
}
}
}
catch (err) { // hit the end of the pages or some other error
break;
}
}
// use the results
console.log(results);
})();
<script src="https://unpkg.com/axios/dist/axios.min.js"></script>
Here's the sequential request waterfall:
A task queue or chunked loop can be used if you want to increase parallelization. A chunked loop would combine the two techniques to request n records at a time and check each result in the chunk for the termination condition. Here's a simple example that strips out the filtering operation, which is sort of incidental to the asynchronous request issue and can be done synchronously after the responses arrive:
(async () => {
const results = [];
const chunk = 5;
for (let page = 1;; page += chunk) {
try {
const responses = await Promise.all(
Array(chunk).fill().map((_, i) =>
axios.get(`https://jsonplaceholder.typicode.com/comments?postId=${page + i}`)
)
);
for (const response of responses) {
for (const comment of response.data) {
const {name, id} = comment;
results.push({name, id});
}
}
// check end condition
if (responses.some(e => e.data.length === 0)) {
break;
}
}
catch (err) {
break;
}
}
// use the results
console.log(results);
})();
<script src="https://unpkg.com/axios/dist/axios.min.js"></script>
(above image is an except of the 100 requests, but the chunk size of 5 at once is visible)
Note that these snippets are proofs-of-concept and could stand to be less indiscriminate with catching errors, ensure all throws are caught, etc. When breaking it into sub-functions, make sure to .then and await all promises in the caller--don't try to turn it into synchronous code.
See also
How do I return the response from an asynchronous call? and Why is my variable unaltered after I modify it inside of a function? - Asynchronous code reference which explain why the array is empty.
What is the explicit promise construction antipattern and how do I avoid it?, which warns against adding a new Promise to help resolve code that already returns promises.
To take a step back and think about why you ran into this issue, we have to think about how synchronous and asynchronous javascript code works together. Your
synchronous getIds function is going to run to completion, stepping through each line until it gets to the end.
The axios function invocation is returning a Promise, which is an object that represents some future fulfillment or rejection value. That Promise isn't going to resolve until the next cycle of the event loop (at the earliest), and your code is telling it to do some stuff when that pending value is returned (which is the callback in the .then() method).
But your main getIds function isn't going to wait around... it invokes the axios function, gives the Promise that is returned something to do in the future, and keeps going, moving past the do/while loop and onto the resolve method which returns a value from the Promise you created at the beginning of the function... but the axios Promise hasn't resolved by that point and therefore filterIds hasn't been populated.
When you moved the resolve method for the promise you're creating into the callback that the axios resolved Promise will invoke, it started working because now your Promise waits for axios to resolve before resolving itself.
Hopefully that sheds some light on what you can do to get your multi-page goal to work.
I couldn't help thinking there was a cleaner way to allow you to fetch multiple pages at once, and then recursively keep fetching if the last page indicated there were additional pages to fetch. You may still need to add some additional logic to filter out any pages that you batch fetch that don't meet whatever criteria you're looking for, but this should get you most of the way:
async function getIds(startingPage, pages) {
const pagePromises = Array(pages).fill(null).map((_, index) => {
const page = startingPage + index;
// set the page however you do it with axios query params
config.page = page;
return axios(config);
});
// get the last page you attempted, and if it doesn't meet whatever
// criteria you have to finish the query, submit another batch query
const lastPage = await pagePromises[pagePromises.length - 1];
// the result from getIds is an array of ids, so we recursively get the rest of the pages here
// and have a single level array of ids (or an empty array if there were no more pages to fetch)
const additionalIds = !lastPage.done ? [] : await getIds(startingPage + pages, pages);
// now we wait for all page queries to resolve and extract the ids
const resolvedPages = await Promise.all(pagePromises);
const resolvedIds = [].concat(...resolvedPages).map(elem => elem.id);
// and finally merge the ids fetched in this methods invocation, with any fetched recursively
return [...resolvedIds, ...additionalIds];
}

Make an async multiple update in a database

I have an array of elements to insert in a database. For each of them, I have to check their integrity (I send "Bad request" if I don't find an element):
let ret = []
const { idElement, type, description, name } = req.body
let promises = []
req.body.pjs.forEach((pj) => {
promises.push(new Promise (async function(resolve, reject) {
const { rows } = await db.query(`SELECT * FROM files WHERE uuid = '${pj.uuid}' AND name = '${pj.name}'`)
if (rows.length == 0) { res.status(400).send("Bad request!") }
const idFile = rows[0].id
await db.query(`UPDATE elements
SET base = base || '{"type":"file","valeur":"${idFile}","description":"${description}","name":"${pj.name}"}'::json
WHERE id = ${idElement}; `)
resolve({id: idElement, name: pj.name, val: idFile, description: description})
}))
});
(async function() {
const asyncFunctions = promises
await asyncFunctions.reduce(async (previousPromise, nextAsyncFunction) => {
await previousPromise;
const r = await nextAsyncFunction();
ret.push(r)
}, Promise.resolve());
})();
res.send(ret)
I took the example of the paragraph "3) one-by-one" heree: https://dev.to/afifsohaili/dealing-with-promises-in-an-array-with-async-await-5d7g
This trick works for a lot of use cases in other parts of my code, but not for this particular case. I have this error:
const r = await nextAsyncFunction();
TypeError: nextAsyncFunction is not a function
And I don't know why. If anybody could give me a hand, it would be very kind :)
The error message is correct, the second parameter of reduce is the next entry of the array being reduced, which in this case is the promises array.
So the immediate solution is to await the promise without trying to call it:
const r = await nextAsyncFunction; // no () on the end
Why the nextAsyncFunction name was used instead of nextPromise or variation thereof is not self evident - it's certainly confusing and led to errors.
Aside from that there seems to be some bugs waiting to happen:
If the "Bad request" message is sent, the code continues to execute and tries to update the database and resolve the promise pushed by the forEach function. Subsequently res.send(ret) will (is likely to?) error as an attempt to send a second set of response headers. Try thowing a Bad Request error and catching it in a promise catch handler to send the 400 response.
there is no attempt to wait for asynchronous processing to finish before executing
res.send(ret)
which would send an empty array if it succeeded.
The reduce(async (previousPromise, nextPromise) construct is a rather complicated way of waiting for promises to be resolved in turn by using for ... of :
(async function() {
for( promise of promises) {
ret.push( await promise);
}
}()
.then( ()=> res.send(ret));
.catch( ()=> // server error response?
Handling requests that are a mixture of valid and invalid pj request values may require further attention.

Parallel HTTP requests in batches with async for loop for each request

I am trying to run parallel requests in batches to an API using a bunch of keywords in an array. Article by Denis Fatkhudinov.
The problem I am having is that for each keyword, I need to run the request again with a different page argument for as many times as the number in the pages variable.
I keep getting Cannot read property 'then' of undefined for the return of the chainNext function.
The parallel request in batches on its own, without the for loop, works great, I am struggling to incorporate the for loop on the process.
// Parallel requests in batches
async function runBatches() {
// The keywords to request with
const keywords = ['many keyword strings here...'];
// Set max concurrent requests
const concurrent = 5;
// Clone keywords array
const keywordsClone = keywords.slice()
// Array for future resolved promises for each batch
const promises = new Array(concurrent).fill(Promise.resolve());
// Async for loop
const asyncForEach = async (pages, callback) => {
for (let page = 1; page <= pages; page++) {
await callback(page);
}
};
// Number of pages to loop for
const pages = 2;
// Recursively run batches
const chainNext = (pro) => {
// Runs itself as long as there are entries left on the array
if (keywordsClone.length) {
// Store the first entry and conviently also remove it from the array
const keyword = keywordsClone.shift();
// Run 'the promise to be' request
return pro.then(async () => {
// ---> Here was my problem, I am declaring the constant before running the for loop
const promiseOperation = await asyncForEach(pages, async (page) => {
await request(keyword, page)
});
// ---> The recursive invocation should also be inside the for loop
return chainNext(promiseOperation);
});
}
return pro;
}
return await Promise.all(promises.map(chainNext));
}
// HTTP request
async function request(keyword, page) {
try {
// request API
const res = await apiservice(keyword, page);
// Send data to an outer async function to process the data
await append(res.data);
} catch (error) {
throw new Error(error)
}
}
runBatches()
The problem is simply that pro is undefined, because you haven't initialized it.
You basically execute this code:
Promise.all(new Array(concurrent).fill(Promise.resolve().map(pro => {
// pro is undefined here because the Promise.resolve had no parameter
return pro.then(async () => {})
}));
I'm not completely sure about your idea behind that, but this is your problem in a more condensed version.
I got it working by moving actual request promiseOperation inside the for loop and returning the recursive function there too
// Recursively run batches
const chainNext = async (pro) => {
if (keywordsClone.length) {
const keyword = keywordsClone.shift()
return pro.then(async () => {
await asyncForEach(pages, (page) => {
const promiseOperation = request(keyword, page)
return chainNext(promiseOperation)
})
})
}
return pro
}
Credit for the parallel requests in batches goes to https://itnext.io/node-js-handling-asynchronous-operations-in-parallel-69679dfae3fc

Perform asynchronous actions semi-synchronously in Axios

I have the following code:
* Fetch stats from api
*/
fetchStats() {
this._isFetching = true;
// fetch stats after building url and replacing invalid characters
return new Promise(async (resolve, reject) => {
await API.fetchStats(this.rsn)
.then(jres => {
this.skills = jres.main.skills;
this._isFetching = false;
resolve('success');
})
.catch(err => {
console.log(err);
console.log('error retreiving stats');
this._isFetching = false;
reject('Failed to retreive stats');
})
.finally(() => {
this._isFetching = false;
});
});
}
I thought making it async with await would make it wait until it got the response before continuing. Returning the promise is something I added in testing to see if I could make it synchronous.
Then my code that consumes this method:
memberCollection.forEach(async el => {
await el.player.fetchStats()
.then(() => {
console.log(`Refreshed ${el.player.rsn}'s account`);
})
.catch(console.log(`Failed to refresh ${el.player.rsn}'s account`));
});
My thinking was that it would wait till it got a response then console.log either a successful refresh or a failed refresh. What I am instead seeing is a whole bunch of "success" messages followed by a string of failed messages indicating that it is running both the then and the catch message in the foreach. Does anyone know how I can make this work.
My issue is that Axios keeps timing out (my speculation is that it is due to the number of requests being sent off and the fact that there is a 5-10sec delay as it pulls from the db), if I navigate to the API URL manually it works as well as if I just do one member (as opposed to forEach) it works fine. So I'm trying to limit the number of requests fired off at once. I have tried setting my axios timeout to 10, 20, and 60 seconds, but it made no improvement.
Solution code:
const asyncForEach = async (arr, cb) => {
for(let i=0;i<arr.length;i++) {
let el = arr[i];
try {
let res = await cb(el);
} catch (err) { console.log(err) };
if(el.player && el.player.rsn) console.log(`Processed ${el.player.rsn}`);
}
console.log('done processing in asyncForEach');
}
not linked to axios but to async await.
consider
function slow(i){
return new Promise((ok,ko)=>{
return setTimeout(_=>ok(i), 1000)
})
}
async function asyncForEach(arr, cb){
for(var i = 0; i<arr.length; ++i){
let el = arr[i];
let res = await cb(el);
console.log('async', res, new Date)
}
}
/*
#foreach does not wait, but async and reduce are spaced by one second
foreach 4 2019-10-14T13:43:47.059Z
foreach 5 2019-10-14T13:43:47.071Z
foreach 6 2019-10-14T13:43:47.071Z
async 1 2019-10-14T13:43:47.071Z
async 2 2019-10-14T13:43:48.073Z
async 3 2019-10-14T13:43:49.074Z
reduce 7 2019-10-14T13:43:50.076Z
reduce 8 2019-10-14T13:43:51.078Z
reduce 9 2019-10-14T13:43:52.080Z
*/
async function main(){
await [4,5,6].forEach(async el=>{
let res = await slow(el);
console.log('foreach', res, new Date)
})
await asyncForEach([1,2,3], slow);
await [7,8,9].reduce((acc, el)=>acc.then(async _=>{
let res = await slow(el);
console.log('reduce', res, new Date);
return;
}), Promise.resolve())
}
main();
As you can see from timestamps, forEach does not wait for slow to finish
however, asyncForEach in its iteration does wait
What you may want to do is either
write a for loop as done with asyncForEach
use standard promises (stacking them):
[1,2,3].reduce((acc, el)=>acc.then(_=>{
return slow(el);
}), Promise.resolve())

Await for file to load, then run a function on each 'line' but wait for the return on each line

I have been trying to understand Promises and I'm hitting a brick wall.
==Order I want the code to run==
I need a .txt file to load each line into an array.
WAIT for this to happen.
Run a Function on each entry that returns and array.
WAIT for each index of the array to be processed before doing the next.
==My Functions==
Call this function to start the program.
async function start(){
var data = await getData();
console.log(data);
for (var i = 0; i < data.length; i++){
console.log(await searchGoogle(data[i]));
}
}
'await' for the data from getData
async function getData(){
return new Promise(function(resolve, reject){
fs.readFile('./thingsToGoogle.txt', function(err, data) {
if(err) throw err;
var array = data.toString().split("\n");
resolve(array);
});
});
}
Then call searchGoogle on each index in the array.
async function searchGoogle(toSearch) {
(async() => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://www.google.com/');
await page.type('input[name=q]', toSearch);
try {
console.log('Setting Search' + toSearch);
await page.evaluate(() => {
let elements = document.getElementsByClassName('gNO89b');
for (let element of elements)
element.click();
});
await page.waitForNavigation();
} catch (err) {
console.log(err)
}
try {
console.log("Collecting Data");
const[response] = await Promise.all([
page.waitForNavigation(),
await page.click('.rINcab'),
]);
} catch (err) {
console.log("Error2: " + err)
}
let test = await page.$$('.LC20lb');
// console.log(test);
allresults = [];
for (const t of test) {
const label = await page.evaluate(el => el.innerText, t);
if (label != "") {
allresults.push(label);
}
}
await browser.close();
resolve(allresults);
})();
}
The problem is that this does not work. it does not wait for the file to load.
Picture of Node JS output.
Hopefully the screen shot has uploaded, but you can see it stacking the SearchGoogle function console.logs;
console.log('Setting..')
console.log('Setting..')
console.log('Collecting..')
console.log('Collecting..')
When it should be
console.log('Setting..')
console.log('Collecting..')
console.log('Setting..')
console.log('Collecting..')
This is the 'first' time sort of dealing with promises, i have done a lot of reading up on them and done bits of code to understand them, however when I have tried to apply this knowledge I am struggling. Hope someone can help.
-Peachman-
Queue with concurrent Limit (using p-queue)
You need a queue with concurrency limit. You will read every single line and add them to a queue. We will be using readline and p-queue module for this.
First, create a queue with concurrency of 1.
const {default: PQueue} = require('p-queue');
const queue = new PQueue({concurrency: 1});
Then, create our reader instance.
const fs = require('fs');
const readline = require('readline');
const rl = readline.createInterface({
input: fs.createReadStream('your-input-file.txt')
});
For every line of the file, add an entry to the queue.
rl.on('line', (line) => {
console.log(`Line from file: ${line}`);
queue.add(() => searchGoogle(line));
});
That's it! If you want to process 10 lines at once, just change the concurrency line. It will still read one line at a time, but the queue will limit how many searchGoogle is invoked.
Optional Fixes: Async Await
Your code has the following structure,
async yourFunction(){
(async()=>{
const browser = await puppeteer.launch();
// ... rest of the code
})()
}
While this might run as intended, you will have a hard time debugging because you will be creating an anonymous function every time you run yourFunction.
The following is enough.
async yourFunction(){
const browser = await puppeteer.launch();
// ... rest of the code
}
Here's a way to process them that lets you process N URLs at a time where you can adjust the value of N. My guess is that you want it set to a value of between 5 and 20 in order to keep your CPU busy, but not use too many server resources.
Here's an outline of how it works:
It uses the line-by-line module to read a file line by line and (unlike the built-in readline interface), this module pauses line events when you call .pause() which is important in this implementation.
It maintains a numInFlight counter that tells you how many lines are in the midst of processing.
You set a maxInFlight constant to the maximum number of lines you want to be processed in parallel.
It maintains a resultCntr that helps you keep results in the proper order.
It creates the readline interface and establishes a listener for the line event. This will start the stream flowing with line events.
On each line event, we increment our numInFlight counter. If we have reached the maximum number allowed in flight, we pause the readline stream so it won't produce any more line events. If we haven't reached the max in flight yet, then more line events will flow until we do reach the max.
We pass that line off to your existing searchGoogle() function.
When that line is done processing, we save the result in the appropriate spot in the array, decrement the numInFlight counter and resume the stream (in case it was previously paused).
We check if we're all done (by checking if numInFlight is 0 and if we've reached the end of our file). If we are done, resolve the master promise with the results.
If we're not all done, then there will either be more line events coming or more searchGoogle() functions in flight that will finish, both of which will check again to see if we're done.
Note that the way this is designed to work is that errors on any given URL are just put into the result array (the error object is in the array) and processing continues on the rest of the URLs with an eventual resolved promise. Errors while reading the input file will terminate processing and reject the return promise.
Here's the code:
const fs = require('fs');
const Readline = require('line-by-line');
function searchAll(file) {
return new Promise(function(resolve, reject) {
const rl = new Readline(file);
// set maxInFlight to something between 5 and 20 to optimize performance by
// running multiple requests in flight at the same time without
// overusing memory and other system resources.
const maxInFlight = 1;
let numInFlight = 0;
let resultCntr = 0;
let results = [];
let doneReading = false;
function checkDone(e) {
if (e) {
reject(e);
} else if (doneReading && numInFlight === 0) {
resolve(results);
}
}
rl.on('line', async (url) => {
if (url) {
let resultIndex = resultCntr++;
try {
++numInFlight;
if (numInFlight >= maxInFlight) {
// stop flowing line events when we hit maxInFlight
rl.pause();
}
let result = await searchGoogle(url);
// store results in order
results[resultIndex] = result;
} catch(e) {
// store error object as result
results[resultIndex] = e;
} finally {
--numInFlight;
rl.resume();
checkDone();
}
}
}).on('end', () => {
// all done reading here, may still be some processing in flight
doneReading = true;
checkDone();
}).on('error', (e) => {
doneReading = true;
checkDone(e);
});
});
}
FYI, you can set maxInFlight to a value of 1 and it will read process the URLs one at a time, but the whole point of writing this type of function is so that you can likely get better performance by setting it to a value higher than 1 (I'm guessing 5-20).

Categories