I am trying to scrape the list of results from Google Maps.
Example: Visit https://www.google.com/maps/search/gym+in+nyc
Get all results in an array, loop Starts and click element#1, extract data, back to results page and continue loop.
const finalData = async function () {
const arr = [];
const resultList = [...[...document.querySelectorAll("[aria-
label^='Results for']")][0].children].filter((even, i) => !(i % 2));
for (const eachElement of resultList) {
let response = await scrapePage(eachElement);
arr.push(response);
}
return arr;
};
async function scrapePage(elem) {
// Clicks each element
let click = await elem.click();
// Grabs Just the Title
const titleText = await setTimeout(function () {
let title = document.querySelector(".section-hero-header-title
span").innerText;
return title;
}, 3000);
// setTimeout to cause delay before click the back button
setTimeout(function () {
document.querySelector(".section-back-to-list-button").click();
}, 5000);
return titleText;
}
const final = finalData().then((value) => {
return value;
});
I have no idea why when I try the above code in devtools, only the last result is clicked and why my const variable "final" is filled with array of random numbers.
The problem is that you had assumed that the await operator worked on setTimeout; setTimout, setInterval and related functions do not use promises at all.
When working with time orientated code, generally I would set up a helper function that uses a promise:
const delay = seconds => new Promise(
resolve => {
setTimeout(resolve, seconds);
}
);
Updating your code to wait for the timeouts from here is simple:
const finalData = async () => {
const arr = [];
const resultList = [...document.querySelector("[aria-label^='Results for']").children].filter((even, i) => !(i % 2));
for (const eachElement of resultList) {
const response = await scrapePage(eachElement);
arr.push(response);
}
return arr;
};
async function scrapePage(elem) {
// Clicks each element
const click = await elem.click(); // this is not asynchronous! click also returns undefined
// Grabs Just the Title
await delay(3);
const titleText = document.querySelector(".section-hero-header-title span").innerText;
// delay before click[ing] the back button
await delay(5);
document.querySelector(".section-back-to-list-button").click();
return titleText;
}
const final = finalData();
And, on a related note, this piece of code waits for every single result in sequence, this takes at least 8 seconds per iteration:
const finalData = async () => {
const arr = [];
const resultList = [...document.querySelector("[aria-label^='Results for']").children].filter((even, i) => !(i % 2));
for (const eachElement of resultList) {
let response = await scrapePage(eachElement);
arr.push(response);
}
return arr;
};
If you wanted to concurrently execute every iteration, you may want to consider mapping the function across the array and using Promise.all, like so:
const finalData = async () => {
const resultList = [...document.querySelector("[aria-label^='Results for']").children].filter((_, i) => !(i % 2));
return Promise.all(resultList.map(scrapePage));
};
You can try this:
async function scrapePage(elem) {
// Clicks each element
let click = await elem.click();
// Grabs Just the Title
const titleText = await new Promise((resolve) => {
setTimeout(function () {
let title = document.querySelector(".section-hero-header-title span").innerText;
resolve(title);
}, 3000);
})
// setTimeout to cause delay before click the back button
setTimeout(function () {
document.querySelector(".section-back-to-list-button").click();
}, 5000);
return titleText;
}
Action in fuction (action for data imported from firstore)
Here you get index data.
export async function getUserIndex (data){
let db = loadFB().firestore();
console.log(data)
let date = moment(data).utc().format()
let query = db.collection('users').where("create_date", ">", date)
console.log(query)
return await query.get().then(docs=>{
let result = docs.size
console.log("result!!", result)
return result
})
}
components in function
async getuserIndex_component(date){
let number =""
number = await userAction.getUserIndex(date)
console.log("number",number)
return await number
}
const {user_list} = this.props; //user_list = [{id:aa, pw:bb},{id:cc, pw:dd}...} data
let data = user_list
let groups = {}
let number =0
for (let index_data of data) {
let date = moment(index_data.create_date).format("YYYY-MM-DD").slice(0, 10)
let index = this.getuserIndex_component(date) //<==here log Promise{<pendding>}
console.log(index)
if (groups[date]) {
let group = groups[date];
group.push({ index: index-number, ...index_data });
} else {
number =0;
groups[date] = [{ index: index-number, ...index_data }]
}
number++;
}
const dates = Object.keys(groups)
const user_list_result = []
for (let date of dates) {
user_list_result.push(...(groups[date]))
}
return(...)
I am using redux. in action
Made a "getUserIndex()."
The component will then access the "getUserIndex" and retrieve data. But there is only pending in the console.log, and the data I want does not appear.
In this situation, I would like to know how to deal with async in for of.
I want to know what part I'm making a mistake.
In summary, functions in components are executed later than the render. So the index in the render does not contain the data. What is the solution to this?
As getuserIndex_component is asynchronous, it will return a promise. You will need to wait for that promise to resolve before the index result is available.
Firstly, you should be able to simplify getuserIndex_component to
async getuserIndex_component(date) {
let number = await userAction.getUserIndex(date)
console.log("number", number)
return number
}
And when you remove the console.log it could simply be
async getuserIndex_component(date) {
return userAction.getUserIndex(date)
}
As I mentioned in the comments. You can send off all of the async calls, then just wait for all of the promises to resolve via Promise.all.
Something like this. (I couldn't test any of this, but it should demonstrate the general idea.)
const { user_list } = this.props; //user_list = [{id:aa, pw:bb},{id:cc, pw:dd}...} data
let data = user_list
let groups = {}
let number = 0
const dates = []
const promises = data.map((index_data) => {
const date = moment(index_data.create_date).format("YYYY-MM-DD").slice(0, 10)
dates.push(date) // To use after all promises resolve.
return this.getuserIndex_component(date) // The index promise.
})
const indexes = await Promise.all(promises)
for (let i = 0; i < dates.length; i++) {
const index = indexes[i]
const date = dates[i]
console.log(index)
if (groups[date]) {
let group = groups[date];
group.push({
index: index - number,
...index_data
});
} else {
number = 0;
groups[date] = [{
index: index - number,
...index_data
}]
}
number++;
}
const dates = Object.keys(groups)
const user_list_result = []
for (let date of dates) {
user_list_result.push(...(groups[date]))
}
return (...)
I'd like to scan items and avoid to use duplicates code.
so, I am trying to use for-of asynchronously for it.
async function checkDupl(){
const arr = new Array(10).fill(0);
let code = '';
for(const i of arr){
//generate RANDOM CODE
//for example, it would be '000001' to '000010'
code = (Math.floor(Math.random() * 10) + 1).toString().padStart(6,"0");
const params = { ... }; // it has filterExpression the code I generated randomly
await DYNAMO_DB.scan(params, (err, res) => {
if(res.Items.length === 0) {
/* no duplicate! */
return code;
}
});
}
return code;
}
console.log(checkDupl());
// it always return '';
What I have missed or misunderstood?
await just waits a Promise (or thenable object), but you are using await with a "void" function (You use DYNAMO_DB.scan as a callback styte function).
My suggestion, Use DYNAMO_DB.scan with Promise style (The way)
async function checkDupl() {
const arr = new Array(10).fill(0);
let code = '';
for (const i of arr) {
//generate RANDOM CODE
//for example, it would be '000001' to '000010'
code = (Math.floor(Math.random() * 10) + 1).toString().padStart(6, "0");
const params = { ... }; // it has filterExpression the code I generated randomly
const res = await DYNAMO_DB.scan(params).promise(); // convert to promise
if (res.Items.length === 0) {
/* no duplicate! */
return code;
}
return code;
}
}
(async () => {
console.log(await checkDupl());
})();
I'm stumped why loopCafes runs before buildCafeList in this example. I need to build an array and pass it along for additional modification, but the execution order is reversed.
The output returns:
loopCafes: 0
getCafeList 8
getCafeList 16
const fs = require("fs");
const JSON_FOLDER = "./reports/";
let cafes = [];
const buildCafeList = async () => {
fs.readdir(JSON_FOLDER, function(err, list) {
if (err) throw err;
list.forEach(function(file) {
let thisJSON = JSON_FOLDER + file;
fs.readFile(thisJSON, function(err2, data) {
if (err2) throw err2;
let thisJSON = JSON.parse(data);
for (let i = 0; i < thisJSON.businesses.length; i++) {
let thisCafe = thisJSON.businesses[i];
cafes.push({
alias: thisCafe.alias,
name: thisCafe.name,
url: thisCafe.url,
address1: thisCafe.location.address1,
city: thisCafe.location.city
});
}
console.log("getCafeList", cafes.length); // 8, 16
});
});
});
};
const loopCafes = async () => {
console.log("loopCafes:", cafes.length); // 0
for (let k = 0; k < cafes.length; k++) {
console.log(k, cafes[k].name);
}
};
const buildReport = async () => {
const getCafeList = await buildCafeList();
const showCafeList = await loopCafes();
};
buildReport();
fs.readdir is an async function that accepts a callback.
Consequently, buildCafeList returns immediately (since you marked it async and didn't include an explicit return statement it returns a promise that resolves immediately).
Later on, the callback for fs.readdir is triggered and the value you are logging is reported.
You need to wrap fx.readdir in a promise and resolve it when you get the data you want in the callback. buildCafeList needs to return that promise.
Sometimes I need to wait for a .forEach() method to finish, mostly on 'loader' functions. This is the way I do that:
$q.when(array.forEach(function(item){
//iterate on something
})).then(function(){
//continue with processing
});
I can't help but feel that this isn't the best way to wait for a .forEach() to finish. What is the best way to do this?
If there is no asynchronous code inside the forEach, forEach is not asynchronous, for example in this code:
array.forEach(function(item){
//iterate on something
});
alert("Foreach DONE !");
you will see the alert after forEach finished.
Otherwise (You have something asynchronous inside), you can wrap the forEach loop in a Promise:
var bar = new Promise((resolve, reject) => {
foo.forEach((value, index, array) => {
console.log(value);
if (index === array.length -1) resolve();
});
});
bar.then(() => {
console.log('All done!');
});
Credit: #rolando-benjamin-vaz-ferreira
The quickest way to make this work using ES6 would be just to use a for..of loop.
const myAsyncLoopFunction = async (array) => {
const allAsyncResults = []
for (const item of array) {
const asyncResult = await asyncFunction(item)
allAsyncResults.push(asyncResult)
}
return allAsyncResults
}
Or you could loop over all these async requests in parallel using Promise.all() like this:
const myAsyncLoopFunction = async (array) => {
const promises = array.map(asyncFunction)
await Promise.all(promises)
console.log(`All async tasks complete!`)
}
var foo = [1,2,3,4,5,6,7,8,9,10];
If you're actually doing async stuff inside the loop, you can wrap it in a promise ...
var bar = new Promise((resolve, reject) => {
foo.forEach((value, index, array) => {
console.log(value);
if (index === array.length -1) resolve();
});
});
bar.then(() => {
console.log('All done!');
});
If you have an async task inside a loop and you want to wait. you can use for await
for await (const i of images) {
let img = await uploadDoc(i);
};
let x = 10; //this executes after
Use for of instead of forEach. Like this:
for (const item of array) {
//do something
}
console.log("finished");
"finished" will be logged after finishing the loop.
forEach() doesn't return anything, so a better practice would be map() + Promise.all()
var arr = [1, 2, 3, 4, 5, 6]
var doublify = (ele) => {
return new Promise((res, rej) => {
setTimeout(() => {
res(ele * 2)
}, Math.random() ); // Math.random returns a random number from 0~1
})
}
var promises = arr.map(async (ele) => {
// do some operation on ele
// ex: var result = await some_async_function_that_return_a_promise(ele)
// In the below I use doublify() to be such an async function
var result = await doublify(ele)
return new Promise((res, rej) => {res(result)})
})
Promise.all(promises)
.then((results) => {
// do what you want on the results
console.log(results)
})
A universal solution for making sure that all forEach() elements finished execution.
const testArray = [1,2,3,4]
let count = 0
await new Promise( (resolve) => {
testArray.forEach( (num) => {
try {
//some real logic
num = num * 2
} catch (e) {
// error handling
console.log(e)
} fanally {
// most important is here
count += 1
if (count == testArray.length) {
resolve()
}
}
})
})
The idea is same with the answer using index to count. But in real case, if error happened, the index way cannot count correctly. So the solution is more robust.
Thx
const array = [1, 2, 3];
const results = [];
let done = 0;
const asyncFunction = (item, callback) =>
setTimeout(() => callback(item * 10), 100 - item * 10);
new Promise((resolve, reject) => {
array.forEach((item) => {
asyncFunction(item, (result) => {
results.push(result);
done++;
if (done === array.length) resolve();
});
});
}).then(() => {
console.log(results); // [30, 20, 10]
});
// or
// promise = new Promise(...);
// ...
// promise.then(...);
The order of results in the "results" array can be different than the order of items in the original array, depending on the time when the asyncFunction() finishes for each of the items.
Alter and check a counter at the end of every possible unique branch of code, including callbacks. Example:
const fs = require('fs');
/**
* #description Delete files older than 1 day
* #param {String} directory - The directory to purge
* #return {Promise}
*/
async function purgeFiles(directory) {
const maxAge = 24*3600000;
const now = Date.now();
const cutoff = now-maxAge;
let filesPurged = 0;
let filesProcessed = 0;
let purgedSize = 0;
await new Promise( (resolve, reject) => {
fs.readdir(directory, (err, files) => {
if (err) {
return reject(err);
}
if (!files.length) {
return resolve();
}
files.forEach( file => {
const path = `${directory}/${file}`;
fs.stat(path, (err, stats)=> {
if (err) {
console.log(err);
if (++filesProcessed === files.length) resolve();
}
else if (stats.isFile() && stats.birthtimeMs < cutoff) {
const ageSeconds = parseInt((now-stats.birthtimeMs)/1000);
fs.unlink(path, error => {
if (error) {
console.log(`Deleting file failed: ${path} ${error}`);
}
else {
++filesPurged;
purgedSize += stats.size;
console.log(`Deleted file with age ${ageSeconds} seconds: ${path}`);
}
if (++filesProcessed === files.length) resolve();
});
}
else if (++filesProcessed === files.length) resolve();
});
});
});
});
console.log(JSON.stringify({
directory,
filesProcessed,
filesPurged,
purgedSize,
}));
}
// !!DANGER!! Change this line! (intentional syntax error in ,')
const directory = ,'/tmp'; // !!DANGER!! Changeme
purgeFiles(directory).catch(error=>console.log(error));
I'm not sure of the efficiency of this version compared to others, but I used this recently when I had an asynchronous function inside of my forEach(). It does not use promises, mapping, or for-of loops:
// n'th triangular number recursion (aka factorial addition)
function triangularNumber(n) {
if (n <= 1) {
return n
} else {
return n + triangularNumber(n-1)
}
}
// Example function that waits for each forEach() iteraction to complete
function testFunction() {
// Example array with values 0 to USER_INPUT
var USER_INPUT = 100;
var EXAMPLE_ARRAY = Array.apply(null, {length: USER_INPUT}).map(Number.call, Number) // [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, n_final... ] where n_final = USER_INPUT-1
// Actual function used with whatever actual array you have
var arrayLength = EXAMPLE_ARRAY.length
var countMax = triangularNumber(arrayLength);
var counter = 0;
EXAMPLE_ARRAY.forEach(function(entry, index) {
console.log(index+1); // show index for example (which can sometimes return asynchrounous results)
counter += 1;
if (triangularNumber(counter) == countMax) {
// function called after forEach() is complete here
completionFunction();
} else {
// example just to print counting values when max not reached
// else would typically be excluded
console.log("Counter index: "+counter);
console.log("Count value: "+triangularNumber(counter));
console.log("Count max: "+countMax);
}
});
}
testFunction();
function completionFunction() {
console.log("COUNT MAX REACHED");
}
I had to deal with the same problem (forEach using multiple promises inside) and none of the solutions presented at the current date were helpful for me. So I implemented a check array, were each promise updates its complete status. We have a general promise that wraps the process. We only resolve the general promise when each promise completed. Snippet code:
function WaitForEachToResolve(fields){
var checked_fields = new Array(fields.length).fill(0);
const reducer = (accumulator, currentValue) => accumulator + currentValue;
return new Promise((resolve, reject) => {
Object.keys(fields).forEach((key, index, array) => {
SomeAsyncFunc(key)
.then((result) => {
// some result post process
checked_fields[index] = 1;
if (checked_fields.reduce(reducer) === checked_fields.length)
resolve();
})
.catch((err) => {
reject(err);
});
}
)}
}
I like to use async-await instead of .then() syntax so for asynchronous processing of data, modified the answer of #Ronaldo this way -
let finalData = [];
var bar = new Promise(resolve => {
foo.forEach((value, index) => {
const dataToGet = await abcService.getXyzData(value);
finalData[index].someKey = dataToGet.thatOtherKey;
// any other processing here
if (finalData[dataToGet.length - 1].someKey) resolve();
});
});
await Promise.all([bar]);
console.log(`finalData: ${finalData}`);
NOTE: I've modified the if condition where it resolves the promise to meet my conditions. You can do the same in your case.
You can use this, because we are using async/await inside the forEach loop. You can use your own logic inside the loop.
let bar = new Promise((resolve, reject) => {
snapshot.forEach(async (doc) => {
"""Write your own custom logic and can use async/await
"""
const result = await something()
resolve(result);
});
});
let test = []
test.push(bar)
let concepts = await Promise.all(test);
console.log(concepts);
For simple compare code i like use for statement.
doit();
function doit() {
for (var i = 0; i < $('span').length; i++) {
console.log(i,$('span').eq(i).text() );
if ( $('span').eq(i).text() == "Share a link to this question" ) { // span number 59
return;
}
}
alert('never execute');
}
If there are async (observable) method calls inside the for loop, you could use the following method:
await players.reduce(async (a, player) => {
// Wait for the previous item to finish processing
await a;
// Process this item
await givePrizeToPlayer(player);
}, Promise.resolve());
Check here: https://gist.github.com/joeytwiddle/37d2085425c049629b80956d3c618971
I've been using this and it works best .forEach()
//count
var expecting = myArray.length;
myArray.forEach(function(item){
//do logic here
var item = item
//when iteration done
if (--expecting === 0) {
console.log('all done!');
}
})