I'm trying to use Recursive Loop and Promises to Scrape a website.
But it fails.. It make the request only for the first page and at the second the program stops giving to me unhandled promise rejection warning
I have this three JS files:
scrapeAll.js (is the recursive loop that calls scrapePage.js)
scrapePage.js
scrapeComponents.js
scrapeAll.js:
var indexPage = 0;
scrapePage(indexPage).then((json)=>{
console.log(JSON.stringify(json, null, 4));
if(indexPage === Number.MAX_SAFE_INTEGER){
console.log("MAX SAFE INTEGER");
return;
}
save(json);
indexpage++;
scrapePage(indexPage);
}).catch((data)=>{
console.log(data);
if(indexPage === Number.MAX_SAFE_INTEGER){
console.log("MAX SAFE INTEGER");
return;
}
indexPage++;
scrapePage(indexPage);
});
ScrapePage.JS
let makeRequestCounter = 0;
function scrapePage(number) {
return new Promise((resolve, reject) => {
let url = URL + number;
let options = {
url: url,
headers: {
Host: SITE,
Connection: "keep-alive",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7",
"Cache-Control": "max-age=0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
"Cookie": restoreCookieToString()
}
};
makeRequest(options).then((jsonData) => {
resolve(jsonData);
}).catch((error) => {
//REQUEST_LIMIT_EXCEEDED
if (error === CONSTANTS.REQUEST_LIMIT_EXCEEDED) {
reject(CONSTANTS.REQUEST_LIMIT_EXCEEDED);
}
//ALREADY_EXIST
else if (error === CONSTANTS.ALREADY_EXIST) {
reject(CONSTANTS.ALREADY_EXIST);
}
else if (error === 404) {
reject("no data found at this page");
}
//error can beeconnrefused or econnreset
else if (error.code !== undefined) {
//econnrefused
if (error.code === CONSTANTS.ECONNREFUSED) {
reject("WRONG_URL", url);
}
//econnreset
else if (error.code === CONSTANTS.ECONNRESET) {
console.log("\neconnreset error\n");
makeRequest(options);
}
}
}
);
});
}
function makeRequest(options) {
return new Promise((resolve, reject) => {
let json = {
category: [],
imgs: [],
title: "",
description: "",
url: ""
};
if (makeRequestCounter === CONSTANTS.REQUEST_LIMIT) {
reject(CONSTANTS.REQUEST_LIMIT_EXCEEDED);
}
makeRequestCounter++;
console.log("request to: ", options.url);
request(options, function (error, response, html) {
if (error) {
//error: possible econnreset econnrefused
reject(error);
} else {
if (response.statusCode === 200) {
cookieSave(response.headers);
//---------- check if in db the url is already saved -------------//
check(response.request.uri.href, (err) => {
if (!err) {
reject(CONSTANTS.ALREADY_EXIST);
}
});
//----------finish checking, is new -------------------//
//GETTING TITLE
title(html, json_recipe).then((json) => {
//GETTING category
category(html, json).then((json) => {
//GETTING images
imgs(html, json).then((json) => {
description(html, json).then((json) => {
json.url = response.request.uri.href;
resolve(json);
//description error
}).catch((error) => {
console.log(error);
});
//images error
}).catch((error) => {
console.log(error);
});
//category error
}).catch((error) => {
console.log(error);
});
//title error
}
).catch((error) => {
console.log(error);
});
}
//no data in this page
if (response.statusCode === 404) {
reject(response.statusCode);
}
}
});
});
}
scrapeComponents.js
...
function description(html, json) {
return new Promise((resolve, reject) => {
const $ = cheerio.load(html);
let description = $('.submitter__description').text().trim();
json.description = JSON.parse(description);
resolve(json);
});
}
...
error:
UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 1): no data found at this page
The program make the first request and return correctly at the scrapeAll.js that correctly the scrapePage(indexPage = 1).
The second time my program do exactly same as first time but when is time to return to the scrapeAll.js ( reject("no data found at this page"); in ScrapePage.js ) the program ends with the error.
Both the pages are without data but program fails also with good pages saving only the first.
I think that I made a big mistake with promises.
Thank you very much guys.
Your call for scrapPage function is running only once and you are not calling it iteratively. You might have to call it in iteration using a function. Update your scrapeAll.js:
function callScrapPage() {
var indexPage = 0;
while (indexPage < Number.MAX_SAFE_INTEGER) {
scrapePage(indexPage).then((json) => {
console.log(JSON.stringify(json, null, 4));
save(json);
indexpage++;
}
}
}
The problem is that one or more of your calls to scrapePage(indexPage) in scrapeAll.js are failing. You cannot recursively call a promise the way you might with other code, so you need a .then and .catch on the additional calls as well. Adding a .catch to the other calls will enable you to see the true source of failure.
scrapePage(indexPage)
.then((json)=>{
console.log(JSON.stringify(json, null, 4));
if(indexPage === Number.MAX_SAFE_INTEGER){
console.log("MAX SAFE INTEGER");
return;
}
save(json);
indexpage++;
scrapePage(indexPage).catch(e => console.log(e));
})
.catch((data)=>{
console.log(data);
if(indexPage === Number.MAX_SAFE_INTEGER){
console.log("MAX SAFE INTEGER");
return;
}
indexPage++;
scrapePage(indexPage).catch(e => console.log(e));
});
Related
I am using axios for api calls.
Incase some failure happens or response status != 200 I need to retry the api call.
By default retry axios works for status with 5XX . But as per documentation we can override retryCondition as per our requirements.
Here is my code snippet
export const doApiFetchCall = (apiEndPoint, dataPayLoad, config, axiosObject, callType,caller,timeoutParam,retryCount) => {
let instance = undefined;
if(axiosObject === 'axios') {
instance = localAxios;
} else if(axiosObject === 'axiosProxy') {
instance = localAxiosProxy;
} else if (axiosObject === 'axiosProxyJira') {
instance = localAxiosProxyJira;
}
let restOptions = {
url: apiEndPoint,
method: callType,
timeout: timeoutParam || 20000, // timeout in ms
headers:config.headers||null,
raxConfig: {
retry: retryCount || 0, // number of retry when facing 4xx or 5xx
instance: instance,
retryCondition: () => true,
onRetryAttempt: err => {
let tempError = Object.assign({}, err)//{...err}
const cfg = rax.getConfig(err);
delete tempError.config;
delete tempError.request;
},
noResponseRetries: 3, // number of retry when facing connection error
httpMethodsToRetry: ['GET', 'HEAD', 'OPTIONS', 'DELETE', 'PUT', 'POST', 'PATCH'],
retryDelay: 3000,
backoffType: 'static'
}
};
var caller_id = caller||'';
if (dataPayLoad) restOptions = {...restOptions, data: dataPayLoad};
return new Promise((resolve, reject) => {
instance(restOptions)
.then(response => {
logger.info(caller_id, '[API_CALL_SUCCESS] API call has succeeded');
if (response) {
const {status, data} = response;
logger.info(caller_id, '[API_CALL_SUCCESS] API call Status Code: [' + status + ']');
try {
if (status === 200 || status === 201) {
resolve(response);
} else {
reject(null);
}
} catch (jsonParseError) {
reject(jsonParseError);
}
} else {
resolve(null);
}
})
.catch(error => {
const {response, request, message, config} = error;
reject(error);
});
I have overridden retryCondition I am not sure if its done in correct way.
Can someone please let me know what wrong I am doing ?
Got the fix .
I over ride statusCodesToRetry property.
I don't know what I could be doing wrong to get this error:
I've tried different promises, await, async combinations and nothing.
I've tried Promise.resolve()) and also .then(function().
Nothing stopped that error, what can I change to fix it?
#Controller()
export class AppController {
constructor(private httpSoap: HttpClient,
#InjectModel('product') private readonly productModel: Model<any>,
private xmlUtils: XmlUtils) { }
#EventPattern("next")
async handleMessagePrinted(data: Record<any, any>) {
let result = data;
this.createproduct(result);
this.insertproduct(result);
}
insertproduct(data: any) {
stringify(data);
this.productModel.insertMany(data);
}
async createproduct(job: any): Promise<any> {
return new Promise((async (resolve, reject) => {
// if (this.soapService.productCreate) {
const payload = job;
const xmlPayload = this.xmlUtils.parseJSONtoXML(payload);
this.insertproduct(stringify(xmlPayload)); //gravar na mongo
console.log("xmlPayload "+xmlPayload);
const headerRequest = {
'Content-Type': ContentTypeEnum.XML,
SOAPAction: SoapActionEnum.CREATE_SERVICE_product
};
const soap: ResponseInterface = await this.request("localhost:8080", xmlPayload, headerRequest, SoapType.product);
if (soap.error) {
reject(soap.error);
}
if (soap.status) {
if (soap.status.firewall.code === '000-000' || soap.status.firewall.code === '000-001') {
resolve(`product ${soap.body.Number} created successfully`);
} else if (soap.status.firewall.code === '000-998' && soap.status.fireWall.code === '623') {
reject({ error: soap.status.fireWall.description });
} else if (soap.status.firewall.code === '000-500' && soap.status.fireWall.code === 'BWENGINE-100029') {
const payloadSearch: productSearchDocument = new productSearchDocument();
payloadSearch.IsOperational = undefined;
payloadSearch.IsHistory = undefined;
payloadSearch.Qualification = `id='${job.data.id_ID}'`;
const search = await this.searchproduct(payloadSearch);
if (search.status) {
if (search.status.firewall.code === '000-000' || search.status.firewall.code === '000-001') {
resolve(`product ${soap.body.Number} created successfully`);
}
} else {
reject({ error: search.status.firewall.description, fireWallError: soap.status.fireWall.description });
}
} else {
reject({ error: soap.status.firewall.description, fireWallError: soap.status.fireWall.description });
}
}
}));
}
public async searchproduct(data: any): Promise<any> {
return new Promise((async (resolve, reject) => {
// if (this.soapService.productSearch) {
const payload = data;
const xmlPayload = this.xmlUtils.parseJSONtoXML(payload);
const headerRequest = {
'Content-Type': ContentTypeEnum.XML,
SOAPAction: SoapActionEnum.SEARCH_SERVICE_product
};
const soap: ResponseInterface = await this.request("localhost:8080", xmlPayload, headerRequest, SoapType.product);
if (soap.error) {
reject(soap.error);
}
if (soap.status) {
if (soap.status.firewall.code === '000-000' || soap.status.firewall.code === '000-001') {
resolve(soap);
} else {
reject({ error: soap.status.fireWall.description });
}
} else {
reject({ error: soap });
}
}));
}
public request(uri: string, data: any, headers: IHeaders, type: SoapType): Promise<any> {
return new Promise(((resolve) => {
this.httpSoap.request(uri, data, (async (err, res) => {
if (err) {
resolve({ error: err });
} else {
try {
console.log("fireWall response: "+data);
const bodyJson = await this.xmlUtils.formatXmlToJson(res.body);
const status: StatusInterface = await this.xmlUtils.formatStatusXML(bodyJson);
let body;
if (type === SoapType.product) {
body = await this.xmlUtils.formatproductServiceBodyXml(bodyJson);
this.insertproduct(stringify(bodyJson)); //gravar na mongo
} else if (type === SoapType.UNAVAILABILITY) {
body = await this.xmlUtils.formatImpactServiceBodyToXML(bodyJson);
} else if (type === SoapType.TASK) {
body = await this.xmlUtils.formatTaskServiceBodyXML(bodyJson);
} else {
body = '';
}
const response: ResponseInterface = {
status,
body,
};
resolve(response);
} catch (e) {
resolve(e);
}
}
}), headers);
}));
}
public simpleRequest(connection, payload): Promise<any> {
return new Promise<any>((resolve, reject) => {
const headers = {
};
this.httpSoap.request("localhost:8080", payload, (async (err, res) => {
if (err) {
resolve({ error: err });
} else {
try {
if (res.statusCode === 500) {
// const bodyJson = await this.xmlUtils.formatXmlToJson(res.body);
resolve(res.body);
} else {
const bodyJson = await this.xmlUtils.formatXmlToJson(res.body);
resolve(bodyJson);
}
} catch (e) {
reject(e);
}
}
}), headers);
});
}
}
My goal is to be able to save to mongo and also be able to make the http call to the SOAP api
This warning is shown when you don't add a rejection handler to a Promise, and it's rejected. It can be rejected when an error is occurred inside a promise, or reject() is called.
reject called:
const aa = new Promise((resolve, reject) => {
reject(new Error('whoops'));
});
aa.then(v => {
console.log(v);
});
// Running this script gives unhandled rejection warning
an error is occurred:
const aa = new Promise((resolve, reject) => {
const a = {};
// "cannot read property 'unexistingProperty' of undefined" error is thrown here
const b = a.b.unexistingProperty;
// alternatively, when an is thrown with throw
// throw new Error('oops')
});
aa.then(v => {
console.log(v);
});
// Running this script also gives unhandled rejection warning
You can add a rejection handler via then (the second argument to then() is rejection handler) or catch. For async/await you can add a try/catch block to catch error.
In node.js you can also add rejection handler to all unhandled rejected promises with process.on('unhandledRejection') like this:
process.on('unhandledRejection', error => {
console.log(error);
});
You can also see where the error is thrown with unhandledRejection event handler shown above, or you can run node.js with --trace-warnings like this.
node --trace-warnings index.js
References:
https://thecodebarbarian.com/unhandled-promise-rejections-in-node.js.html
https://nodejs.org/dist/latest-v14.x/docs/api/process.html
I'm writing a test for some code that will use Promise.race to bring back a result from a graphql service that is on (could be on) multiple servers. I've used Nock to mock the request, which works fine when I'm hitting a single service. When I mock up multiple services, Nock throws an error saying
AssertionError: expected [Function] to not throw an error but 'Error: Error: Nock: No match for request {\n "method": "POST",\n "url": "http://94.82.155.133:35204",\n "headers": {\n "content-type": "application/json",\n "accept": "application/json"\n },\n "body": "{...}"\n}' was thrown
my test looks like this:
it('should make two POST requests to the service for data from graphQL', async () => {
const spy = sinon.spy(releases, '_queryGraphQL');
const releaseID = 403615894;
nock.cleanAll();
const services = serviceDetails(NUMBER_OF_SERVICES); // NUMBER_OF_SERVICES = 3
nock(serviceDiscoveryHost)
.get('/v1/catalog/service/state51')
.reply(HTTP_CODES.OK, services);
for (const service of services) {
const currentNodeHealth = nodeHealth(service.Node);
nock(serviceDiscoveryHost)
.get('/v1/health/node/'+service.Node)
.reply(HTTP_CODES.OK, currentNodeHealth);
const delayTime = Math.floor(Math.random()*1000);
nock('http://'+service.Address+':'+service.ServicePort, serviceHeaders)
.post('/')
.delay(delayTime)
.replyWithError({code: 'ETIMEDOUT', connect: false})
.post('/')
.delay(delayTime)
.reply(HTTP_CODES.OK, getReply(releaseID))
}
const actual = await releases.getRelease(releaseID)
.catch((err) => {
console.log(releases._retries);
(() => { throw err; }).should.not.throw();
});
expect(releases._retries[releaseID]).to.be.equal(1);
expect(spy.callCount).to.be.equal(2);
expect(actual).to.be.an('object')
expect(actual.data.ReleaseFormatById.id).to.be.equal(releaseID);
});
and the offending bit of code looks like
async _queryGraphQL(releaseID, services) {
if (! this._retries[releaseID]) {
this._retries[releaseID] = 0;
}
const postData = this._getReleaseQuery(releaseID);
return Promise.race(services.map( (service) => {
const options = this._getHTTPRequestOptions(service);
return new Promise((resolve, reject) => {
let post = this.http.request(options, (res) => {
let data = '';
if (res.statusCode < 200 || res.statusCode > 299) {
const msg = this.SERVICE_NAME + ' returned a status code outside of acceptable range: ' + res.statusCode;
reject(new QueryError(msg, postData));
} else {
res.setEncoding('utf8');
res.on('data', (chunk) => {
data += chunk;
});
res.on('error', (err) => {
reject(new QueryError(err.message, postData, err));
});
res.on('end', () => {
resolve(JSON.parse(data));
});
}
});
post.on('error', async (err) => {
if (err.code === 'ETIMEDOUT') {
if (this._retries[releaseID] &&
this._retries[releaseID] === 3) {
reject(err);
} else {
this._retries[releaseID] += 1;
resolve(this._queryGraphQL(releaseID, services));
}
} else {
reject(new QueryError(err.message, postData, err));
}
});
post.write(JSON.stringify(postData));
post.end();
});
}));
}
this.http is just require('http');. and the options will be {hostname: service.hostname} \\ example.com etc.
What I'm expecting, is that if the first service to respond, responds with an error relating to: 'ETIMEDOUT', it'll recall the function (upto 2 more times) and try all the services again until the first service to respond is something that isn't a 'ETIMEDOUT'.
I'm web scraping a website and I have an array of links:
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Abercorn',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Longueuil',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Sainte-Anne-De-Bellevue',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Shawinigan',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Chateauguay',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Mont-Laurier',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Georges',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Sherbrooke',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Chicoutimi',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Montreal',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Henri-De-Levis',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Stukely-Sud',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Drummondville',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Montreal-Est',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Hubert',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Trois-Rivieres',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Gatineau',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Montreal-Nord',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Jerome',
"http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Val-D'or",
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Granby',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Montreal-Ouest',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Lambert',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Verdun',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Lachine',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Quebec',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Laurent',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Warwick',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Lasalle',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Rigaud',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Leonard',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Westmount',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Laval',
'http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Roxboro'
But when I do the request some of those links return error 403 - Forbidden.
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Verdun
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Granby
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Lambert
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Val-D'or
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Lasalle
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Laval
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Warwick
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Quebec
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Westmount
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Roxboro
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Laurent
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Rigaud
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Saint-Leonard
Error null Forbidden http://www.adventistdirectory.org/SearchResults.aspx?CtryCode=CA&StateProv=QC&City=Lachine
When I use a list with fewer links it works perfectly.
Here is my code:
const request = require('request');
const cheerio = require('cheerio');
function readChurches(cities){
const churches = []
for (let index = 0; index < cities[0].length; index++){
const city = cities[0][index];
churches.push(new Promise((resolve, reject) => {
const church = []
let options = {
url: city,
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
};
request(options, (error, response, html) => {
if(!error && response.statusCode == 200) {
const $ = cheerio.load(html);
const $$ = cheerio.load($('table').find('tbody').eq(1).find('tr').eq(1).find('td').eq(1).html())
$$('a').each((i, el) => {
const item = $(el).attr('href')
if(item != undefined){
if(item.includes('ViewEntity')) {
church.push(`http://www.adventistdirectory.org${item}`);
}
}
});
resolve(church);
} else {
console.log('Error',error,response.statusMessage,city)
reject(error)
}
});
}))
}
return Promise.all(churches);
}
What can be done to bypass error 403?. Because when I try to open the link on my browser it works, when I use the javascript function doesn't work though.
--- NEW UPDATES ---
I've changed to code. I added a try catch block
function readChurches(cities){
const churches = []
for (let index = 0; index < cities[0].length; index++){
const city = cities[0][index];
churches.push(new Promise((resolve, reject) => {
const church = []
let options = {
url: city,
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
};
try {
request(options, (error, response, html) => {
if(!error && response.statusCode == 200) {
const $ = cheerio.load(html);
const $$ = cheerio.load($('table').find('tbody').eq(1).find('tr').eq(1).find('td').eq(1).html())
$$('a').each((i, el) => {
const item = $(el).attr('href')
if(item != undefined){
if(item.includes('ViewEntity')) {
church.push(`http://www.adventistdirectory.org${item}`);
}
}
});
resolve(church);
}
});
} catch (error) {
console.log('Error',error,city)
reject(error)
}
}))
}
return churches
}
and also, created this function, provided by #chrispytoes
async function doStuff(churches) {
const results = [];
for(let i in churches) {
try {
console.log(churches[i])
results.push(await churches[i]);
sleep(5000);
} catch (error) {
console.log(error)
}
}
return results
}
and I'm running it:
async function run(){
let provinces = []
provinces.push(`http://www.adventistdirectory.org/BrowseStateProv.aspx?CtryCode=CA&StateProv=QC`)
let cities = await readCities(provinces);
const churches = await readChurches(cities);
const stuff = await doStuff(churches)
console.log('Churches: ', stuff);
console.log('End')
} catch (error) {
console.log('Error', error)
}
}
And I'm getting this on my console:
Promise { <pending> }
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=19653'
]
}
Promise { <pending> }
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=19637'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=54633'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=31155'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=15271'
]
}
Promise { <pending> }
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=30783'
]
}
Promise { <pending> }
Promise { <pending> }
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=15265'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=15255'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=15251'
]
}
Promise { <pending> }
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=15247'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=19645'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=32838'
]
}
Promise {
[
'http://www.adventistdirectory.org/ViewEntity.aspx?EntityID=29973'
]
}
Promise { <pending> }
it is not getting to console.log('Churches: ', stuff);
The error being sent back is the choice of the server you are making the request to, so there's no universal way to "avoid" it. You are probably making the requests too fast and they are blocking you for using too much bandwidth.
Using Promise.all is making all the requests at once. You need to make a loop of sorts to make the requests go one at a time.
So something like this may work:
const wait = async (time) =>
new Promise((res, rej) => setTimeout(() => res(), time));
async function doStuff() {
const results = [];
for(let i in churches) {
await wait(1000);
const result = await churches[i];
console.log(result);
results.push(result);
}
}
I have a function in an AWS Lambda function that runs fine by the node command in AWS Cloud9, but fails when run locally on Docker-Lambda. The function in question is below, console.logs with a "!" prefix are successfully logged. Those without are not.
async function getCoworkerById(id) {
console.log('!*several enter getCoworkerById');
return new Promise((resolve, reject) => {
console.log('!*several enter getCoworkerById Promise');
request({
url: 'https://spaces.nexudus.com/api/spaces/coworkers',
qs: {
Coworker_Id: id
},
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': 'Basic ' + new Buffer(process.env.nexudusEmail + ':' + process.env.nexudusPassword, 'utf8').toString('base64')
}
}, (error, response, body) => {
console.log("enter getCoworkerById 2");
if (error) {
console.log("enter getCoworkerById error");
reject(error);
} else {
console.log("enter getCoworkerById response");
if (response.statusCode === 200) {
console.log('recieved 200 from getcoworkerbyid')
resolve(JSON.parse(body).Records[0]);
} else if (response.statusCode === 409) {
console.log('recieved 409 from getcoworkerbyid')
setTimeout(() => {
resolve(getCoworkerById(id));
}, response.headers['Retry-After'] * 1000);
} else {
reject(response);
}
}
});
});
}
The function is called in this block, notice that I'm using the async library:
const coworkerPromises = [];
async.eachLimit(coworkerIds, 3, (id, callback) => {
console.log('!*several enter each limit');
try {
console.log('!*several enter each limit try');
coworkerPromises.push(getCoworkerById(id));
callback();
} catch (e) {
console.log('Error getting coworker by ID: ', e);
}
});
try {
Promise.all(coworkerPromises).then(coworkers => {
console.log("Enter promise all");
console.log(coworkers[coworkers.length - 1]);
});
} catch (e) {
console.log(e);
}
This one has me really stumped. Why would we never get to the second half of the function without an error or anything?