The nightmare is working, of course I'm testing this tool but nway, the mainly problem is why my function isnt on a infinite loop? Since i didnt make a condition to page. May I'm doing this wrong?
The case that I wanted was: whenever page loaded, I get the tittle with page then call the function again to next page till the last page.
I tried without success the setTimeout too.
My console log just print 1 then finish.
The code snippet is here:-
var pagn = 1;
function ab(page){
nightmare.goto(url_base+"&page="+page)
.evaluate(() => {
return document.title;
})
.end()
.then((title) => {
console.log(title + ":" + page);
ab(++pagn);
//setTimeout("page(" + page + ")", 5000);
}).catch(()=>{console.log("Error");});
}
ab(pagn);
The problem is that you are ending your nightmare session with the .end() statement, which stops the nightmare engine, and so node exits after running through the remaining .then statements.
To test your code, I rewrote your function a bit, so that it scrapes a particular website, and exits when it finds the same page multiple times (which is kinda my test scenario, so you might have to adapt it for your code)
const Nightmare = require('nightmare')
const nightmare = Nightmare({ show: true })
function scrapePages( targetUrl, curPage = 0, transform = (url, page) => `${url}?page=${page}`, pageSet = new Set() ) {
console.info('Trying to scrape page ' + transform( targetUrl, curPage ) );
return nightmare
.goto( transform( targetUrl, curPage ) )
.evaluate( () => document.title )
.then( (title) => {
if (pageSet.has( title )) {
throw 'page already exists';
}
pageSet.add( title );
console.info( title + ':' + curPage );
return scrapePages( targetUrl, curPage + 1, transform, pageSet );
})
.catch( ( err ) => {
console.error( err );
return { maxPages: curPage, pages: pageSet };
} );
}
scrapePages( 'some-paged-url', 0, (url, page) => url + '/' + (page + 1) )
.then( ({ maxPages, pages }) => {
// end nightmare process
nightmare.end().then( () => {
console.info(`Found ${maxPages} pages`);
});
} )
.catch( err => console.error('Error occured', err ) );
The biggest difference, as you can see, is that the ending of the nightmare process only occurs once the scraping ran through. At that time, you would have the total pages available and all pages that were called successfully
you should not pass the page variable when defining a global variable else it will be overwritten everytime..
var page = 1;
function ab(){
nightmare.goto(url_base+"&page="+page)
.evaluate(() => {
return document.title;
})
.end()
.then((title) => {
console.log(title + ":" + page);
ab(page++);
//setTimeout("page(" + page + ")", 5000);
});
}
ab();
What if there is a reject thrown by nightmare.goto(). You should implement catch()
page++ didn't pass the incremented value as its post increment operator. page + 1 or ++page should do the tricks.
var page = 1;
function ab(page){
nightmare.goto(url_base+"&page="+page)
.evaluate(() => {
return document.title;
})
.end()
.then((title) => {
console.log(title + ":" + page);
ab(page+1);
//setTimeout("page(" + page + ")", 5000);
}).catch(error => {
console.error('Search failed:', error)
ab(page);
});
}
ab(page);
Related
I have done several tests and read a lot about Garbage Collection in Javascript applications but there must be something I'm not getting right.
I have a react app that polls data from a JSON file using fetch every 25 seconds. The function I'm calling is mapped to the props using redux in the App.js. I'm calling the function like this on componentDidMount
this.fetchDisplayContent(1000);
This is the function's definition which is also located in the App.js
fetchDisplayContent = (fetchInterval = 500, repeat = true) => {
clearTimeout(this._contentInterval);
this._contentInterval = setTimeout(() => {
if (this._mode === "preview") {
this.props.fetchContent(config.WEB_SERVER + '/display/fetchDisplayContents?mode=prvw&localLastUpdate=0&display=' + this._displayId, 0, this._webkey);
}
else if (this._mode === "web") { //live web version
this.props.fetchContent(config.WEB_SERVER + '/display/fetchDisplayContents?mode=web&localLastUpdate=0&display=' + this._displayId, 0, this._webkey);
}
else if (this._mode === "module") {
this.props.fetchModuleContent(config.WEB_SERVER + '/display/fetchModuleContents?mode=prvw&module=' + this._module + '&project=' + this._project + '&module_id=' + this._module_id, this._webkey);
}
else { //form offline file
if (typeof config.DISPLAY === "number") {
this.props.fetchContent(this._baseURL + '/data/displayContents.json', 0, this._mode);
} else {
this.props.fetchContent(this._baseURL + '/data/display_' + this._displayId + '_contents/displayContents.json', 0, this._mode);
}
}
if (repeat) {
this.fetchDisplayContent();
}
}, fetchInterval);
}
And this is the action:
export function fetchContent(url, lastUpdated, webkey) {
return (dispatch) => {
let now = new Date();
let rev = now.getTime()/1000;
if (url.includes('?')){
url+="&rev="+rev
}
else{
url+="?rev="+rev
}
if (webkey){
url+="&webkey="+webkey
}
fetch(url)
.then((response) => {
if (!response.ok) {
throw Error(response.statusText);
}
return response;
})
.then((response) => response.json())
.then((content) => {
if (Number(content.last_update)>Number(lastUpdated)){
dispatch(contentFetchSuccess(content))
}
else{
return null;
}
}).catch(() => {
dispatch(contentHasErrored(true));
});
};
}
fetchInterval is originally 25000 but I changed it to 500 for testing purposes and reproduce the issue faster.
Here's you can see that references to the handler provided to the setTimeout are not been cleaned out by the GC and the heap keeps growing over time. Eventually the app freezes after a few days and I suspect this is the reason. I confirmed that the number of listeners doesn't increment in the profile as well as the heap's size when the setTimeout is not running.
Performance Profile
What would be the proper way to call this function in a setTimeout without leaking memory?
var dateTime = time+' '+date;
const boosterembed = new Discord.RichEmbed()
.setTitle("Boosters")
.setColor('#ff0000')
.setDescription("Last Time updated: " + dateTime)
.setTimestamp()
setInterval(function(){guild.channels.get('740327839177375765').message.channel.fetchMessages('740327893103673466').edit(boosterembed)}, 1000)
Why is this not working? Can't add more (looks like your post is mostly code)
I'm assuming you're using Discord.js v11.
First, fetchMessages is for fetching multiple messages from a channel. You'll want to use fetchMessage instead. Second, you use edit to edit a message. Third, a channel does not have a message property, so guild.channels.get(...).message.channel will throw an error.
setInterval(
() =>
guild.channels.get('740327839177375765').fetchMessage('740327893103673466')
.then(message => message.edit(boosterembed))
// This catches all errors and logs them to stop unhandled promise rejections
.catch(console.error),
1000
)
// Or with async/await:
setInterval(
async () => {
try {
const message = await guild.channels.get('740327839177375765').fetchMessage('740327893103673466')
await message.edit(boosterembed)
} catch (error) {
console.error(error)
}
},
1000
)
This answer is adapted from cherryblossom's answer, make sure you upvote it.
function getDate() {
var today = new Date();
var date = today.getDate()+'/'+(today.getMonth()+1)+'/'+today.getFullYear();
var time = today.getHours() + ":" + today.getMinutes() + ":" + today.getSeconds();
return time + ' ' + date;
}
setInterval(
() =>
bot.channels.get('740327839177375765').fetchMessage('740327893103673466')
.then(message => message.edit(new Discord.RichEmbed(boosterembed).setDescription('Last Time updated: ' + getTime())))
// This catches all errors and logs them to stop unhandled promise rejections
.catch(console.error),
1000
)
// Or with async/await:
setInterval(
async () => {
try {
const message = await bot.channels.get('740327839177375765').fetchMessage('740327893103673466')
await message.edit(new Discord.RichEmbed(boosterembed).setDescription('Last Time updated: ' + getTime()))
} catch (error) {
console.error(error)
}
},
1000
)
I guess this is more of a question regarding how to use Promises correctly, which i don't grok:
According to this site (https://ourcodeworld.com/articles/read/405/how-to-convert-pdf-to-text-extract-text-from-pdf-with-javascript), we extract text from a page this way:
// assume pdf file has been loaded
function getPageText(pageNum, PDFDocumentInstance) {
// Return a Promise that is solved once the text of the page is retrieven
return new Promise(function (resolve, reject) {
PDFDocumentInstance.getPage(pageNum).then(function (pdfPage) {
// The main trick to obtain the text of the PDF page, use the getTextContent method
pdfPage.getTextContent().then(function (textContent) {
var textItems = textContent.items;
var finalString = "";
// Concatenate the string of the item to the final string
for (var i = 0; i < textItems.length; i++) {
var item = textItems[i];
finalString += item.str + " ";
}
// Solve promise with the text retrieven from the page
resolve(finalString);
});
});
});
}
I want to search for a certain string through all the pages till i find the page with that string. i tried the obviously wrong way of calling the above function in a for loop, but didn't know how to end when the string was found.
Thanks for the assist!
here's a lame-ish attempt. it's recursive (wish it wasn't), and although it finds the text and gets to the resolve() call, i have no idea where execution goes from there, because it doesn't log to console as i had hoped:
function findText() {
var textToFind = document.getElementById('textToFind').value;
findIt( 1, textToFind ).then( function( pageIndex ) {
// the line below never gets called. i expected the resolve() method
// further down to come here.
console.log( 'Found ' + textToFind + ' on page ' + pageIndex );
},
function(reason) {
console.log(reason);
});
}
function findIt( pageIndex, textToFind ) {
return new Promise( function( resolve, reject ) {
if ( pageIndex > pdfObject.numPages-1 ) {
reject("Couldn't find " + textToFind);
}
getPageText( pageIndex ).then( function( pageText ) {
if ( pageText.indexOf(textToFind) === -1 ) {
findIt( pageIndex+1, textToFind );
}
else {
resolve(pageIndex); // in the debugger, i get here
}
});
});
}
I need to change the text and style of the "Get next" button to "Loading...",
Synchronously retrieve a random number of record IDs from a "server" and Asynchronously retrieve the corresponding records from the "server", only proceeding when all records have been received.
Sort the records in date order, oldest first and at the end reset the button to its original state
The code is as follows
let loading = true;
const buttonHandler = function () {
loading = !loading;
toggleButton(loading);
getRecords();
};
const btn = document.getElementById('get-records');
btn.addEventListener('click', buttonHandler);
function toggleButton(loaded) {
btn.innerHTML = loaded ? 'Loading...' : 'Get next';
btn.classList.toggle('button-not-loading');
btn.classList.toggle('button-loading');
}
function getRecords() {
// getting the IDs of the records to fetch is a synchronous operation
// you don't need to change this call, it should return the IDs
const ids = Server.getIds();
const allTheRecords = [];
// getting each corresponding record is an async operation
ids.forEach(function (recordId) {
Server.getRecord(recordId, function (error, data) {
// if the fetch is unsuccessful the callback function is invoked with the error only
// if the fetch is successful the callback is invoked with error variable set to null,
// and data variable will hold the response (i.e. the record you wanted to retrieve)
if (error) {
console.log(error);
} else {
error = null;
allTheRecords.push(data);
}
});
// you can get a SINGLE record by calling Server.getRecord(recordId, callbackFunction)
// callbackFunction takes 2 parameters, error and data
// invocation as follows
// you need to make sure the list is not rendered until we have the records...
//but need to allow for any fetch errors or app will hang
// i.e. a record you request might not exist - how would you allow for this?
// when you have the records, call processRecords as follows
processRecords(allTheRecords);
});
}
function processRecords(records) {
toggleButton(true);
const sortedRecords = sortRecords(records);
let html = '';
let tr;
sortedRecords.forEach(function (index, value, array) {
tr = '';
tr +=
'<tr>' +
'<td>' + value.date + '</td>' +
'<td>' + value.name + '</td>' +
'<td>' + value.natInsNumber + '</td>' +
'<td>' + value.hoursWorked + '</td>' +
'<td>' + value.hourlyRate + '</td>' +
'<td>' + (value.hoursWorked * value.hourlyRate) + '</td>' +
'</tr>';
html += tr;
});
document.getElementById('results-body').innerHTML = html;
addTotals(sortedRecords);
}
function sortRecords(records) {
let sorted = records.sort(function (a, b) {
return new Date(a.date) - new Date(b.date);
});
// sort results in date order, most recent last
return sorted;
}
function addTotals(records) {
let hours = 0;
let paid = 0;
records.forEach(function (value, index) {
hours += value.hoursWorked;
paid += (value.hoursWorked * value.hourlyRate);
});
document.getElementById('totals-annot').innerHTML = 'TOTALS';
document.getElementById('totals-hours').innerHTML = hours;
document.getElementById('totals-paid').innerHTML = paid;
}
there is no question there, but ill give a vague pseudo code answer which should be enough to point you in the right direction.
Keyword = Promise.
const loadRecordIds = () => {
return new Promise((resolve, reject) => {
jQuery.get('http://localhost/recordIds').then((data) => {
// do something with the data ... e.g parse/validate
resolve(data);
});
});
};
const loadRecords = (recordIds) => {
return new Promise((resolve, reject) => {
jQuery.get('http://localhost/records?recordIds='+recordIds).then((data) => {
// check the data for errors etc
resolve(data);
});
});
};
const toggleButton = () => {
// toggle your button styles
};
// and you use the functions in sequence using .then() or async keyword(if you have a preprocessor or dont care about old browsers)
loadRecordIds().then((recordIds) => {
// now you have your recordIds loaded
toggleButton();
loadRecords(recordIds).then((records) => {
// now you have your records available for further processing
});
});
// with async await keywords you could do the same like this.
try {
const recordIds = await loadRecordIds();
toggleButton();
const records = await loadRecords(recordIds);
} catch (error) {
// handle errors
}
If you dont know what promises are, google them.
// ok, ill throw in a quick sample of an async code that runs in "sync" using promises.
step1 = () => {
return new Promise((resolve, reject) => {
setTimeout(() => {
// time has run out now, and its time for the second step
// calling "resolve" will call the "then" function and allows the code to continue
// whatever you pass in as the argument for resolve() will be a parameter in the "then()" function callback.
resolve('3000 seconds has passed, time to continue.');
}, 3000);
});
};
step2 = () => {
return new Promise((resolve, reject) => {
setTimeout(() => {
resolve('2000 seconds has passed, time to continue.');
}, 2000);
});
};
step1().then((message) => {
console.log(message);
step2().then((message) => {
console.log(message);
setTimeout(() => {
console.log('and now the script is done...all in sequence');
}, 2000);
});
});
/*
this will output
3000 seconds has passed, time to continue.
2000 seconds has passed, time to continue.
and now the script is done...all in sequence
*/
I have the below js function in my vue.js single file component, I want to try catch the exception when the URL requested by axios.post is not reachable. I have put the whole code in try block but I still don't get the alert I have put in the catch block.
Updated using .catch
deploySelected: function(){
this.showStatus = true ;
// animate open the status window.
$("#status_update").animate({height: '500'})
var url = "http://test-web-machine01.localsite.com:5060/scripts/request_deploy";
axios.post(url)
.then(response => {
if (typeof response.data.reason != "undefined"){
alert("Recieved Status: " + response.data.status + ",\nReason: " + response.data.reason);
}
var req_id = response.data.result.request_id;
this.statusMessage = "Initiating deployment of Scripts for Request ID: " + req_id ;
})
.catch((err) => alert(err))
console.log(url);
}
I got it working like this: .catch((err) => alert(err)) thanks #Jaromanda