phantomjs not waiting for "full" page load

phantomjs not waiting for "full" page load - javascript

I'm using PhantomJS v1.4.1 to load some web pages. I don't have access to their server-side, I just getting links pointing to them. I'm using obsolete version of Phantom because I need to support Adobe Flash on that web pages.
The problem is many web-sites are loading their minor content async and that's why Phantom's onLoadFinished callback (analogue for onLoad in HTML) fired too early when not everything still has loaded. Can anyone suggest how can I wait for full load of a webpage to make, for example, a screenshot with all dynamic content like ads?

Another approach is to just ask PhantomJS to wait for a bit after the page has loaded before doing the render, as per the regular rasterize.js example, but with a longer timeout to allow the JavaScript to finish loading additional resources:
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 1000); // Change timeout as required to allow sufficient time
}
});

I would rather periodically check for document.readyState status (https://developer.mozilla.org/en-US/docs/Web/API/document.readyState). Although this approach is a bit clunky, you can be sure that inside onPageReady function you are using fully loaded document.
var page = require("webpage").create(),
url = "http://example.com/index.html";
function onPageReady() {
var htmlContent = page.evaluate(function () {
return document.documentElement.outerHTML;
});
console.log(htmlContent);
phantom.exit();
}
page.open(url, function (status) {
function checkReadyState() {
setTimeout(function () {
var readyState = page.evaluate(function () {
return document.readyState;
});
if ("complete" === readyState) {
onPageReady();
} else {
checkReadyState();
}
});
}
checkReadyState();
});
Additional explanation:
Using nested setTimeout instead of setInterval prevents checkReadyState from "overlapping" and race conditions when its execution is prolonged for some random reasons. setTimeout has a default delay of 4ms (https://stackoverflow.com/a/3580085/1011156) so active polling will not drastically affect program performance.
document.readyState === "complete" means that document is completely loaded with all resources (https://html.spec.whatwg.org/multipage/dom.html#current-document-readiness).
EDIT 2022:
I created this response 8 years ago and I did not use PhantomJS since then. It is very probable it won't work now in some cases. Also now I think it is not possible to create a one-size-fits-all solution to be absolutely sure the page is loaded. This is because some pages may load additional resources after document is ready. For example, there might be some JS code on the website that waits for the document to be ready an then loads some additional assets (after document state changes to ready) - in this case the onPageReady will trigger and after that the page will start loading some more resources again.
I still think the above snipped is a good starting point and may work in most cases, but may also necessary to create a specific solutions to handle specific websites.

You could try a combination of the waitfor and rasterize examples:
/**
* See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
*
* Wait until the test condition is true or a timeout occurs. Useful for waiting
* on a server response or for a ui change (fadeIn, etc.) to occur.
*
* #param testFx javascript condition that evaluates to a boolean,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* #param onReady what to do when testFx condition is fulfilled,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* #param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
*/
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
var page = require('webpage').create(), system = require('system'), address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split('*');
page.paperSize = size.length === 2 ? {
width : size[0],
height : size[1],
margin : '0px'
} : {
format : system.args[3],
orientation : 'portrait',
margin : {
left : "5mm",
top : "8mm",
right : "5mm",
bottom : "9mm"
}
};
}
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
var resources = [];
page.onResourceRequested = function(request) {
resources[request.id] = request.stage;
};
page.onResourceReceived = function(response) {
resources[response.id] = response.stage;
};
page.open(address, function(status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
for ( var i = 1; i < resources.length; ++i) {
if (resources[i] != 'end') {
return false;
}
}
return true;
}, function() {
page.render(output);
phantom.exit();
}, 10000);
}
});
}

Here is a solution that waits for all resource requests to complete. Once complete it will log the page content to the console and generate a screenshot of the rendered page.
Although this solution can serve as a good starting point, I have observed it fail so it's definitely not a complete solution!
I didn't have much luck using document.readyState.
I was influenced by the waitfor.js example found on the phantomjs examples page.
var system = require('system');
var webPage = require('webpage');
var page = webPage.create();
var url = system.args[1];
page.viewportSize = {
width: 1280,
height: 720
};
var requestsArray = [];
page.onResourceRequested = function(requestData, networkRequest) {
requestsArray.push(requestData.id);
};
page.onResourceReceived = function(response) {
var index = requestsArray.indexOf(response.id);
if (index > -1 && response.stage === 'end') {
requestsArray.splice(index, 1);
}
};
page.open(url, function(status) {
var interval = setInterval(function () {
if (requestsArray.length === 0) {
clearInterval(interval);
var content = page.content;
console.log(content);
page.render('yourLoadedPage.png');
phantom.exit();
}
}, 500);
});

Maybe you can use the onResourceRequested and onResourceReceived callbacks to detect asynchronous loading. Here's an example of using those callbacks from their documentation:
var page = require('webpage').create();
page.onResourceRequested = function (request) {
console.log('Request ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function (response) {
console.log('Receive ' + JSON.stringify(response, undefined, 4));
};
page.open(url);
Also, you can look at examples/netsniff.js for a working example.

In my program, I use some logic to judge if it was onload: watching it's network request, if there was no new request on past 200ms, I treat it onload.
Use this, after onLoadFinish().
function onLoadComplete(page, callback){
var waiting = []; // request id
var interval = 200; //ms time waiting new request
var timer = setTimeout( timeout, interval);
var max_retry = 3; //
var counter_retry = 0;
function timeout(){
if(waiting.length && counter_retry < max_retry){
timer = setTimeout( timeout, interval);
counter_retry++;
return;
}else{
try{
callback(null, page);
}catch(e){}
}
}
//for debug, log time cost
var tlogger = {};
bindEvent(page, 'request', function(req){
waiting.push(req.id);
});
bindEvent(page, 'receive', function (res) {
var cT = res.contentType;
if(!cT){
console.log('[contentType] ', cT, ' [url] ', res.url);
}
if(!cT) return remove(res.id);
if(cT.indexOf('application') * cT.indexOf('text') != 0) return remove(res.id);
if (res.stage === 'start') {
console.log('!!received start: ', res.id);
//console.log( JSON.stringify(res) );
tlogger[res.id] = new Date();
}else if (res.stage === 'end') {
console.log('!!received end: ', res.id, (new Date() - tlogger[res.id]) );
//console.log( JSON.stringify(res) );
remove(res.id);
clearTimeout(timer);
timer = setTimeout(timeout, interval);
}
});
bindEvent(page, 'error', function(err){
remove(err.id);
if(waiting.length === 0){
counter_retry = 0;
}
});
function remove(id){
var i = waiting.indexOf( id );
if(i < 0){
return;
}else{
waiting.splice(i,1);
}
}
function bindEvent(page, evt, cb){
switch(evt){
case 'request':
page.onResourceRequested = cb;
break;
case 'receive':
page.onResourceReceived = cb;
break;
case 'error':
page.onResourceError = cb;
break;
case 'timeout':
page.onResourceTimeout = cb;
break;
}
}
}

I found this approach useful in some cases:
page.onConsoleMessage(function(msg) {
// do something e.g. page.render
});
Than if you own the page put some script inside:
<script>
window.onload = function(){
console.log('page loaded');
}
</script>

I found this solution useful in a NodeJS app.
I use it just in desperate cases because it launches a timeout in order to wait for the full page load.
The second argument is the callback function which is going to be called once the response is ready.
phantom = require('phantom');
var fullLoad = function(anUrl, callbackDone) {
phantom.create(function (ph) {
ph.createPage(function (page) {
page.open(anUrl, function (status) {
if (status !== 'success') {
console.error("pahtom: error opening " + anUrl, status);
ph.exit();
} else {
// timeOut
global.setTimeout(function () {
page.evaluate(function () {
return document.documentElement.innerHTML;
}, function (result) {
ph.exit(); // EXTREMLY IMPORTANT
callbackDone(result); // callback
});
}, 5000);
}
});
});
});
}
var callback = function(htmlBody) {
// do smth with the htmlBody
}
fullLoad('your/url/', callback);

This is an implementation of Supr's answer. Also it uses setTimeout instead of setInterval as Mateusz Charytoniuk suggested.
Phantomjs will exit in 1000ms when there isn't any request or response.
// load the module
var webpage = require('webpage');
// get timestamp
function getTimestamp(){
// or use Date.now()
return new Date().getTime();
}
var lastTimestamp = getTimestamp();
var page = webpage.create();
page.onResourceRequested = function(request) {
// update the timestamp when there is a request
lastTimestamp = getTimestamp();
};
page.onResourceReceived = function(response) {
// update the timestamp when there is a response
lastTimestamp = getTimestamp();
};
page.open(html, function(status) {
if (status !== 'success') {
// exit if it fails to load the page
phantom.exit(1);
}
else{
// do something here
}
});
function checkReadyState() {
setTimeout(function () {
var curentTimestamp = getTimestamp();
if(curentTimestamp-lastTimestamp>1000){
// exit if there isn't request or response in 1000ms
phantom.exit();
}
else{
checkReadyState();
}
}, 100);
}
checkReadyState();

This the code I use:
var system = require('system');
var page = require('webpage').create();
page.open('http://....', function(){
console.log(page.content);
var k = 0;
var loop = setInterval(function(){
var qrcode = page.evaluate(function(s) {
return document.querySelector(s).src;
}, '.qrcode img');
k++;
if (qrcode){
console.log('dataURI:', qrcode);
clearInterval(loop);
phantom.exit();
}
if (k === 50) phantom.exit(); // 10 sec timeout
}, 200);
});
Basically given the fact you're supposed to know that the page is full downloaded when a given element appears on the DOM. So the script is going to wait until this happens.

I use a personnal blend of the phantomjs waitfor.js example.
This is my main.js file:
'use strict';
var wasSuccessful = phantom.injectJs('./lib/waitFor.js');
var page = require('webpage').create();
page.open('http://foo.com', function(status) {
if (status === 'success') {
page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function() {
waitFor(function() {
return page.evaluate(function() {
if ('complete' === document.readyState) {
return true;
}
return false;
});
}, function() {
var fooText = page.evaluate(function() {
return $('#foo').text();
});
phantom.exit();
});
});
} else {
console.log('error');
phantom.exit(1);
}
});
And the lib/waitFor.js file (which is just a copy and paste of the waifFor() function from the phantomjs waitfor.js example):
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
// console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condi>
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
}
This method is not asynchronous but at least am I assured that all the resources were loaded before I try using them.

This is an old question, but since I was looking for full page load but for Spookyjs (that uses casperjs and phantomjs) and didn't find my solution, I made my own script for that, with the same approach as the user deemstone .
What this approach does is, for a given quantity of time, if the page did not receive or started any request it will end the execution.
On casper.js file (if you installed it globally, the path would be something like /usr/local/lib/node_modules/casperjs/modules/casper.js) add the following lines:
At the top of the file with all the global vars:
var waitResponseInterval = 500
var reqResInterval = null
var reqResFinished = false
var resetTimeout = function() {}
Then inside function "createPage(casper)" just after "var page = require('webpage').create();" add the following code:
resetTimeout = function() {
if(reqResInterval)
clearTimeout(reqResInterval)
reqResInterval = setTimeout(function(){
reqResFinished = true
page.onLoadFinished("success")
},waitResponseInterval)
}
resetTimeout()
Then inside "page.onResourceReceived = function onResourceReceived(resource) {" on the first line add:
resetTimeout()
Do the same for "page.onResourceRequested = function onResourceRequested(requestData, request) {"
Finally, on "page.onLoadFinished = function onLoadFinished(status) {" on the first line add:
if(!reqResFinished)
{
return
}
reqResFinished = false
And that's it, hope this one helps someone in trouble like I was. This solution is for casperjs but works directly for Spooky.
Good luck !

this is my solution its worked for me .
page.onConsoleMessage = function(msg, lineNum, sourceId) {
if(msg=='hey lets take screenshot')
{
window.setInterval(function(){
try
{
var sta= page.evaluateJavaScript("function(){ return jQuery.active;}");
if(sta == 0)
{
window.setTimeout(function(){
page.render('test.png');
clearInterval();
phantom.exit();
},1000);
}
}
catch(error)
{
console.log(error);
phantom.exit(1);
}
},1000);
}
};
page.open(address, function (status) {
if (status !== "success") {
console.log('Unable to load url');
phantom.exit();
} else {
page.setContent(page.content.replace('</body>','<script>window.onload = function(){console.log(\'hey lets take screenshot\');}</script></body>'), address);
}
});

Do Mouse move while page is loading should work.
page.sendEvent('click',200, 660);
do { phantom.page.sendEvent('mousemove'); } while (page.loading);
UPDATE
When submitting the form, nothing was returned, so the program stopped. The program did not wait for the page to load as it took a few seconds for the redirect to begin.
telling it to move the mouse until the URL changes to the home page gave the browser as much time as it needed to change. then telling it to wait for the page to finish loading allowed the page to full load before the content was grabbed.
page.evaluate(function () {
document.getElementsByClassName('btn btn-primary btn-block')[0].click();
});
do { phantom.page.sendEvent('mousemove'); } while (page.evaluate(function()
{
return document.location != "https://www.bestwaywholesale.co.uk/";
}));
do { phantom.page.sendEvent('mousemove'); } while (page.loading);

Related

Checking if file exists on server without using XMLHttpRequest

I am trying to use a promise with a setInterval function to keep checking to see if a file exists(as it could be still generating on the backend) and then once it is available it runs the renderpage function as shown in my setupFunction(). Currently this program just keeps running until the setInterval maxes out and then it resolves even if the file is there.
async checkFileExist(path, timeout = 20000) {
let totalTime = 0;
let checkTime = timeout / 10;
var self = this;
return await new Promise((resolve, reject) => {
const timer = setInterval(function () {
totalTime += checkTime;
let fileExists = self.fileExist(path);
if (fileExists || totalTime >= timeout) {
alert(totalTime);
clearInterval(timer);
resolve(fileExists);
}
}, checkTime);
})
},
fileExist(urlToFile) {
var xhr = new XMLHttpRequest();
xhr.open('HEAD', urlToFile, false);
xhr.send();
if (xhr.status == "404") {
alert('false 404 ' + urlToFile)
return false;
} else {
alert('true file found ' + urlToFile)
return true;
}
},
setupFunction() {
var self = this
if (this.filedir == null || this.filedir == "") {
self.loadingFile = false
} else {
this.checkFileExist(this.filedir).then(function (response) {
self.loadingFile = false;
self.renderPage(this.pageNum);
}, function (error) {
console.error("Failed!", error);
})
}
}
},
mounted() {
this.setupFunction()
},
watch: {
filedir: function () { // watch it
this.setupFunction()
},
}

That won't tell you if a file exists. It will tell you if a URL finds a resource. It's an important distinction since a URL could be handled by server-side code and not a simple static file, while a file might exist and not have a URL.
Making an HTTP request is the simplest way to find out if a URL finds a resource. There are other ways to make HTTP requests (such as the fetch API), but they aren't faster, just different.
A potentially faster (but much more complicated) way would be to use a Websocket. You need to have it open before you wanted to know if the URL exists (otherwise any time savings are lost to establishing the Websocket connection) and you'd need to write the server side code which reacted to the Websocket message by working out if the desired URL existed and telling the client.

jQuery bug using PhantomJS

I work at the moment on an automated tests scripts which needs a headless browser (PhantomJS) and all the DOM navigation and actions possible (except downloads).
So PhantomJS should fit.
However I can't use PhantomJS with javascript jQuery as the following errors :
var page = require('webpage').create();
page.open('http://mywebsite.com/', function() {
function dothat() {
console.log('0 started');
$('#id').children().eq(0).click();
console.log('0 finished');
return main(1, 0);
}
}
dothat()
Here the scripts stop a '0 started'. The page I load uses jQuery so I though I wouldn't need to download it back. Even so, let's try with loading it.
var page = require('webpage').create();
page.open('http://mywebsite.com/', function() {
page.injectJs('jquery-2.2.1.min.js');
function dothat() {
console.log('0 started');
$('#id').children().eq(0).click();
console.log('0 finished');
return main(1, 0);
}
}
dothat()
Yes I have the file locally in the same directory...
And it shows that error (very fastly) :
TypeError: undefined is not a constructor (evaluating 'jQuery.easing[jQuery.easing.def](t,e,n,i,a)')
What do you suggest doing to fix that?
Is this an internal bug in PhantomJS and so I should try a Firefox and Xvfb on Ubuntu or is it my error somewhere?
I though first about changing code to a total non-jQuery version, but I need great selectors with classes and to click on some objects...
Thank you very much for pointing me the solution!
EDIT 1: I changed this according to what I have understand from a first answer. Now the console does not print anything
var page = require('webpage').create();
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.open('http://mywebsite.com/', function() {
page.injectJs('jquery-2.2.1.min.js');
var result = page.evaluate(function() {
function dothat() {
console.log('0 started');
$('#id').children().eq(0).click();
console.log('0 finished');
return main(1, 0);
}
function main(x, y) {
if (x === 0) {
return setTimeout(dothat(), y);
} else if (x === 1) {
return setTimeout(dothis(), y);
}
}
return main(0, 5000); //first launch
}
});
I'm not sure that was what you was pointed out though.
EDIT 2: This is a real script that wants to go on google.com and login as me. However the script does not work as intended, see below.
var page = require('webpage').create();
page.onConsoleMessage = function(msg) {
console.log('CONSOLE: ' + msg);
};
page.open('https://google.com/', function() {
page.injectJs('jquery-2.2.1.min.js');
page.evaluate(function() {
function dofirst() {
$('#gb_70').click();
return main(1, 0);
}
function dosecond() {
document.getElementById('Email').value = 'myemail#gmail.com';
$('#next').click();
return main(2, 0);
}
function dothird() {
document.getElementById('Passwd').value = 'mypwd';
$('#signIn').click();
}
function main(i, j) {
if (i === 0) {
console.log('launching 0');
return setTimeout(dofirst(), j); // connections
}
else if (i === 1) {
console.log('launching 1');
return setTimeout(dosecond(), 5000);
}
else if (i === 2) {
console.log('launching 2');
return setTimeout(dothird(), 5000);
}
}
return main(0, 5000);
});
});
As you can see, main is a function I'm using for delaying steps.
However the code does not work and returns me this error (or those) :
TypeError: null is not an object (evaluating 'document.getElementById('Email').value = 'myemail#gmail.com'')
undefined:7 in dosecond
:22 in main
:4 in dofirst
:18 in main
:29
:30
Thank you for keeping trying to help me!

In PhantomJS there's always two javascirpt contexts where functions and variables exist:
PhantomJS context
Opened page context
They are not aware of each other and do not intersect!
PhantomJS cannot access any of the target page objects or DOM, that is why $('#id').children().eq(0).click(); won't work - there is no jQuery in PhantomJS script, there are no DOM elements. They are in the second context, or, in other words, sandbox.
To do anything with the page you have to sort of teleport code there. It is done with the help of page.evaluate() method.
var page = require('webpage').create();
page.open('http://mywebsite.com/', function() {
page.injectJs('jquery-2.2.1.min.js');
var result = page.evaluate(function(){
// You can certainly use functions
// Be sure to declare them also
// inside of page.evaluate
function dothat() {
console.log('0 started');
$('#id').children().eq(0).click();
console.log('0 finished');
return main(1, 0);
}
// Important: don't forget to return result
// back to PhantomJS main context
return dothat();
});
});
Note that to receieve console messages from sandboxed page context you must subscribe to them via page.onConsoleMessage callback:
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
After EDIT 1:
My bad, I've lost a bracket editing your example (fixed above) which led to a syntax error, a fact that PhantomJS v2.x won't complain about, it'll just hang there silently. That's what you mistook for no console messages. Could be useful to test script for syntax errors in v1.9.8 first or in some IDE with syntax highlighting.
Your current code should look this (the missing bracket also fixed):
var page = require('webpage').create();
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.open('http://mywebsite.com/', function() {
page.injectJs('jquery-2.2.1.min.js');
var result = page.evaluate(function() {
function dothat() {
console.log('0 started');
$('#id').children().eq(0).click();
console.log('0 finished');
return main(1, 0);
}
function main(x, y) {
if (x === 0) {
return setTimeout(dothat(), y);
} else if (x === 1) {
return setTimeout(dothis(), y);
}
}
return main(0, 5000); //first launch
}); //
});
Note: will complain about inextistent dothis() function.

Don't you want to do it the other way around?
function dothat() {
console.log('0 started');
$('#id').children().eq(0).click();
console.log('0 finished');
return main(1, 0);
}
var page = require('webpage').create();
page.open('http://mywebsite.com/', dothat)

PhantomJS bug in basic script

This is a script pretending to log in your google account (I've made). But obviously, that doesn't work. There is no particular objective here, but to make it work.
var page = require('webpage').create();
page.onConsoleMessage = function(msg) {
console.log('CONSOLE: ' + msg);
};
page.open('https://google.com/', function() {
page.injectJs('jquery-2.2.1.min.js');
page.evaluate(function() {
function include(arr,obj) { // those functions are not part of scraping
return (arr.indexOf(obj) != -1);
}
function add(a, b) {
return a + b;
}
Array.min = function( array ){
return Math.min.apply( Math, array );
};
function dofirst() {
$('#gb_70').click();
main(1, 0);
}
function dosecond() {
document.getElementById('Email').value = 'myemail#gmail.com';
$('#next').click();
main(2, 0);
}
function dothird() {
document.getElementById('Passwd').value = 'P4SSW0RD';
$('#signIn').click();
main(3, 0);
}
function dofourth() {
L1 = ['test', 'test2', 'google'];
for (var i = 0; i < 1; i++) {
if (L1, 'google') {
console.log('SUCCESS!');
}
}
main(4, 0);
}
function dofifth() {
$('.gb_b.gb_8a.gb_R').click()
setTimeout(function(){$('#gb_71').click()}, 500);
main(0, 5000);
}
function main(i, j) {
if (i === 0) {
console.log('launching 0');
setTimeout(dofirst(), j); // connections
}
else if (i === 1) {
console.log('launching 1');
setTimeout(dosecond(), 5000);
}
else if (i === 2) {
console.log('launching 2');
setTimeout(dothird(), 5000);
}
else if (i === 3) {
console.log('launching 3');
setTimeout(dofourth(), 5000);
} else if (i === 4) {
console.log('launching 4');
setTimeout(dofifth(), 5000);
}
}
main(0, 5000);
});
console('super end');
page.render('google.png');
});
At the end I get those errors :
CONSOLE: launching 0
CONSOLE: launching 1
TypeError: null is not an object (evaluating 'document.getElementById('Email').value = 'myemail#gmail.com'')
undefined:7 in dosecond
:22 in main
:4 in dofirst
:18 in main
:29
:30
I tried many ways and no one worked. I could make it work though with Python and selenium web driver (which was real love). But now the time has passed, and it has to be in javascript (to be completely DOM/jQuery... so Web compatible).
Can you help me please to make it work!
EDIT 1: by trying to capture a screenshot, it does save an empty PNG.
EDIT 2: I think that may be a hint, when I do phantomjs test.js, it takes a very long time to finally load and logs quickly everything...
EDIT 3: I changed document.get(...).value = 'blabla' to $('#id').val('blabla'); And now it prints
CONSOLE: launching 0
CONSOLE: launching 1
CONSOLE: launching 2
CONSOLE: launching 3
CONSOLE: SUCCESS!
CONSOLE: SUCCESS!
CONSOLE: SUCCESS!
CONSOLE: SUCCESS!
CONSOLE: SUCCESS!
However it should prints only one SUCCESS, and evidently capture still does not work.

For EDIT 1: trying to capture the screen
Check the status to make sure the page is loaded.
page.open(url, function(status) {
if (status !== 'success') {
// exit if it fails to load the page
console.log(status);
phantom.exit(1);
}
else{
// your code here
}
});

Retry when no response from website

I use the recursive function below, in order to reopen website if httpstatus != 200:
retryOpen = function(){
this.thenOpen("http://www.mywebsite.com", function(response){
utils.dump(response.status);
var httpstatus = response.status;
if(httpstatus != 200){
this.echo("FAILED GET WEBSITE, RETRY");
this.then(retryOpen);
} else{
var thisnow = hello[variable];
this.evaluate(function(valueOptionSelect){
$('select#the_id').val(valueOptionSelect);
$('select#the_id').trigger('change');
},thisnow);
}
});
}
The problem is that sometimes the retryOpen function does not even go as far as to callback function(response){}. Then, my script freezes.
I wonder how one could change the function to be able to recursively try to open website again if there is no response from website (not even some error code as 404 or something)? In other words, how to rewrite the retryOpen function so it reruns when the function does not reach callback after a certain amount of time?

I would try something like this. Please note this is untested code, but should get you on the correct path
retryOpen = function(maxretry){
var count = 0;
function makeCall(url)
{
this.thenOpen(url, function(response){
utils.dump(response.status);
});
}
function openIt(){
makeCall.call(this,"http://www.mywebsite.com");
this.waitFor(function check() {
var res = this.status(false);
return res.currentHTTPStatus === 200;
}, function then() {
var thisnow = hello[variable];
this.evaluate(function(valueOptionSelect){
$('select#the_id').val(valueOptionSelect);
$('select#the_id').trigger('change');
},thisnow);
}, function timeout() { // step to execute if check has failed
if(count < maxretry)
{
openIt.call(this);
}
count++
},
1000 //wait 1 sec
);
}
openIt();
}

AngularJS page causes Chrome to use up huge tab memory in background

I have a, fairly simple, AngularJS page that simply uses a service to pull data from the backend and show it on screen using an ng-repeat.
During normal usage after 10mins the memory for the page stabilises at around 100mb. If I switch tabs so that this page is no longer focused - it is in the background it will balloon in memory up until 1gb and then itll crash.
I am using $interval to refresh the data and I read recently that Chrome throttles intervals and timeouts when a page is in the background. Shouldn't this prevent this from happening? Anything I can do? As a last resort I have considered switching to requestAnimationFrame as I know this won't get called when the page is in the background.
$interval($scope.update, $scope.refreshInterval);
//
//
//
$scope.update = function () {
$scope.inError = false;
scheduleAppAPIservice.getSchedules().then(
function (response) {
$scope.schedules = null;
$scope.schedules = response.data;
angular.forEach($scope.schedules, $scope.createNextExecutionLabel);
angular.forEach($scope.schedules, $scope.createTrclass);
angular.forEach($scope.tabs, $scope.assignSchedulesToTab);
$scope.loading = false;
},
function () {
$scope.inError = true;
//console.log("failed getSchedules");
});
};
//
//
//
$scope.createNextExecutionLabel = function (schedule) {
schedule.nextExecutionDate = moment(schedule.NextExecution);
schedule.endExecutionDate = moment(schedule.EndExecution);
if (schedule.nextExecutionDate.year() > $scope.finishedCutoffYear) {
schedule.nextExecutionLabel = "Never";
}
else if (schedule.IsCurrentlyExecuting) {
schedule.nextExecutionLabel = "Running";
if (!isNaN(schedule.CurrentProgress) && schedule.CurrentProgress != 0) {
schedule.nextExecutionLabel += " (" + schedule.CurrentProgress + "%)";
}
else {
schedule.nextExecutionLabel += "...";
}
}
else if (schedule.nextExecutionDate < moment()) {
schedule.nextExecutionLabel = "Overdue";
}
else {
schedule.nextExecutionLabel = $filter('emRelativeTime')(schedule.nextExecutionDate);
}
}
//
//
//
$scope.createTrclass = function (schedule) {
var trclass = "system";
if (schedule.IsDisabled) {
trclass = "disabled";
}
else if (schedule.IsUserSchedule) {
if (schedule.nextExecutionDate.year() > $scope.finishedCutoffYear && schedule.ExecuteOnEvent == 0)
trclass = "finished";
else
trclass = "active";
}
schedule.trclass = trclass;
};
//
//
//
$scope.assignSchedulesToTab = function (tab) {
tab.scheduleCount = $scope.schedules.filter(function (x) {
return x.trclass == tab.label.toLowerCase();
}).length;
};

I don't have any information about background tab throttling so, the theory in this answer may not be true.
$interval works, say, every 5 seconds.
But the successful promise inside the $interval may not be resolved in 5 seconds.
So if scheduleAppAPIservice.getSchedules() doesn't resolve in 5 seconds for some reason, an increased numbers of promises will consume memory on each $interval cycle.
You may cancel previous request when starting a new one on $interval, or you can use $timeout + manual invocation of the next request when promise completed to make sure there is nothing that consumes memory in an uncontrolled way.

We Keep Coding

JavaScript is the programming language of the Web.

phantomjs not waiting for "full" page load - javascript

I found this approach useful in some cases: page.onConsoleMessage(function(msg) { // do something e.g. page.render }); Than if you own the page put some script inside: <script> window.onload = function(){ console.log('page loaded'); } </script>

Related

Checking if file exists on server without using XMLHttpRequest

jQuery bug using PhantomJS

PhantomJS bug in basic script

Retry when no response from website

AngularJS page causes Chrome to use up huge tab memory in background

Categories

Resources