I am trying to scrape a webpage which has a form with many dropdowns and values in the form are interdependent. At many point I need the code to wait till the refresh of the page complete. Eg after selecting an option from the list, the code should wait till the next list is populated based on this selection. It would be really helpful if someone could give pointers because strangely my code is working only after I gave so much unnecessary logging statements which in-turn created some delay. Any suggestions to improve the code would be very helpful.
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
userAgent: 'Mozilla/5.0 poi poi poi (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22',
pageSettings: {}
});
casper.start('http://www.abc.com', function () {
console.log("casper started");
this.fill('form[action="http://www.abc.com/forum/member.php"]', {
quick_username: "qwe",
quick_password: "qwe"
}, true);
this.capture('screen.png');
});
casper.thenOpen("http://www.abc.com/search/index.php").then(function () {
this.click('input[type="checkbox"][name="firstparam"]');
this.click('a#poi');
casper.evaluate(function () {
document.getElementsByName("status")[0].value = 1;
document.getElementsByName("state")[0].value = 1078;
changeState(); //This function is associated with the dropdown ie state
and the page reloads at this point. Only after complete refresh the code shoud execute! How can this be achieved?
return true;
});
this.echo('Inside the first thenOpen' + this.evaluate(function () {
return document.search.action;
}));
});
casper.then(function () {
this.capture("poi.png");
console.log('just before injecting jquery');
casper.page.injectJs('./jquery.js');
this.click('input[type="checkbox"][name="or"]');
this.evaluate(function () {
$('.boxline .filelist input:checkbox[value=18127]').attr("checked", true);
});
this.echo('Just before pressing the add college button' + this.evaluate(function () {
return document.search.action;
}));
this.capture('collegeticked.png');
if (this.exists('input[type="button"][name="niv"]')) {
this.echo('button is there');
} else {
this.echo('button is not there');
}
this.echo("Going to print return value");
this.click('input[type="button"][name="poi"]'); // This click again causes a page refresh. Code should wait at this point for completion.
this.echo('Immediately after pressing the add college btn getPresentState()' + this.evaluate(function () {
return getPresentState();
}));
this.echo('Immediately after pressing add colleg button' + this.evaluate(function () {
return document.search.action;
}));
this.capture('iu.png');
});
casper.then(function () {
console.log('just before form submit');
this.click('form[name="search"] input[type="submit"]'); //Again page refresh. Wait.
this.echo('Immediately after search btn getPresentState()' + this.evaluate(function () {
return getPresentState();
}));
this.echo('Immediately after search button-action' + this.evaluate(function () {
return document.search.action;
}));
this.capture("mnf.png");
});
casper.then(function () {
casper.page.injectJs('./jquery.js');
this.capture("resultspage.png");
this.echo('Page title is: ' + this.evaluate(function () {
return document.title;
}), 'INFO');
var a = casper.evaluate(function () {
return $('tbody tr td.tdbottom:contains("tye") ').siblings().filter($('td>a').parent());
});
console.log("ARBABU before" + a.length);
});
casper.run();
I've been using the waitForSelector 'workaround' mentioned by Arun here:
https://stackoverflow.com/a/22217657/1842033
It's the best solution I've found; the 'drawback' as it were is that you need to be aware of what element you're expecting to load. I say drawback, personally I don't think I've encountered a situation where I've not had some kind of feedback saying that whatever I'm waiting for has happened
this.waitForSelector("{myElement}",
function pass () {
test.pass("Found {myElement}");
},
function fail () {
test.fail("Did not load element {myElement}");
},
20000 // timeout limit in milliseconds
);
Although I'd guess you could use waitForResource() or something like that if you didn't have visual feedback.
What I've taken to doing to get around this issue, when there isn't anything specific to target and wait for in the reloaded page, is to use the following:
var classname = 'reload-' + (new Date().getTime()),
callback = function(){},
timeout = function(){};
/// It happens when they change something...
casper.evaluate(function(classname){
document.body.className += ' ' + classname;
}, classname);
casper.thenClick('#submit'); /// <-- will trigger a reload of the page
casper.waitWhileSelector('body.' + classname, callback, timeout);
This way I don't have to rely on a specific expected element in the next page, I've basically done the inverse. I've created a specific selector to watch out for, and execution moves on once that selector fails to match.
For my intents and purposes it was enough to know the page had begun reloading, I didn't need to wait until the next page had fully reloaded. This is so that I could then trigger certain waitForSelector calls on elements that may have existed both before and after the reload. Waiting until the temporary class has been removed lets me know that anything that existed before has since been destroyed, so no fear of selecting elements prior to the reload.
Seems there are no real solutions.
https://casperjs.readthedocs.io/en/latest/modules/casper.html#waitforselector is an available workaround which may not work always.
I have the same experience doing the same thing as you. script these way in user perspective never gone well. it crash in middle of nowhere and very unreliable. I was doing search from salesforce that also require login.
You need to keep your step as minimum as possible. script in a cron job way. don't do form fill/button click unless you are doing UI testing. I would advice you to break the process into two parts
// this part do search and find out the exact url of your screen capture.
// save it in a db/csv file
1 - start by POST to http://www.abc.com/forum/member.php with username password in body.
2 - POST/GET to http://www.abc.com/search/index.php with your search criteria, you look at what the website require. if they do POST, then POST.
// second part read your input
1 - login same as first part.
2 - casper forEach your input save your capture. (save the capture result in db/csv)
my script now is pure phantomjs, casper script just keep crashing for no reason. even phantomjs is unreliable. I save the result/status on each successful search/download, whenever there is error I exit the script if not the rest of result is unpredictable(good result in chrome turn out bad in phantomjs).
I found this question when searching for solution to a problem where click() or fill() action reloads exactly the same data in a child iframe. Here is my improvement to Pebbl answer:
casper.clickAndUnload = function (click_selector, unload_selector, callback, timeout) {
var classname = 'reload-' + (new Date().getTime());
this.evaluate(function (unload_selector, classname) {
$(unload_selector).addClass(classname);
}, unload_selector, classname);
this.thenClick(click_selector);
this.waitWhileSelector(unload_selector + '.' + classname, callback, timeout);
};
casper.fillAndUnload = function (form_selector, data, unload_selector, callback, timeout) {
var classname = 'reload-' + (new Date().getTime());
this.evaluate(function (unload_selector, classname) {
$(unload_selector).addClass(classname);
}, unload_selector, classname);
this.fill(form_selector, data, true);
this.waitWhileSelector(unload_selector + '.' + classname, callback, timeout);
};
This solution assumes that page uses jQuery. It should not be hard to modify it for pages that don't. unload_selector is an element that is expected to be reloaded after click or form submission.
Since Casperjs is written for developers, it's expected one knows what state the page loaded should be in, and what elements should be available to define a page-loaded state.
One option is to check for the presence of, for example, a javascript resource that is loaded at the end of the page.
When running any type of test, results must be reproducable each time and therefore idempotency is essential. For this to happen, the tester must be able to control the environment enough to make this happen.
Just evaluate document.readyState to be complete or interactive. Then it's loaded.
This is an implementation with a while, but maybe can be done with interval...
this.then(function () {
while(this.evaluate(function () { return document.readyState != 'complete' && document.readyState != 'interactive'; })) {}
});
Related
I'm on my third day working with Protractor and I'm constantly hitting bric walls in regards to waiting around for pages to load and elements to appear. This test case in particular has grown ugly and I would like to solve the issues without having to rely on sleeps.
I am currently "outside of the land of AngularJS"
it('it should reflect in both the field and the title when the personnel name is changed', function() {
var inputField, personnelHeader, personnelName;
personnelName = element(By.css(".overlay.editnameoverlay")).click();
personnelHeader = element(By.id("personnel_name_header"));
inputField = element(By.css("input[name='newvalue']"));
inputField.clear();
inputField.sendKeys("Test 123");
element(By.css("input[name='ok_button']")).click();
// browser.driver.sleep(2000); This test only works with this sleep added
browser.wait(function() {
console.log("Waiting for header to change...");
return personnelHeader.getText().then(function(text) {
return text === "Test 123";
});
}, 5000);
return expect(personnelHeader.getText()).toBe(personnelName.getText());
});
So the test here changes the name in an input field. submits it and waits for the changes to become reflected in the header of the modal. The problem is that without the browser.driver.sleep(2000) I get an error saying
Stacktrace:
StaleElementReferenceError: stale element reference: element is not attached to the page document
How do I go about solving this in this particular case?
From the documentation for Expect Conditions:
var EC = protractor.ExpectedConditions;
// Waits for the element with id 'abc' to contain the text 'foo'.
browser.wait(EC.textToBePresentInElement($('#abc'), 'foo'), 5000);
When you use Protractor to test for non-angular pages you're on your own regarding waiting for elements to be ready for interaction.
StaleElementReferenceError is probably the most useless selenium error, it happens when the element got removed from the DOM but is still cached somehow, I also suffered this problem when started with Protractor and even tried to convince it should be automatically retried Protractor-side.
The solution for me is to always explicitly wait for an element to appear on the page using a custom function waitReady() that browser.wait for elements ready, i.e: waits for the element to be ready for interaction:
expect($('#login_field').waitReady()).toBeTruthy();
First integrate this snippet in your code: https://gist.github.com/elgalu/2939aad2b2e31418c1bb
Not only the custom waitReady() waits for the element but it also swallows any unrelated useless webdriver error like StaleElementReferenceError and will simply retry up until finding the element or it will timeout.
So waitReady() each element before interacting, i.e. before clear() or sendKeys() or click() ...
// TODO: Move to Page Objects module
var personnelNameElm = $(".overlay.editnameoverlay");
var personnelHeaderElm = $("#personnel_name_header");
var inputFieldElm = $("input[name='newvalue']");
var okBtnElm = $("input[name='ok_button']");
it('it should reflect in both the field and the title when the ' +
'personnel name is changed', function() {
expect(personnelNameElm.waitReady()).toBeTruthy();
personnelNameElm.click();
expect(inputFieldElm.waitReady()).toBeTruthy();
inputFieldElm.clear().sendKeys("Test 123");
expect(okBtnElm.waitReady()).toBeTruthy();
okBtnElm.click();
browser.wait(function() {
console.log("Waiting for header to change...");
// Using waitReady() before getText() avoids Stale element errors
return personnelHeaderElm.waitReady().then(function() {
return personnelHeaderElm.getText().then(function(text) {
return text === "Test 123";
});
});
}, 5000);
expect(personnelHeaderElm.getText()).toEqual(personnelNameElm.getText());
});
I am new to Protractor. I think I have this down when dealing with an Angular page, but can't figure it out for a non-Angular page. Any help would be appreciated.
describe('Search', function() {
it('should click Search button and wait for results', function() {
browser.driver.findElement(by.id('search')).click();
});
});
Testing non-angular pages with Protractor can be tricky regarding waiting for stuff.
I suggest you upgrade Protractor to latest (1.5.0 as of now), use a custom function waitReady() that browser.wait for elements ready and rewrite your test like below. Note you can put everything within 1 spec if you like so.
// TODO: use page objects
var searchBtnElm = $('#search'); // use element(by.id('search')) if you prefer
it('waits for the elements present and visible (non-angular)', function() {
expect(searchBtnElm.waitReady()).toBeTruthy();
});
it('should click Search button', function() {
searchBtnElm.click();
});
it('wait for more results', function() {
// keep using waitReady() before interacting with the elements
// and before performing expectations on them
});
More details of why waitReady here.
Note: remember to set ignore synchronization for testing a non-angular page:
browser.ignoreSynchronization = true;
You can set it before browser.get the non-angular page.
I've suggested setting a high implicit wait in the past, e.g.
browser.manage().timeouts().implicitlyWait(5000);
That hack allows to you avoid waitReady and keep using the standard
expect(searchBtnElm.isPresent()).toBeTruthy();
But has an ugly disadvantage when testing for elements NOT present, i.e. when testing for absent or non visible elements in which case it will wait 5 seconds (5000ms) in vane, e.g. when doing
expect(someNonExistingElm.isPresent()).toBeFalsy();
Figured this out. I simply added the code below, after the click method:
describe('Search', function() {
it('should click Search button and wait for results', function() {
browser.driver.findElement(by.id('search')).click();
dvr.wait(function() {
return dvr.isElementPresent(by.xpath(
'/html/body/div/div[4]/div/div[2]/div/div/div/span'));
}, 20000);
});
});
Another Neat approach is to use "Expected Conditions" inside browser.wait - something like this:
var EC = protractor.ExpectedConditions;
var search = element(by.id('search'))
browser.wait(EC.visibilityOf(search), 2000).then(function(){
search.click()
})
You can get more details here: https://angular.github.io/protractor/#/api?view=ExpectedConditions
In protractor there are two types terms for on the page. isPresent ask if the element is exists on the page. isDisplayed asks if the element is visible. If you are waiting for a page to load you need to wait for isDisplayed, but that will error if it is not present, so wait for isPresent first. I use a function to wait for an element.
function waitForElement(el, waitTimeoutMilliseconds){
return browser.wait(function() { return el.isPresent(); }, waitTimeoutMilliseconds)
.then(function(){
return browser.wait(function() { return el.isDisplayed(); }, waitTimeoutMilliseconds);
});
}
Then just call that function in your test.
describe('Search', function() {
it('should click Search button and wait for results', function() {
var el = element(by.id('search'));
waitForElement(el, 5000);
el.click();
});
});
I need to set up a custom script for tracking a users click through on a form submission field. This is what I've got so far. As the user navigates down through the form fields the counter variable (base) totals up how far along the path the user has reached. I want to send the results off when the user leaves the page by sending out the base variable. I'm thinking of using the .unload function in jQuery. However for some reason unload isn't responding the way I think it should. Any ideas?
var base = 0; //declares a variable of 0. This should refresh when a new user lands on the form page.
function checkPath(fieldNo, path) { //this function should check the current base value of the base variable vs the current fieldNo
if (fieldNo >= path) { //checks to see if base if lower than fieldNo
base = fieldNo; //if true, base is set to current fieldNo
return base;
} else {
return base; //if false, simply returns base.
}
};
$('#order_customer_fields_forename').focus(function () { //when the form box is selected should run checkPath then alert result.
checkPath(1, base);
});
$('#order_customer_fields_surname').focus(function () {
checkPath(2, base);
});
$('#order_customer_fields_postcode').focus(function () {
checkPath(3, base);
});
$('#order_customer_fields_address1').focus(function () {
checkPath(4, base);
});
$('#order_customer_fields_address2').focus(function () {
checkPath(5, base);
});
$(window).unload(function () {
alert(base);
});
The unload event fires too late for the effect you need. You should try using the onbeforeunload event using either vanilla Javascript:
window.onbeforeunload = function (e) {
// Your code here
};
Or jQuery:
$(window).bind('beforeunload', function (e) {
// Your code here
});
Either way, you should be aware that this is not an ideal solution for what you are trying to achieve. This event is implemented unevenly across browsers. Chrome seems to be the most restrictive, and IE the most permissive, in its implementation.
A different direction you may want to take is sending the data to the server by XHR whenever the user completes a field.
In the end, I have decided that this isn't a problem that I particularly need to fix, however it bothers me that I don't understand why it is happening.
Basically, I have some checkboxes, and I only want the users to be able to select a certain number of them. I'm using the code below to achieve that effect.
$j( function () {
$j('input[type=checkbox].vote_item').click( function() {
var numLeft = (+$j('#vote_num').text());
console.log(numLeft);
if ( numLeft == 0 && this.checked ) {
alert('I\'m sorry, you have already voted for the number of items that you are allowed to vote for.');
return false;
} else {
if ( this.checked == true ) {
$j('#vote_num').html(numLeft-1);
} else {
$j('#vote_num').html(numLeft+1);
}
}
});
});
And when I was testing it, I noticed that if I used:
$j('input[type=checkbox]').each( function () {
this.click()
});
The JavaScript reacted as I would expect, however when used with:
$j('input[type=checkbox]').each( function () {
$j(this).click()
});
It would actually make the counter count UP.
I do realize that it isn't the most secure way to keep count using the counter, however I do have server side error-checking that prevents more than the requisite amount from being entered in the database, that being the reason that I have decided that it doesn't actually need fixing.
Edit: The $j is due to the fact that I have to use jQuery in noConflict mode...
$(this) contains a jQuery wrapper (with lots of functions) whereas this is solely the DOM object.
The fact that counter is going up gave me the clue that there is a link between checked attribute, which you are using, and firing the click event manually.
I searched Google for 'jquery checkbox click event raise' and found this link, where author faces the exact same problem and the workaround he used.
http://www.bennadel.com/blog/1525-jQuery-s-Event-Triggering-Order-Of-Default-Behavior-And-triggerHandler-.htm
On a side note, I think you can simplify your code further:
$j('input[type=checkbox].vote_item').click(
function()
{
var maxNumberOfChoices = 5;
//get number of checked checkboxes.
var currentCheckedCount = $j('input[type=checkbox].vote_item :checked');
if(currentCheckedCount > maxNumberOfChoices)
{
//It's useful if you show how many choices user can make. :)
alert('You can only select maximum ' + maxNumberOfChoices + ' checkboxes.');
return false;
}
return true;
});
this.click() calls the browser DOM method click().
$(this).click() calls the jQuery method click(), which does more than just call the browser method: see the implementation of the function trigger for details.
How to prevent a webpage from navigating away using JavaScript?
Using onunload allows you to display messages, but will not interrupt the navigation (because it is too late). However, using onbeforeunload will interrupt navigation:
window.onbeforeunload = function() {
return "";
}
Note: An empty string is returned because newer browsers provide a message such as "Any unsaved changes will be lost" that cannot be overridden.
In older browsers you could specify the message to display in the prompt:
window.onbeforeunload = function() {
return "Are you sure you want to navigate away?";
}
Unlike other methods presented here, this bit of code will not cause the browser to display a warning asking the user if he wants to leave; instead, it exploits the evented nature of the DOM to redirect back to the current page (and thus cancel navigation) before the browser has a chance to unload it from memory.
Since it works by short-circuiting navigation directly, it cannot be used to prevent the page from being closed; however, it can be used to disable frame-busting.
(function () {
var location = window.document.location;
var preventNavigation = function () {
var originalHashValue = location.hash;
window.setTimeout(function () {
location.hash = 'preventNavigation' + ~~ (9999 * Math.random());
location.hash = originalHashValue;
}, 0);
};
window.addEventListener('beforeunload', preventNavigation, false);
window.addEventListener('unload', preventNavigation, false);
})();
Disclaimer: You should never do this. If a page has frame-busting code on it, please respect the wishes of the author.
The equivalent in a more modern and browser compatible way, using modern addEventListener APIs.
window.addEventListener('beforeunload', (event) => {
// Cancel the event as stated by the standard.
event.preventDefault();
// Chrome requires returnValue to be set.
event.returnValue = '';
});
Source: https://developer.mozilla.org/en-US/docs/Web/Events/beforeunload
I ended up with this slightly different version:
var dirty = false;
window.onbeforeunload = function() {
return dirty ? "If you leave this page you will lose your unsaved changes." : null;
}
Elsewhere I set the dirty flag to true when the form gets dirtied (or I otherwise want to prevent navigating away). This allows me to easily control whether or not the user gets the Confirm Navigation prompt.
With the text in the selected answer you see redundant prompts:
In Ayman's example by returning false you prevent the browser window/tab from closing.
window.onunload = function () {
alert('You are trying to leave.');
return false;
}
The equivalent to the accepted answer in jQuery 1.11:
$(window).on("beforeunload", function () {
return "Please don't leave me!";
});
JSFiddle example
altCognito's answer used the unload event, which happens too late for JavaScript to abort the navigation.
That suggested error message may duplicate the error message the browser already displays. In chrome, the 2 similar error messages are displayed one after another in the same window.
In chrome, the text displayed after the custom message is: "Are you sure you want to leave this page?". In firefox, it does not display our custom error message at all (but still displays the dialog).
A more appropriate error message might be:
window.onbeforeunload = function() {
return "If you leave this page, you will lose any unsaved changes.";
}
Or stackoverflow style: "You have started writing or editing a post."
If you are catching a browser back/forward button and don't want to navigate away, you can use:
window.addEventListener('popstate', function() {
if (window.location.origin !== 'http://example.com') {
// Do something if not your domain
} else if (window.location.href === 'http://example.com/sign-in/step-1') {
window.history.go(2); // Skip the already-signed-in pages if the forward button was clicked
} else if (window.location.href === 'http://example.com/sign-in/step-2') {
window.history.go(-2); // Skip the already-signed-in pages if the back button was clicked
} else {
// Let it do its thing
}
});
Otherwise, you can use the beforeunload event, but the message may or may not work cross-browser, and requires returning something that forces a built-in prompt.
Use onunload.
For jQuery, I think this works like so:
$(window).unload(function() {
alert("Unloading");
return falseIfYouWantToButBeCareful();
});
If you need to toggle the state back to no notification on exit, use the following line:
window.onbeforeunload = null;