Im trying to use PhantomJS to scrape the trophy data from http://my.playstation.com/logged-in/trophies/public-trophies/
The page requires you enter a valid username and then click 'go' and the page will load the data. Ive gotten this to work somewhat, but it never loads the trophy data into the div. Im hoping im missing something ajax related thats causing this?
var fullpagehtml = page.evaluate(function()
{
document.getElementById("trophiesId").value = "<<valid user id>>";
//checkPTrophies(); btn click calls this function
$('#btn_publictrophy').click().delay( 6000 );
console.log("\nWaiting for trophy list to load...");
var trophylist = document.getElementById("trophyTrophyList").innerHtml; // all the data i want ends up inside this div
var counter = 0; //delay andset timeout wont work here so this is the best i coukld think of
while (trophylist == null)
{
//presumably the ajax query should kick in on the page and populate this div, but it doesnt.
trophylist = document.getElementById("trophyTrophyList").innerHtml;
counter ++;
if(counter == 1000000)
{
console.log($('#trophyTrophyList').html());
counter = 0;
}
}
return document.all[0].outerHTML;
});
The delay( 6000 ) does absolutely nothing as the documentation says:
The .delay() method is best for delaying between queued jQuery effects. Because it is limited—it doesn't, for example, offer a way to cancel the delay—.delay() is not a replacement for JavaScript's native setTimeout function, which may be more appropriate for certain use cases.
To wait you have to do this outside of the page context (busy waiting doesn't work in JavaScript because it is single threaded):
page.evaluate(function() {
document.getElementById("trophiesId").value = "<<valid user id>>";
//checkPTrophies(); btn click calls this function
$('#btn_publictrophy').click();
});
console.log("\nWaiting for trophy list to load...");
setTimeout(function(){
var fullpagehtml = page.evaluate(function() {
var trophylist = document.getElementById("trophyTrophyList").innerHTML;
return trophylist;
});
}, 20000);
You also might want to use waitFor to wait until #trophyTrophyList is populated instead of using setTimeout:
waitFor(function(){
return page.evaluate(function(){
var e = document.getElementById("trophyTrophyList");
return e && e.innerHTML;
});
}, function(){
// TODO: get trophies
});
This won't get you far, because just because #trophyTrophyList is loaded, doesn't mean that the descendent elements are already in the DOM. You have to find some selector which signalizes that the page is sufficiently loaded for example by waiting until a .trophy-image exists in the page. It works for me with a 20 second timeout of the waitFor function.
waitFor(function(){
return page.evaluate(function(){
var e = document.querySelector("#trophyTrophyList .trophy-image");
return e;
});
}, function(){
setTimeout(function(){
var trophiesDiv = page.evaluate(function(){
return document.getElementById("trophyTrophyList").innerHTML;
});
console.log(trophiesDiv);
}, 1000); // wait a little longer
}, 20000);
Don't forget that you need page.evaluate to actually access the DOM. Btw, it is innerHTML not innerHtml.
Related
I want to automate clicking the agree button to Google's cookie policies.
(I clean cookies after closing a tab, and I don't want to create a google account, so I get asked every time I use google)
There is a div element with the ID "introAgreeButton" that I'm trying to access with my script:
<div role="button" id="introAgreeButton" [...]></div>
However, document.getElementById('introAgreeButton') always returns null.
My first thought was that the element wasn't loaded by the time my function was executed. But it doesn't work if I execute it on window.onload, or even if I run it in a loop until the element is definitely there:
window.onload = function() {
var x = document.getElementById('introAgreeButton')
console.log(x)
}
Output:
null
function loop() {
var x = document.getElementById('introAgreeButton')
if (x) {
console.log('success')
} else {
loop()
}
}
Output:
null
null
null
...
Can be tested on https://www.google.com/search?hl=en&q=test
Anyone have an idea why this is and how to solve it?
Edit: I execute the script via the browser extension TamperMonkey
You can use setInterval to check if element is rendered in DOM like this :
document.addEventListener('DOMContentLoaded', function () {
var intervalID = null;
function checkElementInDOM () {
var element = document.getElementById('introAgreeButton');
if (element) {
clearInterval(intervalID);
// DO YOUR STUFF HERE ...
}
}
intervalID = setInterval(checkElementInDOM, 100);
});
To be used intelligently, however, so as not to have a setInterval which works continuously. Maybe think about adding a maximum number of attempts.
Here's the problem. I'm making a callback to the server that receives an MVC partial page. It's been working great, it calls the success function and all that. However, I'm calling a function after which iterates through specific elements:
$(".tool-fields.in div.collapse, .common-fields div.collapse").each(...)
Inside this, I'm checking for a specific attribute (custom one using data-) which is also working great; however; the iterator never finishes. No error messages are given, the program doesn't hold up. It just quits.
Here's the function with the iterator
function HideShow() {
$(".tool-fields.in div.collapse, .common-fields div.collapse").each(function () {
if (IsDataYesNoHide(this)) {
$(this).collapse("show");
}
else
$(this).collapse("hide");
});
alert("test");
}
Here's the function called in that, "IsDataYesNoHide":
function IsDataYesNoHide(element) {
var $element = $(element);
var datayesnohide = $element.attr("data-yes-no-hide");
if (datayesnohide !== undefined) {
var array = datayesnohide.split(";");
var returnAnswer = true;
for (var i in array) {
var answer = array[i].split("=")[1];
returnAnswer = returnAnswer && (answer.toLowerCase() === "true");
}
return returnAnswer;
}
else {
return false;
}
}
This is the way the attribute appears
data-yes-no-hide="pKanban_Val=true;pTwoBoxSystem_Val=true;"
EDIT: Per request, here is the jquery $.post
$.post(path + conPath + '/GrabDetails', $.param({ data: dataArr }, true), function (data) {
ToggleLoader(false); //Page load finished so the spinner should stop
if (data !== "") { //if we got anything back of if there wasn't a ghost record
$container.find(".container").first().append(data); //add the content
var $changes = $("#Changes"); //grab the changes
var $details = $("#details"); //grab the current
SplitPage($container, $details, $changes); //Just CSS changes
MoveApproveReject($changes); //Moves buttons to the left of the screen
MarkAsDifferent($changes, $details) //Adds the data- attribute and colors differences
}
else {
$(".Details .modal-content").removeClass("extra-wide"); //Normal page
$(".Details input[type=radio]").each(function () {
CheckOptionalFields(this);
});
}
HideShow(); //Hide or show fields by business logic
});
For a while, I thought the jquery collapse was breaking, but putting the simple alert('test') showed me what was happening. It just was never finishing.
Are there specific lengths of time a callback function can be called from a jquery postback? I'm loading everything in modal views which would indicate "oh maybe jquery is included twice", but I've already had that problem for other things and have made sure that it only ever includes once. As in the include is only once in the entire app and the layout is only applied to the main page.
I'm open to any possibilities.
Thanks!
~Brandon
Found the problem. I had a variable that was sometimes being set as undefined cause it to silently crash. I have no idea why there was no error message.
I am stuck on getting a timeout working. I already have a working code but it seems to me the wrong way to do it.
Working code but probably not the best:
/* Autosave */
// On load we hide all autosave messages.
$('.jform_params_autosave-cg').hide();
// Below is the function that handles the autosave.
$.fn.autoSave = function(){
// We remove the autosave message from it's place defined by the xml and add it to the system message container.
var autosavemessage = $('.jform_params_autosave-cg');
autosavemessage.detach();
autosavemessage.appendTo('#system-message-container');
// Now we show the message.
$('.jform_params_autosave-cg').show();
// Here we save the extension.
Joomla.submitbutton('module.apply');
}
// On change of the below elements we run the autosave.
//------------------------------------------//
// DUPLICATE AUTOSAVE FUNCTION BELOW
//------------------------------------------//
// Autosave: Theme Selection
$("#jform_params_theme_selection").change(function() {
$.fn.autoSave();
});
// Autosave: Add Content
$("a.group-add.btn.btn-mini.button.btn-success").click(function() {
setTimeout(
function()
{
$.fn.autoSave();
}, 5000);
});
The Function:
$.fn.autoSave = function(){
// We remove the autosave message from it's place defined by the xml and add it to the system message container.
var autosavemessage = $('.jform_params_autosave-cg');
autosavemessage.detach();
autosavemessage.appendTo('#system-message-container');
// Now we show the message.
$('.jform_params_autosave-cg').show();
// Here we save the extension.
Joomla.submitbutton('module.apply');
}
The Function Call
$("#jform_params_theme_selection").change(function() {
$.fn.autoSave();
});
The Function Call with Timeout
$("a.group-add.btn.btn-mini.button.btn-success").click(function() {
setTimeout(
function()
{
$.fn.autoSave();
}, 5000);
});
What do I want to achieve
Make the Timeout inside the function.
Define the timeout when calling the function.
With defining I mean calling it something like $.fn.autoSave(5000); or $.fn.autoSave().timeout(500);
I have been trying to get a working code but so far no luck. Will keep updating this post whenever I get more success or details to add.
Thanks everyone for helping.
Any link to existing SO questions will also be appreciated as I might be googling for the wrong key words.
Here it is the modified version of your function. Now it has optional timeout parameter. You can use it like
$('selector').autoSave(5000) or $('selector').autoSave()
$.fn.autoSave = function(timeout) {
function doIt() {
// We remove the autosave message from it's place defined by the xml and add it to the system message container.
var autosavemessage = $('.jform_params_autosave-cg');
autosavemessage.detach();
autosavemessage.appendTo('#system-message-container');
// Now we show the message.
$('.jform_params_autosave-cg').show();
// Here we save the extension.
Joomla.submitbutton('module.apply');
return this;
}
timeout = Number(timeout) || 0;
var f = doIt.bind(this);
if(timeout < 0) return f();
setTimeout(f, timeout);
return this;
}
I have this sample of my code:
function clickOldShares() {
console.log("Waiting for all shares");
element = document.querySelector("#pagelet_scrolling_pager > div > div > a");
return element;
}
casper.thenOpen("https://www.facebook.com/shares/view?id=" + fb_objectID,function(){
console.log("Open post with object-id");
});
casper.then(function(){
element = this.evaluate(clickOldShares);
});
casper.wait(2000,function() {
console.log('ELEMENT1: ' + element);
element = this.evaluate(clickOldShares);
});
casper.wait(2000,function() {
newelement = this.evaluate(clickOldShares);
console.log('ELEMENT2: ' + newelement);
});
casper.wait(2000,function() {
newelement = this.evaluate(clickOldShares);
console.log('ELEMENT3: ' + newelement);
});
I´m not understanding how can I transform this calls to clickOldShares in a loop using CasperJS because casper.wait is asynchronous. May I have some example of how to do this, please?
The page doesn't load all data in one time. It's necessary to click on the 'Older Shares' button until the data appears. And this can happen many times, depending the amount of data. So, I need to click as often as needed before capturing data.
First thing's first, you can't use clickOldShares for anything as it is now. casper.evaluate() provides access to the DOM, but the passed in function is sandboxed and executed in the page context. All data must be explicitly passed in and out, and this has to be primitive. DOM elements are not primitive and cannot be passed out of the page context (this.evaluate(clickOldShares) will always return null). You will either have to call the click code inside of the page context.
You can wait for an element to appear with waitForSelector. You really don't need to iterate to wait for it.
var selector = "#pagelet_scrolling_pager > div > div > a";
casper.start()
.thenOpen(url)
.waitForSelector(selector, null, null, 15000); // max 15 seconds
.then(function(){
this.capture("screen1.png");
this.click(selector);
})
.then(function(){
this.capture("screen2.png");
})
.run();
The third argument for waitForSelector is the callback for when the timeout is reached, but the element is not found. The fourth argument is a custom timeout. The default timeout is set to 10 seconds.
It seems you need to click on a certain selector until it disappears. You can't use a loop for this, because the functions are asynchronous. You will have to use recursion like this:
var selector = "#pagelet_scrolling_pager > div > div > a";
var i = 0;
function step() {
if (this.exists(selector)) {
this.capture("screen"+(i++)+".png");
this.click(selector);
this.wait(2000, step);
} else {
this.capture("screen_final.png");
}
}
casper.start()
.thenOpen(url)
.then(step)
.then(function(){
// TODO: do something else
})
.run()
i have a simple question, there is a function with parameter emp_id that opens up a form for a chat with different attributes, i want it to be refreshed automatically each 10 sec, now it works a bit wrongly, since there is a parameter emp_id that is can be changed, and once i change it, the chat with messages and form are refreshed double time or triple times :) depend on how many times u change the emp_id, i hope i was clear )) anyway here is the javascript function:
function load_chat(emp_id) {
var url = "#request.self#?fuseaction=objects2.popup_list_chatform"
url = url + "&employee_id=" + emp_id;
document.getElementById('form_div').style.display = 'block'; AjaxPageLoad(url,'form_div',1,'Yükleniyor');
setInterval( function() {
load_chat(emp_id);
},10000);
}
there a list of names, once i click on one of them, this form is opened by this function, but if i click another user, i mean if i change the emp_id, it refreshes, the previous and present form. how do i change it so that it will refresh only the last emp_id, but not all of id's which i've changed
thank you all for the help, i really appreciate it!
This would nicely encapsulate what you're doing. The timer id (tid) is kept inside the closure, so when you call load_chat it will stop the interval if there was one running.
Once the new url is set up, it will start the interval timer again.
var ChatModule = (function() {
var tid,
url;
function refresh()
{
AjaxPageLoad(url, 'form_div', 1, 'Yükleniyor');
}
return {
load_chat: function(emp_id) {
if (tid) {
clearInterval(tid);
}
// setup url
url = "#request.self#?fuseaction=objects2.popup_list_chatform"
url = url + "&employee_id=" + emp_id;
document.getElementById('form_div').style.display = 'block';
// load ajax
refresh();
// set timer
tid = setInterval(refresh, 10000);
}
}
}());
ChatModule.load_chat(123);
Use setTimeout instead. Each time your function is executed, it will set up the next execution (you could also make it conditional):
function load_chat(emp_id) {
... // do something
if (condition_still_met)
setTimeout(function() {
load_chat(emp_id); // with same id
}, 10000);
}
load_chat("x"); // to start
Or you will have to use setInterval outside the load_chat function. You can clear the interval when necessary.
function get_chat_loader(emp_id) {
return function() {
... // do something
};
}
var id = setInterval(get_chat_loader("x"), 10000); // start
// then, somewhen later:
clearInterval(id);