run userscript on content after $.get - javascript

I am doing a get request on a page ie: url.com, then checking whether something exists on it. If the info I am looking for exists I open a blank page and put the contents of the get request onto the blank page via document.write.
Tampermonkey doesn't recognize that a script needs to be run when I update it via win.history.pushState, so nothing gets executed. Is there another way to accomplish this?
$.get(url,
function(data)
{
var win=window.open('',"_blank");
win.history.pushState({ 'page_id': 1}, "newpage", "https://www.url.com/stuff/"); //this line doesn't work
with(win.document){
open();
write(data);
close();
}
}
)

Related

Changing a link dynamically in PhantomJS and clicking it to scrape the page

I've been trying to figure this out for a couple days now but haven't been able to achieve it.
There's this web page were I need to scrap all records available on it, I've noticed that if I modify the pagination link with firebug or the browser's inspector I can get all the records I need, for example, this is the original link:
<a href="javascript:gReport.navigate.paginate('paginator_min_row=16max_rows=15rows_fetched=15')">
If I modify that link like this
<a href="javascript:gReport.navigate.paginate('paginator_min_row=1max_rows=5000rows_fetched=5000')">
And then click on the pagination button on the browser (the very same that contains the link I've just changed) I'm able to get all records I need from that site (most of the time "rows" doesn't get any bigger than 4000, I use 5000 just in case)
Since I have to process that file by hand every single day I thought that maybe I could automatize the process with PhantomJS and get the whole page on a single run without looking for that link then changing it, so in order to modify the pagination link and getting all records I'm using the following code:
var page = require('webpage').create();
var fs = require('fs');
page.open('http://testingsite1.local', function () {
page.evaluate(function(){
$('a[href="javascript:gReport.navigate.paginate(\'paginator_min_row=16max_rows=15rows_fetched=15\')"]').first().attr('href', 'javascript:gReport.navigate.paginate(\'paginator_min_row=1max_rows=5000rows_fetched=5000\')').attr('id','clickit');
$('#clickit')[0].click();
});
page.render('test.png');
fs.write('test.html', page.content, 'w');
phantom.exit();
});
Notice that there are TWO pagination links on that website, because of that I'm using jquery's ".first()" to choose only the first one.
Also since the required link doesn't have any identificator I select it using its own link then change it to what I need, and lastly I add the "clickit" ID to it for later calling.
Now, this are my questions:
I'm, not exactly sure why it isn't working, if I run the code it fetches the first page only, after examining the requested page source code I do see the href link has been changed to what I want but it just doesn't get called, I have two different theories on what might be wrong
The modified href isn't getting "clicked" so the page isn't getting updated
The href does get clicked, but since the page takes a few seconds to load all results dynamically I only get to dump the first page Phantomjs gets to see
What do you guys think about it?
[UPDATE NOV 6 2015]
Ok, so the answers provided by #Artjomb and #pguardiario pointed me in a new direction:
I needed more debugging info on what was going on
I needed to call gReport.navigate.paginate function directly
Sadly I simply lack the the experience to properly use PhantomJS, several other samples indicated that I could achieve what I wanted with CasperJS, so I tried it, this is what I produced after a couple of hours
var utils = require('utils');
var fs = require('fs');
var url = 'http://testingsite1.local';
var casper = require('casper').create({
verbose: true,
logLevel: 'debug'
});
casper.on('error', function(msg, backtrace) {
this.echo("=========================");
this.echo("ERROR:");
this.echo(msg);
this.echo(backtrace);
this.echo("=========================");
});
casper.on("page.error", function(msg, backtrace) {
this.echo("=========================");
this.echo("PAGE.ERROR:");
this.echo(msg);
this.echo(backtrace);
this.echo("=========================");
});
casper.start(url, function() {
var url = this.evaluate(function() {
$('a[href="javascript:gReport.navigate.paginate(\'paginator_min_row=16max_rows=15rows_fetched=15\')"]').attr('href', 'javascript:gReport.navigate.paginate(\'paginator_min_row=1max_rows=5000rows_fetched=5000\')').attr('id', 'clicklink');
return gReport.navigate.paginate('paginator_min_row=1max_rows=5000rows_fetched=5000');
});
});
casper.then(function() {
this.waitForSelector('.nonexistant', function() {
// Nothing here
}, function() {
//page load failed after 5 seconds
this.capture('screen.png');
var html = this.getPageContent();
var f = fs.open('test.html', 'w');
f.write(html);
f.close();
}, 50000);
});
casper.run(function() {
this.exit();
});
Please be gentle as I know this code sucks, I'm no Javascript expert and in fact I know very little of it, I know I should have waited an element to appear but it simply didn't work on my tests as I was still getting the page without update from the AJAX request.
In the end I waited a long time (50 seconds) for the AJAX request to show on page and then dump the HTML
Oh! and calling the function directly did work great!
The href does get clicked, but since the page takes a few seconds to load all results dynamically I only get to dump the first page Phantomjs gets to see
It's easy to check whether it's that by wrapping the render, write and exit calls in setTimeout and trying different timeouts:
page.open('http://testingsite1.local', function () {
page.evaluate(function(){
$('a[href="javascript:gReport.navigate.paginate(\'paginator_min_row=16max_rows=15rows_fetched=15\')"]').first().attr('href', 'javascript:gReport.navigate.paginate(\'paginator_min_row=1max_rows=5000rows_fetched=5000\')').attr('id','clickit');
$('#clickit')[0].click();
});
setTimeout(function(){
page.render('test.png');
fs.write('test.html', page.content, 'w');
phantom.exit();
}, 5000);
});
If it's really just a timeout issue, then you should use the waitFor() function to wait for a specific condition like "all elements loaded" or "x elements of that type are loaded".
The modified href isn't getting "clicked" so the page isn't getting updated
This is a little trickier. You can listen to the onConsoleMessage, onError, onResourceError, onResourceTimeout events (Example) and see if there are errors on the page. Some of those errors are fixable by the stuff you can do in PhantomJS: Function.prototype.bind not available or HTTPS site/resources cannot be loaded.
There are other ways to click something that are more reliable such as this one.

How to (ajax) post and execute response?

we have the following situation:
in default.aspx we have a link:
test.
and the JS code:
function doPost() {
$.post('AnHttpHandlerPage.aspx',"{some_data:...}", function(data) {
if(data.indexOf("http://")==0)
window.open(data);
else{
var win=window.open();
with(win.document) {
open();
write(data); //-> how to execute this HTML code? The code also includes references to other js files.
close();
}
}
}).error(function(msg){document.write(msg.responseText);});
}
The callback can first be an url address or 2nd html code that must be executed.
Option 1 fits, but in option 2, a new window will be opened where the code has been written but not executed.
It's clear, since it happens in the stream, it can't be executed. So the question, how can you fix it? Maybe a refresh(), or similar?
Because of the requirement of the customer, the workflow can not be changed, so it must be solved within doPost().
EDIT
The response in case 2 is HTML like this. This part should be executed:
<HTML><HEAD>
<SCRIPT type=text/javascript src="http://code.jquery.com/jquery-latest.js">
</SCRIPT>
<SCRIPT type=text/javascript>
$(document).ready(function() {
do_something...
});
</SCRIPT>
</HEAD>
<BODY>
<FORM>...</FORM>
</BODY>
</HTML>
Please help. Thanks.
In your JS code it should be something like this:
function doPost() {
$.post('AnHttpHandlerPage.aspx',"{some_data:...}", function(data) {
//if(data.indexOf("http://")==0)
if (data.type!="url") //i will add a data type to my returned json so i can differentiate if its url or html to show on page.
window.open(); // I dont know why this is there. You should
else{
var win=window.open(data.url); //This data.url should spit out the whole page you want in new window. If its external it would be fine. if its internal maybe you can have an Action on one of your controllers that spit it with head body js css etc.
/* with(win.document) {
open();
write(data); //-> how to execute this HTML code? The code also includes references to other js files.
close(); */ // No need to write data to new window when its all html to be rendered by browser. Why is this a requirement.
}
}
}).error(function(msg){document.write(msg.responseText);});
}
The overall logic is this
You do your ajax call on doPost
Find out if data returned is of type url or anything that need to open in new window
If it is url type it would have a url (check if this is not null or empty or even a valid url) then open a new window with that url. Have a read of W3C window.open for parameters
If you want to open and close it for some reason just do that by keeping the window handle but you can do this on dom ready event of that new window otherwise you might end up closing it before its dom is completely loaded. (someone else might have better way)
If its not url type then you do your usual stuff on this page.
If this does not make sense lets discuss.

Is it possible to trigger behavior in the content script from the popup?

Currently I am using message passing to send a request from my contentscript for data in localStorage and I am not having any issues with that, the content script is working as expected.
Can you do this in the other direction?
I have an object that exists in the content script that has a method called ".apply()" and I want to run it when the used clicks the option to do so.
I tried to make a listener in the content script like this:
var myLinker = new Linker();
chrome.extension.onRequest.addListener(function(request) {
if (request.method == "apply")
{
myLinker.apply("nothing");
alert("applied");
}
else
; //Do nothing
And send requests to it like this:
chrome.extension.sendRequest({method: "apply"}, function(){
alert("Tried to request");
});
I get that it is a bit of a hack, but it is the only thing I could think of, and it doesn't even work =/
Is there a way to do this?
I am pretty sure I could just inject new code into the page from the popup (I think I saw an api function for that), and then run stuff, but that would take more memory and just feels like a bad way to do it, because you would basically have the exact same code twice.
To send a message from the extension to a content script, use chrome.tabs.sendMessage instead of chrome.extension.sendRequest.
Because sendRequest has been superseded by onMessage in Chrome 20, there's no official documentation for sendRequest any more. The documentation for chrome.tabs.sendMessage can be found here. Please note that these events cannot be mixed, use either *Request or *Message.
Yes, you would use this: http://developer.chrome.com/extensions/tabs.html#method-sendMessage
Content scripts live within the DOM of the page. And each page that is open within Chrome has a tab ID associated with it -- http://developer.chrome.com/extensions/tabs.html#type-tabs.Tab
Let's say you want to send the {method: "apply"} to a page that was just opened in a new tab:
chrome.tabs.onCreated.addListener(function(tab) {
chrome.tabs.sendMessage(tab.id, { method: "apply" });
});
There are other events/methods to get the specific Tab you want to send the message to. I think there's one called getCurrent to send to the currently selected tab, check out the docs for that.

How to make a script to run on pages that updates with ajax requests?

I have a script that runs on some pages. The problem is that I attach the script on the page load, but all links on the page make requests and change the contents from the response. (The url's hash changes)
I need a way to check when the "page" is finished loading to add my scripts.
The problem:
somesite.com/home (event load is fired)
*user clicks a link*
somesite.com/home#profile (event load isn't fired, the page is
already loaded, but the contents are
being changed through ajax calls)
How can I do that? I'm thinking on adding a watcher that will check the page all the time and call the corresponding script.
How can I check if the page is sending a request and execute something when the request ends? (I guess it's possible since Firebug does that)
You could use setInterval, and check every 500 ms for changes in the location hash:
var oldHash = '';
setInterval(function(){
var newHash = location.hash;
if(oldHash != newHash){
oldHash = newHash;
// do something
}
}, 500);

How to use javascript to get information from the content of another page (same domain)?

Let's say I have a web page (/index.html) that contains the following
<li>
<div>item1</div>
details
</li>
and I would like to have some javascript on /index.html to load that
/details/item1.html page and extract some information from that page.
The page /details/item1.html might contain things like
<div id="some_id">
picture
map
</div>
My task is to write a greasemonkey script, so changing anything serverside is not an option.
To summarize, javascript is running on /index.html and I would
like to have the javascript code to add some information on /index.html
extracted from both /index.html and /details/item1.html.
My question is how to fetch information from /details/item1.html.
I currently have written code to extract the link (e.g. /details/item1.html)
and pass this on to a method that should extract the wanted information (at first
just .innerHTML from the some_id div is ok, I can process futher later).
The following is my current attempt, but it does not work. Any suggestions?
function get_information(link)
{
var obj = document.createElement('object');
obj.data = link;
document.getElementsByTagName('body')[0].appendChild(obj)
var some_id = document.getElementById('some_id');
if (! some_id) {
alert("some_id == NULL");
return "";
}
return some_id.innerHTML;
}
First:
function get_information(link, callback) {
var xhr = new XMLHttpRequest();
xhr.open("GET", link, true);
xhr.onreadystatechange = function() {
if (xhr.readyState === 4) {
callback(xhr.responseText);
}
};
xhr.send(null);
}
then
get_information("/details/item1.html", function(text) {
var div = document.createElement("div");
div.innerHTML = text;
// Do something with the div here, like inserting it into the page
});
I have not tested any of this - off the top of my head. YMMV
As only one page exists in the client (browser) at a time and all other (virtual/possible) pages are on the server, how will you get information from another page using JavaScript as you will have to interact with the server at some point to retrieve the second page?
If you can, integrate some AJAX-request to load the second page (and parse it), but if that's not an option, I'd say you'll have to load all pages that you want to extract information from at the same time, hide the bits you don't want to show (in hidden DIVs?) and then get your index (or whoever controls the view) to retrieve the needed information from there ... even though that sounds pretty creepy ;)
You can load the page in a hidden iframe and use normal DOM manipulation to extract the results, or get the text of the page via AJAX, grab the part between <body...>...</body>ยจ and temporarily inject it into a div. (The second might fail for some exotic elements like ins.) I would expect Greasemonkey to have more powerful functions than normal Javascript for stuff like that, though - it might be worth to thumb through the documentation.

Categories