I am trying to collect alert from website by using overwrite method. I searched on google and found wappalyzer, a Chrome/Firefox extension to detect software on website. It injects a script inject.js when page load and collect information. My method is similar. I make a local website and test it. When I inject overwrite_alert.js manually then it works.
But I want to do it dynamically and apply to other website. So I use headless browser like PhantomJS. The following code which I tried in PhantomJS but it does not work.
I am trying to inject a JavaScript on phantom JS. Like this code:
<!DOCTYPE html>
<html>
<head>
<title>Test alert</title>
<!-- !!! Inject here !!! <script src="overwrite_alert.js"></script> -->
<script type="text/javascript" src="other_script.js"> </script>
<script type="text/javascript">
alert("Alert content");
</script>
</head>
<body>
<h1>website content</h1>
</body>
</html>
overwrite_alert.js file from this question:
(function() {
var _alert = window.alert; // <-- Reference
window.alert = function(str) {
// do something additional
if(console) console.log(str);
//return _alert.apply(this, arguments); // <-- The universal method
_alert(str); // Suits for this case
};
})();
I tried with onLoadStarted event and My PhantomJS code:
var webPage = require('webpage');
var page = webPage.create();
var url = "https://localhost:5000/alert.html";
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE> ' + msg);
};
page.onLoadStarted = function() {
if (page.injectJs('do.js')) {
var title = page.evaluate(function() {
// returnTitle is a function loaded from our do.js file - see below
console.log("evaluate completed");
});
console.log(title);
}
}
page.open(url, function(status) {
if (status === "success") {
if (page.injectJs('do.js')) {
var title = page.evaluate(function() {
// returnTitle is a function loaded from our do.js file - see below
console.log("evaluate completed");
});
console.log(title);
phantom.exit();
}
page.render("onOpen.png");
}
});
Result:
$ phantomjs test_inject.js
CONSOLE> from onLoadStarted completed
null
CONSOLE> from page.open
null
Since the page.open callback is called after a page is loaded, it would be simply to late to change the implementation of window.alert. You would need to use earlier events such as page.onInitialized, page.onLoadStarted, etc.
Since you're interested in alerts, you don't need to do that at all, because PhantomJS provides an event for that: page.onAlert
Related
I am trying to utilize PhantomJS to get html generated by dynamic page. I supposed that this would be easy, but after few hours of trying, I am still not lucky.
The page itself has this source code and what gets saved in 1.html eventually:
<!doctype html>
<html lang="cs" ng-app="appId">
<head ng-controller="MainCtrl">
(ommited some lines)
<script src="/js/conf/config.js?pars"></script>
<script src="/js/all.js?pars"></script>
</head>
<body>
<!--<![endif]-->
<div site-loader></div>
<div page-layout>
<div ng-view></div>
</div>
</body>
</html>
All content of web gets loaded inside site-loader div, but I have no luck to get it, even though I am using timeout before scraping html by PhantomJS. Here goes code I am using:
var url = 'http:...';
var page = require('webpage').create();
var fs = require('fs');
page.open(url, function (status) {
if (status !== 'success') {
console.log('Fail');
phantom.exit();
} else {
window.setTimeout(function () {
fs.write('1.html', page.content, 'w');
phantom.exit();
}, 2000); // Change timeout as required to allow sufficient time
}
});
Please what am I doing wrong?
EDIT:
I have decided to try PJscrapper framework and configured it to scrappe all contents of div block. All I got was lousy:
["","\n\t\tif (window.DOT) {\n\t\t\tDOT.cfg({service: 'sreality', impress: false});\n\t\t}\n\t","","Loader.load()","",""]
Seems that I seriously do not get it and always get code before Loader.load() acts. And obviously, timeout does not solve it.
This will do the trick
page.open(url, function (status) {
if (status !== 'success') {
console.log('Unable to load the url!');
phantom.exit();
} else {
window.setTimeout(function () {
var results = page.evaluate(function() {
return document.documentElement.innerHTML;
});
console.log(results)
phantom.exit();
}, 200);
}
});
From below code I am trying to load javascript file TestJScript.js dynamically and after loading want to call javascript function LoadData() exist in that file. But I am getting error please check image.
Note: Error get only on IE-8.0.6001 update 0.
Please suggest me correction such that It will work from 6 to all version of IE.
Or any another solution.
if it require any windows updates. Please let me know.
Please don't suggest with JQUERY code
Javascript file code :
function LoadData() {
alert('ok');
}
Code:
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<script>
function LoadJSFile() {
var js = document.createElement("script")
js.setAttribute("type", "text/javascript")
js.setAttribute("src", "C:\\TestJScript.js")
document.getElementsByTagName("head")[0].appendChild(js)
//call below function exist in TestJScript.js file
LoadData();
}
</script>
</head>
<body onload="LoadJSFile();">
</body>
</html>
Error Image:
Try this http://dustindiaz.com/scriptjs.
Like this:
$script('yui-base.js', function() {
// do stuff with base...
$script(['yui-anim.js', 'yui-connect.js'], function() {
// do stuff with anim and connect...
});
$script('yui-drag.js', function() {
// do stuff with drag...
});
});
The error reports a problem in the javascript file that you're loading. So the problem lies not in how you dynamically load the javascript file, but in the javascript file itself.
It looks like there is a problem with the file once it has loaded. Are you sure there is no syntax error in the file itself.
Also, I would recommend you use a relative path to the javascript file instead of the absolute path.
EDIT:
Try this:
function LoadJSFile() {
var script = document.createElement('script');
script.src = "C:\\TestJScript.js";
script.onload = function () {
LoadData();
};
document.getElementsByTagName("head")[0].appendChild(script)
}
You could try the following:
<script>
function LoadJSFile(src, callback) {
var js = document.createElement('script');
js.src = src;
js.async = true;
js.onreadystatechange = js.onload = function() {
var state = js.readyState;
if (!callback.done && (!state || /loaded|complete/.test(state))) {
callback.done = true;
callback();
}
};
document.getElementsByTagName('head')[0].appendChild(js);
}
LoadJSFile('C:\\TestJScript.js', function() {
LoadData();
});
</script>
If you are using c# code then another solution to solve this script error is, invoke script through c# code.
Code:
/
/Assiging html value to control
webBrowser.DocumentText = "HTML content";
//Calling document load completed event
webBrowser.DocumentCompleted += webBrowser_DocumentCompleted;
void webBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
HtmlDocument htmlDocument = webBrowser.Document;
HtmlElement htmlElementHead = htmlDocument.GetElementsByTagName("head")[0];
HtmlElement HtmlElementScript = htmlDocument.CreateElement("script");
HtmlElementScript.SetAttribute("text", "C:\\TestJScript.js");
htmlElementHead.AppendChild(HtmlElementScript);
htmlDocument.InvokeScript("LoadData");
webBrowser.DocumentCompleted -= webBrowser_DocumentCompleted;
}
I'm opening a tab and trying to call a function defined in it but i always get a "reference error". I send a message to the script after the script is loaded asking to run "test1" function located on the tab code.
Here is my code:
Extention code
chrome.tabs.executeScript(tab.id, {file: "script.js", runAt: "document_end"}, function(array){
//send message executeTest to the created tab
chrome.tabs.sendMessage(tab.id, {msg: "executeTest"});
});
script.js
url = document.URL;
window.addEventListener("load", doStuff, true);
function doStuff(){
//listen for messages coming from extension
chrome.runtime.onMessage.addListener(
function(message, sender) {
var msg = message.msg;
var url = message.url;
switch(msg){
case "executeTest":
test1();
break;
}
}
);
}
Tab HTML
<head>
<title>Test</title>
</head>
<body>
<script src="app.js"></script>
</body>
</html>
Tab Javascript
function test1(){
console.log("test1 is running!");
}
I receive the message on the script but it's not possible to execute "test1()".
What am i doing wrong ?
A couple of unrelated issues here.
chrome.tabs.sendMessage(tab.id, {msg: executeTest});
executeTest is not a string, it's an (undefined) identifier. That's what throwing the error.
Correct way (ninja'd by user2570380) is to write
chrome.tabs.sendMessage(tab.id, {msg: "executeTest"});
The other problem is that you will not be able to call test1(), because content scripts live in isolated context.
To bypass it, you either need to employ messaging between the webpage and your extension, use externally_connectable, or inject some of your code into the page to trigger what you need.
Considering that you do not control the webpage, you need to inject code in the page's context. Example:
control.js
window.addEventListener("message", function(event) {
// We only accept messages from ourselves
if (event.source != window)
return;
if (event.data.source && (event.data.source == "MyExtension")) {
switch(event.data.command){
case "test1":
test1();
break;
}
}
}, false);
script.js
// Initialization
var s = document.createElement('script');
// TODO: add "control.js" to web_accessible_resources in manifest.json
s.src = chrome.extension.getURL('control.js');
s.onload = function() {
this.parentNode.removeChild(this);
};
(document.head||document.documentElement).appendChild(s);
// Sending a command (make sure script initialized listener first)
window.postMessage({ source: "MyExtension", command: "test1" }, "*");
chrome.tabs.sendMessage(tab.id, {msg: "executeTest"});
Quotes.
I'm new to PhantomJS and javascript, so forgive me if there is a simple solution.
I'm using PhantomJS to do screenshots of websites, and I want to replace certain DOM nodes with dynamic content from a remote javascript, something like this:
<script language="JavaScript" src="http://server/dynamic.js" type="text/javascript"></script>
My first try:
var page = require('webpage').create();
page.open('http://stackoverflow.com', function (status) {
if (status !== 'success') {
phantom.exit();
}
page.evaluate(function() {
//// Case #1: what I really want
// var s = '<script language="JavaScript" src="http://server/dynamic.js" type="text/javascript"></script>';
//// Case #2: simple js test case
// var s = '<script language="JavaScript" type="text/javascript">document.write("<p>THIS IS A TEST</p>");</script>';
//// Case #3: very simple case
var s = '<b>THIS IS A TEST</b>';
var node = document.getElementById('mainbar');
var pnode = node.parentNode;
var newele = document.createElement('div');
newele.innerHTML = s;
pnode.replaceChild(newele, node);
});
page.render('test.jpg');
phantom.exit();
});
For the simple case (case #3) where I'm just replacing it with some text, it works fine, but not for either of the javascript cases. It seems the javascript isn't being evaluated after being inserted via replaceChild(). I tried page.reload() just before page.render() but it didn't help.
Next, I tried creating the new element using iframe:
var newele = document.createElement('iframe');
newele.setAttribute('src', 'javascript:document.write("<p>THIS IS A TEST</p>");');
This works for the simple js test case #2, but I can't figure out how to make iframe work with what I really want (Case #1).
Any suggestions?
I came up with a workable solution:
var newele = document.createElement('iframe');
newele.setAttribute('src', 'javascript:document.write("<script src=\'http://server/dynamic.js\'></script>");');
I also had to put in a bit of delay:
window.setTimeout(function () {
page.render('test.jpg');
phantom.exit();
}, 500);
This works, but feels a little klugy to me. I would love to hear a better solution.
Nope, you can add your javascript reference to the page like this, right before calling evaluate the first time (excerpt):
...
page.open(url, function (status) {
if (status !== 'success') {
console.log('Error: Cannot load url. Status: {0}.'.format(status));
phantom.exit(1);
} else {
page.injectJs('../../../code/common/jquery.min.js');
// call evaluate once to change the dropdown:
page.evaluate(function (myobj) {
// manipulate DOM here with jquery, or whatever you want.
}, myobj);
...
Please note that you can call evaluate several times, so you can extract stuff from the page, call javascript functions from the page, modify DOM, etc.
I am testing with pure JavaScript if browser seems to support HTML5 and if so, I want to load jQuery and then process the rest of page. If not, some redirection will occur.
<script type="text/javascript">
var canvas = document.createElement('canvas');
if (canvas && canvas.getContext && canvas.getContext('2d')) {
var s = document.getElementsByTagName('script')[0];
var jq = document.createElement('script');
jq.type = 'text/javascript';
jq.src = 'js/jquery.js';
s.parentNode.insertBefore(jq, s);
}
else {
// ... redirection ...
}
</script>
<script type="text/javascript">
$(function () {
//...
}
</script>
But the code above is not working properly, because I got error
Uncaught ReferenceError: $ is not defined
which is clearly saying that jQuery library has not been loaded.
Why? What is wrong with conditional script loading in my code above?
This is a case where it may make sense to use document.write(). You'd need to put this code in the <body> instead of the <head>:
<script type="text/javascript">
var canvas = document.createElement('canvas');
if (canvas && canvas.getContext && canvas.getContext('2d')) {
document.write( '<script src="js/jquery.js"><\/script>' );
}
else {
// ... redirection ...
}
</script>
<script type="text/javascript">
$(function () {
//...
}
</script>
Or, you may be able to use an ordinary <script> tag to load jQuery, but put it after your conditional redirection:
<script>
var canvas = document.createElement('canvas');
if( !( canvas && canvas.getContext && canvas.getContext('2d') ) ) {
// ... redirection ...
}
</script>
<script src="js/jquery.js"></script>
<script>
$(function () {
//...
}
</script>
With either of these approaches, the order of execution is:
The first <script>.
The loading of jquery.js, whether done with document.write() or a simple <script> tag.
The final script.
When you insert a script tag like you are, it will be loaded in the background, not immediately and thus your next script will run before jQuery is loaded. You will need to attach a listener such that you know when jQuery is successfully loaded and you can then run your scripts that use jQuery.
Here's an article that describes how to know when a dynamically loaded script is loaded: http://software.intel.com/en-us/blogs/2010/05/22/dynamically-load-javascript-with-load-completion-notification.
FYI, in your specific case, you also could just have a static script tag that loads jQuery, but place your script that detects whether to redirect or not BEFORE the jQuery script tag. That would be the simplest option.
<script type="text/javascript">
var canvas = document.createElement('canvas');
if (!canvas || !canvas.getContext || !canvas.getContext('2d')) {
// redirect here or whatever
}
</script>
<script type="text/javascript" src="jquery.js">
</script>
<script type="text/javascript">
$(function () {
//...
}
</script>
finally working like a charm, I'm relieved myself !
<!DOCTYPE html>
<html>
<head>
<meta charset=utf-8 />
<title>JS Bin</title>
<script type="text/javascript">
window.onload = function(){
var jqu = "$(console.log('worked'));";
var canvas = document.createElement('canvas');
if (canvas && canvas.getContext && canvas.getContext('2d')) {
var s = document.getElementsByTagName('head')[0];
var jq = document.createElement('script');
jq.setAttribute('type','text/javascript');
jq.innerHTML = jqu;
var jqLoad = document.createElement('script');
jqLoad.setAttribute('type','text/javascript');
jqLoad.setAttribute('src','jquery-1.10.0.js');
jqLoad.setAttribute('id','jqloader');
s.appendChild(jqLoad);
document.getElementById('jqloader').onload = function(){
console.log('loaded');
s.appendChild(jq);
}
}
else {
// ... redirection ...
}
console.log(document);
}
</script>
</head>
<body>
</body>
</html>
jsbin Demo
explanation :
1- using dom functions to append or insert elements are always the best (dynamic and safer more than anything else), and document.write is not recommended over that.
2- at parse-time, whatever functions you have in your script will be evaluated thus you will get an error if you have the script and not loaded the library yet.
3- loading the library and executing the relevant script in the same tag is not recommended. better do the script in another tag (after loading is done completely) to ensure it will work.
4- events for document.onload ensures that the document is loaded and the doms exist so you can append children to them. as for the document.getElementById('jqloader').onload it was just to insure that the jquery library is loaded completely and added to the document, and only then the script will be added after and evaluated.
As others have said, the reason you're getting an error is because you've loaded jQuery asynchronously and it hasn't loaded yet.
There are two ways to accomplish what you want.
You can poll for window.jQuery, or you can use an asynchronous loader callback.
Since you only load jQuery only when you detect canvas support, you won't have to worry about supporting old browsers.
var async_script_load = function (s, callback) {
var script;
script = document.createElement("script");
script.async = "async";
if (s.scriptCharset) {
script.charset = s.scriptCharset;
}
script.src = s.url;
// Attach handlers for all browsers
script.onload = script.onreadystatechange = function () {
if (!script.readyState || /loaded|complete/.test(script.readyState)) {
// Handle memory leak in IE
script.onload = script.onreadystatechange = null;
// Remove the script
if (head && script.parentNode) {
head.removeChild(script);
}
// Dereference the script
script = undefined;
callback(200, "success");
}
};
// Use insertBefore instead of appendChild to circumvent an IE6 bug.
// This arises when a base node is used (#2709 and #4378).
head.insertBefore(script, head.firstChild);
};
async_loader({url:'http://tempuri.org/jquery.min.js'},function() {
//call jquery here.
});
For a polling method, it's as simple as:
var checkJq = function() {
if(window.jQuery) {
//do jQuery
} else {
setTimeout(checkJq,100);
}
}
setTimeout(checkJq,100);