Is there a way to debug page.open method of phantomjs ? My application loads some files saved locally but unfortunately the only info one can get when opening the page is if it was loaded successfully or not. What more interesting the very same page loads properly when opened in the browser.
Here's my code :
var system = require('system'),
page = require('webpage').create();
var openPage = function () {
var url = 'http:\\localhost:53794/file.html';
page.open(url, function (status) {
if (status !== 'success') {
console.log("FAIL:" + url);
phantom.exit(2);
}
var date = new Date().getTime();
var outputFilename = outputPath + 'print-' + date + '.png';
setTimeout(function () {
page.render(outputFilename);
outputArray.push(outputFilename);
setTimeout(function () {
phantom.exit(1);
}, 1);
}, 1);
});
}
openPage();
via: http://newspaint.wordpress.com/2013/04/25/getting-to-the-bottom-of-why-a-phantomjs-page-load-fails/
After creating the page variable, but before calling page.open() add the following code:
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
Now you can print out the reason for a problem in your page.open() callback, e.g.:
var page = require('webpage').create();
page.onResourceError = function(resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
page.open(
"http://www.nosuchdomain/",
function (status) {
if ( status !== 'success' ) {
console.log(
"Error opening url \"" + page.reason_url
+ "\": " + page.reason
);
phantom.exit( 1 );
} else {
console.log( "Successful page open!" );
phantom.exit( 0 );
}
}
);
Debugging Function
If you read further down the blog, he has some more suggested event handlers to add. I adapted them into a function that you can use to inject the event handlers into your page object (instead of having them defined in your main code)
// this method injects some debugging event handlers
// into a PhantomJS page object.
// usage:
// var page = require('webpage').create();
// var system = require('system');
// addDebugEvents(page,system);
function addDebugEvents(page, system) {
page.onResourceError = function (resourceError) {
page.reason = resourceError.errorString;
page.reason_url = resourceError.url;
};
page.onResourceRequested = function (request) {
system.stderr.writeLine('= onResourceRequested()');
system.stderr.writeLine(' request: ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function (response) {
system.stderr.writeLine('= onResourceReceived()');
system.stderr.writeLine(' id: ' + response.id + ', stage: "' + response.stage + '", response: ' + JSON.stringify(response));
};
page.onLoadStarted = function () {
system.stderr.writeLine('= onLoadStarted()');
var currentUrl = page.evaluate(function () {
return window.location.href;
});
system.stderr.writeLine(' leaving url: ' + currentUrl);
};
page.onLoadFinished = function (status) {
system.stderr.writeLine('= onLoadFinished()');
system.stderr.writeLine(' status: ' + status);
};
page.onNavigationRequested = function (url, type, willNavigate, main) {
system.stderr.writeLine('= onNavigationRequested');
system.stderr.writeLine(' destination_url: ' + url);
system.stderr.writeLine(' type (cause): ' + type);
system.stderr.writeLine(' will navigate: ' + willNavigate);
system.stderr.writeLine(' from page\'s main frame: ' + main);
};
page.onResourceError = function (resourceError) {
system.stderr.writeLine('= onResourceError()');
system.stderr.writeLine(' - unable to load url: "' + resourceError.url + '"');
system.stderr.writeLine(' - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString);
};
page.onError = function (msg, trace) {
system.stderr.writeLine('= onError()');
var msgStack = [' ERROR: ' + msg];
if (trace) {
msgStack.push(' TRACE:');
trace.forEach(function (t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
system.stderr.writeLine(msgStack.join('\n'));
};
}
You should change the URL
from
http:\\localhost:53794/file.html
to
http://localhost:53794/file.html
Related
One day ago I started messin' with phantomjs and their ability to read javascript generated data from the websites.[web scraping]
I'm trying to get element's text content by ID, but sometimes the particular website I try to crawl through doesn't have it, so then I get this error:
ERROR: TypeError: null is not an object (evaluating 'document.getElementById('resInfo-0').textContent')
TRACE:
-> undefined: 2
-> : 5
Screenshot from the Command Prompt:
My code so far:
1 step: reading the data from the file.
var file = "path to text file";
var fs = require('fs');
var stream = fs.open(file, 'r');
var urls = new Array();
var index = 0;
console.log("READING A FILE...");
while(!stream.atEnd()) {
var line = stream.readLine();
urls[index] = line;
index++;
}
console.log("FINISHED READING THE FILE");
index = 0;
2 step: Reading the data from the websites.
function web_page()
{
webPage = require('webpage');
page = webPage.create();
page.onError = function(msg, trace)
{
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in
function "' + t.function +'")' : ''));
});
}
console.log(msgStack.join('\n') + " URL: " + urls[index]);
phantom.exit(0);
};
phantom.onError = function(msg, trace)
{
var msgStack = ['PHANTOM ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line +
(t.function ? ' (in function ' + t.function +')' : ''));
});
}
console.log(msgStack.join('\n')+ " URL: " + urls[index]);
phantom.exit(0);
};
page.open('http://www.delfi.lt/paieska/?q='+urls[index], function(status)
{
if (status !== 'success')
{
console.log('Unable to access network');
}
else
{
var fs = require('fs');
var path = 'output.txt';
var content = page.content;
var ua = page.evaluate(function()
{
var x = document.getElementById('resInfo-0').textContent;
return x;
});
if ( ua != null && ua != "" )
{
var indexas = ua.indexOf("(");
ua = ua.substr(0,indexas);
ua = ua.replace(/\D/g,'');
fs.write(path,urls[index] + " - "+ ua + "\r\n", 'a+');
}
}
});
setTimeout(next,1000);
}
console.log("STARTING TO CRAW WEBSITES...");
web_page();
function next()
{
if ( index + 1 <= 288103 )
{
page.close();
index++;
web_page();
}
else if ( index + 1 > 288103 )
{
console.log("FINISHED CRAWLING PROCESS");
phantom.exit(0);
}
}
var ua = page.evaluate(function()
{
var x = document.getElementById('resInfo-0').textContent;
return x;
});
The error comes from here probably:
var ua = page.evaluate(function()
{
var x = document.getElementById('resInfo-0').textContent;
return x;
});
What I've tried:
if ( document.getElementById('resInfo-0').textContent != null )
if ( document.getElementById('resInfo-0').textContent != "" )
So why can't it become null without triggering this error?
PhantomJS version is 2.1.1 binary windows package.
Sometimes document.getElementById('resInfo-0') is null, but you're still trying to get .textContent of it, hence the error. Try
var elem = document.getElementById('resInfo-0');
if(elem !== null) {
return elem.textContent;
}
return false;
I've been trying to do this since a long time.
[PHANTOMJS VERSION] => 2.1.1
My goal is to log in a portal, go to different menu after being logged, insert parameters to research available file, download file if not downloaded yet.
Problem is that I can login to the portal and reach the index page but when I click on the tag "a" which is the one button that bring me to the new form to fill for searching file, it bring me back to loginpage ... Dunno why that happend. If I use this script in chrome dev it works fine ...
LOGIN SUCCESFUL
REDIRECTED TO LOGIN PAGE
This is my code:
<pre><code>var page = require('webpage').create();
var system= require('system');
var loadInProgress = false;
var testindex = 0;
var address;
var pos;
var stringa;
// Route "console.log()" calls from within the Page context to the main Phantom context (i.e. current "this")
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
page.onAlert = function(msg) {
console.log('alert!!> ' + msg);
};
page.onLoadStarted = function() {
loadInProgress = true;
console.log("CARICAMENTO PAGINA INIZIATO"); //*Page started loading*
};
page.onLoadFinished = function(status) {
loadInProgress = false;
if (status !== 'success') {
console.log('Caricamento pagina fallito !'); //*Page failed to load*
phantom.exit();
} else {
console.log("CARICAMENTO PAGINA COMPLETATO"); //*Page loading complete*
page.render("foto" + testindex + ".png");//*Screen Capture*
console.log("Foto effettuata");
var sitoattuale= page.evaluate(function(){
return window.location.href;
});
console.log(sitoattuale);
}
};
var steps = [
function() {
page.open(system.args[1]);
},
function() {
page.evaluate(function() {
//*insert username and password and click the button to submit the form*
document.getElementById("twsTemplate_Content1_twsModule_txtUser").value="MYUSERNAME";
document.getElementById("twsTemplate_Content1_twsModule_txtPSW").value="MYPASSWORD";
document.getElementById("twsTemplate_Content1_twsModule_btnLogin").click();
});
//*Capture inserted credentials*
page.render("credenziali.png")
},
function() {
page.evaluate(function(){
var a=document.getElementById("twsTemplate_Header1_Mainmenu2_aspMainMenun16Items");
var b= a.getElementsByTagName("a");
var c= b[0];
c.click();
});
//*Capture click of the button that brings me to the form where I ve to insert parameters for the search*
page.render("Cliccato_ricerca.png")
},
function() {
//*I capture the screen as I should be to the form page but it caputre the login page unfilled*
page.render("ramomisura.png")
console.log("stouscendo");
}
];interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
if (testindex==0){
console.log("Fase " + (testindex+1) + " : Avvio portale");
}else{
if(testindex==1){
console.log("Fase " + (testindex+1) + " : Loing in corso ...");
}else{
if(testindex==2){
console.log("Fase " + (testindex+1) + " : Tentativo di navigazione al ramo misure ...")
}else{
console.log("Fase " + (testindex+1) + " : Termine Programma");
}
}
}
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("Programma Terminato");
phantom.exit();
}
}, 100)
</code></pre>
i am developing a way to get callbacks in the browser page, following a emit to the socketio server.
server code:
/** exec php file and retrieve the stdout */
socket.on('php', function(func, uid, data) {
console.log('php'.green + ' act:' + func + ' uid:' + uid);
runner.exec("php " + php_[func].exec,
function(err, stdout, stderr) {
if (err == null) {
socket.emit('php', {uid: uid, o: stdout});
console.log('emitted');
} else {
console.log('err '.red + stdout + ' ' + stderr);
}
});
});
this code executes a php page and retrieves the output to display or parse in the browser
it receives an id to echo back to the page, so I can know what function to execute
browser code to execute callbacks:
function log(text) {
$('.out').append(text + '<br />');
}
window.callbacks = [];
function setcb(c) {
var max = 0;
$.each(window.callbacks, function(index) {max = (index > max ? index : max);});
window.callbacks[max+1] = c;
return max+1;
};
function C(k){return(document.cookie.match('(^|; )'+k+'=([^;]*)')||0)[2]}
var s = io.connect("http://"+ window.location.host +":8088");
//s.emit('debug', 'here');
s.on('getid', function(){
console.log('getid cookieis: ' + C('igr') );
s.emit('setid', C('igr'));
});
s.emit('php', 'test',
setcb(
function () {
var data = JSON.parse(this);
log('callback passed' + this);
}
), null
);
s.on('php', function(data) {
//window.callbacks[j.uid].call(j.o);
log('rec' + JSON.stringify(data));
//var jsn = JSON.parse(data);
console.log(data);
console.log(window.callbacks[data.uid]);
window.callbacks[data.uid].call(data.o);
delete window.callbacks[data.uid];
window.callbacks.splice(data.uid, 1);
console.log(window.callbacks);
});
this is working, but when I try to make two requests at the same time, it doesn't run like expected, leaving one callback to execute and in the callbacks array.
test code:
s.emit('php', 'test',
setcb(
function (data) {log('callback passed' + this);}
), null
);
s.emit('php', 'test',
setcb(
function (data) {log('callback passed' + this);}
), null
);
I want to eliminate this error, and for each event received, execute the callback I define.
This is way more simple than I've imagined
You can pass by reference the callback.
server side code:
/** exec php file and retrieve the stdout */
socket.on('php', function(func, data, callback) {
console.log('php'.green + ' act:' + func);
runner.exec("php " + php_[func].exec,
function(err, stdout, stderr) {
if (err == null) {
callback(stdout);
//socket.emit('php', {uid: uid, o: stdout});
console.log('emitted');
} else {
console.log('err '.red + stdout + ' ' + stderr);
}
});
});
client side code:
function log(text) {
$('.out').append(text + '<br />');
}
function C(k){return(document.cookie.match('(^|; )'+k+'=([^;]*)')||0)[2]}
var s = io.connect("http://"+ window.location.host +":8088");
//s.emit('debug', 'here');
s.on('getid', function(){
console.log('getid cookieis: ' + C('igr') );
s.emit('setid', C('igr'));
});
s.emit('php', 'test', null,
function(data) { log('d: ' + JSON.stringify(data)); }
);
I am developing an app which consumes a WebService like this
function callWS(filebytes, fpath, filename) { //consumes the webservice
var response;
var data = '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ws="http://ws.myCompany.com">\n' +
' <soapenv:Header/>\n' +
' <soapenv:Body>\n' +
' <ws:uploadFileService>\n' +
' <ws:filebytes>' + filebytes + '</ws:filebytes>\n' +
' <ws:fpath>' + fpath + '</ws:fpath>\n' +
' <ws:filename>' + filename + '</ws:filename>\n' +
' </ws:uploadFileService>\n' +
' </soapenv:Body>\n' +
'</soapenv:Envelope>\n';
console.log("XML SOAP: " + data + "\r\n");
var options = {
url: "http://XXX.XXX.XX.XXX:XXXX/FILESERVERWS/services/FILESERVERWS?wsdl",
type: "post",
headers: {
"Content-Type": "text/xml; charset=utf-8",
"SOAPAction": "uploadFileService"
},
data: data
};
WinJS.Promise.timeout(8000, WinJS.xhr(options)).then(
function (request) {
var doc = request.responseXML.documentElement;
var output = doc.getElementsByTagName("uploadFileServiceReturn");
//Windows.UI.Popups.MessageDialog(output[0].textContent, "the XML message").showAsync();
console.log("the XML message: " + output[0].textContent + "\r\n");
result.style.backgroundColor = "#00A000";
response = true;
},
function (error) {
Windows.UI.Popups.MessageDialog(error.status + " : " + error.statusText, "Status").showAsync();
result.style.backgroundColor = "#FF0000";
response = false;
},
function (progress) {
result.innerText = "Ready state is " + progress.readyState;
result.style.backgroundColor = "#0000A0";
}
);
return response;
}
the purpose is to consume the webService and returns a value
on success response = true
on error response = false
because I want to take an action depending if the webService returned a value by doing this
if (callWS(document.getElementById("formfield" + i).value, UID_KEY[7], arrayCaptures[i - 1].name)) {
console.log("take action a");
}
else {
console.log("take action b");
}
but it always take action B even if the webService is consumed and I get answer from the webservice, what am I doing wrong???
You'll need to return a Promise object from your function, allowing the calling script to use a then() or done() call on it to get the result. You can read more about asynchronous programming in WinJS on the msdn site, but generally it looks like this:
function callWS(filebytes, fpath, filename) {
return new WinJS.Promise(function (complete, error) {
// put your functionality here...
WinJS.Promise.timeout(8000, WinJS.xhr(options)).then(
function (request) {
// more processing...
complete(true); // or false or a variable...
},
function (e) {
// error handling unique to this function
complete(false); // OR you could just call error(e);
},
...
);
});
}
And you would use it like this:
callWS( ... ).then(
function(response) {
// handle response...
// will be either true or false
},
function(err) {
// handle errors
}
);
I want my context menu item to be visible only if the clicked node is a link i.e. and href is either a magnet link or a torrent link. But item is visible for all the links because context function is not executing, can anybody help why context function is not executing?
Here is the code:
exports.main = function() {
var cm = require("sdk/context-menu");
var contextCode = ' self.on("context", function (node) { '+
' while(node.nodeName!="A") { node = node.parentNode; } '+
' var pat_magnet = /^magnet:/i; ' +
' var pat_torrent = /.torrent$/i; ' +
' if(pat_torrent.test(node.href) || pat_magnet.test(node.href)) { return true; } '+
' else { return false; } '+
' }); ';
var clickCode = ' self.on("click", function(node,data){ '+
' while(node.nodeName!="A") { node = node.parentNode; } '+
' var pat_hash = /[0-9abcdef]{32,40}/i; ' +
' var result = node.href.match(pat_hash); '+
' var hash = "" '
' if(result != null) { hash=result[0]; } '+
' var xhr = new XMLHttpRequest(); '+
' if(hash != "") { '+
' var apiCall = "https://www.furk.net/api/dl/add?api_key=*************&info_hash="+hash; '+
' } '+
' else{ '+
' var apiCall = "https://www.furk.net/api/dl/add?api_key=*************&url="+encodeURI(node.href); '+
' } '+
' xhr.open("GET",apiCall,true); '+
' xhr.onreadystatechange = function(){ if(xhr.readyState = 4) { if (xhr.response.status = "ok") { alert("Torrent added to Furk."); } else { alert("Torrent could not be added to Furk."); } } } '+
' xhr.send(null); '+
' });';
cm.Item({
label: "Add to Furk",
context: cm.SelectorContext("a[href]"),
contentScript: contextCode + clickCode
});
};
Please always post self-containied examples that can be directly tried in the future.
Now back to your problem: The content script actually has a syntax error.
The following line:
' var pat_torrent = /.torrent$/i ' +
lacks a semicolon, and should be:
' var pat_torrent = /.torrent$/i; ' +
The reason automatic semicolon insertion (ASI) does not work here is: The "code" is actually a string that has no newlines in it whatsoever. If there were newlines, then ASI would have worked.
Anway, another reason not to have complex content script inline. Have a look at contentScriptFile.
This error is actually logged, but the presentation sucks. In the Browser Console:
[20:57:51.707] [object Error] (expandable)
In terminal:
console.error: context-magnet:
Message: SyntaxError: missing ; before statement
Here is a fixed, reproducible sample:
var cm = require("sdk/context-menu");
var contextCode = ' self.on("context", function (node) { '+
' while(node.nodeName!="A") { node = node.parentNode; } '+
' var pat_magnet = /^magnet:/i; ' +
' var pat_torrent = /.torrent$/i; ' +
' if(pat_torrent.test(node.href) || pat_magnet.test(node.href)) { return true; } '+
' else { return false; } '+
' }); ';
cm.Item({
label: "magnet test",
context: cm.SelectorContext("a[href]"),
contentScript: contextCode
});
Edit ' var hash = "" ' has the same problem, and there are might be other such errors that I missed skimming this new code.
As I already said, please use contentScriptFile and not contentScript for long-ish scripts.
Another edit
Here is a builder using contentScriptFile, where I also fixed a couple of other errors, the most important of which are:
Use permissions so that the XHR will work.
Correctly set up the XHR to use responseType and overrideMimeType().
Use onload/onerror instead of onreadystatechange.