how to ignore errors in phantomjs - javascript

I have a web crawler and I use phantomjs to parse pages,
I want to get the html, but I always get this type of errors in the output before the html code
ReferenceError: Can't find variable: collapse_content_selector
http://staticloads.com/js/toggle.js?v=2013.10.04:135
TypeError: 'undefined' is not a function (evaluating '$('[placeholder]').placeholderLabel()')
how can I stop that

The easiest way is to add an error handler to phantom.onerror or webpage.onerror.
These callbacks are invoked when there is a JavaScript execution error (in the page or in your script).
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
// uncomment to log into the console
// console.error(msgStack.join('\n'));
};

Related

Async functions within typeaheadjs

This function works within typeahead.js context,
i use lodash request-promise and cheerio
to grab/parse/organize my data.
console.log shows every variables as it should be.
but with this line :
return '<p>' + img + data.name + ' - ' + getProviders(data) + '</p>'
i get the following error:
Uncaught TypeError: Failed to execute 'appendChild' on 'Node': parameter 1 is not of type 'Node'.
I'm not sure what's happening, this might have something to do with typeahead.js trying to happend an invalid (because incomplete yet) DOM element.
PS: let img = $(selector).attr('src'); is intended (it stores the src attribute as a string)
PS2: if i ommit img it works:
return '<p>' + data.name + ' - ' + getProviders(data) + </p>'
function(data) {
let imgProviders = _.keys(data.url).map((r) => {
return {[r]: data.url[r]};
});
let url = imgProviders[0][_.keys(imgProviders[0])[0]].replace(/\/$/, '');
let selector = _.find(sites, {
providerName: _.keys(imgProviders[0])[0]
}).coverIMG;
let body = request.get(url, (err, res, html) => {
$ = cheerio.load(html);
// the variable that is messing with me
let img = $(selector).attr('src');
// works fine when console.log()
console.log(img, data.name, getProviders(data));
return '<p>' + img + data.name + ' - ' + getProviders(data) + '</p>';
});
}

How to retrieve and alert JSON.stringify

Im trying to alert a user that the file size is not allowed and the size of the image.
in my code I'm doing:
alert('Error' + [JSON.stringify(err)]);
The alert message returns:
Error[{"message":"File size not allowed","error":"FILE_SIZE","param":10722753}]
I would like the alert to say:
File size not allowed 10722753
How can I clean up this error message and get the details our of the err object?
var err = {"message":"File size not allowed","error":"FILE_SIZE","param":10722753};
document.write('Error ' + err.message + " " + err.param);
You can directly access the JSON properties
alert('Error ' + err.message + " " + err.param);
You need to use the fields of your JSON object:
alert(err.message + " " + err.param );
May be this will help you.
var err=[{"message":"File size not allowed","error":"FILE_SIZE","param":10722753}];
alert("Error Message: "+err[0].message);
You will get:
Error Message: File size not allowed

Retrieved anchors list gets corrupted?

I am trying to analyze anchor links ( their text property ) in PhantomJS.
The retrieval happens here:
var list = page.evaluate(function() {
return document.getElementsByTagName('a');
});
this will return an object with a property length which is good (the same length I get when running document.getElementsByTagName('a'); in the console). But the vast majority of the elements in the object have the value of null which is not good.. I have no idea why this is happening.
I have been playing with converting to a real array thru slice which did no good. I have tried different sites, no difference. I have dumped the .png file to verify proper loading and the site is properly loaded.
This is obviously not the full script, but a minimal script that shows the problem on a well known public site ;)
How can I retrieve the full list of anchors from the loaded page ?
var page = require('webpage').create();
page.onError = function(msg, trace)
{ //Error handling mantra
var msgStack = ['PAGE ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
phantom.onError = function(msg, trace)
{ //Error handling mantra
var msgStack = ['PHANTOM ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
console.error(msgStack.join('\n'));
phantom.exit(1);
};
function start( url )
{
page.open( url , function (status)
{
console.log( 'Loaded' , url , ': ' , status );
if( status != 'success' )
phantom.exit( 0 );
page.render( 'login.png');
var list = page.evaluate(function() {
return document.getElementsByTagName('a');
});
console.log( 'List length: ' , list.length );
for( var i = 0 ; i < list.length ; i++ )
{
if( !list[i] )
{
console.log( i , typeof list[i] , list[i] === null , list[i] === undefined );
//list[i] === null -> true for the problematic anchors
continue;
}
console.log( i, list[i].innerText , ',' , list[i].text /*, JSON.stringify( list[i] ) */ );
}
//Exit with grace
phantom.exit( 0 );
});
}
start( 'http://data.stackexchange.com/' );
//start( 'http://data.stackexchange.com/account/login?returnurl=/' );
The current version of phantomjs permits only primitive types (boolean, string, number, [] and {}) to pass to and from the page context. So essentially all functions will be stripped and that is what DOM elements are. t.niese found the quote from the docs:
Note: The arguments and the return value to the evaluate function must be a simple primitive object. The rule of thumb: if it can be serialized via JSON, then it is fine.
Closures, functions, DOM nodes, etc. will not work!
You need to do a part of the work inside of the page context. If you want the innerText property of every node, then you need to map it to a primitive type first:
var list = page.evaluate(function() {
return Array.prototype.map.call(document.getElementsByTagName('a'), function(a){
return a.innerText;
});
});
console.log(list[0]); // innerText
You can of course map multiple properties at the same time:
return Array.prototype.map.call(document.getElementsByTagName('a'), function(a){
return { text: a.innerText, href: a.href };
});

js exception position

For the JavaScript code like this:
try {
MyJob.process();
} catch(e) {
console.log("Exception occur!");
}
I run the code in Chrome or FireFox, When the exception happens, the line number of "Exception occur!" will be shown in console, but the original exception in MyJob won't be there. Is there any solution that show the original position where the exception happens and keep the try-catch that I write here?
window.onerror = function ( msg, url, num ) {
alert ( "Error: " + msg + "\nURL: " + url + "\nLine: " + num );
return true;
};
This will show most of the errors.
In the catch block add:
catch(e) {
console.log( e.name + ": " + e.message );
}
More about the error handling at JavaScriptkit
If the try/catch block is inside a function you could take advantage of the arguments.callee mdn msdn
function foo() {
try {
someFunction();
}
catch (e) {
var f = arguments.callee.toString().substr ( "function ".length);
f = f.substr(0, f.indexOf('('));
alert ( "Error type : " + e.name + "\nError : " + e.message + "\nIn function : " + f );
}
}
Result will be:
Error type : ReferenceError
Error : someFunction is not defined
In function : foo
Temporary comment out the try and catch, then step through with the Chrome (or Firefox) JavaScript debugging tools.
//try {
MyJob.process();
/*} catch(e) {
console.log("Exception occur!");
}*/
After identifying the issue, remove the comments to restore the original error handling.

Regular Expression not working in JavaScript

This is working fine if i am writing jpg|png|jpeg|gif here...
if (!(ext && /^(jpg|png|jpeg|gif)$/.test(ext))) {
alert('Error: extension is not allowed!' + Extensions + ' file ext: ' + ext);
return false;
}
If i use variable instead of static then it is not working
var Extensions = "jpg|png|jpeg|gif";
if (!(ext && /^(Extensions)$/.test(ext))) {
alert('Error: extension is not allowed!' + Extensions + ' file ext: ' + ext);
return false;
}
Thanks in advance
Imdadhusen
You should do it like this:
(new RegExp("jpg|png|jpeg|gif")).test(ext)
You are using invalid syntax for the regular expression. If you are going to store it in a variable, you must still use your regular expression from your first example.
So:
var Extensions = /^(jpg|png|jpeg|gif)$/;
if (!(ext && Extensions.test(ext)))
will work. Your second example is trying to match the word 'Extensions'.
it wont get error
var Extensions = "/^(jpg|png|jpeg|gif)$/";
if (!(ext && Extensions.test(ext))) {
alert('Error: extension is not allowed!' + Extensions + ' file ext: ' + ext);
return false;
}
To use a variable, you need to use the RegExp object:
new RegExp('^(' + Extensions + ')$').test(ext)
Or assign the entire regex into your variable:
var Extensions = /^(jpg|png|jpeg|gif)$/;
Extensions.test(ext)
Perhaps call it allowedExtensions or something though.
Try this:
var Extensions = /^(jpg|png|jpeg|gif)$/;
if (!(ext && Extensions.test(ext))) {
alert('Error: extension is not allowed!' + Extensions + ' file ext: ' + ext);
return false;
}

Categories