I modified the basic phantomjs example here http://phantomjs.org/screen-capture.html to accept command line args.
When I pass http://google.com as argument console.log outputs are correct
0: index.js
1: http://google.com
but I don't get any thumbnail.png in my folder why ?
var page = require('webpage').create();
var system = require('system');
var args = system.args;
var url;
if (args.length === 1) {
url = 'http://github.com/';
} else {
args.forEach(function(arg, i) {
console.log(i + ': ' + arg);
if (i > 0) {
page.open(arg, function() {
page.render('thumbnail' + '.png');
});
}
});
}
phantom.exit();
page.open is an asynchronous function, therefore phantom.exit is being called before your callback to render the thumbnail.
move phantom.exit inside your callback as specified in the docs
var page = require('webpage').create();
page.open('http://github.com/', function() {
page.render('github.png');
phantom.exit();
});
Related
I have been having some issues with opening multiple webpages in phantomjs, I am first opening a website which contains a few links, which I want to open as well, and save a piece of text from each URL to my jobs_list which has many objects inside of it. And after all the URL's have been run, I want to exit phantomjs. But as it is right now it never exits, and I have trouble recieving data from second function.
var webPage = require('webpage');
var page = webPage.create();
var jobs_list = [];
page.open('url', function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.onConsoleMessage = function(msg) {
console.log(msg);
};
var list = page.evaluate(function() {
var jobs = [];
var job;
$('.test').each(function(){
$(this).find('span').each(function(){
var job_link = $(this).find('a');
var url = job_link.attr("href");
job = {title : job_link.text(), url : url, location : ""};
jobs.push(job);
})
});
return jobs;
});
var i = 0;
jobs_list = list;
next_page(i);
});
});
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
if(i == (jobs_list.length-1)) {
phantom.exit(0);
}
});
});
console.log(i, current_job.title);
next_page(++i);
}
}
The problem is that the page.open call is asynchronous. If you look closely to your next_page function it can be shortened to this:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
...
});
console.log(i, current_job.title);
next_page(++i);
}
}
It means that next_page(++i); is executed before page.open(url, ...) even managed to load the first HTML content. This call leads to the next page.open(url, ...) being executed immediately, thus overwriting the previous request. And you're never going to get any data this way.
You have to do two things:
move the next_page(++i); call where the execution of one page is finished
reduce the number of condition checking
I propose:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
console.log(i, current_job.title);
next_page(++i);
});
});
} else {
phantom.exit(0);
}
}
That's quite an old version of jQuery. Perhaps you want to load a newer version. If the page already has jQuery included, you will likely break the page by loading another jQuery into it. Don't load an additional jQuery version at all in this case.
I am tring to scrape a few sites. Here is my code:
for (var i = 0; i < urls.length; i++) {
url = urls[i];
console.log("Start scraping: " + url);
page.open(url, function () {
waitFor(function() {
return page.evaluate(function() {
return document.getElementById("progressWrapper").childNodes.length == 1;
});
}, function() {
var price = page.evaluate(function() {
// do something
return price;
});
console.log(price);
result = url + " ; " + price;
output = output + "\r\n" + result;
});
});
}
fs.write('test.txt', output);
phantom.exit();
I want to scrape all sites in the array urls, extract some information and then write this information to a text file.
But there seems to be a problem with the for loop. When scraping only one site without using a loop, all works as I want. But with the loop, first nothing happens, then the line
console.log("Start scraping: " + url);
is shown, but one time too much.
If url = {a,b,c}, then phantomjs does:
Start scraping: a
Start scraping: b
Start scraping: c
Start scraping:
It seems that page.open isn't called at all.
I am newbie to JS so I am sorry for this stupid question.
PhantomJS is asynchronous. By calling page.open() multiple times using a loop, you essentially rush the execution of the callback. You're overwriting the current request before it is finished with a new request which is then again overwritten. You need to execute them one after the other, for example like this:
page.open(url, function () {
waitFor(function() {
// something
}, function() {
page.open(url, function () {
waitFor(function() {
// something
}, function() {
// and so on
});
});
});
});
But this is tedious. There are utilities that can help you with writing nicer code like async.js. You can install it in the directory of the phantomjs script through npm.
var async = require("async"); // install async through npm
var tests = urls.map(function(url){
return function(callback){
page.open(url, function () {
waitFor(function() {
// something
}, function() {
callback();
});
});
};
});
async.series(tests, function finish(){
fs.write('test.txt', output);
phantom.exit();
});
If you don't want any dependencies, then it is also easy to define your own recursive function (from here):
var urls = [/*....*/];
function handle_page(url){
page.open(url, function(){
waitFor(function() {
// something
}, function() {
next_page();
});
});
}
function next_page(){
var url = urls.shift();
if(!urls){
phantom.exit(0);
}
handle_page(url);
}
next_page();
I'm trying to automate the navigation of some web pages with phantomJS.
What i'm trying to create is a pattern for testing and navigation, so far i got this.
For a moment ignore all the potential null pointers due to empty arrays and such :)
testSuite.js
var webPage = require('webpage');
// Test suite definition
function testSuite(name){
this.name=name;
this.startDate=new Date();
this.tests=[];
this.add=function(test){
this.tests.push(test);
};
this.start=function(){
console.log("Test Suite ["+this.name+"] - Start");
this.next();
},
this.next=function(){
console.log("neeext");
console.log(this.tests.length);
var test=this.tests[0];
this.tests.splice(0,1);
console.log("Test ["+ test.name+"]");
test.execute();
};
}
//Test definition
function test(name,testFunction){
this.name=name;
this.execute=testFunction;
}
module.exports.testSuite=testSuite;
module.exports.test=test;
FirstPageModule.js
var currentPage;
function onPageLoadFinished(status) {
var url = currentPage.url;
var filename='snapshot.png';
console.log("---------------------------------------------------------------");
console.log("Status: " + status);
console.log("Loaded: " + url);
console.log("Render filename:" + filename);
console.log("---------------------------------------------------------------");
if(status == 'success'){
currentPage.render(filename);
}
if(status=='fail'){
console.log("Status: " + status);
}
}
function open(){
currentPage.open("http://localhost:8080");
}
function login(){
var username="topSecretUsername";
var password="topSecretPassord";
currentPage.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js");
currentPage.evaluate(function(user,pass) {
$("#user").val(user);
$("#pass").val(pass);
},username,password);
currentPage.render("page.png");
currentPage.evaluate(function(){
$('#loginButton').click();
});
}
function FirstPage(){
var page = webPage.create();
currentPage=page;
this.testSuite = new testSuite("FirstPageModule");
this.testSuite.add(new test("Open First Page",open));
this.testSuite.add(new test("Login",login));
var onLoadFinished=onPageLoadFinished;
var callNextTest=this.testSuite.next;
currentPage.onLoadFinished=function(status){
onLoadFinished.apply(this,arguments);
callNextTest();
};
page.onConsoleMessage = function(msg) {
console.log(msg);
}
}
module.exports=new FirstPage();
PageTests.js
var firstPage=require('./FirstPageModule.js');
firstPage.testSuite.start();
What i want to do is to have a sequential execution of isolated functions, after each function gets executed, i take a screenshot and call the next function.
But, for some reason, the next method on the testSuite isn't getting called, or the method on the second test isn't getting executed.
What am i doing wrong?
Just make available the logName variable in the "global" scope :
var logName;
function onPageLoadComplete(status){
console.log(status);
// Call the logName function
if(typeof(logName) == "function"){
logName();
}
}
function test(){
var page = webPage.create();
this.name="TestName";
// Update logName value as a function.
logName = function(){
console.log(this.name);
}
page.onLoadFinished = onPageLoadComplete;
}
Primary, it doesn't seems to be related to phantomjs but only plain javascript, i hope that's what you need, otherwise please be more specific with your question.
You can create your own page.get implementation with a callback when a page is fully loaded.
ex: create a file module pageSupport.js
// attach listeners
Object.prototype.listeners = {};
// store and fire listeners
Object.prototype.addEventListener = function(event, callback) {
if (!this.listeners[event]) this.listeners[event] = [];
this.listeners[event].push(callback);
this[event] = function(e) {
if (listeners[event]) {
listeners[event].forEach(function(listener) {
listener.call(null, e);
});
}
}
}
// create a new reference to webpage.open method
Object.prototype._open = Object.open;
// receive an url and
// return a function success that will be called when page is loaded.
Object.prototype.get = function(url) {
return {
success : function(callback) {
this.open(url);
this.addEventListener('onLoadFinished', function(status) {
if (status == 'success') {
return callback(status);
}
});
}.bind(this)
}
}
// export as phantomjs module.
exports.Object = Object;
So you can call this module in your script and uses it as follows:
var page = require('webpage').create();
require('./pageSupport');
page.get('http://stackoverflow.com').success(function(status) {
// Now this callback will be called only when the page is fully loaded \o/
console.log(status); // logs success
});
i want to use this phantom code but with node.js, but i don't find the equivalent of system.args[1] in node.js
var phantom = require('phantom');
// var page = new WebPage();
// var system = require('system');
// var sBlob = system.args[1];
var sUrl = 'file:///C:/Users/editor.html?Blob='+sBlob;
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open(sUrl, function(status) {
console.log("opened diagram? ", status);
page.evaluate(function() {
return document.getElementById("GraphImage").src;
}, function(result) {
console.log(result)
ph.exit();
});
});
});
}, {
dnodeOpts : {
weak : false
}
});
If you are running your script as $ node script.js theargument
you should be able to get it using
// the first argument is node and
//the second is the script name, none of them should be relevant
var args = process.argv.slice(2);
var sBlob = args[0];
I have two angularJS $scope functions inside a main one. When calling the play() function there is an error stating that getPhoneGapPath() is undefined. What is the solution to this
My Code:
function DontAsk($scope){
$scope.getPhoneGapPath = function(){
var path = window.location.pathname;
path = path.substr( 0, path.length - 10 );
return 'file://' + path;
}
$scope.play= function(){
var os = navigator.platform;
if (os=='iPhone'){
var url = "sounds/DontEventAsk.mp3";
}
else{
var url = getPhoneGapPath() + "sounds/DontEventAsk.mp3";
}
var my_media = new Media(url,
// success callback
function() {
console.log("playAudio():Audio Success");
},
// error callback
function(err) {
console.log("playAudio():Audio Error: "+JSON.stringify(err));
});
// Play audio
my_media.play();
}}
Ideally I would want the getPhoneGapPath() to be defined and also outside the main function because I have multiple functions like the DontAsk() one.
Thanks a lot.
var url = $scope.getPhoneGapPath() + "sounds/DontEventAsk.mp3";
There is no getPhoneGapPath function in the scope. You are defining this function as a property of the $scope object, so you should use it accordingly:
$scope.getPhoneGapPath()