i want to use this phantom code but with node.js, but i don't find the equivalent of system.args[1] in node.js
var phantom = require('phantom');
// var page = new WebPage();
// var system = require('system');
// var sBlob = system.args[1];
var sUrl = 'file:///C:/Users/editor.html?Blob='+sBlob;
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open(sUrl, function(status) {
console.log("opened diagram? ", status);
page.evaluate(function() {
return document.getElementById("GraphImage").src;
}, function(result) {
console.log(result)
ph.exit();
});
});
});
}, {
dnodeOpts : {
weak : false
}
});
If you are running your script as $ node script.js theargument
you should be able to get it using
// the first argument is node and
//the second is the script name, none of them should be relevant
var args = process.argv.slice(2);
var sBlob = args[0];
Related
In order to avoid regressions with our analytics tagging, I want to use PhantomJS to automate testing for Adobe Analytics tags.
To do so I want to be able to test 2 things:
Presence of certain js variables declarations in the HTML source
Compare the variables in HTML source with Ajax calls made to Adobe Analytics and ensure they have the same values
Exemple of variable in the HTML source I want to spy on:
<script>s.events="event27";</script>
Here is the js test script I have so far:
"use strict";
if (!String.prototype.contains) {
String.prototype.contains = function (arg) {
return !!~this.indexOf(arg);
};
}
var page = require('webpage').create();
page.onResourceRequested = function(request) {
var obj = JSON.stringify(request, undefined, 4);
var needle = '2o7';
var url = request.url;
if (url.contains(needle)) {
var calledUrl = decodeURI(decodeURIComponent(url));
console.log(calledUrl);
}
};
page.onResourceReceived = function(response) {
var jsonResponse = JSON.stringify(response, undefined, 4);
};
page.open('http://www.domain.com/page.html', function() {
var s_account = page.evaluate(function () {
return window.s_account;
});
var s_events = page.evaluate(function () {
return window.s.events;
});
phantom.exit();
});
I would like to be able to pass s_account and s_events variables to the onResourceRequested function so that I can assert equality between these 2 variables and _GET params in calledUrl var.
But I can not figure out how to do so. Any help would be appreciated!
I found the way to do it.
I actually get the content of window.s_account from within the onResourceRequested callback:
page.onResourceRequested = function(request) {
var obj = JSON.stringify(request, undefined, 4);
var needle = '2o7';
var url = request.url;
var result = false;
if (url.contains(needle)) {
var s_account = page.evaluate(function () {
return window.s_account;
});
result = true;
var calledUrl = decodeURI(decodeURIComponent(url));
console.log(s_account);
}
};
It might not be the most elegant way to do it however but it works.
I modified the basic phantomjs example here http://phantomjs.org/screen-capture.html to accept command line args.
When I pass http://google.com as argument console.log outputs are correct
0: index.js
1: http://google.com
but I don't get any thumbnail.png in my folder why ?
var page = require('webpage').create();
var system = require('system');
var args = system.args;
var url;
if (args.length === 1) {
url = 'http://github.com/';
} else {
args.forEach(function(arg, i) {
console.log(i + ': ' + arg);
if (i > 0) {
page.open(arg, function() {
page.render('thumbnail' + '.png');
});
}
});
}
phantom.exit();
page.open is an asynchronous function, therefore phantom.exit is being called before your callback to render the thumbnail.
move phantom.exit inside your callback as specified in the docs
var page = require('webpage').create();
page.open('http://github.com/', function() {
page.render('github.png');
phantom.exit();
});
I have been having some issues with opening multiple webpages in phantomjs, I am first opening a website which contains a few links, which I want to open as well, and save a piece of text from each URL to my jobs_list which has many objects inside of it. And after all the URL's have been run, I want to exit phantomjs. But as it is right now it never exits, and I have trouble recieving data from second function.
var webPage = require('webpage');
var page = webPage.create();
var jobs_list = [];
page.open('url', function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.onConsoleMessage = function(msg) {
console.log(msg);
};
var list = page.evaluate(function() {
var jobs = [];
var job;
$('.test').each(function(){
$(this).find('span').each(function(){
var job_link = $(this).find('a');
var url = job_link.attr("href");
job = {title : job_link.text(), url : url, location : ""};
jobs.push(job);
})
});
return jobs;
});
var i = 0;
jobs_list = list;
next_page(i);
});
});
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
if(i == (jobs_list.length-1)) {
phantom.exit(0);
}
});
});
console.log(i, current_job.title);
next_page(++i);
}
}
The problem is that the page.open call is asynchronous. If you look closely to your next_page function it can be shortened to this:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
...
});
console.log(i, current_job.title);
next_page(++i);
}
}
It means that next_page(++i); is executed before page.open(url, ...) even managed to load the first HTML content. This call leads to the next page.open(url, ...) being executed immediately, thus overwriting the previous request. And you're never going to get any data this way.
You have to do two things:
move the next_page(++i); call where the execution of one page is finished
reduce the number of condition checking
I propose:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
console.log(i, current_job.title);
next_page(++i);
});
});
} else {
phantom.exit(0);
}
}
That's quite an old version of jQuery. Perhaps you want to load a newer version. If the page already has jQuery included, you will likely break the page by loading another jQuery into it. Don't load an additional jQuery version at all in this case.
I know this question have been asked many times, but I can't make it work.
Here is my situation. I had a string called data, and I want to unshorten all the link inside that string.
Code:
var Bypasser = require('node-bypasser');
var URI = require('urijs');
var data = 'multiple urls : http://example.com/foo http://example.com/bar';
var result = URI.withinString(data, function(url) {
var unshortenedUrl = null;
var w = new Bypasser(url);
w.decrypt(function(err, res) {
// How can I return res ?
unshortenedUrl = res;
});
// I know the w.descrypt function is a asynchronous function
// so unshortenedUrl = null
return unshortenedUrl;
});
Let's me walk you through the code.
URI.withinString will match all the URLs in data, manipulate it and return the result.
You can view an example from URI.js docs
What I want to with these URLs is to unshorten all of them using node-passer.
This is from node-bypasser document:
var Bypasser = require('node-bypasser');
var w = new Bypasser('http://example.com/shortlink');
w.decrypt(function(err, result) {
console.log('Decrypted: ' + result);
});
This is the result that I want multiple urls : http://example.com/foo_processed http://example.com/bar_processed
I created a notebook at tonicdev.com
Solution
var getUrlRegEx = new RegExp(
"(^|[ \t\r\n])((ftp|http|https|gopher|mailto|news|nntp|telnet|wais|file|prospero|aim|webcal):(([A-Za-z0-9$_.+!*(),;/?:#&~=-])|%[A-Fa-f0-9]{2}){2,}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*(),;/?:#&~=%-]*))?([A-Za-z0-9$_+!*();/?:~-]))"
, "g"
);
var urls = data.match(getUrlRegEx);
async.forEachLimit(urls, 5, function (url, callback) {
let w = new Bypasser(url);
w.decrypt(function (err, res) {
if (err == null && res != undefined) {
data = data.replace(url, res);
callback();
}
});
}, function(err) {
res.send(data);
});
You don't really understand what callback is. The callback serves to allow asynchronous code to run without Javascript waiting for it. If you were less lazy and added some debug in your code:
console.log("Started parsing");
var result = URI.withinString(data, function(url) {
console.log("URL parsed (or whatever)");
var unshortenedUrl = null;
var w = new Bypasser(url);
w.decrypt(function(err, res) {
// How can I return res ?
unshortenedUrl = res;
});
// I know the w.descrypt function is a asynchronous function
// so unshortenedUrl = null
return unshortenedUrl;
});
console.log("Call to library over");
You would (most likely) see messages in this order:
Started parsing
Call to library over
URL parsed (or whatever)
The answer: Callback is not guaranteed to run before any code you execute after assigning it. You can't put data in your result variable because the data might not be fetched yet.
I am able to fork a child process, but I am having issues seeing what errors are taking place in that file, and I know the file has errors, because I created an object without a closing } so an error should occur. Using the following code, I get nothing (in the console and/or the browser):
var child = require('child_process').fork(full_path, [], {
silent: true,
});
// Tried both of these and nothing gets displayed
child.stdout.on('error', function(data){
res.write(data);
console.log(data);
});
child.stderr.on('error', function(data){
res.write(data);
console.log(data);
});
Here is the child process:
var sys = require('sys');
var mustache = require('mustache');
var template = require('./templates/mypage.html');
sys.puts(template);
var view = {
"musketeers": ["Athos", "Aramis", "Porthos", "D'Artagnan"]
; // Forced error here with no closing "}"
var html = mustache.to_html(template, view);
sys.puts(html);
What do I need to do to display the errors? What am I doing incorrectly?
Edit
Full script:
var sys = require('sys');
var config = require('./server.json');
var url = require('url');
var http = require('http');
function handleRequest(req, res){
var full_path = "";
for(var i in config){
var domain = config[i].server;
if(req.headers.host === domain){
var info = url.parse(req.url);
full_path = config[i].root + info.pathname;
}
}
console.log("Page: " + full_path);
if(full_path != ""){
var child = require('child_process').fork(full_path, [], {
silent: true,
});
child.stdout.on('data', function(data){
res.write(data);
});
child.stdout.on('error', function(data){
res.write(data);
console.log(data);
});
child.stderr.on('error', function(data){
res.write(data);
console.log(data);
});
child.stdout.on('end', function(){
res.end();
});
}else{
res.end();
}
}
var server = http.createServer(handleRequest);
server.listen(3000, function(err){
console.log(err || 'Server listening on 3000');
});
By default all input/output goes back to the parent.
Given parent.js
var fork = require('child_process').fork;
var child = fork('./child');
output and errors from child.js go back to parent
setInterval(function() {
console.log('I am here')
}, 1000);
setTimeout(function() {
throw new Error('oops')
}, 5000);
Like #Plato says, { silent: true } is gonna shut things up