Pass custom variables to onResourceRequested with PhantomJS - javascript

In order to avoid regressions with our analytics tagging, I want to use PhantomJS to automate testing for Adobe Analytics tags.
To do so I want to be able to test 2 things:
Presence of certain js variables declarations in the HTML source
Compare the variables in HTML source with Ajax calls made to Adobe Analytics and ensure they have the same values
Exemple of variable in the HTML source I want to spy on:
<script>s.events="event27";</script>
Here is the js test script I have so far:
"use strict";
if (!String.prototype.contains) {
String.prototype.contains = function (arg) {
return !!~this.indexOf(arg);
};
}
var page = require('webpage').create();
page.onResourceRequested = function(request) {
var obj = JSON.stringify(request, undefined, 4);
var needle = '2o7';
var url = request.url;
if (url.contains(needle)) {
var calledUrl = decodeURI(decodeURIComponent(url));
console.log(calledUrl);
}
};
page.onResourceReceived = function(response) {
var jsonResponse = JSON.stringify(response, undefined, 4);
};
page.open('http://www.domain.com/page.html', function() {
var s_account = page.evaluate(function () {
return window.s_account;
});
var s_events = page.evaluate(function () {
return window.s.events;
});
phantom.exit();
});
I would like to be able to pass s_account and s_events variables to the onResourceRequested function so that I can assert equality between these 2 variables and _GET params in calledUrl var.
But I can not figure out how to do so. Any help would be appreciated!

I found the way to do it.
I actually get the content of window.s_account from within the onResourceRequested callback:
page.onResourceRequested = function(request) {
var obj = JSON.stringify(request, undefined, 4);
var needle = '2o7';
var url = request.url;
var result = false;
if (url.contains(needle)) {
var s_account = page.evaluate(function () {
return window.s_account;
});
result = true;
var calledUrl = decodeURI(decodeURIComponent(url));
console.log(s_account);
}
};
It might not be the most elegant way to do it however but it works.

Related

javascript revealing module pattern with multiple files/modules and global vars

I apologize if this has been asked elsewhere, but all the info I've seen doesn't really help me understand both the answer and best practice.
I'm attempting to use the revealing module pattern for javascript. I have a base js file that contains methods and properties that are needed on every page of the application.
Then each page has its own js file that has it's own unique functions and variables, but also needs to reference the variables in the base js file.
I seem to be really misunderstanding variable scope, or how I'm setting this up is somehow very bad, because I'm getting confusing results.
Questions:
If i'm structuring this wrong, what is the right way?
How can I access variables correctly across modules?
Am I correct to initialize the modules on doc ready?
What I'm doing right now is this:
var base = (function() {
init = function() {
url = $(location).attr('href');
url = url.substr(-1) === '/' ? url.slice(0, -1) : url;
segments = url.split('/');
baseUrl = this.segments[0] + '//' + this.segments[2] + '/';
resource = this.segments[3];
this.segments = segments; //need access to this across all pages
this.resource = resource; //need access to this across all pages
this.baseUrl = baseUrl; //need access to this across all pages
}
}
doStuffGlobally = function() {
//do stuff for all pages
}
return {
init: init,
baseUrl: this.baseUrl,
segments: this.segments,
resource: this.resource
};
}());
$(document).ready(function() {
base.init();
}
And on another Page I'm doing this:
var otherPage = (function() {
init = function() {
//do otherPage stuff;
}
}
doStuff = function() {
base.doStuffGlobally(); //need to access this.
console.log(base.segments); //need to access this.
}
return {
init: init
};
}());
$(document).ready(function() {
otherPage.init();
}
You don't need an init method for your base module, it doesn't need to wait for domready but can (and should) run immediately. So write
var base = (function() {
var url = $(location).attr('href');
if (url.substr(-1) === '/') url = url.slice(0, -1);
var segments = url.split('/');
var baseUrl = this.segments[0] + '//' + this.segments[2] + '/';
var resource = this.segments[3];
function doStuffGlobally() {
//do stuff for all pages
}
return {
baseUrl,
segments,
resource,
doStuffGlobal,
};
}());

Phantomjs opening many pages

I have been having some issues with opening multiple webpages in phantomjs, I am first opening a website which contains a few links, which I want to open as well, and save a piece of text from each URL to my jobs_list which has many objects inside of it. And after all the URL's have been run, I want to exit phantomjs. But as it is right now it never exits, and I have trouble recieving data from second function.
var webPage = require('webpage');
var page = webPage.create();
var jobs_list = [];
page.open('url', function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.onConsoleMessage = function(msg) {
console.log(msg);
};
var list = page.evaluate(function() {
var jobs = [];
var job;
$('.test').each(function(){
$(this).find('span').each(function(){
var job_link = $(this).find('a');
var url = job_link.attr("href");
job = {title : job_link.text(), url : url, location : ""};
jobs.push(job);
})
});
return jobs;
});
var i = 0;
jobs_list = list;
next_page(i);
});
});
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
if(i == (jobs_list.length-1)) {
phantom.exit(0);
}
});
});
console.log(i, current_job.title);
next_page(++i);
}
}
The problem is that the page.open call is asynchronous. If you look closely to your next_page function it can be shortened to this:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
...
});
console.log(i, current_job.title);
next_page(++i);
}
}
It means that next_page(++i); is executed before page.open(url, ...) even managed to load the first HTML content. This call leads to the next page.open(url, ...) being executed immediately, thus overwriting the previous request. And you're never going to get any data this way.
You have to do two things:
move the next_page(++i); call where the execution of one page is finished
reduce the number of condition checking
I propose:
function next_page(i){
if (i <= (jobs_list.length-1)) {
var current_job = jobs_list[i];
var url = current_job.url;
page.open(url, function (status) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function () {
var location = page.evaluate(function() {
var job_location;
$('.job-location').each(function(){
$(this).find('li').each(function(){
job_location = $(this).text();
})
})
console.log(job_location);
return job_location;
});
jobs_list[i].location = location;
console.log(i, current_job.title);
next_page(++i);
});
});
} else {
phantom.exit(0);
}
}
That's quite an old version of jQuery. Perhaps you want to load a newer version. If the page already has jQuery included, you will likely break the page by loading another jQuery into it. Don't load an additional jQuery version at all in this case.

Node.js : how add variable as input in url

i want to use this phantom code but with node.js, but i don't find the equivalent of system.args[1] in node.js
var phantom = require('phantom');
// var page = new WebPage();
// var system = require('system');
// var sBlob = system.args[1];
var sUrl = 'file:///C:/Users/editor.html?Blob='+sBlob;
phantom.create(function(ph) {
ph.createPage(function(page) {
page.open(sUrl, function(status) {
console.log("opened diagram? ", status);
page.evaluate(function() {
return document.getElementById("GraphImage").src;
}, function(result) {
console.log(result)
ph.exit();
});
});
});
}, {
dnodeOpts : {
weak : false
}
});
If you are running your script as $ node script.js theargument
you should be able to get it using
// the first argument is node and
//the second is the script name, none of them should be relevant
var args = process.argv.slice(2);
var sBlob = args[0];

Return result from a javascript callback (node.js)

I know this question have been asked many times, but I can't make it work.
Here is my situation. I had a string called data, and I want to unshorten all the link inside that string.
Code:
var Bypasser = require('node-bypasser');
var URI = require('urijs');
var data = 'multiple urls : http://example.com/foo http://example.com/bar';
var result = URI.withinString(data, function(url) {
var unshortenedUrl = null;
var w = new Bypasser(url);
w.decrypt(function(err, res) {
// How can I return res ?
unshortenedUrl = res;
});
// I know the w.descrypt function is a asynchronous function
// so unshortenedUrl = null
return unshortenedUrl;
});
Let's me walk you through the code.
URI.withinString will match all the URLs in data, manipulate it and return the result.
You can view an example from URI.js docs
What I want to with these URLs is to unshorten all of them using node-passer.
This is from node-bypasser document:
var Bypasser = require('node-bypasser');
var w = new Bypasser('http://example.com/shortlink');
w.decrypt(function(err, result) {
console.log('Decrypted: ' + result);
});
This is the result that I want multiple urls : http://example.com/foo_processed http://example.com/bar_processed
I created a notebook at tonicdev.com
Solution
var getUrlRegEx = new RegExp(
"(^|[ \t\r\n])((ftp|http|https|gopher|mailto|news|nntp|telnet|wais|file|prospero|aim|webcal):(([A-Za-z0-9$_.+!*(),;/?:#&~=-])|%[A-Fa-f0-9]{2}){2,}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*(),;/?:#&~=%-]*))?([A-Za-z0-9$_+!*();/?:~-]))"
, "g"
);
var urls = data.match(getUrlRegEx);
async.forEachLimit(urls, 5, function (url, callback) {
let w = new Bypasser(url);
w.decrypt(function (err, res) {
if (err == null && res != undefined) {
data = data.replace(url, res);
callback();
}
});
}, function(err) {
res.send(data);
});
You don't really understand what callback is. The callback serves to allow asynchronous code to run without Javascript waiting for it. If you were less lazy and added some debug in your code:
console.log("Started parsing");
var result = URI.withinString(data, function(url) {
console.log("URL parsed (or whatever)");
var unshortenedUrl = null;
var w = new Bypasser(url);
w.decrypt(function(err, res) {
// How can I return res ?
unshortenedUrl = res;
});
// I know the w.descrypt function is a asynchronous function
// so unshortenedUrl = null
return unshortenedUrl;
});
console.log("Call to library over");
You would (most likely) see messages in this order:
Started parsing
Call to library over
URL parsed (or whatever)
The answer: Callback is not guaranteed to run before any code you execute after assigning it. You can't put data in your result variable because the data might not be fetched yet.

How to pass parameters in url callback function in javascript

I have a javascript file from a wordpress plugin that I'm trying to modify. It seems to be getting away with cross-domain requests with some sort of loophole. It's this function:
function urlCallback(url){
var req = url;
var head = document.getElementsByTagName("head").item(0);
var script = document.createElement("script");
script.setAttribute("type", "text/javascript");
script.setAttribute("src", req);
head.appendChild(script);
}
The url passed is something like 'http://url.com/page?callback=namespace.myFunction' where myFunction is a function defined elsewhere in the script.
From what I understand, this is inserting a source url at the top of my page, causing the browser to load that page. The callback attached to that url is then called, passing the result as a single parameter to the example function, myFunction.
My problem is that I need to call myFunction for a few different unique urls, but the results are tied to the calling url. myFunction seems to be called whenever the page finishes loading, so I can't simply keep count of which data set it's operating on. I need some way for myFunction to either be passed an additional variable encoded in the callback url, or for myFunction to know the url it was attached to.
Can anyone help me?
EDIT:
To elaborate further, here is a simplified version of the code I have:
var parseUrls = (function(){
function urlCallback(url){
var req = url;
// Here is where I need to save the url
var head = document.getElementsByTagName("head").item(0);
var script = document.createElement("script");
script.setAttribute("type", "text/javascript");
script.setAttribute("src", req);
head.appendChild(script);
}
return {
options: {},
parseNextUrl: function(result) {
if (!result || !result.data) { return; }
var data = result.data;
// Here is where I need the url
},
parseUrl: function(result) {
if (!result || !result.data) { return; }
var data = result.data;
for (var i = 0; i < data.length; i++) {
urlCallback( data[i].url + "/new_url/page?callback=parseUrls.parseNextUrl" );
}
},
showResult: function(options){
urlCallback( "http://start.url.com/page?callback=parseUrls.parseUrl" );
this.options = options;
}
};
})();
Just to be clear, parseNextUrl is called whenever the source request is finished. Which means all the urlCallback calls have already finished by then.
Here's the updated code based on the newly provided code.
var parseUrls = (function(){
function urlCallback(url){
// Create request
var head = document.getElementsByTagName("head").item(0);
var script = document.createElement("script");
script.setAttribute("type", "text/javascript");
script.setAttribute("src", req);
head.appendChild(script);
}
return {
parsers: [], //response handler array
options: {},
parseUrl: function(result) {
//parseUrls.parseUrl.url = request URL
if (!result || !result.data) { return; }
var data = result.data;
// Create requests
for (var i = 0; i < data.length; i++) {
// Create new response handler
var parseNextUrl = function(result) {
// parseNextUrl.url = request URL
if (!result || !result.data) { return; }
var data = result.data;
// Check the URL
console.log('Result URL = ' + parseNextUrl.url);
};
// Make callback names and URLs for each handler
var cbName = "parseUrls.parsers[" + this.parsers.length + "]";
var req = data[i].url + "/new_url/page?callback=" + encodeURI(cbName);
// Save the URL in the handler
parseNextUrl.url = req;
// Put handler into storage.
// Note: Don't delete/insert any of parsers array element
// until no more new requests and all responses are received.
this.parsers.push(parseNextUrl);
urlCallback(req);
}
},
showResult: function(options){
this.parseUrl.url = "http://start.url.com/page?callback=parseUrls.parseUrl";
urlCallback(this.parseUrl.url);
this.options = options;
}
};
})();

Categories