How can I exit PhantomJS when a specific request is not found?

How can I exit PhantomJS when a specific request is not found? - javascript

The problem is, some sites contain the request to test.com/test.aspx and some don't.
If the request exists, it should print the JSON and exit.
If the request does not exist, it should exit too - at the moment, it stays open in this case.
Also, how could I make the code better? Maybe even faster if that's possible?
My JS code:
var Url = "http://www.test.de";
var params = new Array();
var webPage = require('webpage');
var page = webPage.create();
var targetJSON = {};
page.open(Url);
page.onResourceRequested = function(requestData, networkRequest) {
var match = requestData.url.match(/test.com\/test.aspx/g);
if (match != null) {
var targetString = decodeURI(JSON.stringify(requestData.url));
var klammerauf = targetString.indexOf("{");
var jsonobjekt = targetString.substr(klammerauf, (targetString.indexOf("}") - klammerauf) + 1);
targetJSON = (decodeURIComponent(jsonobjekt));
console.log(targetJSON);
phantom.exit();
}
};
I tried to add
} else {
phantom.exit();
}
and
} if (match == null) {
phantom.exit();
}
but nothing solves my problem.

If you want to check whether something doesn't exist, then you need to check all things to see if they are not it or as first-order logic: .
You first need to see all requests to see whether your intended request was there. For example like this:
var found = false;
page.onResourceRequested = function(requestData, networkRequest) {
var match = requestData.url.match(/test.com\/test.aspx/g);
if (match != null) {
var targetString = decodeURI(JSON.stringify(requestData.url));
var klammerauf = targetString.indexOf("{");
var jsonobjekt = targetString.substr(klammerauf, (targetString.indexOf("}") - klammerauf) + 1);
targetJSON = (decodeURIComponent(jsonobjekt));
console.log(targetJSON);
found = true;
phantom.exit();
}
};
page.open(Url, function(){
setTimeout(function(){
console.log("found: " + found); // will always print "false"
phantom.exit();
}, 1000);
});
I solved this with a global variable which denotes whether the request was found. If it wasn't, then you can exit PhantomJS. If wait until the page is loaded and an additional waiting time in case there are Ajax requests.

Related

AJAX: questions not rendering to the browser

I have been tasked with getting a small quiz application working. I saved my questions as a json file and conducting an http request. When I initialed ran a JSON.parse() of the questions, it rendered to the browser.
var output = document.getElementById('output');
// this allows us to keep it as a global value
var myObj = '';
loadQuestions();
console.log(myObj);
// var myQuestions = '[{"question":"What is your nearest star?","answers":{"a":"Alpha Centauri","b":"Barnard\'s Star","c":"Sirius","d":"Sol"},"correctAnswer":"d"},{"question":"What color is grass?","answers":{"a":"Blue","b":"Red","c":"Green","d":"Purple"},"correctAnswer":"c"}]';
// var myObj = JSON.parse(myQuestions);
// for (var i in myObj) {
// output.innerHTML += myObj[i].question + '? <br>';
// }
However, when I refactored it to do an AJAX call, it is iterating through the console, but not in the browser window:
var output = document.getElementById('output');
// this allows us to keep it as a global value
var myObj = '';
loadQuestions();
console.log(myObj);
function loadQuestions() {
// http request
// this will allow you to pull in
// the questions from the api that
// your questions or data is stored in
var a = new XMLHttpRequest();
a.open("GET", "https://api.myjson.com/bins/8xmud", true); // opened request with address with
a.onreadystatechange = function(){
if (a.readyState == 1) {
var myQuestions = JSON.parse(a.responseText);
console.log(myQuestions);
for (var i in myQuestions) {
output.innerHTML = myQuestions[i].question + '? <br>';
}
}
console.log(a);
}
a.send();
}
I thought it could be my for...in loop, but that's written correctly. I am unclear as to why, despite getting a Status: OK or 200, it is not iterating through the browser window as before.

var output = document.getElementById('output');
// this allows us to keep it as a global value
var myObj = '';
loadQuestions();
console.log(myObj);
function loadQuestions() {
// http request
// this will allow you to pull in
// the questions from the api that
// your questions or data is stored in
var a = new XMLHttpRequest();
a.open("GET", "https://api.myjson.com/bins/8xmud", true); // opened request with address with
a.onreadystatechange = function(){
if (a.readyState == 4) {
var myQuestions = JSON.parse(a.responseText);
console.log(myQuestions);
for (var i in myQuestions) {
output.innerHTML += myQuestions[i].question + '? <br>';
}
}
console.log(a);
}
a.send();
}
<div id ="output"></div>

JavaScript EventSource won't open connection after Apache basic auth

I wasn't in charge of the Apache configuration, so I'm not sure what I can provide in terms of useful conf text, but I'm fairly certain I have narrowed the problem down to the login. EventSource works flawlessly both locally on XAMPP without any login and once you refresh the page after authenticating on the production server, but that first load on the server just will not open a connection. Has anyone seen this problem before? I couldn't find anything on the internet about this after searching for the past few days.
Edit: Some code
Some of the server-side code (which mostly shouldn't be relevant):
header('Content-Type: text/event-stream');
header('Cache-Control: no-cache');
$client_stream = new RedisStream();
$client_stream->poll(1); //The loop, with sleep time as a parameter
The JavaScript:
var xhttpViewSet;
var xhttpSearch;
var view = 'tile';
var search = '';
var seed_url = '/core/seed_view.php';
var stream_url = '/core/stream.php';
var default_class = 'panel-default';
var success_class = 'panel-success';
var warning_class = 'panel-warning';
var danger_class = 'panel-danger';
function UpdateClient(c_name, c_obj) {
if ((c_element = document.getElementById(c_name)) !== null) {
c_element.classList.remove('text-muted');
c_element.classList.remove(default_class);
c_element.classList.remove(success_class);
c_element.classList.remove(warning_class);
c_element.classList.remove(danger_class);
switch (c_obj['status']) {
case 0:
c_obj['status'] = 'OK';
c_element.classList.add(success_class)
break;
case 1:
c_obj['status'] = 'Warning';
c_element.classList.add(warning_class)
break;
case 2:
c_obj['status'] = 'Critical';
c_element.classList.add(danger_class)
break;
default:
c_obj['status'] = 'Unknown';
c_element.classList.add(danger_class)
break;
}
for (i in c_obj) {
var var_nodes = c_element.getElementsByClassName(i);
if (var_nodes.length > 0) {
for (var j = var_nodes.length - 1; j >= 0; j--) {
var_nodes[j].innerHTML = c_obj[i];
}
}
}
}
}
function SetView() {
var view_url = seed_url + '?search=' + search + '&view=' + view;
xhttpViewSet.open('GET', view_url, true);
xhttpViewSet.send();
}
var main = function() {
container = document.getElementById('content');
if (new XMLHttpRequest()) {
xhttpViewSet = new XMLHttpRequest();
xhttpSearch = new XMLHttpRequest();
} else {
xhttpViewSet = new ActiveXObject('Microsoft.XMLHTTP');
xhttpSearch = new ActiveXObject('Microsoft.XMLHTTP');
}
var stream = new EventSource(stream_url);
stream.onopen = function() {
console.log('Connection opened.'); //This doesn't fire
}
stream.onmessage = function(e) {
var c_obj = JSON.parse(e.data);
UpdateClient(c_obj.name, c_obj.value);
};
xhttpViewSet.onreadystatechange = function() {
if (xhttpViewSet.readyState == 4) {
var resp = xhttpViewSet.responseText;
if (xhttpViewSet.status == 200 && resp.length > 0) {
container.innerHTML = resp;
if (view == 'list') {
$('#computer-table').DataTable({
"lengthMenu": [[25, 50, 100], [25, 50, 100]]
});
}
} else {
container.innerHTML = '<error>No computers matched your search or an error occured.</error>';
}
}
}
SetView(); //This successfully does all but make the EventSource connection, and only fails to do that on first load
document.getElementById('list-view').addEventListener('click', function() {
view = 'list';
SetView();
});
document.getElementById('tile-view').addEventListener('click', function() {
view = 'tile';
SetView();
});
document.getElementById('search').addEventListener('keyup', function() {
search = this.value.toUpperCase();
SetView();
});
document.getElementById('clear-search').addEventListener('click', function() {
document.getElementById('search').value = '';
search = '';
SetView();
});
};
window.onload = main;

It is a bit hard to know for sure without a lot more information, but based on what you have said so far, I think it is one of:
HEAD/OPTIONS: Some browsers will send a HEAD or OPTIONS http call to a server script, before they send the GET or POST. The purpose of sending OPTIONS is to ask what headers are allowed to be sent. It is possible this is happening as part of the login process; that might explain why it works when you reload. See chapter 9 of Data Push Apps with HTML5 SSE (disclaimer: my book) for more details; basically, at the top of your SSE script you need to check the value of $_SERVER["REQUEST_METHOD"] and if it is "OPTIONS", intercept and say what headers you want to accept. I've used this one before:
header("Access-Control-Allow-Headers: Last-Event-ID,".
" Origin, X-Requested-With, Content-Type, Accept,".
" Authorization");`
CORS: The HTML page URL and the SSE page URL must have identical origins. There are detailed explanations (specific to SSE) in chapter 9 of Data Push Apps with HTML5 SSE (again), or (less specifically) at Wikipedia. If this is the problem, look into adding header("Access-Control-Allow-Origin: *"); to your SSE script.
withCredentials: There is a second parameter to the SSE constructor, and you use it like this: var stream = new EventSource(stream_url, { withCredentials: true }); It is saying it is okay to send the auth credentials. (Again, chapter 9 of the book goes into more detail - sorry for the repeated plugs!) There is a second step, over on the server-side: at the top of your PHP SSE script you need to add the following.
header("Access-Control-Allow-Origin: ".#$_SERVER["HTTP_ORIGIN"]);
header("Access-Control-Allow-Credentials: true");
PHP Sessions locking: This normally causes the opposite problem, which is that the SSE script has locked the PHP session, so no other PHP scripts work. See https://stackoverflow.com/a/30878764/841830 for how to handle it. (It is a good idea to do this anyway, even if it isn't your problem.)

AJAX response text undefined

I am following below 2 posts:
Ajax responseText comes back as undefined
Can't return xmlhttp.responseText?
I have implemented the code in same fashion. But I am getting
undefined is not a function
wherever i am using callback() funtion in my code.
CODE:
function articleLinkClickAction(guid,callback){
var host = window.location.hostname;
var action = 'http://localhost:7070/assets/find';
var url = action + '?listOfGUID=' + guid.nodeValue;
console.log("URL "+url);
xmlhttp = getAjaxInstance();
xmlhttp.onreadystatechange = function()
{
if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {
var response = JSON.parse(xmlhttp.responseText);
console.log(response);
console.log(xmlhttp.responseText);
callback(null, xmlhttp.responseText);// this is line causing error
}
else{
callback(xmlhttp.statusText);// this is line causing error
}
};
xmlhttp.open("GET", url, true);
xmlhttp.send(null);
}
And I am calling it from this code:
var anchors = document.getElementsByTagName("a");
var result = '';
for(var i = 0; i < anchors.length; i++) {
var anchor = anchors[i];
var guid = anchor.attributes.getNamedItem('GUID');
if(guid)
{
articleLinkClickAction(guid,function(err, response) { // pass an anonymous function
if (err) {
return "";
} else {
var res = response;
html = new EJS({url:'http://' + host + ':1010/OtherDomain/article-popup.ejs'}).render({price:res.content[i].price});
document.body.innerHTML += html;
}
});
}
}

You are using a single global variable for your xmlhttp and trying to run multiple ajax calls at the same time. As such each successive ajax call will overwrite the previous ajax object.
I'd suggest adding var in front of the xmlhttp declaration to make it a local variable in your function so each ajax request can have its own separate state.
function articleLinkClickAction(guid,callback){
var host = window.location.hostname;
var action = 'http://localhost:7070/assets/find';
var url = action + '?listOfGUID=' + guid.nodeValue;
console.log("URL "+url);
// add var in front of xmlhttp here to make it a local variable
var xmlhttp = getAjaxInstance();
xmlhttp.onreadystatechange = function()
{
if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {
var response = JSON.parse(xmlhttp.responseText);
console.log(response);
console.log(xmlhttp.responseText);
callback(null, xmlhttp.responseText);// this is line causing error
}
else{
callback(xmlhttp.statusText);// this is line causing error
}
};
xmlhttp.open("GET", url, true);
xmlhttp.send(null);
}
In the future, you should consider using Javascript's strict mode because these "accidental" global variables are not allowed in strict mode and will report an error to make you explicitly declare all variables as local or global (whichever you intend).
I can't say if this is the only error stopping your code from working, but it is certainly a significant error that is in the way of proper operation.
Here's another significant issue. In your real code (seen in a private chat), you are using:
document.body.innerHTML += html
in the middle of the iteration of an HTMLCollection obtained like this:
var anchors = document.getElementsByTagName("a");
In this code, anchors will be a live HTMLCollection. That means it will change dynamically anytime an anchor element is added or removed from the document. But, each time you do document.body.innerHTML += html that recreates the entire body elements from scratch and thus completely changes the anchors HTMLCollection. Doing document.body.innerHTML += html in the first place is just a bad practice. Instead, you should append new elements to the existing DOM. I don't know exactly what's in that html, but you should probably just create a div, put the HTML in it and append the div like this:
var div = document.createElement("div");
div.innerHTML = html;
document.body.appendChild(div);
But, this isn't quite all yet because if this new HTML contains more <a> tags, then your live HTMLCollection in anchors will still change.
I'd suggestion changing this code block:
var anchors = document.getElementsByTagName("a");
var result = '';
for(var i = 0; i < anchors.length; i++) {
var anchor = anchors[i];
var guid = anchor.attributes.getNamedItem('GUID');
if(guid)
{
articleLinkClickAction(guid,function(err, response) { // pass an anonymous function
if (err) {
return "";
} else {
var res = response;
html = new EJS({url:'http://' + host + ':1010/OtherDomain/article-popup.ejs'}).render({price:res.content[i].price});
document.body.innerHTML += html;
}
});
}
}
to this:
(function() {
// get static copy of anchors that won't change as document is modified
var anchors = Array.prototype.slice.call(document.getElementsByTagName("a"));
var result = '';
for (var i = 0; i < anchors.length; i++) {
var anchor = anchors[i];
var guid = anchor.attributes.getNamedItem('GUID');
if (guid) {
articleLinkClickAction(guid, function (err, response) { // pass an anonymous function
if (err) {
//return "";
console.log('error : ' + err);
} else {
var res = response;
var html = new EJS({
url: 'http://' + host + ':1010/OtherDomain/article-popup.ejs'
}).render({
price: res.content[i].price
});
var div = document.createElement("div");
div.innerHTML = html;
document.body.appendChild(html);
}
});
}
}
})();
This makes the following changes:
Encloses the code in an IIFE (self executing function) so the variables declared in the code block are not global.
Changes from document.body.innerHTML += html to use document.body.appendChild() to avoid recreating all the DOM elements every time.
Declares var html so it's a local variable, not another accidental global.
Makes a copy of the result from document.getElementsByTagName("a") using Array.prototype.slice.call() so the array will not change as the document is modified, allowing us to accurately iterate it.

Use HTTP-on-modify-request to redirect URL

I am building an add-on for Firefox that redirect request to a new URL if the URL match some conditions. I've tried this, and it does not work.
I register an observer on HTTP-on-modify-request to process the URL, if the URL match my condition, I will redirect to a new URL.
Here is my code:
var Cc = Components.classes;
var Ci = Components.interfaces;
var Cr = Components.results;
var newUrl = "https://google.com";
function isInBlacklist(url) {
// here will be somemore condition, I just use youtube.com to test
if (url.indexOf('youtube.com') != -1) {
return true;
}
return false;
}
exports.main = function(options,callbacks) {
// Create observer
httpRequestObserver =
{
observe: function (subject, topic, data) {
if (topic == "http-on-modify-request") {
var httpChannel = subject.QueryInterface(Ci.nsIHttpChannel);
var uri = httpChannel.URI;
var domainLoc = uri.host;
if (isInBlacklist(domainLoc) === true) {
httpChannel.cancel(Cr.NS_BINDING_ABORTED);
var gBrowser = utils.getMostRecentBrowserWindow().gBrowser;
var domWin = channel.notificationCallbacks.getInterface(Ci.nsIDOMWindow);
var browser = gBrowser.getBrowserForDocument(domWin.top.document);
browser.loadURI(newUrl);
}
}
},
register: function () {
var observerService = Cc["#mozilla.org/observer-service;1"].getService(Ci.nsIObserverService);
observerService.addObserver(this, "http-on-modify-request", false);
},
unregister: function () {
var observerService = Cc["#mozilla.org/observer-service;1"].getService(Ci.nsIObserverService);
observerService.removeObserver(this, "http-on-modify-request");
}
};
//register observer
httpRequestObserver.register();
};
exports.onUnload = function(reason) {
httpRequestObserver.unregister();
};
I am new to Firefox add-on development.

You can redirect a channel by calling nsIHttpChannel.redirectTo.
This is not possible once the channel is opened, but in http-on-modify-request it will work.
So in your code, you can do something like:
Cu.import("resource://gre/modules/Services.jsm");
// ...
if (condition) {
httpChannel.redirectTo(
Services.io.newURI("http://example.org/", null, null));
}
It looks like you might be using the Add-on SDK. In that case, read up on Using Chrome Authority.

You could simply do a
httpChannel.URI.spec = newUrl;
instead of
httpChannel.cancel(Cr.NS_BINDING_ABORTED);
...
browser.loadURI(newUrl);
Not sure how 'safe' it would be in your case, since I'm not exactly sure how other headers in the request (e.g. Cookie) would be manipulated when you change the URL to point to an entirely different domain at this stage.

Phantomjs interprocess communication

I am trying to implement a solution where by using PhantomJS a web location is open evaluated and the output is saved to a file for processing. Specifically the scanning for malicious scripts. I have been able to implement the solution using PhantomJS running once. For example this works perfectly...
var system = require('system');
var page = require('webpage').create();
var lastReceived = new Date().getTime();
var requestCount = 0;
var responseCount = 0;
var requestIds = [];
var fileSystem = require('fs');
var startTime = new Date().getTime();
page.onResourceReceived = function (response) {
if(requestIds.indexOf(response.id) !== -1) {
lastReceived = new Date().getTime();
responseCount++;
requestIds[requestIds.indexOf(response.id)] = null;
}
};
page.onResourceRequested = function (request) {
if(requestIds.indexOf(request.id) === -1) {
requestIds.push(request.id);
requestCount++;
}
};
page.open('http://adserver.example.com/adserve/;ID=164857;size=300x250;setID=162909;type=iframe', function () {});
var checkComplete = function () {
// We don't allow it to take longer than 5 seconds but
// don't return until all requests are finished
if((new Date().getTime() - lastReceived > 300 && requestCount === responseCount) || new Date().getTime() - startTime > 5000) {
clearInterval(checkCompleteInterval);
console.log(page.content);
phantom.exit();
}
}
var checkCompleteInterval = setInterval(checkComplete, 1);
However, I have had immense difficulty trying to create and automated system that doesn't require PhantomJS to continually be restarted which has a fair bit of overhead.
I tried using a named pipe to read from and then attempt to open the passed url, but for some reason it will not open properly. I would love and deeply appreciate any guidance on this.

One thing to mention is that PhantomJS excels in HTTP communications. That's why for advanced features & better performance, I always use resource pooling pattern + webserver module. This module is still tagged EXPERIMENTAL, but I have always found it quite stable until now.
So, I think the best in your case it's better to communicate via HTTP than via files IO.
Here is a very basic example :
var page = require('webpage').create();
var server = require('webserver').create();
var system = require('system');
var host, port;
if (system.args.length !== 2) {
console.log('Usage: server.js <some port>');
phantom.exit(1);
} else {
port = system.args[1];
var listening = server.listen(port, function (request, response) {
var page=require('webpage').create();
page.open(request.post.target, function(status){
response.write("Hello "+page.title);
response.close();
});
});
if (!listening) {
console.log("could not create web server listening on port " + port);
phantom.exit();
}
//test only
var url = "http://localhost:" + port + "/";
console.log("SENDING REQUEST TO:");
console.log(url);
var data='target=http://stackoverflow.com/';
page.open(url,'post', data, function (status) {
if (status !== 'success') {
console.log('FAIL to load the address');
} else {
console.log("GOT REPLY FROM SERVER:");
console.log(page.content);
}
phantom.exit();
});
}

We Keep Coding

JavaScript is the programming language of the Web.

How can I exit PhantomJS when a specific request is not found? - javascript

Related

AJAX: questions not rendering to the browser

JavaScript EventSource won't open connection after Apache basic auth

AJAX response text undefined

Use HTTP-on-modify-request to redirect URL

Phantomjs interprocess communication

Categories

Resources