Read a local HTML file from CasperJS start() - javascript

I am writing a simple casperjs script to fill a rather complex form on a website. The HTML code of the website is a bit messy and I don't want to go through the navigation steps to reach the page everytime when I am testing my script.
I have the form page saved as HTML file but I couldn't even properly load a testing HTML file into casperjs. Here is the code, file and result:
var casper = require('casper').create();
casper.start('file://test.html').then(function() {
this.echo('started')
this.echo(this.getPageContent())
});
casper.run(function(){
this.echo('ended');
casper.done();
});
The test file:
<html>
<head>
<meta charset="utf-8">
<title>My page</title>
</head>
<body>
<h1 class="page-title">Hello</h1>
<ul>
<li>one</li>
<li>two</li>
<li>three</li>
</ul>
<footer><p>2012 myself</p></footer>
</body>
</html>
The execution result:
C:>started
<html><head></head><body></body></html>
ended
Why the tags within the HTML body are gone?

All works fine, with an absolute path:
var casper = require('casper').create();
casper.start('file:///home/root2/pjs/test.html').then(function() {
this.echo('started')
this.echo(this.getPageContent())
});
casper.run(function(){
this.echo('ended');
casper.done();
});
The execution result:
started
<html><head>
<meta charset="utf-8">
<title>My page</title>
</head>
<body>
<h1 class="page-title">Hello</h1>
<ul>
<li>one</li>
<li>two</li>
<li>three</li>
</ul>
<footer><p>2012 myself</p></footer>
</body></html>
ended
You can also try to specify an absolute path like this:
file:///C://Full/Path/To/test.html

FYI, you can use these functions to get an absolute file uri for a relative one:
function getAbsoluteFilePath(relativePath) {
return "file:///" + currentDir() + relativePath;
};
// Courtesy https://github.com/casperjs/casperjs/issues/141
function currentDir() {
var sep = "/";
var pathParts = fs.absolute(casper.test.currentTestFile).split(sep);
pathParts.pop();
return pathParts.join(sep) + sep;
}
To use that in your scenario, use this:
// ...
casper.start(getAbsoluteFilePath('test.html')).then(function() {
this.echo('started')
this.echo(this.getPageContent())
});
// ...

You can make use of the fs module to get the absolute path of the working directory, and then concatenate the protocol "file://" and the relativePath.
E.g.
const fs = require('fs');
var casper = require('casper').create();
// your casper logic here
console.log(getAbsoluteFilePath('test.html'));
casper.run();
function getAbsoluteFilePath(relativePath) {
return "file://" + fs.workingDirectory + '/' + relativePath;
};

Related

test html page with jest

I'm trying to setup a test for prototype.js.
I would like to avoid starting a local server to use cypress / puppeteer / etc. My goal here is just use jest.
My idea was to have for each test a minimal index.html like the following:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<script src="https://ajax.googleapis.com/ajax/libs/prototype/1.7.2.0/prototype.js"></script>
</head>
<body>
<span id="el">foo foo</span>
</body>
</html>
and I tried first to run jest as
const fs = require('fs');
const path = require('path');
const html = fs.readFileSync(path.resolve(__dirname, './index.html'), 'utf8');
jest.dontMock('fs');
describe('$', function () {
beforeEach(() => {
document.documentElement.innerHTML = html.toString();
});
afterEach(jest.resetModules);
it('finds a node', function () {
expect($('el')).toBeInTheDocument()
});
});
but $ as all the prototype.js global stuff is not available.
ouhhh .... looks like wrapping the expect into a window.onload is enough, which makes also sense.

How to load javascript files using event listeners

This question is not duplicate of
Conditionally load JavaScript file
and nor this
How to include an external javascript file conditionally?
I have gone through them, they are kind of similar to what I want to do but not exactly same. Here is how, in the above question the user just wants to load the script file once based on a condition. But in my case I want to load different js files based on click events.
So here in my case I have an HTML document:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Experiment</title>
<link href="s.css" rel="stylesheet" type="text/css">
</head>
<body>
<div class="navigation">
<nav>
<ul>
<li id="home_btn"> Home</li>
<li id="about_btn"> About </li>
</ul>
</nav>
</div>
<canvas id="myCanvas">
</canvas>
<div class="notePane">
<p> This is just a bunch of text not explanation</p>
</div>
</body>
<script src="./exp.js" type="text/javascript"></script>
</html>
and this h.html file is linked to an exp.js file. Now in the exp.js file :
var h_btn = document.getElementById("home_btn");
var a_btn = document.getElementById("about_btn");
var head = document.getElementsByTagName("head")[0];
var js = document.createElement("script");
js.type="module";
h_btn.addEventListener("click", showHome );
a_btn.addEventListener("click", showAbout);
function showHome() {
js.src="./j1.js";
head.appendChild(js);
}
function showAbout() {
js.src="./j2.js";
head.appendChild(js);
}
So things work fine when I click the h_btn on the web page. It loads j1.js. But then when I click on the a_btn on the web page I expect to see j2.js linked but I don't see it. I have to refresh the page and then click on a_btn to see j2.js linked. How do I link j1.js and j2.js such that I don't have to refresh the page again and again to load the correct script.
Update: OP has updated the question requirements such that he wants to "unload" a JS file when another is clicked. There is no way to undo all the runtime logic once a JS file is loaded: the only way is to reload the page. Removing the <script> tag or changing the src attribute will not magically unbind event listeners or "undeclare" variables.
Therefore, if OP wants to "start anew", the only way is to check if a custom script has been loaded before: if it has, we force reload the page. There are of course many ways to "inform" the next page which source to load, if available: in the example below, we use query strings:
var h_btn = document.getElementById("home_btn");
var a_btn = document.getElementById("about_btn");
var head = document.getElementsByTagName("head")[0];
var appendedScriptKey;
var scripts = {
'home': './j1.js',
'about': './j2.js'
}
h_btn.addEventListener("click", showHome);
a_btn.addEventListener("click", showAbout);
// Check query string if a specific script is set
var params = (new URL(document.location)).searchParams;
var scriptKey = params.get('scriptKey');
if (scriptKey && scriptKey in scripts) {
appendScript(scriptKey);
}
function appendScript(key) {
if (hasAppendedScript) {
location.href = location.href + (location.search ? '?' : '&') + 'script=' + key;
location.reload();
}
var js = document.createElement("script");
js.type="module";
js.src = scripts[key];
head.appendChild(js);
appendedScript = key;
}
function showHome() {
appendedScriptKey('home');
}
function showAbout() {
appendScript('about');
}
This is because of how Node.appendChild() works. The first click works because you're creating a new element and inserting it into your document. However, the second click will not work as you've expected because the node already exists:
The Node.appendChild() method adds a node to the end of the list of children of a specified parent node. If the given child is a reference to an existing node in the document, appendChild() moves it from its current position to the new position
This means that the second click will only mutate the src attribute of the already-injected <script> element instead of creating a new one, and that also means that the second script src will not be loaded.
A solution will be to use a function that will create a script tag every single time:
var h_btn = document.getElementById("home_btn");
var a_btn = document.getElementById("about_btn");
var head = document.getElementsByTagName("head")[0];
h_btn.addEventListener("click", showHome);
a_btn.addEventListener("click", showAbout);
function insertScript(src) {
var js = document.createElement("script");
js.type = "module";
js.src = src;
head.appendChild(js);
}
function showHome() {
insertScript('./j1.js');
}
function showAbout() {
insertScript('./j2.js');
}
But this will also mean that multiple clicks on the same button will cause the script to be injected multiple times. This does not affect browser performance much since the browser has the loaded script cached somewhere, but to guard against this, it might be a good idea to implement some kind of unique identifier per script, and check against that before injection. There are many ways to do this, and this is just one way:
var h_btn = document.getElementById("home_btn");
var a_btn = document.getElementById("about_btn");
var head = document.getElementsByTagName("head")[0];
h_btn.addEventListener("click", showHome);
a_btn.addEventListener("click", showAbout);
// Store scripts that you've injected
var scripts = [];
function insertScript(src) {
// If we have previously attempted injection, then do nothing
if (scripts.indexOf(src) !== -1) {
return;
}
var js = document.createElement("script");
js.type = "module";
js.src = src;
head.appendChild(js);
// Push script to array
scripts.push(src);
}
function showHome() {
insertScript('./j1.js');
}
function showAbout() {
insertScript('./j2.js');
}
Alternative unique script injection strategies and ideas:
Use ES6 Map() to track unique script sources being injected
Perhaps only store src to array/dict/map when the script has successfully loaded
You have to create the element twice, as there can only be one element with 1 src.
var h_btn = document.getElementById("home_btn");
var a_btn = document.getElementById("about_btn");
var js1 = document.createElement("script");
var js2 = document.createElement("script");
h_btn.addEventListener("click", showHome);
a_btn.addEventListener("click", showAbout);
function showHome() {
js1.src = "j1.js";
document.body.appendChild(js1);
}
function showAbout() {
js2.src = "j2.js";
document.body.appendChild(js2);
}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Experiment</title>
<link href="s.css" rel="stylesheet" type="text/css">
</head>
<body>
<div class="navigation">
<nav>
<ul>
<li id="home_btn"> Home</li>
<li id="about_btn"> About </li>
</ul>
</nav>
</div>
<canvas id="myCanvas">
</canvas>
<div class="notePane">
<p> This is just a bunch of text not explanation</p>
</div>
</body>
<script src="exp.js" type="text/javascript"></script>
</html>

Replace text before other scripts load

I have piece of code:
<!doctype html>
<html lang="en">
<head>
<script src="main.js"></script>
</head>
<body>
<script src="custom.js?token={{token}}"></script>
</body>
</html>
main.js is my script which replace in body tag {{token}} for value provided from Query.
window.onload = function() {
var link_sid = query.get('link_sid');
document.body.innerHTML = document.body.innerHTML.replace(/{{token}}/g, util.protocol() + '://c.' + util.env() + util.domain() + '/' + link_sid);
}
custom.js?token={{token}} is user content which I can't change.
The problem is that user JS put some HTML code based on my {{token}} value.
So after open page all {{token}} are changed but not inside custom JS cos token was changed after JS was loaded.
How can I replace {{token}} in custom.js query before it loads? It needs to be done in pure JS.
[Updated 1]
I can't move anything from body also can't change. Body is user-provided content.
You need to load your script dynamically. Here is one way to do it by creating script tag and appending it to the page
<!doctype html>
<html lang="en">
<head>
<script>
var script = document.createElement("script")
script.type = "text/javascript";
script.src = "custom.js?token=" + "your_token"
document.getElementsByTagName("head")[0].appendChild(script);
</script>
</head>
<body>
</body>
</html>
I can see the way you are passing {{token}} to custom.js is a hacky way. I can suggest you to change the way it should be like this:
Since main.js is your script, you can try to assign the {{token}} to a global value, example:
Inside main.js:
window.TOKEN = util.protocol() + '://c.' + util.env() + util.domain() + '/' + link_sid`;
In the html will still be the same, except you don't need to add the {{token}} string:
Then inside custom.js, you can use window.TOKEN to do whatever you want with it.
If the window.TOKEN is evaluated asynchronously, you will also need a callback function from main.js to tell custom.js when the token is ready, then it can start using it, example:
When TOKEN have to be asynchronous:
In main.js:
var TOKEN = util.protocol() + '://c.' + util.env() + util.domain() + '/' + link_sid`;
typeof window.tokenReady === 'function' && window.tokenReady(token); // make sure it's a function before calling it
In custom.js:
window.tokenReady = function (token) {
// Do something with token
}

AngularJS Webpage Can't Find Referenced Angular Module or Controller

Sorry for the possible repost, but I don't know how else to write this. I've been using this website to create a small angularJS webpage using nodeJS as well, and I've gotten to the part of separating the angular code from the view, or HTML. What I've found was that when I tried to separate them nothing worked any more.
Here's my code so far:
Home.html:
<!DOCTYPE html>
<html lang="en-US">
<head>
<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.6.4/angular.min.js"></script>
<!-- <script>
var myApp = angular.module('myApp', []).controller('myCtrl', function($scope) {
$scope.firstName = "John";
$scope.lastName = "Doe";
$scope.myColor = "red";
});
</script> -->
</head>
<body>
<!-- <script>
myApp.directive('myDirective', function() {
return {
template: "This is a test directive."
};
})
</script> -->
<h2>myApp Test</h2>
<div ng-app="myApp" ng-controller="myCtrl" ng-init="quantity = 10; cost = 5">
<p>Color: <input type="text" style="background-color:{{myColor}}" ng-model="myColor" value="{{myColor}}"></p>
<p>Total in dollar (raw exp): {{quantity * cost}}</p>
<p>Total in dollar (span): <span ng-bind="quantity * cost"></span></p>
<p> First Name: <input type="text" ng-model="firstName"></p>
<p> Last Name: <input type="text" ng-model="lastName"></p>
<h1>Hello, {{firstName + " " + lastName}}</h1>
</div>
Are you even changing?!
</body>
<script type="javascript" src="../JS/myApp.js"></script>
<!-- <script type="javascript" src="../JS/myApp.module.js"></script> -->
<script type="javascript" src="../JS/myCtrl.js"></script>
<!-- <script type="javascript" src="../JS/myCtrl.component.js"></script> -->
</html>
myApp.js / myApp.module.js:
var myApp = angular.module('myApp', []);
myCtrl.js / myCtrl.component.js:
myApp.controller('myCtrl', function($scope) {
$scope.firstName = "John";
$scope.lastName = "Doe";
$scope.myColor = "red";
});
// app.directive('myDirective', function() {
// return {
// template: '<p><h3>This is a test directive.<h3></p>'
// };
// });
Finally, here's my folder hierarchy:
If what I've done wrong is already evident, there's no need for you to continue reading.
Now the commented-out code is important as well. These are both the other things I've tried, and what I wanted to implement. I have tried:
Re-adding all the angular code back to the head tag to make sure it was still working at all. It worked, aside from the directive stuff (which, at point, I believe would have to be part of a separate module, but not the point, nonetheless).
Moving the script references, which are, and were, located below the body, to the head.
Moving the script references to the top of the body.
Moving everything into just *one* file (myApp.module.js).
Renaming both "myCtrl.component.js" and "myApp.module.js" to "myCtrl.js" and "myApp.js", respectively, to ensure possibly-antiquated angularJS nomenclature wasn't an attribution.
Adding "type="javascript"" to the script references.
"Hard refreshing" to make sure old documents weren't cached.
Tested to see if it was even requesting the files from the server using node. It doesn't look like it is.
If you need the nodeJS part of it, it's here as well (index.js):
var http = require('http');
var url = require('url');
var fs = require('fs');
var path = require('path');
http.createServer(function(req, res) {
var myUrl = url.parse(req.url);
var basename = path.basename(myUrl.path);
console.log('request url: ' + req.url);
console.log('url path: ' + myUrl.path);
console.log('basename: ' + basename);
fs.readFile('../HTML/Home.html', function(err, data) {
res.writeHead(200, { 'ContentType': 'text/html' });
res.write(data);
res.end();
});
}).listen(8080);
The problem is in your script tag
<script type="javascript" src="../JS/myCtrl.js"></script>
It should not be
type="javascript"
Either change it to
type="text/javascript"
or remove the type totally. Due to incorrect type , it is not loading controller and other js files to browser.

Scrape html with js

I'm trying to get the html of www.soccerway.com. In particular this:
that have the label-wrapper class I also tried with: select.nav-select but I can't get any content. What I did is:
1) Created a php filed called grabber.php, this file have this code:
<?php echo file_get_contents($_GET['url']); ?>
2) Created a index.html file with this content:
<!DOCTYPE html>
<html>
<head>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"></script>
<meta charset=utf-8 />
<title>test</title>
</head>
<body>
<div id="response"></div>
</body>
<script>
$(function(){
var contentURI= 'http://soccerway.com';
$('#response').load('grabber.php?url='+ encodeURIComponent(contentURI) + ' #label-wrapper');
});
var LI = document.querySelectorAll(".list li");
var result = {};
for(var i=0; i<LI.length; i++){
var el = LI[i];
var elData = el.dataset.value;
if(elData) result[el.innerHTML] = elData; // Only if element has data-value attr
}
console.log( result );
</script>
</html>
in the div there is no content grabbed, I tested my js code for get all the link and working but I've inserted the html page manually.
I see a couple issues here.
var contentURI= 'http:/soccerway.com #label-wrapper';
You're missing the second slash in http://, and you're passing a URL with a space and an ID to file_get_contents. You'll want this instead:
var contentURI = 'http://soccerway.com/';
and then you'll need to parse out the item you're interested in from the resulting HTML.
The #label-wrapper needs to be in the jQuery load() call, not the file_get_contents, and the contentURI variable needs to be properly escaped with encodeURIComponent:
$('#response').load('grabber.php?url='+ encodeURIComponent(contentURI) + ' #label-wrapper');
Your code also contains a massive vulnerability that's potentially very dangerous, as it allows anyone to access grabber.php with a url value that's a file location on your server. This could compromise your database password or other sensitive data on the server.

Categories