How to use HTML DOM using npm test (CLI) - javascript

I am trying to find a way to run npm test using mocha over a HTML DOM. In this case, I am using the global document to retrieve a table out of the DOM. However, when I run npm test I get something like the error:
ReferenceError: document is not defined
at /home/luiz/Projects/linguist-unknown/src/scripts/ling-loader.js:92:61
at extFunc (/home/luiz/Projects/linguist-unknown/src/scripts/ling-loader.js:49:11)
at Array.every (native)
at Utilities.tryMatchUrlExtension (/home/luiz/Projects/linguist-unknown/src/scripts/ling-loader.js:60:25)
at Utilities.<anonymous> (/home/luiz/Projects/linguist-unknown/src/scripts/ling-loader.js:90:16)
at xhr.onload (/home/luiz/Projects/linguist-unknown/src/scripts/ling-loader.js:24:11)
at dispatchEvent (/home/luiz/Projects/linguist-unknown/node_modules/xmlhttprequest/lib/XMLHttpRequest.js:591:25)
at setState (/home/luiz/Projects/linguist-unknown/node_modules/xmlhttprequest/lib/XMLHttpRequest.js:614:14)
at IncomingMessage.<anonymous> (/home/luiz/Projects/linguist-unknown/node_modules/xmlhttprequest/lib/XMLHttpRequest.js:447:13)
at emitNone (events.js:91:20)
at IncomingMessage.emit (events.js:185:7)
at endReadableNT (_stream_readable.js:974:12)
at _combinedTickCallback (internal/process/next_tick.js:80:11)
at process._tickCallback (internal/process/next_tick.js:104:9)
1) should refresh table
16 passing (3s)
1 failing
1) Loader Utilities should refresh table:
Error: Timeout of 2000ms exceeded. For async tests and hooks, ensure "done()" is called; if returning a Promise, ensure it resolves.
I understand that the document is undefined and that I need to, somehow, create one myself, however, I believe that my main problems are:
My first time using npm and mocha and I cannot find anything related to it in their documentation.
Mostly, all problems people have regarding that are related to webbrowsers // I am using CLI, it will be tested with Travis on Github
In my code below you'll see that I solved a similar problem with XMLHttpRequest. However, I just can't figure out the best approach for including the document variable properly into my tests.
Thus, pardon me asking that shall this answer be already there on stackoverflow
My code is the following:
test-utilities.js
...
global.XMLHttpRequest = require('xmlhttprequest').XMLHttpRequest;
global.jsyaml = require('../src/scripts-min/js-yaml.min.js');
global.LinguistHighlighter = require('../src/scripts/ling-highlighter.js').LinguistHighlighter;
var LinguistLoader = require('../src/scripts/ling-loader.js').LinguistLoader;
describe('Loader', function () {
var utilities = new LinguistLoader.Utilities();
it('should refresh table', function(done) {
var location = {
hostname: "github.com",
href: "https://github.com/github-aux/linguist-unknown/blob/chrome/examples/Brain/human_jump.brain",
pathname: "/github-aux/linguist-unknown/blob/chrome/examples/Brain/human_jump.brain"
};
// check if it is not breaking
utilities.refresh(location, function(langObj, table){
done();
});
});
});
...
utilities.js:
...
Utilities.prototype.refresh = function(location, callback) {
var new_url = location.href;
if (new_url === current_url || !this.isGithub(location)) {
return;
}
current_url = new_url;
if (linguistObj === null) {
linguistObj = {
path: this.getPossibleFilepath(location)
};
}
setTimeout(function() {
var downloadHelper = new DownloadHelper();
downloadHelper.load(linguistObj.path, function(objs){
this.tryMatchUrlExtension(current_url, objs, function(langObj){
var table = document.getElementsByClassName("blob-wrapper")[0]
.getElementsByTagName("table")[0];
new LinguistHighlighter.Highlighter(langObj).draw(table);
// callback for tests purposes only
if (callback) {
callback(langObj, table);
}
});
}.bind(this));
}.bind(this), 100);
};
...
Any help is appreciated. Thank you!

I found a very good tool: JSDOM. Its goal is to emulate a subset of a web browser, such as the DOM. With that, I could implement my test-utilities.js file without even touching my utilities.js file, which is pretty much what I wanted.
Here goes the resolution of the file test-utilities.js
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
global.XMLHttpRequest = require('xmlhttprequest').XMLHttpRequest;
global.jsyaml = require('../src/scripts-min/js-yaml.min.js');
global.LinguistHighlighter = require('../src/scripts/ling-highlighter.js').LinguistHighlighter;
var LinguistLoader = require('../src/scripts/ling-loader.js').LinguistLoader;
describe('Loader', function () {
var utilities = new LinguistLoader.Utilities();
it('should refresh the code table', function(done) {
// Download the HTML string and parse it to JSDOM
JSDOM.fromURL("https://github.com/github-aux/linguist-unknown/blob/chrome/examples/Brain/human_jump.brain").then(dom => {
// JSDOM does not support 'innerText' and that is why I am creating this property for all objects.
var o = Object.prototype;
Object.defineProperty(o, "innerText", {
get: function jaca() {
if (this.innerHTML === undefined)
return "";
return this.innerHTML;
}
});
var location = {
hostname: "github.com",
href: "https://github.com/github-aux/linguist-unknown/blob/chrome/examples/Brain/human_jump.brain",
pathname: "/github-aux/linguist-unknown/blob/chrome/examples/Brain/human_jump.brain"
};
// check if it is not breaking
utilities.refresh(location, function(langObj, table) {
done();
});
});
});
That is working properly now! I hope it helps anyone! :D

Related

require node-pty results in TypeError: Object.setPrototypeOf: expected an object or null, got undefined

TL;DR: If I try to do var pty = require('node-pty'); results in TypeError: Object.setPrototypeOf: expected an object or null, got undefined keep reading for context
Hi, I'm trying to build a proof of concept by creating a terminal using React. For that, I used xterm-for-react which I made it work fine, and node-pty with this last library is with the one I'm having problems.
Initially I created a file in which I would try to make calls to it, it looks like this:
var os = require('os');
var pty = require('node-pty');
var shell = os.platform() === 'win32' ? 'powershell.exe' : 'bash';
var ptyProcess;
function createNewTerminal(FE){
ptyProcess = pty.spawn(shell, [], {
name: 'xterm-color',
cols: 80,
rows: 30,
cwd: process.env.HOME,
env: process.env
});
ptyProcess.onData((data) => FE.write(data));
}
function writeOnTerminal(data){
ptyProcess.write(data);
}
module.exports = {
createNewTerminal,
writeOnTerminal
}
I know it may not be the best code out there, but I was doing it just to try to see if this was possible. My plan was to call the functions from the react component like this:
import {createNewTerminal, writeOnTerminal} from './terminal-backend';
function BashTerminal() {
const xtermRef = React.useRef(null)
React.useEffect(() => {
// You can call any method in XTerm.js by using 'xterm xtermRef.current.terminal.[What you want to call]
xtermRef.current.terminal.writeln("Hello, World!")
createNewTerminal(xtermRef.current.terminal)
}, [])
const onData = (data) => {
writeOnTerminal(data);
}
return (
<XTerm ref={xtermRef} onData={onData}/>
);
}
But I was surprised that this was not working, and returned the error in the title. So, in order to reduce noise, I tried to change my functions to just console logs and just stay with the requires. My file now looked like this:
var os = require('os');
var pty = require('node-pty');
function createNewTerminal(FE){
console.log("Creating new console");
}
function writeOnTerminal(data){
console.log("Writing in terminal");
}
module.exports = {
createNewTerminal,
writeOnTerminal
}
Still got the same error. I'm currently not sure if this is even possible to do, or why this error occurs. Trying to look things online doesn't give any results, or maybe it does and I'm just not doing it right. Well, thanks for reading, I'm completely lost, so, if someone knows something even if it's not the complete answer I will be very thankful

Node Js: Using a WebWorker to resize images

I am making a node.js application that can resize images. I am able to do this successfully with jimp. However resizing an image is not asynchronous, and freezes the UI while resizing. I want to avoid this, so I tried using a webworker.
// main.js
var worker = new Worker(__dirname + '\\worker.js');
worker.addEventListener('message', function(e) {
if (e.data == 'done') { worker.terminate() } // Done
}, false);
worker.postMessage({'buff': buf, 'filename': filename}); // Start the worker
|
// worker.js
const Jimp = require('jimp'); // Oops, this doesn't work
self.addEventListener('message', function(e) {
resize(e.data.buf, e.data.filename);
}, false);
function resize(buf, filename) {
Jimp.read(buf).then(image => {
image.resize(1920, Jimp.AUTO);
image.writeAsync(filename).then(cb => { self.postMessage('done') });
});
}
What I found is that I cannot use node.js functions like require() in a webworker. How can I use Jimp in a webworker or resize an image in a different asynchronous way?
Edit: I am trying to use webworkify, copying the answer from #RubyJunk. When I try to create the worker I get the error Uncaught TypeError: Cannot convert undefined or null to object at Function.keys (<anonymous>). Does anyone know how to fix that?
Edit 2: I am using electron to create my Node.js app. They have a window property (nodeIntegrationInWorker) that makes it seem like I can run Node.js functions in a Web Worker, but when I try to use require() it still tells me it is not a function.
Try using webworkify, and create your worker like so instead
var work = require('webworkify');
var worker = work(require('./worker.js');
and put your worker method into module.exports like so
const Jimp = require('jimp');
module.exports = function(self) {
self.addEventListener('message', function(e) {
resize(e.data.buf, e.data.filename);
}, false);
function resize(buf, filename) {
Jimp.read(buf).then(image => {
image.resize(1920, Jimp.AUTO);
image.writeAsync(filename).then(cb => { self.postMessage('done') });
});
}
}

Scraping fully rendered webpage with nodejs

I am trying to get amazon pricing information with nodejs.
Here's the target url:
http://aws.amazon.com/ec2/pricing/
But the content of the pricing tables which I am reading in nodejs is not fully rendered and there are only javascripts.
So far I have used jsdom, jquerygo and phantom but I was not successful. Even setting timeouts does not help. Can anyone please provide me with a working solution for this specific case?
Thanks and best regards.
There are different ways to scrape a web page using node.js
I was inspired by spookjs
var Spooky = require('spooky');
var spooky = new Spooky({
child: {
transport: 'http'
},
casper: {
logLevel: 'debug',
verbose: true
}
}, function (err) {
if (err) {
e = new Error('Failed to initialize SpookyJS');
e.details = err;
throw e;
}
spooky.start(
'http://en.wikipedia.org/wiki/Spooky_the_Tuff_Little_Ghost');
spooky.then(function () {
this.emit('hello', 'Hello, from ' + this.evaluate(function () {
return document.title;
}));
});
spooky.run();
});
spooky.on('error', function (e, stack) {
console.error(e);
if (stack) {
console.log(stack);
}
});
spooky.on('console', function (line) {
console.log(line);
});
spooky.on('hello', function (greeting) {
console.log(greeting);
});
spooky.on('log', function (log) {
if (log.space === 'remote') {
console.log(log.message.replace(/ \- .*/, ''));
}
});
Note: Gives flexibility to run casperjs and phantom js using node.js
This solved my issue:
I noticed that when installing phantom module in node, it was complaining about version of phantomjs (version 2) and was downloading version (1.9.8) in some temporary location.
Thus I installed version 1.9.8 instead and set the PATH variable to that. And it worked!
Also must note that inside page.open(...) function you must setTimeout for quite a long time (in my case about 35 seconds) so that the whole page is fully loaded and rendered.

Can I mock console in NodeJs?

In my JS test, I need to check if the console.info is called. That's why I want to mock console. However, it seems that the console variable cannot be assigned with a different object. Did I make any mistake?
Here is the code I used:
var oldConsole = console;
var infoContent;
console = {
info: function(content) {
infoContent = content;
}
};
game.process('a command');
infoContent.should.equal('a command is processed');
console = oldConsole;
You can use rewire to replace the whole of console to silence it, or to inject a mock. I use deride but sinon would also work.
var rewire = require('rewire');
var deride = require('deride');
var Game = rewire('../lib/game');
describe('game testing', function() {
var stubConsole, game;
beforeEach(function() {
stubConsole = deride.stub(['info']);
stubConsole.setup.info.toReturn();
Game.__set__({
console: stubConsole
});
game = new Game();
});
it('logs info messages', function() {
game.process('a command');
stubConsole.expect.info.called.withArgs(['a command is processed']);
});
});
I find the solution. I can change the method info of console.
console.info = function(content) {
infoContent = content;
};
The question is now why console object itself cannot be reassigned?
you can use sinon npm to count the call to a function :
it("calls the original function only once", function () {
var callback = sinon.spy();
var proxy = once(callback);
proxy();
proxy();
assert(callback.calledOnce);
// ...or:
// assert.equals(callback.callCount, 1);
});
You can find the docs here : sinonjs.org
I thought I had the same problem and my solution was using this std-mocks module:
https://github.com/neoziro/std-mocks
This has the advantage of not taking over the global "console" but allows you to see what gets logged to the stdout / stderr. This solves the problem in a different way than the question was explicitly looking for; however I believe it is a good answer for the problem the question implies and may be useful for others.
const stdMocks = require('std-mocks');
stdMocks.use(); console.log('test'); stdMocks.restore();
// => undefined [nothing gets output, stdout intercepted]
const logged = stdMocks.flush();
console.log(logged)
// => { stdout: [ 'test\n' ], stderr: [] }

Save and render a webpage with PhantomJS and node.js

I'm looking for an example of requesting a webpage, waiting for the JavaScript to render (JavaScript modifies the DOM), and then grabbing the HTML of the page.
This should be a simple example with an obvious use-case for PhantomJS. I can't find a decent example, the documentation seems to be all about command line use.
From your comments, I'd guess you have 2 options
Try to find a phantomjs node module - https://github.com/amir20/phantomjs-node
Run phantomjs as a child process inside node - http://nodejs.org/api/child_process.html
Edit:
It seems the child process is suggested by phantomjs as a way of interacting with node, see faq - http://code.google.com/p/phantomjs/wiki/FAQ
Edit:
Example Phantomjs script for getting the pages HTML markup:
var page = require('webpage').create();
page.open('http://www.google.com', function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var p = page.evaluate(function () {
return document.getElementsByTagName('html')[0].innerHTML
});
console.log(p);
}
phantom.exit();
});
With v2 of phantomjs-node it's pretty easy to print the HTML after it has been processed.
var phantom = require('phantom');
phantom.create().then(function(ph) {
ph.createPage().then(function(page) {
page.open('https://stackoverflow.com/').then(function(status) {
console.log(status);
page.property('content').then(function(content) {
console.log(content);
page.close();
ph.exit();
});
});
});
});
This will show the output as it would have been rendered with the browser.
Edit 2019:
You can use async/await:
const phantom = require('phantom');
(async function() {
const instance = await phantom.create();
const page = await instance.createPage();
await page.on('onResourceRequested', function(requestData) {
console.info('Requesting', requestData.url);
});
const status = await page.open('https://stackoverflow.com/');
const content = await page.property('content');
console.log(content);
await instance.exit();
})();
Or if you just want to test, you can use npx
npx phantom#latest https://stackoverflow.com/
I've used two different ways in the past, including the page.evaluate() method that queries the DOM that Declan mentioned. The other way I've passed info from the web page is to spit it out to console.log() from there, and in the phantomjs script use:
page.onConsoleMessage = function (msg, line, source) {
console.log('console [' +source +':' +line +']> ' +msg);
}
I might also trap the variable msg in the onConsoleMessage and search for some encapsulate data. Depends on how you want to use the output.
Then in the Nodejs script, you would have to scan the output of the Phantomjs script:
var yourfunc = function(...params...) {
var phantom = spawn('phantomjs', [...args]);
phantom.stdout.setEncoding('utf8');
phantom.stdout.on('data', function(data) {
//parse or echo data
var str_phantom_output = data.toString();
// The above will get triggered one or more times, so you'll need to
// add code to parse for whatever info you're expecting from the browser
});
phantom.stderr.on('data', function(data) {
// do something with error data
});
phantom.on('exit', function(code) {
if (code !== 0) {
// console.log('phantomjs exited with code ' +code);
} else {
// clean exit: do something else such as a passed-in callback
}
});
}
Hope that helps some.
Why not just use this ?
var page = require('webpage').create();
page.open("http://example.com", function (status)
{
if (status !== 'success')
{
console.log('FAIL to load the address');
}
else
{
console.log('Success in fetching the page');
console.log(page.content);
}
phantom.exit();
});
Late update in case anyone stumbles on this question:
A project on GitHub developed by a colleague of mine exactly aims at helping you do that: https://github.com/vmeurisse/phantomCrawl.
It still a bit young, it certainly is missing some documentation, but the example provided should help doing basic crawling.
Here's an old version that I use running node, express and phantomjs which saves out the page as a .png. You could tweak it fairly quickly to get the html.
https://github.com/wehrhaus/sitescrape.git

Categories