perform browser action with node.js with using API - javascript

I want to do some event eg. clicks in a website. I can do it in chrome with javascript (or chrome extension), but is it possible to do without opening chrome but with server side code? No API is provided. It's not scraping but perform some sort of action.

NodeJS uses Google V8 engine to interpret the JavaScript code. It does not run in a browser environment and therefore it lacks DOM and event handling. However, you can actually mock browser in NodeJS environment using mock-browser package.
const MockBrowser = require('mock-browser/lib/MockBrowser')
const mockBrowser = new MockBrowser()
global.window = mockBrowser.getWindow()
global.document = mockBrowser.getDocument()
global.navigator = mockBrowser.getNavigator()
However, you should be careful with this approach, as some methods (e.g. getComputedStyle) still will not work.
Maybe you should reconsider why you want to use DOM and events on the server side.
PhantomJS: Headless browser for NodeJS
PhantomJS is a headless browser for NodeJS that is used for testing, scraping, etc. It provides you with a full-featured browser that can simulate a browser.
Using CasperJS for scraping
If you want to scrape websites, you may use a library called CasperJS that itself uses PhantomJS. An example:
var casper = require('casper').create();
var links;
function getLinks() {
// Scrape the links from top-right nav of the website
var links = document.querySelectorAll('ul.navigation li a');
return Array.prototype.map.call(links, function (e) {
return e.getAttribute('href')
});
}
// Opens casperjs homepage
casper.start('http://casperjs.org/');
casper.then(function () {
links = this.evaluate(getLinks);
});
casper.run(function () {
for(var i in links) {
console.log(links[i]);
}
casper.done();
});

Related

How can I check whether an application is installed from a web browser?

This is for Windows.
I have a flash application I am converting to AIR. I built a captive installer using NSIS and it works fine. However I would like to have an icon on a website which checks if the application is already installed and ask the user if they wish to run it. If it is not installed, they get the option to download it.
I am fairly certain this is doable, because Zoom and GoToMeeting both do this.
My searching skills seem to be failing me when looking for this.
Edit:
It appears the best/only way to do this is to create a custom protocol for the application. Something like DoDaApp://.
Which brings up the next set of questions;
How to create an NSIS file which will create the appropriate registry entries on the client computer? As a user, not admin.
How to check if the protocol is currently installed on the computer?
This is a partial answer as it does not work in Edge. I'll explain the issue below.
As recommended in How to detect browser's protocol handlers you can use timeout & blur event handlers. Here is my interpretation of the code;
function checkCustomProtocol(inProtocol,inInstalLink,inTimeOut)
{
var timeout = inTimeOut;
window.addEventListener('blur',function(e)
{
window.clearTimeout(timeout);
}
)
timeout = window.setTimeout(function()
{
console.log('timeout');
window.location = inInstalLink;
}, inTimeOut
);
window.location = inProtocol;
}
Microsoft Edge is ever so helpful by popping up a dialog box telling you "You'll Need a new app to open this" which "blurs" the screen, not allowing download of the file.
So I will be posting another question on how to make it work in Edge. I have reviewed ismailhabib's code but the known issues section says it doesn't work with Edge either.
Here is a more complete answer. It has been lightly tested in IE 11, Microsoft Edge, Chrome and Firefox. I also added comments;
/*
checkCustomProtocol - check if custom protocol exists
inProtocol - URL of application to run eg: MyApp://
inInstallLink - URL to run when the protocol does not exist.
inTimeOut - time in miliseconds to wait for application to Launch.
*/
function checkCustomProtocol(inProtocol,inInstalLink,inTimeOut)
{
// Check if Microsoft Edge
if (navigator.msLaunchUri)
{
navigator.msLaunchUri(inProtocol, function ()
{
//It launched, nothing to do
},
function()
{
window.location = inInstalLink; //Launch alternative, typically app download.
}
);
}
else
{
// Not Edge
var timeout = inTimeOut;
//Set up a listener to see if it navigates away from the page.
// If so we assume the papplication launched
window.addEventListener('blur',function(e)
{
window.clearTimeout(timeout);
}
)
//Set a timeout so that if the application does not launch within the timeout we
// assume the protocol does not exist
timeout = window.setTimeout(function()
{
console.log('timeout');
window.location = inInstalLink; //Try to launch application
}, inTimeOut
);
window.location = inProtocol; //Launch alternative, typically app download.
}
}

Open browser and reload on source code changes

Basically I want to open the default browser (which I handled already) and when the development folder has any changes (which I handled already) to use the browser reference to reload the url.
This is what I have done so far:
var open = requires('opn');
var fs = requires('fs');
var promise = open('http://localhost/my-developemnt-path/:80');
var browser;
promise.then((cp) => {
//get a reference to the browser from the child process cp
browser = cp;//...??
});
fs.watch('my-developemnt-path', {recursive: true}, (eventType, filename) => {
console.log(`event type is: ${eventType}`);
if (filename) {
console.log(`filename provided: ${filename}`);
browser && browser.location.reload();
} else {
console.log('filename not provided');
}
});
So how do I get the browser reference out of the child-process and how can I use it to force a reload?
CLARIFICATIONS
I am not using any Express or other particular application. Just a common web app I am running on Apache.
I am using nodejs just to open a browser window and monitor files changes under the working dir.

Retrieve html content of a page several seconds after it's loaded

I'm coding a script in nodejs to automatically retrieve data from an online directory.
Knowing that I had never done this, I chose javascript because it is a language I use every day.
I therefore from the few tips I could find on google use request with cheerios to easily access components of dom of the page.
I found and retrieved all the necessary information, the only missing step is to recover the link to the next page except that the one is generated 4 seconds after loading of page and link contains a hash so that this step Is unavoidable.
What I would like to do is to recover dom of page 4-5 seconds after its loading to be able to recover the link
I looked on the internet, and much advice to use PhantomJS for this manipulation, but I can not get it to work after many attempts with node.
This is my code :
#!/usr/bin/env node
require('babel-register');
import request from 'request'
import cheerio from 'cheerio'
import phantom from 'node-phantom'
phantom.create(function(err,ph) {
return ph.createPage(function(err,page) {
return page.open(url, function(err,status) {
console.log("opened site? ", status);
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function(err) {
//jQuery Loaded.
//Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function() {
return page.evaluate(function() {
var tt = cheerio.load($this.html())
console.log(tt)
}, function(err,result) {
console.log(result);
ph.exit();
});
}, 5000);
});
});
});
});
but i get this error :
return ph.createPage(function (page) {
^
TypeError: ph.createPage is not a function
Is what I am about to do is the best way to do what I want to do? If not what is the simplest way? If so, where does my error come from?
If You dont have to use phantomjs You can use nightmare to do it.
It is pretty neat library to solve problems like yours, it uses electron as web browser and You can run it with or without showing window (You can also open developer tools like in Google Chrome)
It has only one flaw if You want to run it on server without graphical interface that You must install at least framebuffer.
Nightmare has method like wait(cssSelector) that will wait until some element appears on website.
Your code would be something like:
const Nightmare = require('nightmare');
const nightmare = Nightmare({
show: true, // will show browser window
openDevTools: true // will open dev tools in browser window
});
const url = 'http://hakier.pl';
const selector = '#someElementSelectorWitchWillAppearAfterSomeDelay';
nightmare
.goto(url)
.wait(selector)
.evaluate(selector => {
return {
nextPage: document.querySelector(selector).getAttribute('href')
};
}, selector)
.then(extracted => {
console.log(extracted.nextPage); //Your extracted data from evaluate
});
//this variable will be injected into evaluate callback
//it is required to inject required variables like this,
// because You have different - browser scope inside this
// callback and You will not has access to node.js variables not injected
Happy hacking!

Automate daily csv file download from website button click

I would like to automate the process of visiting a website, clicking a button, and saving the file. The only way to download the file on this site is to click a button. You can't navigate to the file using a url.
I have been trying to use phantomjs and casperjs to automate this process, but haven't had any success.
I recently tried to use brandon's solution here
Grab the resource contents in CasperJS or PhantomJS
Here is my code for that
var fs = require('fs');
var cache = require('./cache');
var mimetype = require('./mimetype');
var casper = require('casper').create();
casper.start('http://www.example.com/page_with_download_button', function() {
});
casper.then(function() {
this.click('#download_button');
});
casper.on('resource.received', function (resource) {
"use strict";
for(i=0;i < resource.headers.length; i++){
if(resource.headers[i]["name"] == "Content-Type" && resource.headers[i]["value"] == "text/csv; charset-UTF-8;"){
cache.includeResource(resource);
}
}
});
casper.on('load.finished', function(status) {
for(i=0; i< cache.cachedResources.length; i++){
var file = cache.cachedResources[i].cacheFileNoPath;
var ext = mimetype.ext[cache.cachedResources[index].mimetype];
var finalFile = file.replace("."+cache.cacheExtension,"."+ext);
fs.write('downloads/'+finalFile,cache.cachedResources[i].getContents(),'b');
}
});
casper.run();
I think the problem could be caused by my cachePath being incorrect in cache.js
exports.cachePath = 'C:/Users/username/AppData/Local/Ofi Labs/PhantomJS';
Should I be using something in adition to the backslashes to define the path?
When I try
casperjs --disk-cache=true export_script.js
Nothing is downloaded. After a little debugging I have found that cache.cachedResources is always empty.
I would also be open to solutions outside of phantomjs/casperjs.
UPDATE
I am not longer trying to accomplish this with CasperJS/PhantomJS.
I am using the chrome extension Tampermonkey suggested by dandavis.
Tampermonkey was extremely easy to figure out.
I installed Tampermonkey, navigated to the page with the download link, and then clicked New Script under tampermonkey and added my javascript code.
document.getElementById("download_button").click();
Now every time I navigate to the page in my browser, the file is downloaded. I then created a batch script that looks like this
set date=%DATE:~10,4%_%DATE:~4,2%_%DATE:~7,2%
chrome "http://www.example.com/page-with-dl-button"
timeout 10
move "C:\Users\user\Downloads\export.csv" "C:\path\to\dir\export_%date%.csv"
I set that batch script to run nightly using the windows task scheduler.
Success!
Your button most likely issues a POST request to the server.
In order to track it:
Open Network tab in Chrome developer tools
Navigate to the page and hit the button.
Notice which request led to file download. Right click on it and copy as cURL
Run copied cURL
Once you have cURL working you can schedule downloads using cron or Task Scheduler depending on operation system you are using.

Identify tab that made request in Firefox Addon SDK

I'm using the Firefox Addon SDK to build something that monitors and displays the HTTP traffic in the browser. Similar to HTTPFox or Live HTTP Headers. I am interested in identifying which tab in the browser (if any) generated the request
Using the observer-service I am monitoring for "http-on-examine-response" events. I have code like the following to identify the nsIDomWindow that generated the request:
const observer = require("observer-service"),
{Ci} = require("chrome");
function getTabFromChannel(channel) {
try {
var noteCB= channel.notificationCallbacks ? channel.notificationCallbacks : channel.loadGroup.notificationCallbacks;
if (!noteCB) { return null; }
var domWin = noteCB.getInterface(Ci.nsIDOMWindow);
return domWin.top;
} catch (e) {
dump(e + "\n");
return null;
}
}
function logHTTPTraffic(sub, data) {
sub.QueryInterface(Ci.nsIHttpChannel);
var ab = getTabFromChannel(sub);
console.log(tab);
}
observer.add("http-on-examine-response", logHTTPTraffic);
Mostly cribbed from the documentation for how to identify the browser that generated the request. Some is also taken from the Google PageSpeed Firefox addon.
Is there a recommended or preferred way to go from the nsIDOMWindow object domWin to a tab element in the SDK tabs module?
I've considered something hacky like scanning the tabs list for one with a URL that matches the URL for domWin, but then I have to worry about multiple tabs having the same URL.
You have to keep using the internal packages. From what I can tell, getTabForWindow() function in api-utils/lib/tabs/tab.js package does exactly what you want. Untested code:
var tabsLib = require("sdk/tabs/tab.js");
return tabsLib.getTabForWindow(domWin.top);
The API has changed since this was originally asked/answered...
It should now (as of 1.15) be:
return require("sdk/tabs/utils").getTabForWindow(domWin.top);
As of Addon SDK version 1.13 change:
var tabsLib = require("tabs/tab.js");
to
var tabsLib = require("sdk/tabs/helpers.js");
If anyone still cares about this:
Although the Addon SDK is being deprecated in support of the newer WebExtensions API, I want to point out that
var a_tab = require("sdk/tabs/utils").getTabForContentWindow(window)
returns a different 'tab' object than the one you would typically get by using
worker.tab in a PageMod.
For example, a_tab will not have the 'id' attribute, but would have linkedPanel property that's similar to the 'id' attribute.

Categories