Download a file using Nightmare - javascript

I am using Nightmare to create a automated downloader for today's newspaper. I managed to login and go the the specified page. However I could not find out how to download a file with Nightmare.
var Nightmare = require('nightmare');
new Nightmare()
.goto('https://login.nrc.nl/login?service=http://digitaleeditie.nrc.nl/welkom')
.type('input[name="username"]', 'Username')
.type('input[name="password"]','Password')
.click('button[type="submit"]')
.wait()
.goto('http://digitaleeditie.nrc.nl/digitaleeditie/NH/2014/10/20141124___/downloads.html')
.wait()
.click('a[href="/digitaleeditie/helekrant/epub/nrc_20141124.epub"]')
.wait()
.url(function(url) {
console.log(url)
})
.run(function (err, nightmare) {
if (err) return console.log(err);
console.log('Done!');
});
I tried to download the file by clicking on the download button. However this seems not to work.

PhantomJS (and CasperJS and Nightmare) don't trigger a download (dialog) when you click on something that should be downloaded. So, it is necessary to download it yourself. If you can find out the URL of the file, then it can be easily downloaded using an XMLHttpRequest from the page context.
So you need to exchange
.click('a[href="/digitaleeditie/helekrant/epub/nrc_20141124.epub"]')
for
.evaluate(function ev(){
var el = document.querySelector("[href*='nrc_20141124.epub']");
var xhr = new XMLHttpRequest();
xhr.open("GET", el.href, false);
xhr.overrideMimeType("text/plain; charset=x-user-defined");
xhr.send();
return xhr.responseText;
}, function cb(data){
var fs = require("fs");
fs.writeFileSync("book.epub", data, "binary");
})
You can also use the newer way of requesting binary data.
.evaluate(function ev(){
var el = document.querySelector("[href*='.pdf']");
var xhr = new XMLHttpRequest();
xhr.open("GET", el.href, false);
xhr.responseType = "arraybuffer";
xhr.send();
var bytes = [];
var array = new Uint8Array(xhr.response);
for (var i = 0; i < array.length; i++) {
bytes[i] = array[i];
}
return bytes;
}, function cb(data){
var fs = require("fs");
fs.writeFileSync("book.epub", new Buffer(data), "binary");
})
Both of the ways are described on MDN. Here is a sample script which shows a proof of concept.

There is a Nightmare download plugin.
You can download the file just with this code below:
var Nightmare = require('nightmare');
require('nightmare-download-manager')(Nightmare);
var nightmare = Nightmare();
nightmare.on('download', function(state, downloadItem){
if(state == 'started'){
nightmare.emit('download', '/some/path/file.zip', downloadItem);
}
});
nightmare
.downloadManager()
.goto('https://github.com/segmentio/nightmare')
.click('a[href="/segmentio/nightmare/archive/master.zip"]')
.waitDownloadsComplete()
.then(() => {
console.log('done');
});

I got my downloads super easy using the request module, as described here.
var Nightmare = require('nightmare');
var fs = require('fs');
var request = require('request');
new Nightmare()
.goto('https://login.nrc.nl/login?service=http://digitaleeditie.nrc.nl/welkom')
.insert('input[name="username"]', 'Username')
.insert('input[name="password"]','Password')
.click('button[type="submit"]')
.wait()
.goto('http://digitaleeditie.nrc.nl/digitaleeditie/NH/2014/10/20141124___/downloads.html')
.wait()
.then(function () {
download('http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nrc_20141124.epub', 'myBook.epub', function () {
console.log('done');
});
})
.catch(function (err) {
console.log(err);
})
function download(uri, filename, callback) {
request.head(uri, function () {
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
}
Run npm i request in order to use request.

Nightmare will download it properly if you click on the download link.
const Nightmare = require('nightmare');
const show = ( process.argv[2].includes("true") ) ? true : false;
const nightmare = Nightmare( { show: show } );
nightmare
.goto("https://github.com/segmentio/nightmare")
.click('a[href="/segmentio/nightmare/archive/master.zip"]')
.end(() => "Done!")
.then((value) => console.log(value));

Related

download a PDF with Javascript/Node.Js after generating data and storing the file into a path

I created a generator for PDF files that creates the file and sends to a specific path automatically after creation. Still I want to download it right after but didn't know how to do that. Any help would be appreciated. This is my generatorPdf.js :
module.exports={
pdfGenerator:function(data,pathfile){
var fonts = {
Roboto: {
normal: 'server/pdfgenerator/fonts/Roboto-Regular.ttf',
bold: 'server/pdfgenerator/fonts/Roboto-Medium.ttf',
italics: 'server/pdfgenerator/fonts/Roboto-Italic.ttf',
bolditalics: 'server/pdfgenerator/fonts/Roboto-MediumItalic.ttf'
}
};
var datePaiements='';
var dateFinPaiements='';
if(data.abonnement[0].datePaiement!=null)
datePaiements= new Date( data.abonnement[0].datePaiement.toString());
if(datePaiements!=='')
{
dateFinPaiements= ('0'+datePaiements.getDate()).slice(-2).toString()+'/'+('0'+(datePaiements.getMonth()+1)).slice(-2).toString()+'/'+(datePaiements.getFullYear()+1).toString();
datePaiements=('0'+datePaiements.getDate()).slice(-2).toString()+'/'+('0'+(datePaiements.getMonth()+1)).slice(-2).toString()+'/'+datePaiements.getFullYear().toString();
}
var dateFacture= new Date(data.abonnement[0].timestampCreation.toString());
dateFacture= ('0'+dateFacture.getDate()).slice(-2).toString()+'/'+('0'+(dateFacture.getMonth()+1)).slice(-2).toString()+'/'+dateFacture.getFullYear().toString();
var PdfPrinter = require('pdfmake/src/printer');
var printer = new PdfPrinter(fonts);
var fs = require('fs');
var dd = {
content: [ ..............],
footer:{.............}
}
try{
var pdfDoc = printer.createPdfKitDocument(dd);
if (fs.existsSync(pathfile)) {//server/pdfgenerator/documentpdf/basics21.pdf
fs.unlink(pathfile, (err) => {//server/pdfgenerator/documentpdf/basics21.pdf
if (err) {
console.error(err)
return
}
})
}
pdfDoc.pipe(fs.createWriteStream(pathfile)).on('finish',function(){//server/pdfgenerator/documentpdf/basics21.pdf
});
}
catch(e){
console.log(e);
return null;
}
}
}
and this is my remote method in Loopback to send the pdf to a path and where probably I have to do the download of the file:
cm_abonnements.getAbonnementById= async (options,req,res)=>{
const token = options && options.accessToken;
const userId = token && token.userId;
try{
if(userId!==null){
let dataComedien= await app.models.cm_comediens.getComedienByUser(userId);
let argAbn={};
const form = new formidable.IncomingForm();
var formPromise = await new Promise(function(resolve,reject){
form.parse(req,function(err,fields,files){
if(err)
{
reject(err);
return-1
}
console.log(fields.key)
argAbn.idAbonnement=fields.key;
resolve();
})
})
let dataFac=await cm_abonnements.find({where :{and :[{idAbonnement:argAbn.idAbonnement},{idComedien : dataComedien.idComedien}]}});
var data={abonnement:[]};
data.abonnement=dataFac;
var str_date= new Date(dataFac[0].timestampCreation.toString());
var nameFile= 'Fac_'+dataFac[0].idFacture+'_'+str_date.getFullYear().toString()+'-'+('0'+str_date.getMonth()+1).slice(-2).toString()+'-'+('0'+str_date.getDate()).slice(-2).toString()+'.pdf';
var path='public/upload/Comediens/'+dataComedien.idComedien.toString()+'/factures/'+nameFile;
createPdf.pdfGenerator(data,path);
return dataFac;
}
return null;
}
catch(e){
console.log(e);
return null;
}
}
cm_abonnements.remoteMethod(
'getAbonnementById',{
http:{
verb:'POST'
},
description:'Get detail facture by number facture',
accepts:[
{arg:"options", "type":"object","http":"optionsFromRequest"},
{ arg: 'req', type: 'object', 'http': {source: 'req'}},
{arg: 'res', type: 'object', 'http': {source: 'res'}}
],
returns:{arg:'data',root:true}
}
);
Any help would be appreciated. Thank you
You need to send the following HTTP headers:
Content-Type: application/pdf
Content-Disposition: inline; filename="download.pdf"
After data is generated and pdf file is stored, there are 2 steps left to implement the "download" feature:
Return HTTP response to browser, with Content-Type header as application/pdf, and Content-Disposition header as attachment; filename="yourname.pdf". Normally, this would be handled automatically by web framework. I'm not familiar with loopback, so take Express for example:
In generatorPdf.js, add a callback to listen the finish event:
pdfGenerator:function(data, pathfile, callback){
...
pdfDoc.pipe(fs.createWriteStream(pathfile)).on('finish', callback);
...
}
When pdfGenerator function is used, pass a callback function parameter. If the pdf work is "finished", return response to browser using res.download() (It's Express API, but I believe loopback has similar API as loopback is built on top of Express):
var nameFile=...
var path=...
createPdf.pdfGenerator(data, path, function() {
res.download(path, nameFile);
});
In browser side, if it's an AJAX request (I guess so, as you mentioned it is a POST request), you need to handle the request with some blob operation. Here is an example snippet, with explanation comment:
var req = new XMLHttpRequest();
req.open('POST', '/download', true); // Open an async AJAX request.
req.setRequestHeader('Content-Type', 'application/json'); // Send JSON data
req.responseType = 'blob'; // Define the expected data as blob
req.onreadystatechange = function () {
if (req.readyState === 4) {
if (req.status === 200) { // When data is received successfully
var data = req.response;
var defaultFilename = 'default.pdf';
// Or, you can get filename sent from backend through req.getResponseHeader('Content-Disposition')
if (typeof window.navigator.msSaveBlob === 'function') {
// If it is IE that support download blob directly.
window.navigator.msSaveBlob(data, defaultFilename);
} else {
var blob = data;
var link = document.createElement('a');
link.href = window.URL.createObjectURL(blob);
link.download = defaultFilename;
document.body.appendChild(link);
link.click(); // create an <a> element and simulate the click operation.
}
}
}
};
req.send(JSON.stringify({test: 'test'}));

iterate node.js request function

This question is about a crawler in node.js.
A start_url is given where he crawls for URLs, and "pushes" them to a .json-file (output.json).
At the moment, he runs the request function only with the start_url, and saves the collected URLs in output.json. I want that he uses the saved URLs by replacing the start_url with the first collected URL and collect links again ... and so on ...
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var start_url = ["http://stackoverflow.com/"]
var req = function(url){
request(url, function(error, response, html){
var $ = cheerio.load(html);
var data = [];
$("a").each(function() {
var link = $(this);
var exurls = {exurl: new Array(link.attr("href"))}
data.push(exurls);
// Queue "exurls" for "start_url" and call the same function with the new URL (endless loop)
// save to "output.json" from time to time, so you can stop it anytime
});
fs.writeFile("output.json", JSON.stringify(data, null, 4), function(err){
if(err){
console.log(err);
} else {
console.log("File successfully written!");
}
});
});
}
for (var i = 0; i < start_url.length; i++){
req(start_url[i]);
}
So what you can do is make the function call recursively. The below example should work:
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var start_url = ["http://stackoverflow.com/"]
var req = function(url){
var count = 0;
request(url, function(error, response, html){
var $ = cheerio.load(html);
$("a").each(function() {
var link = $(this);
var exurls = {exurl: new Array(link.attr("href"))}
start_url.push(exurls);
// Queue "exurls" for "start_url" and call the same function with the new URL (endless loop)
// save to "output.json" from time to time, so you can stop it anytime
});
try {
fs.writeFileSync("output.json");
console.log("File successfully written!");
}catch(err){
console.log(err);
}
++count;
if(start_url.length > count) {
req(start_url[count]);
}
});
}
return req(start_url[0]);
The problem with this is that you are completely rewriting the file each time. If this goes on for awhile you are going to run out of memory. Another option is to create a write stream
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var start_url = ["http://stackoverflow.com/"]
var wstream = fs.createWriteStream("output.json");
var req = function(url){
request(url, function(error, response, html){
var $ = cheerio.load(html);
$("a").each(function() {
var link = $(this);
var exurls = {exurl: new Array(link.attr("href"))}
start_url.push(exurls);
// Queue "exurls" for "start_url" and call the same function with the new URL (endless loop)
// save to "output.json" from time to time, so you can stop it anytime
wstream.write('"'+ exurls + '",');
});
start_url.shift();
if(start_url.length > 0) {
return req(start_url[0]);
}
wstream.end();
});
}
req(start_url[0]);
Edit: switched to a basic queue so combat memory problems

Using NodeJS with AppJS to Package Zip with JSZip

My original file wouldn't download using the local method so I decided to use Node.js, as it's already packed in AppJS, and still the zip file won't execute in AppJS.
$(".export").on("click", function() {
var fs = require("fs");
var JSZip = require("jszip");
var zip = new JSZip();
zip.file("hello.txt", "Hello node!");
var content = zip.generate({type:"nodebuffer"});
// saveAs(content, "test.zip");
fs.writeFile("test.zip", content, function(err) {
if (err) throw err;
});
});
body {
background: #fff;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script src="http://stuk.github.io/jszip/dist/jszip.min.js"></script>
<script src="http://stuk.github.io/jszip-utils/dist/jszip-utils.js"></script>
<script src="http://stuk.github.io/jszip/vendor/FileSaver.js"></script>
<button class="export">Download</button>
Note: I've tried saving files using the File API, but the only want I've been able to successfully write a file in AppJS is by using Node.js as seen below.
var fs = require("fs");
fs.writeFile("hello.txt", "Hi", function(err) {
if (err) throw err;
});
I wasn't using Node but instead doing something similar in the browser that required FileSaver.js as well.
Have you tried:
var JSZip = require('jszip');
var saveAs = require('filesaver.js');
var zip = new JSZip();
zip.file("hello.txt", "Hello node!");
var blob = zip.generate({type: 'blob'});
saveAs(blob, 'images.zip');
The main difference here using {type: 'blob'} instead of nodebuffer. I was successful setting the response type to arraybuffer using a simple XHR download module.
module.exports = function download(url, callback) {
var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'arraybuffer';
xhr.onload = function(e) {
callback(null, xhr.response);
};
xhr.onerror = function(e) {
callback(status.response);
};
xhr.send(null);
};

How to Pipe Response to a File in Co-Request module & NodeJs?

I am using Co-Request to read Zip file from http url, and i have below code to read from server..
The code works already. But I dont know how to write the response Zip to a file.
var co = require( "co" );
var request = require( "co-request" );
var options = {
url: "http://www.example.com/sample.zip",
headers: {
'Token': Appconfig.Affiliate_Token,
'Affiliate-Id' : Appconfig.Affiliate_Id
}
}
console.log( "Downloading : zip file" );
var j = yield request( options );
Co-Request is actually wrapper for Request and I have found below code to pipe file to stream. But not sure how to write the same using Co-Request with yield.
request.get('http://example.com/img.png').pipe(request.put('http://example.com/img.png'))
Please help how to write response zip to a file using yield and co-request
I think request cant pipe after data has been emitted from the response
use request instead of co-request, write a promise to achieve this
var co = require('co');
var request = require('request');
var fs = require('fs');
var url = 'http://google.com/doodle.png';
var requestPipToFile = function(url, filepath) {
return new Promise(function(resolve, reject) {
try {
var stream = fs.createWriteStream(filepath);
stream.on('finish', function() {
console.log("pipe finish");
return resolve(true);
});
return request(url).pipe(stream);
} catch (e) {
return reject(e);
}
});
};
co(function*() {
var value = (yield requestPipToFile(url, './outfile'));
return value;
}).then(function(value) {
return console.log(value);
}).catch(function(err) {
return console.error(err);
});

using zip.js to read a zip file via xmlhttp/ajax call on Node.js

I am trying to :
Send a zip file via xmlhttp to the client
then read the file using zip.js and render its contents
I successfully receive the binary of the file i.e. the success callback is called but I get and error when I try to do getEntries. I think the error is with the way of sending stream , please help.
Error msg :
Error in reading zip file
My client side code (using angular) :
$http.get(window.location.origin + '/book/'+bookName,{responseType:"Blob"}).
success(function (data , error) {
var a = new Uint8Array(data);
//var dataView = new DataView(data);
//var blob = new Blob(dataView.buffer);
zip.useWebWorkers = true;
zip.workerScriptsPath = '/js/app/';
zip.createReader(new zip.BlobReader(data), function(reader) {
// get all entries from the zip
reader.getEntries(function(entries) { //HERE I GET THE ERROR
if (entries.length) {
// get first entry content as text
entries[0].getData(new zip.TextWriter(), function(text) {
// text contains the entry data as a String
console.log(text);
// close the zip reader
reader.close(function() {
// onclose callback
var a = 0;
});
}, function(current, total) {
// onprogress callback
var a = 0;
});
}
});
},
function(error) {
// onerror callback
var a = 0;
});
})
.error( function (data , error) {
var a = 0;
});
My Server side code on Node:
router.get('/book/:bookName',function (req , res ) {
console.log('Inside book reading block : ' + req.params.bookName);
req.params.bookName += '.zip';
var filePath = path.join(__dirname,'/../\\public\\books\\' ,req.params.bookName );
var stat = fileSystem.statSync(filePath);
res.writeHead(200, {
//'Content-Type': 'application/zip',
'Content-Type': 'blob',
'Content-Length': stat.size
});
var readStream = fileSystem.createReadStream(filePath);
// replace all the event handlers with a simple call to readStream.pipe()
readStream.pipe(res);
});
It is probable that you might have already found a solution. I faced the same problem today and this is how I solved it in plain javascript:
var xhr = new XMLHttpRequest();
xhr.open('GET', 'assets/object/sample.zip', true);
xhr.responseType = 'arraybuffer';
xhr.onload = function(e) {
// response is unsigned 8 bit integer
var responseArray = new Uint8Array(this.response);
var blobData = new Blob([responseArray], {
type: 'application/zip'
});
zip.createReader(new zip.BlobReader(blobData), function(zipReader) {
zipReader.getEntries(displayEntries);
}, onerror);
};
xhr.send();
The problem I see in your code is that you are changing the value to Uint8Array and assigning it to var a, but still use the raw data in blobreader. Also the blob reader required blob and not an array. So you should have converted var a into blob and then used it for reading.

Categories