Unable to createWriteStream to save downloaded file - javascript

I have a function in my NW.js app that downloads a bunch of files from the server and saves them in the folder chosen by the user with the names sent from the server. I do not know the names of the files in advance - the urls I am using are randomly-generated strings that I have gotten from another server, and this server is looking up each hash to see which file it corresponds to.
var regexp = /filename=\"(.*)\"/gi;
media_urls.forEach(function(url) {
var req = client.request(options, function(res) {
var file_size = parseInt(res.headers['content-length'], 10);
var content_disposition = res.headers['content-disposition'];
var name = regexp.exec(content_disposition)[1];
var path = Path.join(save_dir, name);
var file = fs.createWriteStream(path);
file.on('error', function(e) {
console.log(e);
req.abort();
});
res.on('data', function(chunk) {
file.write(chunk);
});
res.on('end', function() {
file.end();
});
});
req.on('error', function(e) {
console.log(e);
});
req.end();
});
I keep getting ENOENT errors when this code runs. This doesn't make any sense because the file is supposed to be created now, so of course it doesn't exist!
Why am I getting this error instead of having the file downloaded?

The file names coming from the server had :s in them, which is a valid filename character on Linux ext4, but not on Windows ntfs.
Changing
var name = regexp.exec(content_disposition)[1];
to
var name = regexp.exec(content_disposition)[1].replace(':', '-');
solved this particular problem.

Related

File uploads through socket.io (JavaScript & FileReader)

I am creating a chat app (in React Native), but for now, I have made some tests in vanilla JavaScript. The server is a NodeJS-server.
It works with sending text messages, but now I have some questions about sending photos/videos/audio files. I'm doing a lot of research online on what's the best method to do this.
I came up with the idea to use the FileReader API and split up the file into chunks, and sending chunk by chunk via the socket.emit()-function.
This is my code so far (simplified):
Please note that I will create a React Native app, but for now (for testing), I've just created a HTML-file with an upload form.
// index.html
// the page where my upload form is
var reader = {};
var file = {};
var sliceSize = 1000 * 1024;
var socket = io('http://localhost:8080');
const startUpload = e => {
e.preventDefault();
reader = new FileReader();
file = $('#file)[0].files[0]
uploadFile(0)
}
$('#start-upload').on('click', startUpload)
const uploadFile = start => {
var slice = start + sliceSize + 1;
var blob = file.slice(start, slice)
reader.on('loadend', e => {
if (slice < file.size) {
socket.emit('message', JSON.stringify({
fileName: file.name,
fileType: file.type,
fileChunk: e.target.result
})
} else {
console.log('Upload completed!')
}
})
reader.readAsDataURl(blob)
}
// app.js
// my NodeJS server-file
var file;
var files = {};
io.on('connection', socket => {
console.log('User connected!');
// when a message is received
socket.on('message', data => {
file = JSON.parse(data)
if (!files[file.fileName]) {
// this is the first chunk received
// create a new string
files[file.fileName] = '';
}
// append the binary data
files[file.fileName] = files[file.fileName] + file.fileChunk;
})
// on disconnect
socket.on('disconnect', () => {
console.log('User disconnected!');
})
})
I did not include any checks for file type (I'm not at that point yet), I first want to make sure that this is the right thing to do.
Stuff I need to do:
Send a message (like socket.emit('uploaddone', ...)) from the client to the server to notify the server that the upload is done (and the server can emit the complete file to another user).
My questions are:
Is it okay to send chunks of binary data (base64) over a socket, or would it take up to much bandwidth?
Will I lose some quality (photos/videos/audio files) when splitting them up into chunks?
If there is a better way to do this, please let me know. I'm not asking for working code examples, just some guidance in the good direction.
You can send raw bytes over WebSocket, base64 has 33% size overhead.
Also you won't have to JSON.stringify all (and maybe large) body and parse it on client-side.
Will I lose some quality
No, underlying protocol (TCP) delivers data in-order and without corruption.
I realize this answer is a couple of months late, but just for future reference you should look into using the acknowledgment option with socket.io here
// with acknowledgement
let message = JSON.stringify({
fileName: file.name,
fileType: file.type,
fileChunk: e.target.result
})
socket.emit("message", message, (ack) => {
// send next chunk...
});

local PDF file scraping in node.js

I have uploaded a pdf via a MEAN stack web application using fs. I want to extract certain fields from the pdf and display them on the web app. I have looked at a couple npm packages like pdf.js, pdf2json. I can't figure out the documentation and javascript callbacks used in the examples available. Please help!
I hope I can help answer your question. Using pdf2json can be used to parse a pdf and extract the text. There are a couple of steps that need to be taken to get it working. I have adapted the example from https://github.com/modesty/pdf2json.
The setup is to install pdf2json in the node app, and also underscore. The example page didn't explain the need to define your own callback functions. It also used self instead of this to register them. So, with the appropriate changes the code to extract all the text from the pdf will be something like this:
// Get the dependencies that have already been installed
// to ./node_modules with `npm install <dep>`in the root director
// of your app
var _ = require('underscore'),
PDFParser = require('pdf2json');
var pdfParser = new PDFParser();
// Create a function to handle the pdf once it has been parsed.
// In this case we cycle through all the pages and extraxt
// All the text blocks and print them to console.
// If you do `console.log(JSON.stringify(pdf))` you will
// see how the parsed pdf is composed. Drill down into it
// to find the data you are looking for.
var _onPDFBinDataReady = function (pdf) {
console.log('Loaded pdf:\n');
for (var i in pdf.data.Pages) {
var page = pdf.data.Pages[i];
for (var j in page.Texts) {
var text = page.Texts[j];
console.log(text.R[0].T);
}
}
};
// Create an error handling function
var _onPDFBinDataError = function (error) {
console.log(error);
};
// Use underscore to bind the data ready function to the pdfParser
// so that when the data ready event is emitted your function will
// be called. As opposed to the example, I have used `this` instead
// of `self` since self had no meaning in this context
pdfParser.on('pdfParser_dataReady', _.bind(_onPDFBinDataReady, this));
// Register error handling function
pdfParser.on('pdfParser_dataError', _.bind(_onPDFBinDataError, this));
// Construct the file path of the pdf
var pdfFilePath = 'test3.pdf';
// Load the pdf. When it is loaded your data ready function will be called.
pdfParser.loadPDF(pdfFilePath);
I am running the code out of my server side controller.
module.exports = (function() {
return {
add: function(req, res) {
var tmp_path = req.files.pdf.path;
var target_path = './uploads/' + req.files.pdf.name;
fs.rename(tmp_path, target_path, function(err) {
if (err) throw err;
// delete the temporary file, so that the explicitly set temporary upload dir does not get filled with unwanted files
fs.unlink(tmp_path, function() {
if (err) throw err;
//edit here pdf parser
res.redirect('#/');
});
})
},
show: function(req, res) {
var pdfParser = new PDFParser();
var _onPDFBinDataReady = function (pdf) {
console.log('Loaded pdf:\n');
for (var i in pdf.data.Pages) {
var page = pdf.data.Pages[i];
// console.log(page.Texts);
for (var j in page.Texts) {
var text = page.Texts[j];
// console.log(text.R[0].T);
}
}
console.log(JSON.stringify(pdf));
};
// Create an error handling function
var _onPDFBinDataError = function (error) {
console.log(error);
};
pdfParser.on('pdfParser_dataReady', _.bind(_onPDFBinDataReady, this));
// Register error handling function
pdfParser.on('pdfParser_dataError', _.bind(_onPDFBinDataError, this));
// Construct the file path of the pdf
var pdfFilePath = './uploads/Invoice_template.pdf';
// Load the pdf. When it is loaded your data ready function will be called.
pdfParser.loadPDF(pdfFilePath);
},
//end controller
}

How can I get a buffer for a file (image) from CollectionFS

I'm trying to insert an image into a pdf I'm creating server-side with PDFkit. I'm using cfs:dropbox to store my files. Before when I was using cvs:filesystem, it was easy to add the images to my pdf's cause they were right there. Now that they're stored remotely, I'm not sure how to add them, since PDFkit does not support adding images with just the url. It will, however, accept a buffer. How can I get a buffer from my CollectionFS files?
So far I have something like this:
var portrait = Portraits.findOne('vS2yFy4gxXdjTtz5d');
readStream = portrait.createReadStream('portraits');
I tried getting the buffer two ways so far:
First using dataMan, but the last command never comes back:
var dataMan = new DataMan.ReadStream(readStream, portrait.type());
var buffer = Meteor.wrapAsync(Function.prototype.bind(dataMan.getBuffer, dataMan))();
Second buffering the stream manually:
var buffer = new Buffer(0);
readStream.on('readable', function() {
buffer = Buffer.concat([buffer, readStream.read()]);
});
readStream.on('end', function() {
console.log(buffer.toString('base64'));
});
That never seems to come back either. I double-checked my doc to make sure it was there and it has a valid url and the image appears when I put the url in my browser. Am I missing something?
I had to do something similar and since there's no answer to this question, here is how I do it:
// take a cfs file and return a base64 string
var getBase64Data = function(file, callback) {
// callback has the form function (err, res) {}
var readStream = file.createReadStream();
var buffer = [];
readStream.on('data', function(chunk) {
buffer.push(chunk);
});
readStream.on('error', function(err) {
callback(err, null);
});
readStream.on('end', function() {
callback(null, buffer.concat()[0].toString('base64'));
});
};
// wrap it to make it sync
var getBase64DataSync = Meteor.wrapAsync(getBase64Data);
// get a cfs file
var file = Files.findOne();
// get the base64 string
var base64str = getBase64DataSync(file);
// get the buffer from the string
var buffer = new Buffer(base64str, 'base64')
Hope it'll help!

Create plugin gulp with stream

I created plugin for send json data in json file.
But I don't understand why send my object json in pipe, and not write file directly in my plugin.
I want use my plugin whit this syntax:
gulp.task('js-hash', function()
{
// Get all js in redis
gulp.src('./build/js/**/*.js')
.pipe(getHashFile('/build/js/'))
.pipe(gulp.dest('./build/js/hash.json'));
});
And not that:
gulp.task('js-hash', function()
{
// Get all js in redis
gulp.src('./build/js/**/*.js')
.pipe(getHashFile('./build/js/hash.json', '/build/js/'));
});
This is my plugin:
var through = require('through2');
var gutil = require('gulp-util');
var crypto = require('crypto');
var fs = require('fs');
var PluginError = gutil.PluginError;
// Consts
const PLUGIN_NAME = 'get-hash-file';
var json = {};
function getHashFile(filename, basename)
{
if (!filename) {
throw PluginError(PLUGIN_NAME, "Missing filename !");
}
// Creating a stream through which each file will pass
var stream = through.obj(function (file, enc, callback) {
if (file.isNull()) {
this.push(file); // Do nothing if no contents
return callback();
}
if (file.isBuffer()) {
var hash = crypto.createHash('sha256').update(String(file.contents)).digest('hex');
json[file.path.replace(file.cwd+basename, '')] = hash;
return callback();
}
if (file.isStream()) {
this.emit('error', new PluginError(PLUGIN_NAME, 'Stream not supported!'));
return callback();
}
}).on('finish', function () {
fs.writeFile(filename, JSON.stringify(json), function(err) {
if (err) {
throw err;
}
});
});
// returning the file stream
return stream;
}
// Exporting the plugin main function
module.exports = getHashFile;
Your are idea
Nothing prevents you from doing this... besides not respecting plugins guidelines!
Users actually assume a plugin will stream files and that they can pipe them to other plugins.
If I get your code right, you're trying to generate a file that contains all sha hashes of inbound files. Why not let users take this file and pipe it to other plugins? You'd be surprised what people could do.
While this question looks a bit opinion-based, you could definitely put the focus on how to deal with files that may not belong to the main stream of files. Issues like this can be found in many plugins; for example, gulp-uglify authors are wondering how they can add source-maps without mixing js and source map downstream.

Downloading N number of remote files using Node.js synchronously

I'm working on a simple app using Node.js which needs to do the following when given a valid URL
Retrieve the HTML of the remote page, save it locally.
Spider the HTML (using cheerio) and record all JS and CSS file references.
Make HTTP request for each JS/CSS file and save it to the server by file name.
Zip up the html, css, and js files and stream the resulting file to the browser.
I've got 1 and 2 working, and the first half of #3 but I'm running into issues with the synchronous nature of the downloads. My code is running too fast and generating file names for the CSS and JS files, but none of the content. I'm guessing this is because my code isn't synchronous. The problem is that I don't know in advance how many files there might be and all of them have to be there before the ZIP file can be generated.
Here's the flow of my app as it currently exists. I've left out the helper methods as they don't affect synchronicity. Can any of you provide input as to what I should do?
http.get(fullurl, function(res) {
res.on('data', function (chunk) {
var $source = $(''+chunk),
js = getJS($source, domain),
css = getCSS($source, domain),
uniqueName = pw(),
dir = [baseDir,'jsd-', uniqueName, '/'].join(''),
jsdir = dir + 'js/',
cssdir = dir + 'css/',
html = rewritePaths($source);
// create tmp directory
fs.mkdirSync(dir);
console.log('creating index.html');
// save index file
fs.writeFileSync(dir + 'index.html', html);
// create js directory
fs.mkdirSync(jsdir);
// Save JS files
js.forEach(function(jsfile){
var filename = jsfile.split('/').reverse()[0];
request(jsfile).pipe(fs.createWriteStream(jsdir + filename));
console.log('creating ' + filename);
});
// create css directory
fs.mkdirSync(cssdir);
// Save CSS files
css.forEach(function(cssfile){
var filename = cssfile.split('/').reverse()[0];
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
console.log('creating ' + filename);
});
// write zip file to /tmp
writeZip(dir,uniqueName);
// https://npmjs.org/package/node-zip
// http://stuk.github.com/jszip/
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
The way you are downloading file through request module is asynchronous
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
instead of download like that you need to do like this create a seperate function
function download (localFile, remotePath, callback) {
var localStream = fs.createWriteStream(localFile);
var out = request({ uri: remotePath });
out.on('response', function (resp) {
if (resp.statusCode === 200){
out.pipe(localStream);
localStream.on('close', function () {
callback(null, localFile);
});
}
else
callback(new Error("No file found at given url."),null);
})
};
you need to use async module by colan https://github.com/caolan/async for
// Save JS files
async.forEach(js,function(jsfile,cb){
var filename = jsfile.split('/').reverse()[0];
download(jsdir + filename,jsfile,function(err,result){
//handle error here
console.log('creating ' + filename);
cb();
})
},function(err){
// create css directory
fs.mkdirSync(cssdir);
// Save CSS files
css.forEach(function(cssfile){
var filename = cssfile.split('/').reverse()[0];
request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
console.log('creating ' + filename);
});
// write zip file to /tmp
writeZip(dir,uniqueName);
});

Categories