Well the title says it all, I'm trying to write a script (that runs in a nodejs/express server-side application) that leverages libraries request, unzip and xml2js to perform a task consisting of fetching a zip file from a given url, whose content is an xml file which I need to parse to a javascript object for some further processing.
So far I've managed to come up with:
var express = require("express");
var app = express();
/* some init code omitted */
var request = require("request");
var unzip = require("unzip");
var xml2js = require("xml2js");
var parser = new xml2js.Parser();
app.get("/import", function(req, res) {
request("http://path.to/file.zip")
.pipe(unzip.Parse())
.on("entry", function(entry) {
//This is what I'm trying to avoid, which doesn't even work
entry.pipe(fs.createWriteStream(entry.path));
fs.readFile(entry.path, function(err, data) {
if(err) {
return res.status(500).send(err);
}
parser.parseString(data, function(err, obj) {
console.log(util.inspect(obj));
/* further processing of obj */
});
});
});
});
Albeit the fact the contents of the xml file are correctly written to disk, I'm looking for an alternative to this approach for two reasons:
to save disk space, since I don't really need to keep the xml file anyway once it has been converted to js
it doesn't even work: fs.readFile probably starts reading the file before fs.createWriteStream is done writing it, because the line console.log(utils.inspect(obj)) logs null (whereas if I run only the innermost fs.readFile block and replace entry.path with the name of the previously written file, it produces the desired output)
I wish I could jot down a jsFiddle for this but I'm clueless as to how, when it comes to expressjs applications. Cheers.
EDITED
Piping is unnecessary, parse data directly from the entry stream:
app.get("/import", function(req, res) {
request("http://link-top.zip")
.pipe(unzip.Parse())
.on("entry", function(entry) {
var chunks = [];
var res;
if(entry.path == 'needed.xml') {
entry.on('data', function(data) {
chunks.push(data.toString());
});
entry.on('end', function () {
res = chunks.join("");
parser.parseString(res, function(err, obj) {
console.log(util.inspect(obj));
/* further processing of obj */
});
});
}
});
});
I have uploaded a pdf via a MEAN stack web application using fs. I want to extract certain fields from the pdf and display them on the web app. I have looked at a couple npm packages like pdf.js, pdf2json. I can't figure out the documentation and javascript callbacks used in the examples available. Please help!
I hope I can help answer your question. Using pdf2json can be used to parse a pdf and extract the text. There are a couple of steps that need to be taken to get it working. I have adapted the example from https://github.com/modesty/pdf2json.
The setup is to install pdf2json in the node app, and also underscore. The example page didn't explain the need to define your own callback functions. It also used self instead of this to register them. So, with the appropriate changes the code to extract all the text from the pdf will be something like this:
// Get the dependencies that have already been installed
// to ./node_modules with `npm install <dep>`in the root director
// of your app
var _ = require('underscore'),
PDFParser = require('pdf2json');
var pdfParser = new PDFParser();
// Create a function to handle the pdf once it has been parsed.
// In this case we cycle through all the pages and extraxt
// All the text blocks and print them to console.
// If you do `console.log(JSON.stringify(pdf))` you will
// see how the parsed pdf is composed. Drill down into it
// to find the data you are looking for.
var _onPDFBinDataReady = function (pdf) {
console.log('Loaded pdf:\n');
for (var i in pdf.data.Pages) {
var page = pdf.data.Pages[i];
for (var j in page.Texts) {
var text = page.Texts[j];
console.log(text.R[0].T);
}
}
};
// Create an error handling function
var _onPDFBinDataError = function (error) {
console.log(error);
};
// Use underscore to bind the data ready function to the pdfParser
// so that when the data ready event is emitted your function will
// be called. As opposed to the example, I have used `this` instead
// of `self` since self had no meaning in this context
pdfParser.on('pdfParser_dataReady', _.bind(_onPDFBinDataReady, this));
// Register error handling function
pdfParser.on('pdfParser_dataError', _.bind(_onPDFBinDataError, this));
// Construct the file path of the pdf
var pdfFilePath = 'test3.pdf';
// Load the pdf. When it is loaded your data ready function will be called.
pdfParser.loadPDF(pdfFilePath);
I am running the code out of my server side controller.
module.exports = (function() {
return {
add: function(req, res) {
var tmp_path = req.files.pdf.path;
var target_path = './uploads/' + req.files.pdf.name;
fs.rename(tmp_path, target_path, function(err) {
if (err) throw err;
// delete the temporary file, so that the explicitly set temporary upload dir does not get filled with unwanted files
fs.unlink(tmp_path, function() {
if (err) throw err;
//edit here pdf parser
res.redirect('#/');
});
})
},
show: function(req, res) {
var pdfParser = new PDFParser();
var _onPDFBinDataReady = function (pdf) {
console.log('Loaded pdf:\n');
for (var i in pdf.data.Pages) {
var page = pdf.data.Pages[i];
// console.log(page.Texts);
for (var j in page.Texts) {
var text = page.Texts[j];
// console.log(text.R[0].T);
}
}
console.log(JSON.stringify(pdf));
};
// Create an error handling function
var _onPDFBinDataError = function (error) {
console.log(error);
};
pdfParser.on('pdfParser_dataReady', _.bind(_onPDFBinDataReady, this));
// Register error handling function
pdfParser.on('pdfParser_dataError', _.bind(_onPDFBinDataError, this));
// Construct the file path of the pdf
var pdfFilePath = './uploads/Invoice_template.pdf';
// Load the pdf. When it is loaded your data ready function will be called.
pdfParser.loadPDF(pdfFilePath);
},
//end controller
}
I just have a quick question there:
I am using Node.JS to write a commandline tool that validates JSON Files with JSON Schemas. So, now I have a problem that when wanting to get all the schemas, that I always get "undefined" for using a async function but otherwise only sync functions.
For this commandline tool async is NOT needed.
Could someone help me out and give me a hand on how to make it work just fine?
var getJSONSchemaFiles = function (dir) {
results2 = [];
var recursive = require('recursive-readdir');
recursive(dir, function (err, files) {
// Files is an array of filename
// console.log(files);
files.forEach(function (entry) {
if (entry.indexOf(".schema.json") > -1) {
results2.push(entry);
}
});
console.log(results2);
});
return results2;
};
I am using the npm "recursive-readdir" but I think that I do not even need a npm for this kind of thing?
Ok, this enumerates all files under the given path synchronously:
var fs = require('fs');
function recursiveReaddir(path) {
var stat = fs.lstatSync(path);
if(stat.isFile())
return [path];
if(!stat.isDirectory())
return [];
return [].concat.apply([], fs.readdirSync(path).map(function(fname) {
return recursiveReaddir(path + '/' + fname);
}));
}
Use glob module https://github.com/isaacs/node-glob. There is async and Sync methods like: glob.sync(pattern, [options]); and glob(pattern, [options], cb);
Example from their docs:
var glob = require("glob")
// options is optional
glob("**/*.js", options, function (er, files) {
// files is an array of filenames.
// If the `nonull` option is set, and nothing
// was found, then files is ["**/*.js"]
// er is an error object or null.
})
For Node.js, what is the best way to prepend to a file in a way SIMILAR to
fs.appendFile(path.join(__dirname, 'app.log'), 'appendme', 'utf8')
Personally, the best way really revolves around a asynchronous solution to create a log where I can basically push onto the file from the top.
This solution isn't mine and I don't know where it's from but it works.
const data = fs.readFileSync('message.txt')
const fd = fs.openSync('message.txt', 'w+')
const insert = Buffer.from("text to prepend \n")
fs.writeSync(fd, insert, 0, insert.length, 0)
fs.writeSync(fd, data, 0, data.length, insert.length)
fs.close(fd, (err) => {
if (err) throw err;
});
It is impossible to add to a beginning of a file. See this question for the similar problem in C or this question for the similar problem in C#.
I suggest you do your logging in the conventional way (that is, log to the end of file).
Otherwise, there is no way around reading the file, adding the text to the start and writing it back to the file which can get really costly really fast.
It seems it is indeed possible with https://www.npmjs.com/package/prepend-file
Here is an example of how to prepend text to a file using gulp and a custom built function.
var through = require('through2');
gulp.src('somefile.js')
.pipe(insert('text to prepend with'))
.pipe(gulp.dest('Destination/Path/'))
function insert(text) {
function prefixStream(prefixText) {
var stream = through();
stream.write(prefixText);
return stream;
}
let prefixText = new Buffer(text + "\n\n"); // allocate ahead of time
// creating a stream through which each file will pass
var stream = through.obj(function (file, enc, cb) {
//console.log(file.contents.toString());
if (file.isBuffer()) {
file.contents = new Buffer(prefixText.toString() + file.contents.toString());
}
if (file.isStream()) {
throw new Error('stream files are not supported for insertion, they must be buffered');
}
// make sure the file goes through the next gulp plugin
this.push(file);
// tell the stream engine that we are done with this file
cb();
});
// returning the file stream
return stream;
}
Sources: [cole_gentry_github_dealingWithStreams][1]
Its possible by using the prepend-file node module. Do the following:
npm i prepend-file -S
import prepend-file module in your respective code.
Example:
let firstFile = 'first.txt';
let secondFile = 'second.txt';
prependFile(firstFile, secondFile, () => {
console.log('file prepend successfully');
})
I've been trying to find a way to write to a file when using Node.js, but with no success. How can I do that?
There are a lot of details in the File System API. The most common way is:
const fs = require('fs');
fs.writeFile("/tmp/test", "Hey there!", function(err) {
if(err) {
return console.log(err);
}
console.log("The file was saved!");
});
// Or
fs.writeFileSync('/tmp/test-sync', 'Hey there!');
Currently there are three ways to write a file:
fs.write(fd, buffer, offset, length, position, callback)
You need to wait for the callback to ensure that the buffer is written to disk. It's not buffered.
fs.writeFile(filename, data, [encoding], callback)
All data must be stored at the same time; you cannot perform sequential writes.
fs.createWriteStream(path, [options])
Creates a WriteStream, which is convenient because you don't need to wait for a callback. But again, it's not buffered.
A WriteStream, as the name says, is a stream. A stream by definition is “a buffer” containing data which moves in one direction (source ► destination). But a writable stream is not necessarily “buffered”. A stream is “buffered” when you write n times, and at time n+1, the stream sends the buffer to the kernel (because it's full and needs to be flushed).
In other words: “A buffer” is the object. Whether or not it “is buffered” is a property of that object.
If you look at the code, the WriteStream inherits from a writable Stream object. If you pay attention, you’ll see how they flush the content; they don't have any buffering system.
If you write a string, it’s converted to a buffer, and then sent to the native layer and written to disk. When writing strings, they're not filling up any buffer. So, if you do:
write("a")
write("b")
write("c")
You're doing:
fs.write(new Buffer("a"))
fs.write(new Buffer("b"))
fs.write(new Buffer("c"))
That’s three calls to the I/O layer. Although you're using “buffers”, the data is not buffered. A buffered stream would do: fs.write(new Buffer ("abc")), one call to the I/O layer.
As of now, in Node.js v0.12 (stable version announced 02/06/2015) now supports two functions:
cork() and
uncork(). It seems that these functions will finally allow you to buffer/flush the write calls.
For example, in Java there are some classes that provide buffered streams (BufferedOutputStream, BufferedWriter...). If you write three bytes, these bytes will be stored in the buffer (memory) instead of doing an I/O call just for three bytes. When the buffer is full the content is flushed and saved to disk. This improves performance.
I'm not discovering anything, just remembering how a disk access should be done.
You can of course make it a little more advanced. Non-blocking, writing bits and pieces, not writing the whole file at once:
var fs = require('fs');
var stream = fs.createWriteStream("my_file.txt");
stream.once('open', function(fd) {
stream.write("My first row\n");
stream.write("My second row\n");
stream.end();
});
Synchronous Write
fs.writeFileSync(file, data[, options])
fs = require('fs');
fs.writeFileSync("foo.txt", "bar");
Asynchronous Write
fs.writeFile(file, data[, options], callback)
fs = require('fs');
fs.writeFile('foo.txt', 'bar', (err) => { if (err) throw err; });
Where
file <string> | <Buffer> | <URL> | <integer> filename or file descriptor
data <string> | <Buffer> | <Uint8Array>
options <Object> | <string>
callback <Function>
Worth reading the offical File System (fs) docs.
Update: async/await
fs = require('fs');
util = require('util');
writeFile = util.promisify(fs.writeFile);
fn = async () => { await writeFile('foo.txt', 'bar'); }
fn()
var path = 'public/uploads/file.txt',
buffer = new Buffer("some content\n");
fs.open(path, 'w', function(err, fd) {
if (err) {
throw 'error opening file: ' + err;
}
fs.write(fd, buffer, 0, buffer.length, null, function(err) {
if (err) throw 'error writing file: ' + err;
fs.close(fd, function() {
console.log('file written');
})
});
});
The answers provided are dated and a newer way to do this is:
const fsPromises = require('fs').promises
await fsPromises.writeFile('/path/to/file.txt', 'data to write')
see documents here for more info
I liked Index of ./articles/file-system.
It worked for me.
See also How do I write files in node.js?.
fs = require('fs');
fs.writeFile('helloworld.txt', 'Hello World!', function (err) {
if (err)
return console.log(err);
console.log('Wrote Hello World in file helloworld.txt, just check it');
});
Contents of helloworld.txt:
Hello World!
Update:
As in Linux node write in current directory , it seems in some others don't, so I add this comment just in case :
Using this ROOT_APP_PATH = fs.realpathSync('.'); console.log(ROOT_APP_PATH); to get where the file is written.
I know the question asked about "write" but in a more general sense "append" might be useful in some cases as it is easy to use in a loop to add text to a file (whether the file exists or not). Use a "\n" if you want to add lines eg:
var fs = require('fs');
for (var i=0; i<10; i++){
fs.appendFileSync("junk.csv", "Line:"+i+"\n");
}
OK, it's quite simple as Node has built-in functionality for this, it's called fs which stands for File System and basically, NodeJS File System module...
So first require it in your server.js file like this:
var fs = require('fs');
fs has few methods to do write to file, but my preferred way is using appendFile, this will append the stuff to the file and if the file doesn't exist, will create one, the code could be like below:
fs.appendFile('myFile.txt', 'Hi Ali!', function (err) {
if (err) throw err;
console.log('Thanks, It\'s saved to the file!');
});
You may write to a file using fs (file system) module.
Here is an example of how you may do it:
const fs = require('fs');
const writeToFile = (fileName, callback) => {
fs.open(fileName, 'wx', (error, fileDescriptor) => {
if (!error && fileDescriptor) {
// Do something with the file here ...
fs.writeFile(fileDescriptor, newData, (error) => {
if (!error) {
fs.close(fileDescriptor, (error) => {
if (!error) {
callback(false);
} else {
callback('Error closing the file');
}
});
} else {
callback('Error writing to new file');
}
});
} else {
callback('Could not create new file, it may already exists');
}
});
};
You might also want to get rid of this callback-inside-callback code structure by useing Promises and async/await statements. This will make asynchronous code structure much more flat. For doing that there is a handy util.promisify(original) function might be utilized. It allows us to switch from callbacks to promises. Take a look at the example with fs functions below:
// Dependencies.
const util = require('util');
const fs = require('fs');
// Promisify "error-back" functions.
const fsOpen = util.promisify(fs.open);
const fsWrite = util.promisify(fs.writeFile);
const fsClose = util.promisify(fs.close);
// Now we may create 'async' function with 'await's.
async function doSomethingWithFile(fileName) {
const fileDescriptor = await fsOpen(fileName, 'wx');
// Do something with the file here...
await fsWrite(fileDescriptor, newData);
await fsClose(fileDescriptor);
}
You can write to files with streams.
Just do it like this:
const fs = require('fs');
const stream = fs.createWriteStream('./test.txt');
stream.write("Example text");
var fs = require('fs');
fs.writeFile(path + "\\message.txt", "Hello", function(err){
if (err) throw err;
console.log("success");
});
For example : read file and write to another file :
var fs = require('fs');
var path = process.cwd();
fs.readFile(path+"\\from.txt",function(err,data)
{
if(err)
console.log(err)
else
{
fs.writeFile(path+"\\to.text",function(erro){
if(erro)
console.log("error : "+erro);
else
console.log("success");
});
}
});
Here we use w+ for read/write both actions and if the file path is not found then it would be created automatically.
fs.open(path, 'w+', function(err, data) {
if (err) {
console.log("ERROR !! " + err);
} else {
fs.write(data, 'content', 0, 'content length', null, function(err) {
if (err)
console.log("ERROR !! " + err);
fs.close(data, function() {
console.log('written success');
})
});
}
});
Content means what you have to write to the file and its length, 'content.length'.
Here is the sample of how to read file csv from local and write csv file to local.
var csvjson = require('csvjson'),
fs = require('fs'),
mongodb = require('mongodb'),
MongoClient = mongodb.MongoClient,
mongoDSN = 'mongodb://localhost:27017/test',
collection;
function uploadcsvModule(){
var data = fs.readFileSync( '/home/limitless/Downloads/orders_sample.csv', { encoding : 'utf8'});
var importOptions = {
delimiter : ',', // optional
quote : '"' // optional
},ExportOptions = {
delimiter : ",",
wrap : false
}
var myobj = csvjson.toSchemaObject(data, importOptions)
var exportArr = [], importArr = [];
myobj.forEach(d=>{
if(d.orderId==undefined || d.orderId=='') {
exportArr.push(d)
} else {
importArr.push(d)
}
})
var csv = csvjson.toCSV(exportArr, ExportOptions);
MongoClient.connect(mongoDSN, function(error, db) {
collection = db.collection("orders")
collection.insertMany(importArr, function(err,result){
fs.writeFile('/home/limitless/Downloads/orders_sample1.csv', csv, { encoding : 'utf8'});
db.close();
});
})
}
uploadcsvModule()
fs.createWriteStream(path[,options])
options may also include a start option to allow writing data at some position past the beginning of the file. Modifying a file rather than replacing it may require a flags mode of r+ rather than the default mode w. The encoding can be any one of those accepted by Buffer.
If autoClose is set to true (default behavior) on 'error' or 'finish' the file descriptor will be closed automatically. If autoClose is false, then the file descriptor won't be closed, even if there's an error. It is the application's responsibility to close it and make sure there's no file descriptor leak.
Like ReadStream, if fd is specified, WriteStream will ignore the path argument and will use the specified file descriptor. This means that no 'open' event will be emitted. fd should be blocking; non-blocking fds should be passed to net.Socket.
If options is a string, then it specifies the encoding.
After, reading this long article. You should understand how it works.
So, here's an example of createWriteStream().
/* The fs.createWriteStream() returns an (WritableStream {aka} internal.Writeable) and we want the encoding as 'utf'-8 */
/* The WriteableStream has the method write() */
fs.createWriteStream('out.txt', 'utf-8')
.write('hello world');
Point 1:
If you want to write something into a file.
means: it will remove anything already saved in the file and write the new content. use fs.promises.writeFile()
Point 2:
If you want to append something into a file.
means: it will not remove anything already saved in the file but append the new item in the file content.then first read the file, and then add the content into the readable value, then write it to the file. so use fs.promises.readFile and fs.promises.writeFile()
example 1: I want to write a JSON object in my JSON file .
const fs = require('fs');
const data = {table:[{id: 1, name: 'my name'}]}
const file_path = './my_data.json'
writeFile(file_path, data)
async function writeFile(filename, writedata) {
try {
await fs.promises.writeFile(filename, JSON.stringify(writedata, null, 4), 'utf8');
console.log('data is written successfully in the file')
}
catch (err) {
console.log('not able to write data in the file ')
}
}
example2 :
if you want to append data to a JSON file.
you want to add data {id:1, name:'my name'} to file my_data.json on the same folder root. just call append_data (file_path , data ) function.
It will append data in the JSON file if the file existed . or it will create the file and add the data to it.
const fs = require('fs');
const data = {id: 2, name: 'your name'}
const file_path = './my_data.json'
append_data(file_path, data)
async function append_data(filename, data) {
if (fs.existsSync(filename)) {
var read_data = await readFile(filename)
if (read_data == false) {
console.log('not able to read file')
} else {
read_data.table.push(data) //data must have the table array in it like example 1
var dataWrittenStatus = await writeFile(filename, read_data)
if (dataWrittenStatus == true) {
console.log('data added successfully')
} else {
console.log('data adding failed')
}
}
}
}
async function readFile(filePath) {
try {
const data = await fs.promises.readFile(filePath, 'utf8')
return JSON.parse(data)
}
catch (err) {
return false;
}
}
async function writeFile(filename, writedata) {
try {
await fs.promises.writeFile(filename, JSON.stringify(writedata, null, 4), 'utf8');
return true
}
catch (err) {
return false
}
}
You can use library easy-file-manager
install first from npm
npm install easy-file-manager
Sample to upload and remove files
var filemanager = require('easy-file-manager')
var path = "/public"
var filename = "test.jpg"
var data; // buffered image
filemanager.upload(path,filename,data,function(err){
if (err) console.log(err);
});
filemanager.remove(path,"aa,filename,function(isSuccess){
if (err) console.log(err);
});
You can write in a file by the following code example:
var data = [{ 'test': '123', 'test2': 'Lorem Ipsem ' }];
fs.open(datapath + '/data/topplayers.json', 'wx', function (error, fileDescriptor) {
if (!error && fileDescriptor) {
var stringData = JSON.stringify(data);
fs.writeFile(fileDescriptor, stringData, function (error) {
if (!error) {
fs.close(fileDescriptor, function (error) {
if (!error) {
callback(false);
} else {
callback('Error in close file');
}
});
} else {
callback('Error in writing file.');
}
});
}
});