I have yet to find a answer to my problem with the examples from questions others have asked on this.
I wrote a little web scraper that stores data to 1 array and would like to write it (the arrays) to a file. I'm having trouble setting things up correctly.
I am using nodejs. Could someone write a sample that takes an array content then writes to a file. please break it down to basic, I am still new at programming.
Thanks the code is below
var content = [];
var request = require('request');
var cheerio = require('cheerio');
var URL = 'http://www.amazon.com';
request(URL, function(error, response, html){
if (error){
consol.log('Error:', error);
}
if (response.statusCode !== 200) {
console.log('Invalid Status Code Returned:', response.statusCode);
}
//console.log(html);
var $ = cheerio.load(html);
$('td').each(function (i, element) {
var a = $(this).next();
var trimmed_a = a.text();
trimmed_a = trimmed_a.trim();
var str = trimmed_a.replace(/\s\s+/g,"");
var newStr = str.trim();
content.push(newStr);
});
console.log(content);
})
Simplest possible answer:
var fs = require('fs');
var arr = ['cat','dog','bird'];
var filename = 'output.txt';
var str = JSON.stringify(arr, null, 4);
fs.writeFile(filename, str, function(err){
if(err) {
console.log(err)
} else {
console.log('File written!');
}
});
Here, arr, would be your array of data, that your casting to a string because fs.writeFile expects a string. I used the null,4 additional arguments to make it pretty print so you can see it with a four space indent.
Hope this helps!
It's not possible to store a real array/object in a file – its contents are bytes, however you can store a stringified format of this object, then parse this same format, using JSON for example (I think this ref equivals to Node.js):
json_format = JSON.stringify(content)
var json_format
So, if you want to read the array in the file after getting its contents
JSON.parse(json_format)
Remind, in JSON there is no kind of function declarations, all primitive values are supported, except NaN, Infinity, undefined (that is not a value), etc., and still include special number syntaxes (exponent (+ | -), ...): JSON. All values that JSON doesn't support, JSON.stringify treats them as null. I'm not sure how it exactly works between different platforms, though (I only use browser JS).
Now, to save/write the file we currently have
asynchronous fs.writeFile and synchronous fs.writeFileSync. I don't know much about Node.js, though. When using these methods you must include the File System in somewhere, normally so (File System is in a module):
fs = require('fs'); var fs
Related
I have a really huge json object (created with a JavaScript parser called espree, contains an array of objects). I want to write it to a .json file, but it fails every time with memory allocation problems (My heap size is 22 Gb).
As far as I understand, the buffer gets overloaded while the data is not being written into file.
If I use synchronous file operations only, the output gets written into the file, but the running time of my application exploads.
Solutions I have tried and failed (tried to serialize the whole object, then tried to serialize the items of the array):
JSON.stringify
JSONStream
big-json (which should be serialize the object as a stream, but the buffer still gets overloaded..)
watching for drain events
Here is the current code:
const bjson= require('big-json');
function save(result) {
let outputStream = fs.createWriteStream(/*path*/);
const stringifyStream = bjson.createStringifyStream({
body: result
});
function write(d) {
let result = outputStream.write(d);
if (!result) {
outputStream.once('drain', write);
}
}
stringifyStream.on('data', function (chunk) {
writeData(chunk);
});
stringifyStream.on('end', function () {
outputStream.end();
});
}
let results = [/*results as an array, containing lots of json objects*/];
for (let i = 0; i < results.length; i++){
save(result[i]);
}
Performance issue came from JSON transformation to string. I have the same issue and I solved that by storing data with msgpack format.
Has explained here, I installed msgpack-lite in my projet with npm :
npm install msgpack-lite
I code that for storing my JSON object :
var fs = require("fs");
var msgpack = require("msgpack-lite");
var writeStream = fs.createWriteStream("file.msp");
var encodeStream = msgpack.createEncodeStream();
encodeStream.pipe(writeStream);
// send multiple objects to stream
encodeStream.write(myBigObject);
// call this once you're done writing to the stream.
encodeStream.end();
And that for reading my file and restore my object. I don't know why it doesn't works with Streams :
var fs = require("fs");
var msgpack = require("msgpack-lite");
var buffer = fs.readFileSync("file.msp");
var myBigObject = msgpack.decode(buffer);
Using Node.js, what is the best way to stream a file from a filesystem into Node.js, but reading it backwards, from bottom to top? I have a large file, and there doesn't seem to be much sense in reading from the top if I only want the last 10 lines. Is this possible?
Right now I have this horrible code, where we do a GET request with a browser to view the server logs, and pass a query string parameter to tell the server how many lines at the end of the log file we want to read:
function get(req, res, next) {
var numOfLinesToRespondWith = req.query.num_lines || 10;
var fileStream = fs.createReadStream(stderr_path, {encoding: 'utf8'});
var jsonData = []; //where jsonData gets populated
var ret = [];
fileStream.on('data', function processLineOfFileData(chunk) {
jsonData.push(String(chunk));
})
.on('end', function handleEndOfFileData(err) {
if (err) {
log.error(colors.bgRed(err));
res.status(500).send({"error reading from smartconnect_stdout_log": err.toString()});
}
else {
for(var i = 0; i < numOfLinesToRespondWith; i++){
ret.push(jsonData.pop());
}
res.status(200).send({"smartconnect_stdout_log": ret});
}
});
}
the code above reads the whole file and then adds the number of lines requested to the response after reading the whole file. This is bad, is there a better way to do this? Any recommendations will be met gladly.
(one problem with the code above is that it's writing out the last lines of the log but the lines are in reverse order...)
One potential way to do this is:
process.exec('tail -r ' + file_path).pipe(process.stdout);
but that syntax is incorrect - so my question there would be - how do I pipe the result of that command into an array in Node.js and eventually into a JSON HTTP response?
I created a module called fs-backwards-stream that could may meet your needs. https://www.npmjs.com/package/fs-backwards-stream
If you need the result parsed by lines rather than byte chunks you should use the module fs-reverse https://www.npmjs.com/package/fs-reverse or
both of these modules stream you could simply read the last n bytes of a file.
here is an example using plain node fs apis and no dependencies.
https://gist.github.com/soldair/f250fb497ce592c3694a
hope that helps.
One easy way if you're on a linux computer would be to execute the tac command in node as process.exec("tac yourfile.dat") and pipe it to your write stream
You could also use slice-file and then reverse the order yourself.
Also, look at what #alexmills said in the comments
this is the best answer I got, for now
the tail command on Mac/UNIX reads files from the end and pipes to stdout (correct me if this is loose language)
var cp = require('child_process');
module.exports = function get(req, res, next) {
var numOfLinesToRespondWith = req.query.num_lines || 100;
cp.exec('tail -n 5 ' + stderr_path, function(err,stdout,stderr){
if(err){
log.error(colors.bgRed(err));
res.status(500).send({"error reading from smartconnect_stderr_log": err.toString()});
}
else{
var data = String(stdout).split('\n');
res.status(200).send({"stderr_log": data});
}
});
}
this seems to work really well - it does, however, run on separate process which is expensive in it's own way, but probably better than reading an entire 10,000 line log file.
I am very new to node.js and I think I understand the basics of how it functions but I feel like I am not seeing something that is vital to how fs.write and buffers function.
I am trying to send a user defined variable over socket.io and write it into an html file. I have a main site that has the button, when clicked it sends the information to the socket in a variable.
The thing I can't figure out is how to insert the variable into the html file.
I can save strings that I type, into a file:
(e.g.) var writeBuffer = new Buffer ('13');
But not variables that I put in:
(e.g.) var writeBuffer = new Buffer ($(newval));
I even tried different encoding methods, I think I am missing something.
Server.js
var newval = "User String";
var fd = fsC.open(fileName, 'rs+', function (error, fd) {
if (error) { throw error }
var writeBuffer = new Buffer($(newval));
var bufferLength = writeBuffer.length;
fsC.write( fd, writeBuffer, 0, bufferLength, 937,
function (error, written) {
if (error) { throw error }
fsC.close(fd, function() {
console.log('File Closed');
});
}
);
});
If you are using a version of jsdom 4.0.0 or later, it will not work with Node.js. As per the jsdom github readme:
Note that as of our 4.0.0 release, jsdom no longer works with
Node.js™, and instead requires io.js. You are still welcome to install
a release in the 3.x series if you use Node.js™.
I'm developing a web app that can upload large file into the Azure Blob Storage.
As a backend, I am using Windows Azure Mobile Services (the web app will generate contents for mobile devices) in nodeJS.
My client can successfully send chunks of data to the backend, everything looks fine but, at the end, the uploaded file is empty. The data upload has been prepared by following this tutorial: it works perfectly when the file is small enough to be uploaded in a single requests. The process fails when the file needs to be broken in chunks. It uses the ReadableStreamBuffer from the tutorial.
Can somebody help me?
Here the code:
Back-end : createBlobBlockFromStream
[...]
//Get references
var azure = require('azure');
var qs = require('querystring');
var appSettings = require('mobileservice-config').appSettings;
var accountName = appSettings.STORAGE_NAME;
var accountKey = appSettings.STORAGE_KEY;
var host = accountName + '.blob.core.windows.net';
var container = "zips";
//console.log(request.body);
var blobName = request.body.file;
var blobExt = request.body.ext;
var blockId = request.body.blockId;
var data = new Buffer(request.body.data, "base64");
var stream = new ReadableStreamBuffer(data);
var streamLen = stream.size();
var blobFull = blobName+"."+blobExt;
console.log("BlobFull: "+blobFull+"; id: "+blockId+"; len: "+streamLen+"; "+stream);
var blobService = azure.createBlobService(accountName, accountKey, host);
//console.log("blockId: "+blockId+"; container: "+container+";\nblobFull: "+blobFull+"streamLen: "+streamLen);
blobService.createBlobBlockFromStream(blockId, container, blobFull, stream, streamLen,
function(error, response){
if(error){
request.respond(statusCodes.INTERNAL_SERVER_ERROR, error);
} else {
request.respond(statusCodes.OK, {message : "block created"});
}
});
[...]
Back-end: commitBlobBlock
[...]
var azure = require('azure');
var qs = require('querystring');
var appSettings = require('mobileservice-config').appSettings;
var accountName = appSettings.STORAGE_NAME;
var accountKey = appSettings.STORAGE_KEY;
var host = accountName + '.blob.core.windows.net';
var container = "zips";
var blobName = request.body.file;
var blobExt = request.body.ext;
var blobFull = blobName+"."+blobExt;
var blockIdList = request.body.blockList;
console.log("blobFull: "+blobFull+"; blockIdList: "+JSON.stringify(blockIdList));
var blobService = azure.createBlobService(accountName, accountKey, host);
blobService.commitBlobBlocks(container, blobFull, blockIdList, function(error, result){
if(error){
request.respond(statusCodes.INTERNAL_SERVER_ERROR, error);
} else {
request.respond(statusCodes.OK, result);
blobService.listBlobBlocks(container, blobFull)
}
});
[...]
The second method returns the correct list of blockId, so I think that the second part of the process works fine. I think that it is the first method that fails to write the data inside the block, as if it creates some empty blocks.
In the client-side, I read the file as an ArrayBuffer, by using the FileReader JS API.
Then I convert it in a Base4 encoded string by using the following code. This approach works perfectly if I create the blob in a single call, good for small files.
[...]
//data contains the ArrayBuffer read by the FileReader API
var requestData = new Uint8Array(data);
var binary = "";
for (var i = 0; i < requestData.length; i++) {
binary += String.fromCharCode( requestData[ i ] );
}
[...]
Any idea?
Thank you,
Ric
Which version of the Azure Storage Node.js SDK are you using? It looks like you might be using an older version; if so I would recommend upgrading to the latest (0.3.0 as of this writing). We’ve improved many areas with the new library, including blob upload; you might be hitting a bug that has already been fixed. Note that there may be breaking changes between versions.
Download the latest Node.js Module (code is also on Github)
https://www.npmjs.org/package/azure-storage
Read our blog post: Microsoft Azure Storage Client Module for Node.js v. 0.2.0 http://blogs.msdn.com/b/windowsazurestorage/archive/2014/06/26/microsoft-azure-storage-client-module-for-node-js-v-0-2-0.aspx
If that’s not the issue, can you check a Fiddler trace (or equivalent) to see if the raw data blocks are being sent to the service?
Not too sure if your still suffering from this problem but i was experiencing the exact same thing and came across this looking for a solution. Well i found one and though id share.
My problem was not with how i push the block but in how i committed it. My little proxy server had no knowledge of prior commits, it just pushes the data its sent and commits it. Trouble is i wasn't providing the commit message with the previously committed blocks so it was overwriting them with the current commit each time.
So my solution:
var opts = {
UncommittedBlocks: [IdOfJustCommitedBlock],
CommittedBlocks: [IdsOfPreviouslyCommittedBlocks]
}
blobService.commitBlobBlocks('containerName', 'blobName', opts, function(e, r){});
For me the bit that broke everything was the format of the opts object. I wasn't providing an array of previously committed block names. Its also worth noting that i had to base64 decode the existing block names as:
blobService.listBlobBlocks('containerName', 'fileName', 'type IE committed', fn)
Returns an object for each block with the name being base64 encoded.
Just for completeness here's how i push my blocks, req is from the express route:
var blobId = blobService.getBlockId('blobName', 'lengthOfPreviouslyCommittedArray + 1 as Int');
var length = req.headers['content-length'];
blobService.createBlobBlockFromStream(blobId, 'containerName', 'blobName', req, length, fn);
Also with the upload i had a strange issue where the content-length header caused it to break and so had to delete it from the req.headers object.
Hope this helps and is detailed enough.
I am developing a Firefox addon. I need to save a bunch of data URI images to the disk. How do I approach to this?
I have browsed through the file I/O snippets on MDN, but the snippets don't help me much.
There are async and sync methods.I would like to use async method but how can I write a binary file using async method
Components.utils.import("resource://gre/modules/NetUtil.jsm");
Components.utils.import("resource://gre/modules/FileUtils.jsm");
// file is nsIFile
var file = FileUtils.getFile("Desk", ["test.png"]);
// You can also optionally pass a flags parameter here. It defaults to
// FileUtils.MODE_WRONLY | FileUtils.MODE_CREATE | FileUtils.MODE_TRUNCATE;
var ostream = FileUtils.openSafeFileOutputStream(file);
//base64 image that needs to be saved
image ="iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==";
// How can I create an inputstream from the image data URI?
var inputstream = createInputstream(image);
// The last argument (the callback) is optional.
NetUtil.asyncCopy(inputstream , ostream, function(status) {
if (!Components.isSuccessCode(status)) {
// Handle error!
return;
}
// Data has been written to the file.
});
It sounds like you'd like to write not the data URI but the binary data it "contains", so I'll answer that.
First, lets assume we got some actual data URI, (if not, adding data:application/octet-stream;base64, isn't too hard ;)
// btoa("helloworld") as a placeholder ;)
var imageDataURI = "data:application/octet-stream;base64,aGVsbG93b3JsZA==";
Option 1 - Using OS.File
OS.File has the benefit that it is truly async. On the other hand, NetUtil is only mostly async, in that there will be stat calls on the main thread and the file will be opened and potentially closed on the main thread as well (which can lead to buffer flushes and hence block the main thread while the flush is happening).
After constructing a path (with some constants help), OS.File.writeAtomic is suited for the job.
Components.utils.import("resource://gre/modules/osfile.jsm");
var file = OS.Path.join(OS.Constants.Path.desktopDir, "test.png");
var str = imageDataURI.replace(/^.*?;base64,/, "");
// Decode to a byte string
str = atob(str);
// Decode to an Uint8Array, because OS.File.writeAtomic expects an ArrayBuffer(View).
var data = new Uint8Array(str.length);
for (var i = 0, e = str.length; i < e; ++i) {
data[i] = str.charCodeAt(i);
}
// To support Firefox 24 and earlier, you'll need to provide a tmpPath. See MDN.
// There is in my opinion no need to support these, as they are end-of-life and
// contain known security issues. Let's not encourage users. ;)
var promised = OS.File.writeAtomic(file, data);
promised.then(
function() {
// Success!
},
function(ex) {
// Failed. Error information in ex
}
);
Option 2 - Using NetUtil
NetUtil has some drawbacks in that is is not fully async, as already stated above.
We can take a shortcut in that we can use NetUtil.asyncFetch to directly fetch the URL, which gives us a stream we can pass along to .asyncCopy.
Components.utils.import("resource://gre/modules/NetUtil.jsm");
Components.utils.import("resource://gre/modules/FileUtils.jsm");
// file is nsIFile
var file = FileUtils.getFile("Desk", ["test.png"]);
NetUtil.asyncFetch(imageDataURI, function(inputstream, status) {
if (!inputstream || !Components.isSuccessCode(status)) {
// Failed to read data URI.
// Handle error!
return;
}
// You can also optionally pass a flags parameter here. It defaults to
// FileUtils.MODE_WRONLY | FileUtils.MODE_CREATE | FileUtils.MODE_TRUNCATE;
var ostream = FileUtils.openSafeFileOutputStream(file);
// The last argument (the callback) is optional.
NetUtil.asyncCopy(inputstream , ostream, function(status) {
if (!Components.isSuccessCode(status)) {
// Handle error!
return;
}
// Data has been written to the file.
});
});