Node.js - Reading CSV-file not working with line numbers > 500

Node.js - Reading CSV-file not working with line numbers > 500 - javascript

I am currently struggling to run my Node.js server.
What I want to do:
Upload a CSV-File from mobile device to my local server and save it on the file system
Read each line of the .csv-File and save each row to my MongoDB database
Uploading and saving the file works flawlessly. Reading the .csv-File and saving each row to the database only works for files with small line numbers.
I don't know the exact number of lines when it stops working. It seems to differ every time I read a file.
Sometimes (if the line numbers are bigger than 1000) the CSV-Reader I use doesn't even start processing the file. Other times he reads only 100-200 lines and then stops.
Here is my code how I upload the file:
var fs = require('fs');
var sys = require("sys");
var url = require('url');
var http = require('http');
http.createServer(function(request, response) {
sys.puts("Got new file to upload!");
var urlString = url.parse(request.url).pathname;
var pathParts = urlString.split("/");
var deviceID = pathParts[1];
var fileName = pathParts[2];
sys.puts("DeviceID: " + deviceID);
sys.puts("Filename: " + fileName);
sys.puts("Start saving file");
var tempFile = fs.createWriteStream(fileName);
request.pipe(tempFile);
sys.puts("File saved");
// Starting a new child process which reads the file
// and inserts each row to the database
var task = require('child_process').fork('databaseInsert.js');
task.on('message', function(childResponse) {
sys.puts('Finished child process!');
});
task.send({
start : true,
deviceID : deviceID,
fileName : fileName
});
sys.puts("After task");
response.writeHead(200, {
"Content-Type" : "text/plain"
});
response.end('MESSAGE');
}).listen(8080);
This works all fine.
Now the code of the child process (databaseInsert.js):
var sys = require("sys");
var yaCSV = require('ya-csv');
var Db = require('mongodb').Db;
var dbServer = require('mongodb').Server;
process.on('message', function(info) {
sys.puts("Doing work in child process");
var fileName = info.fileName;
var deviceID = info.deviceID;
sys.puts("Starting db insert!");
var dbClient = new Db('test', new dbServer("127.0.0.1", 27017, {}), {
w : 1
});
dbClient.open(function(err, client) {
if (err) {
sys.puts(err);
}
dbClient.createCollection(deviceID, function(err, collection) {
if (err) {
sys.puts("Error creating collection: " + err);
} else {
sys.puts("Created collection: " + deviceID);
var csvReader = yaCSV.createCsvFileReader(fileName, {
columnsFromHeader : true,
'separator' : ';'
});
csvReader.setColumnNames([ 'LineCounter', 'Time', 'Activity',
'Latitude', 'Longitude' ]);
var lines = 0;
csvReader.addListener('data', function(data) {
lines++;
sys.puts("Line: " + data.LineCounter);
var docRecord = {
fileName : fileName,
lineCounter : data.LineCounter,
time : data.Time,
activity : data.Activity,
latitude : data.Latitude,
longitude : data.Longitude
};
collection.insert(docRecord, {
safe : true
}, function(err, res) {
if (err) {
sys.puts(err);
}
});
});
}
});
});
process.send('finished');
});
At first I didn't use a child process but I had the same behaviour as I have now. So I tested this.
Hopefully someone who has some experience with Node.js can help me.

I think your issue is that you are trying to read the tempFile while it is still being written to. Right now you are piping the request to the file stream (which proceeds in parallel and asynchronously) and start the reader process. The reader process will then start reading the file in parallel with the write operations. If the reader is faster (it usually will be), it will read the first couple of records but then encounter an end of file and stop reading.
To remedy this, you could only start the reader process after writing has completely finished, i.e., put the part from sys.puts("File.send"); onward into a callback of tempFile.end(...) (see http://nodejs.org/api/stream.html#stream_writable_end_chunk_encoding_callback).
Reading the file while it is still being written to, akin to the tail command in Unix, is fairly hard in my understanding (google for details on how difficult it is to implement a proper tail).

Are you familiar with mongoimport/export?
I used this in the past to export from my db to a csv file...so you can do the opposite after you upload it from the mobile-client to the server.
Its from the shell, but you can write it in code using nodeJS_ChildSpawn

Related

File uploads through socket.io (JavaScript & FileReader)

I am creating a chat app (in React Native), but for now, I have made some tests in vanilla JavaScript. The server is a NodeJS-server.
It works with sending text messages, but now I have some questions about sending photos/videos/audio files. I'm doing a lot of research online on what's the best method to do this.
I came up with the idea to use the FileReader API and split up the file into chunks, and sending chunk by chunk via the socket.emit()-function.
This is my code so far (simplified):
Please note that I will create a React Native app, but for now (for testing), I've just created a HTML-file with an upload form.
// index.html
// the page where my upload form is
var reader = {};
var file = {};
var sliceSize = 1000 * 1024;
var socket = io('http://localhost:8080');
const startUpload = e => {
e.preventDefault();
reader = new FileReader();
file = $('#file)[0].files[0]
uploadFile(0)
}
$('#start-upload').on('click', startUpload)
const uploadFile = start => {
var slice = start + sliceSize + 1;
var blob = file.slice(start, slice)
reader.on('loadend', e => {
if (slice < file.size) {
socket.emit('message', JSON.stringify({
fileName: file.name,
fileType: file.type,
fileChunk: e.target.result
})
} else {
console.log('Upload completed!')
}
})
reader.readAsDataURl(blob)
}
// app.js
// my NodeJS server-file
var file;
var files = {};
io.on('connection', socket => {
console.log('User connected!');
// when a message is received
socket.on('message', data => {
file = JSON.parse(data)
if (!files[file.fileName]) {
// this is the first chunk received
// create a new string
files[file.fileName] = '';
}
// append the binary data
files[file.fileName] = files[file.fileName] + file.fileChunk;
})
// on disconnect
socket.on('disconnect', () => {
console.log('User disconnected!');
})
})
I did not include any checks for file type (I'm not at that point yet), I first want to make sure that this is the right thing to do.
Stuff I need to do:
Send a message (like socket.emit('uploaddone', ...)) from the client to the server to notify the server that the upload is done (and the server can emit the complete file to another user).
My questions are:
Is it okay to send chunks of binary data (base64) over a socket, or would it take up to much bandwidth?
Will I lose some quality (photos/videos/audio files) when splitting them up into chunks?
If there is a better way to do this, please let me know. I'm not asking for working code examples, just some guidance in the good direction.

You can send raw bytes over WebSocket, base64 has 33% size overhead.
Also you won't have to JSON.stringify all (and maybe large) body and parse it on client-side.
Will I lose some quality
No, underlying protocol (TCP) delivers data in-order and without corruption.

I realize this answer is a couple of months late, but just for future reference you should look into using the acknowledgment option with socket.io here
// with acknowledgement
let message = JSON.stringify({
fileName: file.name,
fileType: file.type,
fileChunk: e.target.result
})
socket.emit("message", message, (ack) => {
// send next chunk...
});

Writing to file only writes last item, not all items, why?

i'm trying to write a feed to a file using node.js. the problem is, it doesn't write all the feeds, only the last 1.
var fs = require('fs');
var feedParser = require('ortoo-feedparser')
var url = "http://iwnsvg.com/feed";
feedParser.parseUrl(url).on('article', function(article) {
console.log('title; ', article.title);
fs.writeFile("articles.json", JSON.stringify(article.title), function(err) {
if(err) {
console.log(err);
}
});
});
Why?

Just change fs.writeFile( to fs.appendFile( and you're fine.
fs.writeFile overwrites your file each time you call it whereas fs.appendFile adds to a file.

As #Robert says you should use appendFile, but also note that that change won't write out valid json. I'm not sure what output you're trying to achieve - it you just want the titles you could write out a txt file with a title on each line like so:
var fs = require('fs');
var feedParser = require('ortoo-feedparser')
var url = "http://iwnsvg.com/feed";
feedParser.parseUrl(url).on('article', function(article) {
console.log('title; ', article.title);
fs.appendFile("articles.txt", article.title + "\n", function(err) {
if(err) {
console.log(err);
}
});
});
To write out json you can do:
var fs = require('fs');
var feedParser = require('ortoo-feedparser')
var url = "http://iwnsvg.com/feed";
let titles = [];
feedParser.parseUrl(url)
.on('article', function (article) {
console.log('title; ', article.title);
titles.push(article.title);
})
.on('end', function () {
fs.writeFile('articles.json', JSON.stringify({ titles }), function (err) {
if (err) {
console.log(err);
}
});
});

fs.writeFile comes with some options like flag. Default value of flag is w for write, so your data are replaced by the new one.
Use 'a' instead
{flag:'a'}
and you'll be fine.
But don't forget that WriteFile or AppendFile are upper layer in fs library which open and close file each time you need to add data.
Preferably, use fs.createWriteStream which returns a writable stream (writable file handle in other languages). Then use and reuse this stream when you need to write data in your file.

Nodejs: wget, unzip and convert to js without writing to file

Well the title says it all, I'm trying to write a script (that runs in a nodejs/express server-side application) that leverages libraries request, unzip and xml2js to perform a task consisting of fetching a zip file from a given url, whose content is an xml file which I need to parse to a javascript object for some further processing.
So far I've managed to come up with:
var express = require("express");
var app = express();
/* some init code omitted */
var request = require("request");
var unzip = require("unzip");
var xml2js = require("xml2js");
var parser = new xml2js.Parser();
app.get("/import", function(req, res) {
request("http://path.to/file.zip")
.pipe(unzip.Parse())
.on("entry", function(entry) {
//This is what I'm trying to avoid, which doesn't even work
entry.pipe(fs.createWriteStream(entry.path));
fs.readFile(entry.path, function(err, data) {
if(err) {
return res.status(500).send(err);
}
parser.parseString(data, function(err, obj) {
console.log(util.inspect(obj));
/* further processing of obj */
});
});
});
});
Albeit the fact the contents of the xml file are correctly written to disk, I'm looking for an alternative to this approach for two reasons:
to save disk space, since I don't really need to keep the xml file anyway once it has been converted to js
it doesn't even work: fs.readFile probably starts reading the file before fs.createWriteStream is done writing it, because the line console.log(utils.inspect(obj)) logs null (whereas if I run only the innermost fs.readFile block and replace entry.path with the name of the previously written file, it produces the desired output)
I wish I could jot down a jsFiddle for this but I'm clueless as to how, when it comes to expressjs applications. Cheers.

EDITED
Piping is unnecessary, parse data directly from the entry stream:
app.get("/import", function(req, res) {
request("http://link-top.zip")
.pipe(unzip.Parse())
.on("entry", function(entry) {
var chunks = [];
var res;
if(entry.path == 'needed.xml') {
entry.on('data', function(data) {
chunks.push(data.toString());
});
entry.on('end', function () {
res = chunks.join("");
parser.parseString(res, function(err, obj) {
console.log(util.inspect(obj));
/* further processing of obj */
});
});
}
});
});

Pass Buffer to ChildProcess Node.js

Here I have on Node.Js where I want to do Image Processing in a Sub Process.
As you will see I take the file image.jpg and want to write it back to hello.jpg in a subprocess:
var node = require('child_process').spawn('node',['-i']);
var fs = require('fs');
node.stdout.on('data',function(data) {
var fs = require('fs');
var gm = require('gm').subClass({ imageMagick: true });
gm(data)
.resize(500, 500)
.toBuffer("jpg", function(err, buffer) {
if (err) {
console.log(err);
}else{
fs.writeFile("hello.jpg", buffer);
}
});
});
var buffer = fs.readFileSync(__dirname + "/image.jpg");
node.stdin.write(buffer);
However when I run this file I get this error:
[Error: Stream yields empty buffer]
For me it seems like the buffer is not passed correctly to the subprocess?
What do I wrong? What can I do to run Image Processing in a subtask. For me its important that Its not read from a file in the subprocess. Because I want to read one File again and then send the buffer to several subprocesses that do Image Transformations. Thanks!

You are not doing any work in a subprocess. It is just node -i and nothing else. All your image processing happens in the main process.
To fix it, you can actually run another Node process and give it some script to execute, say worker.js:
process.stdin.on('data',function(data) {
var fs = require('fs');
var gm = require('gm').subClass({ imageMagick: true });
gm(data)
.resize(500, 500)
.toBuffer("jpg", function(err, buffer) {
if (err) {
console.log(err);
}else{
fs.writeFile("hello.jpg", buffer);
}
});
});
Then you would create a subprocess from your main script:
var node = require('child_process').spawn('node', ['worker.js']);
var fs = require('fs');
var buffer = fs.readFileSync(__dirname + "/image.jpg");
node.stdin.end(buffer);
Note that I used node.stdin.end in the last line to terminate the worker.
Take a look at cluster module for the alternative approach.

Node.js - Organising code and closures - SFTP/Inotify

I was hoping I could get some advice on why my nodejs program is behaving in the way it is.
I am using two modules, node-sftp and node-inotify. I have setup node-inotify to watch a directory and call a function when something is written there, the function being an sftp upload.
Now the problem I have is that processing one file at a time is fine but when I drop 4 files in one go there, the function is called four times but only one sftp upload goes through.
Do I need to order my code in a particular way to ensure that the sftp upload occurs x times, is this something to do with closures perhaps?
This is a basic version of my code...
"event_handler" is called when something happens on a "watched" directory
"check_event" figures out if this type of event is one we want, in this case it's a "write"
"ftp_to_server" prepare connection details
"do_ftp" basically uses the node-sftp module to perform the sftp upload
event_handler = function(event){
var supplier;
check_event(event, supplier, type, ftp_to_server);
};
=================
function check_event(event, handler)
{
if (event.type === 'xxxxxx') {
var file_to_process_name = 'abc';
var file_to_process_dir = 'abc';
var remote_dir = 'abc';
handler(file_to_process_name, file_to_process_dir, remote_dir);
}
}
function ftp_to_server(file_to_process_name, file_to_process_dir, remote_dir) {
var connection_details = conf.ftp.connections
do_ftp(connection_details, file_to_process_name, file_to_process_dir, remote_dir);
}
function do_ftp(connection_details, file_to_process_name, file_to_process_dir, remote_dir) {
var credentials = {
// FTP settings here
};
var local_file = file_to_process_dir + file_to_process_name;
var remote_file = remote_dir + file_to_process_name;
connection = new sftp(credentials, function(err) {
if (err){
throw err;
}
connection.writeFile(remote_file, fs.readFileSync(local_file, "utf8"), null, function(err) {
if (err) {
throw err;
}
console.info('FTP PUT DONE');
});
});
};

Your "connection = new sftp(credentials, function(err) {"
should be
var connection = new sftp(credentials, function(err) {
The way you currently have it coded, "connection" is a global and you are writing over it.

We Keep Coding

JavaScript is the programming language of the Web.

Node.js - Reading CSV-file not working with line numbers > 500 - javascript

Are you familiar with mongoimport/export? I used this in the past to export from my db to a csv file...so you can do the opposite after you upload it from the mobile-client to the server. Its from the shell, but you can write it in code using nodeJS_ChildSpawn

Related

File uploads through socket.io (JavaScript & FileReader)

Writing to file only writes last item, not all items, why?

Nodejs: wget, unzip and convert to js without writing to file

Pass Buffer to ChildProcess Node.js

Node.js - Organising code and closures - SFTP/Inotify

Categories

Resources