Uploading and processing multiple files in node.js API - javascript

I'm new to nodejs and lets say i have to upload several text files in this kind of format to a nodejs endpoint (they could stack to a total of 200mb):
AU Olsen, BI
Lund, NW
Ellingsen, G
Hartvigsen, G
PY 2012
ER
AU Ming, X
Hajid, H
PY 2012
ER
What i want to do is to read those files and generate an array of objects that will be returned in the response, something like this:
publications = [{"author":["Olsen, BI", "Lund, NW", "Ellingsen, G", "Hartvigsen, G"],"publicationYear": "2012"},{"author":["Ming, X", "Hajid, H"],"publicationYear":"2012}]
At the moment i try to read all the files that were uploaded in the folder line by line so i can add to the publications variable latter on,the problem is that the publications variable always end up returning empty, looks like anything i do inside the IFS doesn't do anything, its probably an assynchronous callback problem but i have no idea how to fix it.
const multer = require ('multer');
const lineReader = require('line-reader');
var fs = require('fs');
const upload = multer({dest: 'uploads/'})
const app = express();
app.use(express.static('public'));
app.post('/uploadAndProcess', upload.array('publications'),(req,res) => {
var publications = [{}]
fs.readdir('./uploads', (err, dir)=>{
for(var i=0; i<dir.length; i++){
fileName = dir[i];
lineReader.eachLine('./uploads/'+fileName, function(line) {
if(line.includes('ER')) {
//do something and add to publications variable
}
if(line.includes('PY')) {
//do something and add to publications variable
}
});
}
})
return res.json({pub: publications});
});
app.listen(3001, () => console.log('App is listening...'));
Any thoughts? Thank you!
EDIT: Tried with sync readdir and still didnt work :(
app.post('/uploadAndProcess', upload.array('publications'),(req,res) => {
var publications = [{}]
var files = fs.readdirSync('./uploads');
for(var file in files) {
lineReader.eachLine('./uploads/'+files[file], function(line) {
if(line.includes('ER')) {
publications.push({'test':'test'})
}
});
};
return res.json({pub: publications});
});

I think you are on the right track with the async callback idea. I imagine the response is being sent out before the callback from fs.readdir is complete since fs.readdir is async and file reading is generally an expensive operation. Try using the fs.readdirSync function to process these synchronously and see if it works! Here's a link to the Node documentation to get you started.

Related

For Loop in Protractor is not working properly

I am working on Protractor for testing the Angular JS application. I have written a code to read the data from excel sheet.My scenario is like I have a end to end flow that should execute.The code will take the URL,UserName and Password from the excel sheet and will execute the entire flow. Than again it will iterate the other value. But its not going into the loop.
My code is:
var Excel = require('exceljs');
var XLSX = require('xlsx');
var os = require('os');
var TEMP_DIR = os.tmpdir();
var wrkbook = new Excel.Workbook();
//---------------------Duration as Days------------------------------------------
describe('Open the clinicare website by logging into the site', function () {
it('IP Medication Simple flows for Patient Keerthi for Days,Weeks and Months', function () {
console.log("hello6");
browser.driver.manage().window().maximize();
var wb = XLSX.readFile('E:\\LAM WAH EE_Testing Enviornment\\IP_Medication_Flow\\Patients_Entry.xlsx');
var ws = wb.Sheets.Sheet1;
var json = XLSX.utils.sheet_to_json(wb.Sheets.Sheet1);
console.log("json", json);
//var json = XLSX.utils.sheet_to_json(wb.Sheets.Sheet1);
//console.log("json", json);
for(var a = 0; a < json.length ; a++){
console.log("Test_URL", json[a].Test_URL);
console.log("User_Name", json[a].User_Name);
console.log("Password", json[a].Password);
browser.get(json[a].Test_URL);
console.log("hello10");
//Perform Login:UserName
element(by.model('accessCode')).sendKeys(json[a].User_Name);
browser.sleep(6000);
// browser.driver.sleep(6000);
//Perform Login:Password
element(by.model('password')).sendKeys(json[a].Password);
browser.sleep(6000);
//Hospital Name
element(by.cssContainingText('option', 'HLWE')).click();
browser.sleep(6000);
//Perform Login:LoginButton
element(by.css('.btn.btn-primary.pull-right')).click();
browser.sleep(6000);
//Clicking on Admitted Tab
element(by.xpath("//span[contains(text(),' Admitted(25)')]")).click();
browser.sleep(6000);
// browser.driver.sleep(6000);
//Clicking on First Admitted Patient
element(by.cssContainingText('span.clearfloat', '35690')).element(by.xpath('//*[#id="searchPatientImgAdmittedF"]')).click();
jasmine.DEFAULT_TIMEOUT_INTERVAL = 600000;
// browser.sleep(600);
//Clicking anywhere to proceed
element(by.xpath('/html/body/div[3]/div[1]/div[16]/div[1]/div/table[4]/tbody/tr[2]/td/div/div/div[3]/table/tbody/tr[1]/td[3]')).click();
jasmine.DEFAULT_TIMEOUT_INTERVAL = 10000;
browser.sleep(800);
Anyone's help is appreciated. Thanks in advance.
Alright initially confused with the 'exceljs' node module. It is not used in your test. I think the major problem here is that the file does not exist.
readFile and ENOENT
The first thing of the readFile is an alias for readFileSync which calls readSync which calls (probably) read_binary which offloads to node's fs.readFileSync. More than likely the fs.readFileSync is throwing the ENOENT because the path does not exist.
Looking at your path, you might need a backslash before your spaces.
var wb = XLSX.readFile('E:\\LAM\ WAH\ EE_Testing Enviornment\\IP_Medication_Flow\\Patients_Entry.xlsx');
It could be a good practice to get the file path with path.resolve prior to calling the read file method.
var path = require('path');
var patientEntryFilePath = path.resolve('E:\\LAM\ WAH\ EE_Testing Enviornment\\IP_Medication_Flow\\Patients_Entry.xlsx');
console.log(patientEntryFilePath);
var wb = XLSX.readFile(patientEntryFilePath);
Additional comments and thoughts about the original code snippet
Some additional comments about the code snippet from the original question. Maybe considerations for future cleanup.
Think about using a beforeAll or beforeEach for setting your browser driver window size and reading in a file. Reading in the file once is potentially a time and resource saver.
describe('Open the clinicare website by logging into the site', function () {
var json = null;
beforeAll(() => {
browser.driver.manage().window().maximize();
var wb = XLSX.readFile('E:\\LAM\ WAH\ EE_Testing Enviornment\\IP_Medication_Flow\\Patients_Entry.xlsx');
var ws = wb.Sheets.Sheet1;
json = XLSX.utils.sheet_to_json(wb.Sheets.Sheet1);
});
it('IP Medication Simple flows for Patient Keerthi for Days,Weeks and Months', function () {
console.log("json", json);
...
Looking at your test that it is a login and it appears to have the same flow, you really only need to test this once. The for loop is acceptable since the json file is resolved and each line is executed in the control flow that Protractor uses.
Avoid using xpath. It is better to find elements by css or id or partial path. In the developer adds an additional div in the list of div's will break your test, making your test more fragile and require more upkeep.
This because Protractor API execute Async, but the For loop execute Sync. Get detail explain from here, which is same issue as yours.
To fix your issue, we can use javascript closure.
for(var a = 0; a < json.length ; a++) {
(function(a){
console.log("Test_URL", json[a].Test_URL);
console.log("User_Name", json[a].User_Name);
console.log("Password", json[a].Password);
browser.get(json[a].Test_URL);
console.log("hello10");
//Perform Login:UserName
element(by.model('accessCode')).sendKeys(json[a].User_Name);
browser.sleep(6000);
// browser.driver.sleep(6000);
//Perform Login:Password
element(by.model('password')).sendKeys(json[a].Password);
browser.sleep(6000);
...
})(a)
}

Node.js: event emitters not firing properly

I created a project that recursively looks for valid media file types from an array of directories. The issue is that two of my event emitters are working correctly and two are not. I've read the docs, watched some videos, and searched this site looking for answers. I think there is something fundamental that I'm missing or not understanding that is getting in the way.
I have a file called directoryWatch.js with the following code.
let util = require('util');
let EventEmitter = require('events').EventEmitter;
let recursive = require('recursive-readdir');
let mm = require("minimatch");
let local = require('../_localData/environmentConfig');
let validMediaFiles = function(dirs){
let self = this;
self.emit('start',`${local.dirScanStart}`);
dirs.forEach(function(dirName){
recursive(dirName, function (err, files) {
if(err){
self.emit('error',`${local.dirScanError} ${dirName}`);
} else{
self.emit('success',`${local.dirScanSuccess} ${dirName}`,
mm.match(files, local.validFileExtensions,
{matchBase: true, nocase: true, nonull: true}));
}
});
});
self.emit('end',`${local.errScanMsg}`);
};
util.inherits(validMediaFiles, EventEmitter);
module.exports = validMediaFiles;
I have 4 events I want to emit.
The start step of looking at the directories.
The errors from trying to find a directory.
The success from reading a directory.
The end step of looking at the directories.
My main.js file that I'm executing looks like this.
let local = require('./_localData/environmentConfig');
let dirWatch = require('./fileIO/directoryWatch');
let results = new dirWatch(local.directories);
results.on('start',function(msg){
console.log(msg);
});
results.on('error',function(msg){
console.log(msg);
});
results.on('success',function(msg,files){
console.log(msg);
console.log(files);
});
results.on('end',function(msg){
console.log(msg);
});
The output in Powershell looks like this
PS G:\Javascrpt Projects\Node\Misc\Test2>
PS G:\Javascrpt Projects\Node\Misc\Test2> node main.js
Error scanning directory: ssdfdsfasf
Getting media from: G:\Javascrpt Projects\Node\Misc\Test2\TestFiles\music
Getting media from: G:\Javascrpt Projects\Node\Misc\Test2\TestFiles\music2
{....big list of file names goes here...}
The issue is that my "start" event and "end" event are getting ignored, but I'm not sure why.

Syncchronously and recursive filesystem file finding

I just have a quick question there:
I am using Node.JS to write a commandline tool that validates JSON Files with JSON Schemas. So, now I have a problem that when wanting to get all the schemas, that I always get "undefined" for using a async function but otherwise only sync functions.
For this commandline tool async is NOT needed.
Could someone help me out and give me a hand on how to make it work just fine?
var getJSONSchemaFiles = function (dir) {
results2 = [];
var recursive = require('recursive-readdir');
recursive(dir, function (err, files) {
// Files is an array of filename
// console.log(files);
files.forEach(function (entry) {
if (entry.indexOf(".schema.json") > -1) {
results2.push(entry);
}
});
console.log(results2);
});
return results2;
};
I am using the npm "recursive-readdir" but I think that I do not even need a npm for this kind of thing?
Ok, this enumerates all files under the given path synchronously:
var fs = require('fs');
function recursiveReaddir(path) {
var stat = fs.lstatSync(path);
if(stat.isFile())
return [path];
if(!stat.isDirectory())
return [];
return [].concat.apply([], fs.readdirSync(path).map(function(fname) {
return recursiveReaddir(path + '/' + fname);
}));
}
Use glob module https://github.com/isaacs/node-glob. There is async and Sync methods like: glob.sync(pattern, [options]); and glob(pattern, [options], cb);
Example from their docs:
var glob = require("glob")
// options is optional
glob("**/*.js", options, function (er, files) {
// files is an array of filenames.
// If the `nonull` option is set, and nothing
// was found, then files is ["**/*.js"]
// er is an error object or null.
})

How to use promises to return the final result of an array?

I am currently trying to return a request of all the file names (in each existing folder) on a particular website. My web application is using NodeJS, Express, Cheerio, and Request to web scrape. My code is first getting a list of all the folder names. After retrieving a list of folder names, it then goes inside each folder name to get a list of file names and store them in the 'files' array. Finally, the 'files' array is what will be sent to the client-side.
Right now I am having a big issue with asynchronous stuff because my request would always return an empty list of 'files'. I have the Q node module installed and have tried using promises, but have had no luck getting the results I want. I am still new to nodeJS and would love it if someone can help me out.. :)
exports.getAllImages = function(req, res) {
var folders = [];
var files = [];
//Step 1: Get folder names and store all of them in the 'folders' array
var foldersUrl = 'http://students.washington.edu/jmzhwng/Images/';
request(foldersUrl, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
$("a:contains('-')").filter(function(){
var data = $(this)[0].attribs.href;
folders.push(data);
})
//Step 2: Using the 'folders' array, get file names in each folder and store all of them in the 'files' array
for (var i=0; i < folders.length; i++) {
var imagesUrl = 'http://students.washington.edu/jmzhwng/Images/' + folders[i];
request(imagesUrl, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
$("a:contains('.')").filter(function(){
var data = $(this)[0].attribs.href;
files.push(data);
})
}
})
}
//Step 3: Return all file names to client-side
res.json({
images: files
}, 200);
console.log('GET ALL IMAGES - ' + JSON.stringify(files));
}
})
For better readability or support, you can view the JSFiddle I created here: http://jsfiddle.net/fKGrm/
You don’t necessarily need promises for this—you’re 95% of the way there already without them. The main issue, as I think you’re aware, is that your response is being sent before the image requests come back. You just need to wait for those to finish before you send the response.
The most basic way is to count the number of callbacks you receive in your Step 2. When it equals the folders.length, then send your response.
Here’s a simplified version of that:
var request = require('request'),
cheerio = require('cheerio');
var baseUrl = 'http://students.washington.edu/jmzhwng/Images/';
var files = [];
request(baseUrl, function (error, res, body) {
var folders = folderLinks(cheerio.load(body));
count = 0;
folders.forEach(function (folder) {
request(baseUrl + folder, function (error, res, body) {
files.push.apply(files, fileLinks(cheerio.load(body)));
if (++count == folders.length) {
console.log(files);
}
});
});
});
function folderLinks ($) {
return $('a:contains(-)').get().map(function (a) {
return a.attribs.href;
});
}
function fileLinks ($) {
return $('a:contains(.)').get().map(function (a) {
return a.attribs.href;
});
}

node.js never exits after insert to couchbase, opposite of most node questions

My problem seems to be the opposite of every node.js question :-) I have a simple forEach loop to read a list of files and insert them into a Couchbase database. This works great, but it never exits after reading all the lines. So I added a counter to shutdown the couchbase connection after all inserts are complete. This works.
This process is intended to load hundreds of thousands of files, so I brought the async module into the mix to batch the inserts into groups of 100. The async.eachLimit is used to iterate over the array and insert documents in batches. Now the orig problem is back. Whatever magic async.eachLimit uses to recognize the process is complete is not happening.
I've been going through javascript scoping, callbacks, async, etc. Google searches are hitting keywords but not this issue. I've reduced the code down to the following testcase. To test, create three files and add their names to testlist.txt.
The async.eachLimit in place works up until it hits the limit, then hangs. Comment this out and uncomment array.forEach line and it works. Thanks in advance!
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
} else {
console.log('PASS ['+line+']');
}
key_count--;
if (key_count == 0) {
console.log('DONE Shutting down client, no more keys');
db.shutdown();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, lines) {
if(filelist_err) throw filelist_err;
// HACK split adds empty line to array, use replace to fix
var array = lines.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('INIT lines['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) { console.log('FAIL async err['+err+']');} );
//array.forEach(function(data){insertFile(data);return;});
});
Testcase output using array.forEach:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
LOAD [files.txt]
PASS [files.little.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys
Testcase output using async.eachLimit:
INIT lines[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
PASS [files.big.txt]
... hang, never gets to 3...
After review with a coworker, they spotted my mistake. I missed the async callback in my insertFile function. Adding that in works and allows me to remove the key counter! Solution code below:
var fs = require('fs');
var couchbase = require('couchbase');
var async = require('async');
var filelist = 'testlist.txt';
var key_count = 0;
var cb_config = { host: 'localhost:8091', bucket: 'default'};
var db = new couchbase.Connection(cb_config, function(err) {
if (err) {
console.log('ERRR connect to couchbase at config['+cb_config+']');
throw err;
}
});
var insertFile=function(line, callback) {
console.log('LOAD ['+line+']');
fs.readFile(line, function(file_err, f_doc) {
if(file_err) throw file_err;
db.set(line, f_doc, function(db_err, db_res){
if (db_err) {
console.log('FAIL ['+line+'] err['+db_err+']');
callback(db_err);
} else {
console.log('PASS ['+line+']');
callback();
}
});
});
}
// read list of files into data array from file filelist
fs.readFile(filelist, function(filelist_err, data) {
if(filelist_err) throw filelist_err;
// HACK stoopid bug split adds empty line to array, use replace to fix
var array = data.toString().replace(/\n$/, '').split('\n');
key_count = array.length;
console.log('READ files['+key_count+']');
async.eachLimit(array, 2, insertFile, function(err) {
if (err) console.log('LAST with async err['+err+']');
console.log('DONE Shutting down client, no more keys');
db.shutdown();
});
});
And successful output:
$ node testcase.js
READ files[3]
LOAD [files.big.txt]
LOAD [files.little.txt]
PASS [files.little.txt]
LOAD [files.txt]
PASS [files.big.txt]
PASS [files.txt]
DONE Shutting down client, no more keys

Categories