Multiple file upload to s3 node.js - javascript

I am working on MEAN stack application and I am using AWS SDK to upload multiple files to S3. I am using busboy and AWS SDK.
Code:
var inputObj = {};
var busboy = new Busboy({ headers: req.headers });
// The file upload has completed
busboy.on('finish', function() {
console.log('Upload finished.....');
var file = [];
const file1 = req.files.clogo;
const file2 = req.files.cbanner1;
const file3 = req.files.cbanner2;
const file4 = req.files.cbanner3;
const file5 = req.files.cbanner4;
const file6 = req.files.clongHeader;
file.push(file1);
file.push(file2);
file.push(file3);
file.push(file4);
file.push(file5);
file.push(file6);
multipleUploadToS3(req.body.cname, file, function(fileName) {
console.log("client file upload finished.....");
if(fileName.length == 6){
inputObj.clogo = fileName[0];
inputObj.cbanner1 = fileName[1];
inputObj.cbanner2 = fileName[2];
inputObj.cbanner3 = fileName[3];
inputObj.cbanner4 = fileName[4];
inputObj.clongHeader = fileName[5];
console.log(inputObj);
var clientObj = new client(inputObj);
clientObj.save(function(err, client) {
console.log("Client Saved.....");
if (err) {
return res.status(400).send({
message: errorHandler.getErrorMessage(err)
});
} else {
res.json(client);
}
});
}
});
});
req.pipe(busboy);
File Upload Method:
function multipleUploadToS3(client, file, callback) {
console.log("multipleUpload to S3");
console.log(client);
console.log(file);
let s3bucket = new AWS.S3({
accessKeyId: IAM_USER_KEY,
secretAccessKey: IAM_USER_SECRET,
Bucket: BUCKET_NAME,
});
var fileNames = [];
for(var i=0; i<file.length; i++){
s3bucket.createBucket(function () {
var params = {
Bucket: BUCKET_NAME,
Key: client+ '/' + file[i].name,
Body: file[i].data,
};
s3bucket.upload(params, function (err, data) {
if (err) {
console.log('error in callback');
console.log(err);
}
console.log('success');
//console.log(data.key);
fileNames.push(data.key);
if(i == file.length){ callback(fileNames);}
});
});
}
};
The issue: file upload is asynchronous so for example if file1 I am uploading is honest.jpg then I want multipleUploadToS3 method to return file name after its done uploading to S3. I am binding this in inputObj keys which will be saved to mongo db. so inputObj.logo should have logo.png in it not the banner image which is happening due to asynchronous call.
This is working for a single file but failing for multiple files.

The problem is because for loop is synchronous and file upload is asynchronous.
Take a look at this example below,
for(var i = 0; i<5;i++) {
setTimeout(function(){ console.log(i); }, 100);
}
The above loop will print 5 for 5 times i.e 55555.
This behaviour is because for loop gets executed immediately making i=5 and when timeout gets executed it prints "i" value 5 for 5 times. 5 times because setTimeout is pushed in the queue for five times.
There are two ways to solve the problem you are facing
You can use Recursion.
Use neo-async(async-parallel) lib to control the async flow of javascript(Nodejs). click here for neo-async lib
Hope this clears your doubt. Please comment for more info.

Related

Issue with JSZip, Filesaver and saving multiple URLs

When using JSZip to generate a ZIP-file from multiple file-URLs, some users get empty ZIP-files. On macOS there does not seem to be a problem in various browsers, but on Windows the error occurs.
I suspect it to be related to promises, await and async.
This is my function to which I can pass a string as filename, and an array of URLs (located on the same server):
function saveToZip(filename, urls) {
let zip = new JSZip();
let count = 0;
urls.forEach((url) => {
const blobPromise = fetch(url).then(r => {
if (r.status === 200) return r.blob()
return Promise.reject(new Error(r.statusText))
})
let name = url.substring(url.lastIndexOf('/'));
name = name.toLowerCase();
zip.file(name, blobPromise, {binary: true});
count++;
checkCount();
});
function checkCount(){
if(count == urls.length){
console.log('All files added to ZIP');
createAndSaveZip()
}
}
function createAndSaveZip(){
zip.generateAsync({
type: "blob",
streamFiles: false
}, function updateCallback(metadata) {
console.log(Math.round(metadata.percent) + "%");
}).then(function (blob) {
saveAs(blob, filename); // Filesaver.js
console.log("Finished");
}).catch(function (e) {
console.log(e);
});
}
}
As you can see, I first create a new JSZip, then loop through each URL in the array and add each to the ZIP-file.
Then a function checks if all URLs have been added. If so, generateAsync is used together with Filesaver to generate and save the ZIP-file.
You will notice the use of blob. Is this not working for browsers on Windows?
Is there something wrong in the way I have constructed the logic?

Watson Assistant context is not updated

I use watson assistant v1
My problem is that every time I make a call to the code in Nodejs, where I return the context, to have a coordinated conversation, the context is only updated once and I get stuck in a node of the conversation
this is my code
client.on('message', message => {
//general variables
var carpetaIndividual = <../../../>
var cuerpoMensaje = <....>
var emisorMensaje = <....>
//detect if context exists
if(fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = require(carpetaIndividual+'/contexto.json');
var variableContexto = watsonContexto;
} else {
var variableContexto = {}
}
//conection with Watson Assistant
assistant.message(
{
input: { text: cuerpoMensaje },
workspaceId: '<>',
context: variableContexto,
})
.then(response => {
let messageWatson = response.result.output.text[0];
let contextoWatson = response.result.context;
console.log('Chatbot: ' + messageWatson);
//Save and create JSON file for context
fs.writeFile(carpetaIndividual+'/contexto.json', JSON.stringify(contextoWatson), 'utf8', function (err) {
if (err) {
console.error(err);
}
});
//Send messages to my application
client.sendMessage(emisorMensaje, messageWatson)
})
.catch(err => {
console.log(err);
});
}
client.initialize();
the context.json file is updated, but when it is read the code only reads the first update of the context.json file and not the other updates
This will be because you are using require to read the .json file. For all subsequent requires of an already-required file, the data is cached and reused.
You will need to use fs.readfile and JSON.parse
// detect if context exists
if (fs.existsSync(carpetaIndividual+'/contexto.json')) {
var watsonContexto = fs.readFileSync(carpetaIndividual+'/contexto.json');
// Converting to JSON
var variableContexto = JSON.parse(watsonContexto);
} else {
var variableContexto = {}
}
There is another subtle problem with your code, in that you are relying on
your async call to fs.writeFile completing before you read the file. This will be the case most of the time, but as you don't wait for the fs.writeFile to complete there is the chance that you may try to read the file, before it is written.

Downloading multiple files from aws to Node js server

I need to download large no of files(say 100k, each file size 0.2 - 1 MB) from aws s3 to node js server. The code I am using is
app.get('/api/download-all', function(req, res) {
res.json({status: 'download initiated'})
downloadFromS3(getDocs());
});
The function that downloads the audios is
function downloadFromS3(docs){
docs.forEach((doc, fileIndex)=>{
var s3FilePath = doc.wav
var fileName = s3FilePath.split('/').pop();
var s3Params = {Bucket: 'zzzzz', Key: s3FilePath};
var file = fs.createWriteStream(dir + '/' + fileName);
console.log(downloadSession);
s3.getObject(s3Params)
.on('httpData', function (chunk) {
console.log("file writing happening", fileName);
file.write(chunk);
})
.send();
}); }
Here the download function fires S3.getObject call as many times as the no of files to download. it doesn't wait for the status of the file. its almost like some 100k (in my case) s3.getObject has been made before letting a file to download. is this a right way or should I wait for one file to download and invoke the s3 call after that. what will be the right approach.
2) There is one other issue I am facing with this code. Once I make the download api call from UI the server gets busy with download. its not returning any requests from the UI. all requests gets pending. Is there is anyway to do the download in background. I had gone through some approaches like fork a child process or a web worker to handle this. I am not sure which one to use. what is the best way to handle this.
I'd advise an in-between approach. Kicking off 100k downloads in parallel is really not a good idea. But similarly, waiting for each download to fully complete won't utilise your full bandwidth. I'd suggest a solution that "pools" jobs - e.g., you create a pool of promises, each of which can download one file at a time, as soon as it finishes it starts the next.
I've been using a function like this:
Promise.pool = function pool(funcs, inParallel, progressCallback) {
const promises = [];
const results = [];
function getNext() {
if (funcs.length) {
return funcs.pop()()
.catch(() => {})
.then((res) => {
results.push(res);
if (progressCallback) {
progressCallback(results);
}
return getNext();
});
}
}
for (let i = 0; i < Math.min(inParallel, funcs.length); i++) {
promises.push(getNext());
}
return Promise.all(promises)
.then(() => results);
};
Then you'd define an array of functions, each downloads one file and returns a promise which resolves on completion:
const funcs = docs.map((doc) => {
return () => {
return new Promise((resolve) => {
var s3FilePath = doc.wav
var fileName = s3FilePath.split('/').pop();
var s3Params = {Bucket: 'zzzzz', Key: s3FilePath};
var file = fs.createWriteStream(dir + '/' + fileName);
console.log(downloadSession);
s3.getObject(s3Params)
.on('httpData', function (chunk) {
console.log("file writing happening", fileName);
file.write(chunk);
})
.on("end", () => resolve())
.send();
});
}
});
Finally, you'd use it like this:
const inParallel = 32;
function callback(partialResults) {
//console log, whatever
}
Promise.pool(funcs, inParallel, callback)
.then(() => console.log("all done!"));

Nodejs download s3 images (getObject) in loop

I'm trying to download images from S3 bucket in a loop. My bucket is not public and using the direct getSignedURL doesn't work (Forbidden error). I need to download (between 10 - 30) images from S3 upon user selection from the user interface (and then later just delete after creating a GIF).
It downloads correct amount of images (with correct names) but the content of all the images is replaced with the last image on the local machine. I even wrote a Promise to call within the loop (hoping that each getObject call will complete first before going to the next) but didn't work. Except bluebird, I tried all solutions from this, but same result. My code looks like this:
var urlParams = {Bucket: 'bucket_name', Key: ''};
for (i = 0; i < imageNames.length; i+=increment) {
urlParams.Key = imageNames[i]+'.jpg';
pathToSave = '/local-files/'+urlParams.Key;
var tempFile = fs.createWriteStream(pathToSave);
// I tried a Promise (and setTimeout) here too but gives me the same result
var stream = s3.getObject(urlParams).createReadStream().pipe(tempFile);
var had_error = false;
stream.on('error', function(err){
had_error = true;
});
stream.on('close', function(){
if (!had_error) {
console.log("Image saved");
}
});
}
After the above code finishes, as I mentioned all images with correct names are saved but due to the non-blocking issue here, all images contain the content of last image in the array (imageNames). The Promise I wrote and tried is below
function getBucketObject(urlParams){
return new Promise ((resolve, reject)=> {
var pathToSave = '/local-files/'+params.Key;
var tempFile = fs.createWriteStream(pathToSave);
var stream = s3.getObject(params).createReadStream().pipe(tempFile);
var had_error = false;
stream.on('error', function(err){
had_error = true;
});
stream.on('close', function(){
if (!had_error) {
resolve(pathToSave);
}
});
})
}
Both setTimeout and Promise are not working for my issue. Any help will be appreciated. Thanks
You should use let instead of var
Modify your code as:
for (var i = 0; i < imageNames.length; i+=increment) {
let urlParams = {Bucket: 'bucket_name', Key: imageNames[i]+'.jpg'};
let pathToSave = 'img/analysis/'+urlParams.Key;
getBucketObject(urlParams).then(function(pathToSave){
console.log("image saved");
})
}
function getBucketObject(urlParams){
return new Promise ((resolve, reject)=> {
let pathToSave = '/local-files/'+params.Key;
let tempFile = fs.createWriteStream(pathToSave);
let stream = s3.getObject(params).createReadStream().pipe(tempFile);
let had_error = false;
stream.on('error', function(err){
had_error = true;
});
stream.on('close', function(){
if (!had_error) {
resolve(pathToSave);
}
});
})
}
Following what #Anshuman Jaiswal suggested in the comment, I tried the code with let instead of var and it works now. Thanks Anshuman. The code in the loop looks like below
for (var i = 0; i < imageNames.length; i+=increment) {
let urlParams = {Bucket: 'bucket_name', Key: imageNames[i]+'.jpg'};
let pathToSave = '/local-files/'+urlParams.Key;
getBucketObject(urlParams).then(function(pathToSave){
console.log("image saved");
})
}
And the promise function is below
function getBucketObject(urlParams){
return new Promise ((resolve, reject)=> {
let pathToSave = '/local-files/'+params.Key;
let tempFile = fs.createWriteStream(pathToSave);
let stream = s3.getObject(params).createReadStream().pipe(tempFile);
let had_error = false;
stream.on('error', function(err){
had_error = true;
});
stream.on('close', function(){
if (!had_error) {
resolve(pathToSave);
}
});
})
}

Recursive reading on all files in a directory - FileSystem API

I am getting confuse how to read a list of files recursively.
Assume I have 3 text files in my filesytem api root directory
text1.txt
text2.txt
text3.txt
My goal is to read each text files one by one and then concatenate all the entries in each file into one string but I am currently at lost
how to do this in Javascript FileSystem API.
window.requestFileSystem(window.TEMPORARY, 1024*1024, onInitFs, errorHandler);
function onInitFs(fs) {
var dirReader = fs.root.createReader();
var entries = [];
// Call the reader.readEntries() until no more results are returned.
var readEntries = function() {
dirReader.readEntries (function(results) {
if (!results.length) {
readAllFiles(results);
} else {
entries = entries.concat(toArray(results));
readEntries();
}
}, errorHandler);
};
readEntries(); // Start reading dirs.
}
function readAllFiles(entries){
//Loop thru all the files and read each entries
}
I have seen how to read one text file but I dont know how to implement the reading of all files and concatenate the value.
They all implement callback functions so I am getting confused on how to handle it. Any points please?
I actually have been basing all my works in this http://www.html5rocks.com/en/tutorials/file/filesystem
UPDATE 2
As per #Johan
I actually changed my code to make use of callback
window.requestFileSystem(window.TEMPORARY, 1024*1024, onInitFs, errorHandler);
function onInitFs(fs) {
var dirReader = fs.root.createReader();
var entries = [];
// Call the reader.readEntries() until no more results are returned.
var readEntries = function() {
dirReader.readEntries (function(results) {
if (!results.length) {
readAllFiles(results, concatMessages);
} else {
entries = entries.concat(toArray(results));
readEntries();
}
}, errorHandler);
};
readEntries(); // Start reading dirs.
}
var concatMessage = '';
function concatMessages(message){
concatMessage += message;
}
function readAllFiles(logs, callBack) {
logs.forEach(function(entry, iCtr) {
var message;
entry.file(function(file) {
var reader = new FileReader();
reader.onloadend = function(e) {
//message += this.result;
if(callBack)
callBack('==================' + iCtr + '==========================');
callBack(this.result);
};
reader.readAsText(file); // Read the file as plaintext.
}, function(err) {
console.log(err);
});
});
}
My only problem is this, the callback function is not sequential.
It reads text2.txt first then text3.txt then text1.txt so the end result is not sequential which is not what I want to do. Any more hints?
Highly recommend you consider using something like caolan's async library to accomplish this.
You can do something like this:
async.each(openFiles, function(file, callback) {
// Perform operation on file here.
console.log('Processing file ' + file);
callback();
}, function(err) {
// if any of the file processing produced an error, err would equal that error
if (err) {
// One of the iterations produced an error.
// All processing will now stop.
console.log('A file failed to process');
} else {
// do your concatenation here
}
});

Categories