I have a Lambda function that is meant to download a directory of files from s3, convert them, delete the old files, and upload the new output files back to s3. The output for each file will be at least one file and a folder.
Everything seems to be working as intended, except for the upload. No errors are thrown, it just ends without putting.
I'm a novice, so feel free to point out I've done it all wrong.
exports.handler = async ({ dirName }) => {
// const jsonIn = JSON.parse(event.body);
// const dirName = jsonIn.dirName;
const localDir = `/tmp/${dirName}`;
const params = {
Bucket: 'to-pdf-test',
Delimiter: '/',
Prefix: dirName + '/',
StartAfter: dirName + '/'
};
var s3List;
var localList = [];
execSync(`mkdir ${localDir}`);
try {
s3List = await s3.listObjectsV2(params).promise();
} catch (e) {
throw e;
}
await Promise.all(
s3List.Contents.map(async (file) => {
let f = await getFiles(file);
localList.push(f);
})
).then(res => {console.log('Get Successful' + res) } )
.catch(err => {console.log('error' + err) } );
await Promise.all(
localList.map(async (file) => {
convertFile(file);
})
).then(res => {console.log('Convert Successful' + res) } )
.catch(err => {console.log('error' + err) } );
dirSync(localDir, async (filePath, stat) => {
let bucketPath = filePath.substring(5);
let uploadParams = { Bucket: 'to-pdf-test',
Key: `${bucketPath}`,
Body: fs.readFileSync(filePath) };
console.log('DS fPath ' + filePath);
console.log('DS bPath ' + bucketPath);
console.log(uploadParams.Body);
try {
let res = await s3.putObject(uploadParams).promise();
console.log('Upload Complete', res);
} catch (e) {
console.log('Error', e);
}
});
};
async function getFiles(file) {
let filePath = `/tmp/${file.Key}`;
let fileParams = {
Bucket: 'to-pdf-test',
Key: file.Key
};
try {
const { Body: inputFileBuffer } = await s3.getObject(fileParams).promise();
fs.writeFileSync(filePath, inputFileBuffer);
} catch (e) {
throw (e);
}
return filePath;
}
function convertFile(file) {
const noPath = getFilename(file);
const fPath = getFilePath(file);
if (path.extname(noPath) === '.msg') {
execSync(`cd ${fPath} && ${command} ${noPath}`);
} else {
console.log(`${noPath} not run. Not .msg`);
}
fs.unlinkSync(file);
}
function getFilename(fullPath) {
return fullPath.replace(/^.*[\\\/]/, '');
}
function getFilePath(fullPath) {
return fullPath.substring(fullPath.lastIndexOf('/'), 0);
}
function dirSync(dirPath, callback) {
fs.readdirSync(dirPath).forEach((name) => {
var filePath = path.join(dirPath, name);
var stat = fs.statSync(filePath);
if (stat.isDirectory()) {
dirSync(filePath, callback);
} else {
callback(filePath, stat);
}
});
}
I had the upload working in a previous version of this function, so thanks to this post for when it was working.
My solution for the moment - Read the local directory separately, push the paths of the files to localList then .map the array with all the paths to upload them.
localList = [];
//read dir and push to localList array
await dirSync(localDir, (filePath, stat) => {
localList.push(filePath);
});
console.log(localList);
await Promise.all(
localList.map( async (file) => {
let bucketPath = file.substring(5);
let uploadParams = {
Bucket: 'to-pdf-test',
Key: bucketPath,
Body: fs.readFileSync(file) };
console.log('Uploading', file);
await s3.putObject(uploadParams).promise()
.then((res) => {console.log('Upload Successful', bucketPath) } )
.catch((err) => {console.log('error' + err) } );
})
);
If there is better (or proper) way to do this, someone let me know :)
Related
I am trying to upload multiple files to Google Cloud Storage. I am using a for loop to for each file in the list of files which I want to upload.
However, the problem is that the for loop does not pause to wait for the upload to finish before moving on to the next upload. It eventually uploads them, however, the for loop finishes earlier which then sends back to the client the empty urlList.
How do I make it pause and wait for each upload process before moving on to the next file in the for loop?
const processFile = require('../middleware');
const { format } = require('util');
let uuidv4 = require('uuid/v4');
const Cloud = require('#google-cloud/storage');
const { Storage } = Cloud;
const storage = new Storage({
keyFilename: './xxx.json',
projectId: 'xxx'
});
const bucket = storage.bucket('xxx');
exports.upload = async (req, res) => {
const urlList = [];
await processFile(req, res); //multer
for (var i = 0; i < req.files.length; i++) {
if (!req.files[i]) {
return res.status(400).send({ message: 'Please upload a file!' });
}
const { originalname, buffer } = req.files[i];
var filename = originalname
.toLowerCase()
.split(' ')
.join('-');
filename = uuidv4() + '-' + filename;
console.log(filename);
const blob = bucket.file(filename);
const blobStream = blob.createWriteStream({
resumable: false
});
blobStream.on('error', err => {
res.status(500).send({ message: err.message });
});
blobStream.on('finish', async data => {
const publicUrl = format(
`https://storage.googleapis.com/${bucket.name}/${blob.name}`
);
urlList.push(publicUrl);
try {
await bucket.file(filename).makePublic();
} catch (err) {
console.log('failed to make it public');
reject(err);
}
});
blobStream.end(buffer);
}
return res.status(200).send({
message: 'Uploaded the files successfully',
url: urlList
});
};
Just put your "upload" code in a Promise that you can await in the loop. Othervise by using on the code inside of it will not follow the for loop. By using such event based code your for loop will just go trough it and can't await it to finish. This should do the trick:
const uploadFile = (f) => {
return new Promise((resolve, reject) => {
const { originalname, buffer } = f;
var filename = originalname.toLowerCase().split(" ").join("-");
filename = uuidv4() + "-" + filename;
console.log(filename);
const blob = bucket.file(filename);
const blobStream = blob.createWriteStream({
resumable: false,
});
blobStream.on("error", (err) => {
res.status(500).send({ message: err.message });
reject(err);
});
blobStream.on("finish", async (data) => {
const publicUrl = format(
`https://storage.googleapis.com/${bucket.name}/${blob.name}`
);
try {
await bucket.file(filename).makePublic();
resolve(publicUrl);
} catch (err) {
console.log("failed to make it public");
reject(err);
}
});
blobStream.end(buffer);
});
};
exports.upload = async (req, res) => {
const urlList = [];
await processFile(req, res); //multer
for (var i = 0; i < req.files.length; i++) {
if (!req.files[i]) {
return res.status(400).send({ message: "Please upload a file!" });
}
const publicUrl = await uploadFile(req.files[i]);
urlList.push(publicUrl);
}
return res.status(200).send({
message: "Uploaded the files successfully",
url: urlList,
});
};
The scenario is I have two large CSV files csv1.csv and csv2.csv. In both the files, there is an email column and I have to read csv1.csv row by row and check if the email exists in csv2.csv and if matches write the row of csv2.csv in csv3.csv. I have tried read stream as well but it is not working as expected. Any guidance or help is appreciated.
Thanks to all in advance.
Following are the CSV files
csv1.csv
email,header1,header2
test1#example.com,test1,test1
test2#example.com,test2,test2
test3#example.com,test3,test3
test4#example.com,test4,test4
test5#example.com,test5,test5
csv2.csv
email,header1,header2
test4#example.com,test4,test4
test5#example.com,test5,test5
test6#example.com,test6,test6
test7#example.com,test7,test7
test8#example.com,test8,test8
Following is the code that I tried
const fs = require('fs');
const csv = require('fast-csv')
class CsvHelper {
static write(filestream, rows, options) {
return new Promise((res, rej) => {
csv.writeToStream(filestream, rows, options)
.on('error', err => rej(err))
.on('finish', () => res());
});
}
constructor(opts) {
this.headers = opts.headers;
this.path = opts.path;
this.writeOpts = {
headers: this.headers,
includeEndRowDelimeter: true
};
}
create(rows) {
return CsvHelper.write(fs.createWriteStream(this.path, { flags: 'a' }), rows, { ...this.writeOpts });
}
append(rows) {
return CsvHelper.write(fs.createWriteStream(this.path, { flags: 'a' }), rows, {
...this.writeOpts,
writeHeaders: false,
});
}
}
class Helper {
async matchCsv (outerRow) {
try {
const filePath2 = "csv2.csv";
const filePath3 = "csv3.csv";
let row = [];
const csvFile = new CsvHelper({
path: filePath3,
headers: ["Email", "Active"]
});
return new Promise((resolve, reject) => {
fs.createReadStream(filePath2)
.on("error", err => {
reject(err);
})
.pipe(csv.parse({headers: true}))
.on("error", err => {
reject(err);
})
.on("data", async innerRow => {
if(outerRow["email"] === innerRow["email"]) {
console.log("====================");
console.log("match found");
console.log(innerRow);
console.log("====================");
row.push([innerRow["email"], "yes"]);
console.log("row: ", row);
}
})
.on("finish", async() => {
if (!fs.existsSync(filePath3)) {
await csvFile.create(row).then(() => {
resolve("Done from matchCsv");
})
} else {
await csvFile.append(row).then(() => {
resolve("Done from matchCsv");
})
}
})
});
} catch (err) {
throw(err);
}
}
async generateCsv () {
try {
const filePath1 = "csv1.csv";
return new Promise((resolve, reject) => {
fs.createReadStream(filePath1)
.on("error", err => {
reject(err);
})
.pipe(csv.parse({headers: true}))
.on("error", err => {
reject(err);
})
.on("data", async outerRow => {
const result = await this.matchCsv(outerRow);
console.log("result: ", result);
})
.on("finish", () => {
resolve("Generated csv3.csv file.");
});
});
} catch (err) {
throw(err);
}
}
}
async function main() {
const helper = new Helper();
const result = await helper.generateCsv()
console.log(result);
}
main();
So the question is a little confusing, but I think I know what you want. Here's what I would do to check if the email exists. It will add all the rows to an array, cycle through them, then if the email address matches the email you're looking for, it will do something else... I think you said you wanted to write to a csv file again with the row, but that should be simple enough.
const csv = require('csv-parser');
const fs = require('fs');
const filepath = "./example_data.csv";
const emailAdd = "myemail#email.com";
var rowsArr = [];
fs.createReadStream(filepath)
.on('error', () => {
// handle error
})
.pipe(csv())
.on('data', (row) => {
rowsArr.push(row);
})
.on('end', () => {
for (var i = 0; i <= rowsArr.length; i++) {
if (rowsArr[i].emailAddress == emailAdd) {
//do something
}
}
})
I'm trying to launch an exe file from an Electron app with React/Redux.
From the component i'm doing dispatch(launch(titleId, titleName))
The problem is i'm getting path undefined when i'm waiting for readFolders() async.
Any idea what i'm doing wrong and what should i change in my aproach?
Thanks in advance!
launch.js
export const launch = async (titleId, titleName) => {
const path = await readFolders(titleId);
console.log('path:', path) //undefined
execFile(path, (err, data) => {
if (err) {
console.log('err', err);
} else if (data) {
console.log('data:', data)
} else {
console.log('success');
}
});
return {
type: 'LAUNCH',
};
};
readFolders.js
import fs from 'fs';
import { homedir } from 'os';
const fsPromises = fs.promises;
const isExeFile = file => file.match(/.*\.exe$/i);
export const readFolders = async titleId => {
const userDir = homedir();
const folderPath = `${userDir}/downloads`;
const fullPath = `${folderPath}/${titleId}`;
try {
const contents = await fsPromises.readdir(fullPath);
contents.forEach(async item => {
if (isExeFile(item)) {
console.log('isExeFile');
return `${fullPath}/${item}`;
}
try {
const nestedFolder = await fsPromises.readdir(`${fullPath}/${item}`);
nestedFolder.forEach(nestedItem => {
if (isExeFile(nestedItem)) {
return `${fullPath}/${item}/${nestedItem}`;
}
return null;
});
} catch (err) {
console.log('err:', err);
}
});
} catch (err) {
console.log('err main:', err);
}
};
Edit:
I also tried this way and now const path = await readFolders(titleId); returns the correct result, but this way eslint is complaining (https://eslint.org/docs/rules/no-async-promise-executor) and it doesn't feel like a good solution.
return new Promise(async (resolve, reject) => {
try {
const contents = await fsPromises.readdir(fullPath);
contents.forEach(async item => {
if (isExeFile(item)) {
console.log(`${fullPath}/${item}`);
return resolve(`${fullPath}/${item}`);
}
try {
const nestedFolder = await fsPromises.readdir(`${fullPath}/${item}`);
nestedFolder.forEach(nestedItem => {
if (isExeFile(nestedItem)) {
console.log(`${fullPath}/${item}/${nestedItem}`);
return resolve(`${fullPath}/${item}/${nestedItem}`);
}
return null;
});
} catch (err) {
console.log('err:', err);
reject(err);
}
});
} catch (err) {
console.log('err main:', err);
reject(err);
}
});
Missing return at the end. When you return in forEach, It returns from callback anonymous function only. return ${fullPath}/${item}/${nestedItem};
For more you can read my blog on it:
https://medium.com/#deepak_v/weird-part-how-to-break-the-loop-in-javascript-8bba3e658267
Updated code:
export const readFolders = async (titleId) => {
const userDir = homedir();
const folderPath = `${userDir}/downloads`;
const fullPath = `${folderPath}/${titleId}`;
try {
const contents = await fsPromises.readdir(fullPath);
let path = "";
contents.some(async (item) => {
if (isExeFile(item)) {
console.log("isExeFile");
path = `${fullPath}/${item}`;
return path;
}
try {
const nestedFolder = await fsPromises.readdir(`${fullPath}/${item}`);
const found = nestedFolder.some((nestedItem) => {
if (isExeFile(nestedItem)) {
path = `${fullPath}/${item}/${nestedItem}`;
return path;
}
return false;
});
if (found) return path;
else return false;
} catch (err) {}
});
return path;
} catch (err) {
console.log("err main:", err);
}
};
I am coding a post request which downloads all URL HTML,zips them and email it back. This all should happen in the backend. I am storing all the data in an array and extract the first element to start these operations.
I have while loop inside which I am calling some functions. Each function gets executed at a certain time.
I used async, await and promises to make sure they run one after the
other.
Coming to my problem.
My while loop starts getting executed again before all the
functions inside it are executed.
app.post('/?', async (req, res) => {
var urls = req.query.urls
var email = req.query.email;
var new_stack = [urls, email]
stack.push(new_stack)
res.send("Mail sent")
if (isFunctionRunning === false) { //initially it is false
console.log(isFunctionRunning, stack.length)
send_mails();
}
});
const getGoogleIndexHTML = (url) => {
return new Promise((resolve, reject) => {
request(url, (err, res, body) => err ? reject(err) : resolve(body))
})
}
const some_function_to_download = async (url) => {
try {
const a = url.split(".")
let googleIndexHTML = await getGoogleIndexHTML(url)
await fs.writeFile(directory + '/' + a[1] + '.html', googleIndexHTML, (err) => {
if (err) throw err
})
console.log('File created.')
} catch (err) {
console.log(err)
}
}
const html_to_zip_file = async () => {
await zipper.zip(directory, function (error, zipped) {
if (!error) {
zipped.compress();
zipped.save('./package.zip', function (error) {
if (!error) {
console.log("Saved successfully !");
}
});
} else {
console.log(error)
}
})
}
const send_mails = async () => {
while (stack.length > 0) {
isFunctionRunning = true
var a = stack.shift()
var urls = a[0]
var collection_urls = urls.split(",");
var to_email = a[1]
rimraf(directory, function () {
console.log("done");
});
fs.mkdirSync(directory);
for (url of collection_urls) {
await some_function_to_download(url); // 5 sec per download
}
await html_to_zip_file() // takes 5 sec to zip
.then(result => {
transporter.sendMail(set_mail_options(to_email)) //2 sec to send mail
.then(result => {
console.log("Mail sent")
})
.catch(err => {
console.log(err)
})
})
.catch(err => {
console.log(err)
})
console.log("reached") // this is reached before zip is done and mail sent. I want to prevent this
}
isFunctionRunning = false
}
You need to return transporter.sendMail in sendMail, fs.writeFile in someFunctionToDownload and zipper.zip in htmlToZipFile otherwise the await won't work as expected (I'm assuming that they actually do return promises, I'm only familiar with fs.writeFile)
Also: CamelCase is used in JS, not snake_case 🙃
And are you sure rimraf is synchronous?
const sendMails = async () => {
while (stack.length > 0) {
isFunctionRunning = true;
const [urls, toEmail] = stack.shift();
var collectionUrls = urls.split(",");
rimraf(directory, function() {
console.log("done");
});
await fs.mkdir(directory);
await Promise.All(collectionUrls.map(someFunctionToDownload)); // 5 sec per download
await htmlToZipFile() // takes 5 sec to zip
.then(result => transporter.sendMail(set_mail_options(toEmail))) //2 sec to send mail
.then(result => {
console.log("Mail sent");
})
.catch(err => {
console.log(err);
});
console.log("reached"); // this is reached before zip is done and mail sent. I want to prevent this
}
isFunctionRunning = false;
};
const someFunctionToDownload = async url => {
const a = url.split(".");
const googleIndexHTML = await getGoogleIndexHTML(url);
return fs.writeFile(`${directory}/${a[1]}.html`, googleIndexHTML, err => {
if (err) throw err;
});
};
const htmlToZipFile = async () => {
return zipper.zip(directory, function(error, zipped) {
if (!error) {
zipped.compress();
zipped.save("./package.zip", function(error) {
if (!error) {
console.log("Saved successfully!");
}
});
} else {
console.log(error);
}
});
};
Try using the following
while (stack.length > 0) {
isFunctionRunning = true
var a = stack.shift()
var urls = a[0]
var collection_urls = urls.split(",");
var to_email = a[1]
rimraf(directory, function () {
console.log("done");
});
fs.mkdirSync(directory);
for (url of collection_urls) {
await some_function_to_download(url); // 5 sec per download
}
try {
const result = await html_to_zip_file() // takes 5 sec to zip
const sendMailResult = await transporter.sendMail(set_mail_options(to_email))
} catch(e)
{
console.log(e)
}
console.log("reached")
}
Since html_to_zip_file() and sendMail function are independent
we can use
const result = await Promise.all([html_to_zip_file(),transporter.sendMail(set_mail_options(to_email))]);
I want to zip all the images from s3 urls. I am doing it on server side on sailsjs framework.
I tried using axios to download the images and used 'zipdir'. The images are getting downloaded in temp folder. But its not getting zipped properly.
this.downloadFiles = function (req, res) {
var resObj = {}
async.waterfall([
this.createFolder.bind(undefined, req),
this.downloadFilesAxios.bind(undefined, req),
this.zipTheFiles.bind(undefined, req)
], function final(err, result) {
if (err) {
console.log('SOME ERROR', err);
resObj.statusCode = err.statusCode || 500;
} else {
resObj.statusCode = 200;
resObj.result = result.questionList;
}
console.log('------', resObj.statusCode)
resObj.messageKey = sails.config.statusCode[resObj.statusCode].key;
resObj.message = sails.config.statusCode[resObj.statusCode].message;
return res.send(resObj);
});
};
}
this.downloadFilesAxios = function (req, obj, callback) {
SurveyDocs.find({ surveyId: req.body.surveyId })
.exec(function (err, docsDetails) {
async.map(docsDetails, function (img, cb) {
const url = img.docS3Url;
let imageName = img.docFileName;
const path = Path.resolve(__dirname, "temp", imageName);
const writer = Fs.createWriteStream(path)
Axios({
method: 'get',
url: url,
responseType: 'stream'
})
.then(function (response) {
response.data.pipe(writer)
})
writer.on('finish', (done) => {
console.log('success!!!');
cb(null, null)
});
writer.on('error', (err) => {
console.log('failed!!!');
cb(err, null)
});
}, (err, data) => {
if (err) {
console.log('errrr', err);
}
callback(null, obj);
});
})
};
this.zipTheFiles = function (req, obj, callback) {
var surveyId = req.body.surveyId;
var tempDir = 'assets/zip/' + surveyId + '.zip'
zipdir('temp', { saveTo: tempDir }, function (err, buffer) {
callback(null, obj);
});
callback(null, obj);
}
Here I am getting a corrupt zip file. Please suggest the solution.
I tried out your example there are a few things you need to consider in order to make it work.
const async = require('async');
const fs = require('fs');
const path = require('path');
const zipDir = require('zip-dir');
const axios = require('axios');
let writer;
async.waterfall([
createFolder,
downLoadFileAxios,
zip
], function (err, result) {
if (err) {
console.log(err);
} else {
console.log('result :', result);
}
});
let's assume this method creates the temp folder
function createFolder(callback) {
setTimeout(function() {
callback(null, 'temp');
}, 1000);
}
Here the writeStream object and it's events should be put inside the then block. So that it writes the stream to the file correctly.
Another important thing here is you are not having a cath block attached the promise, so if any exception occurs it will be simply eaten up.
function downLoadFileAxios(dirPath, callback) {
// Hard coded the images url for the sake of simplicity
let files = [
'https://free-images.com/lg/be5e/climbing_helmets_climbing_equipment.jpg',
'https://free-images.com/lg/87ce/lilac_lilac_bush_lilac.jpg'
];
async.mapSeries(files, function(img, cb) {
let name = img.slice(img.lastIndexOf('/') + 1);
let imagePath = path.resolve(__dirname, "newDir", name);
writer = fs.createWriteStream(imagePath);
axios({
method: 'get',
url: img,
responseType: 'stream'
}).
then(function(response) {
response.data.pipe(writer);
writer.on('finish', (done) => {
console.log('success!!!');
cb(null, null)
});
writer.on('error', (err) => {
console.log('failed!!!');
cb(err, null)
});
})
.catch((err) => {
console.log(err);
})
}, function(err, result) {
if (err) {
console.log('errrr', err);
}
callback(null, 'done downloading');
})
}
function zip (dirPath, callback) {
let zipPath = path.resolve(__dirname, "assets", "file.zip");
// console.log(`got directory path : ${dirPath}`);
zipDir("newDir", {
saveTo: zipPath
}, function(err, buffer) {
if(err) {
callback(err, null);
} else {
callback(null, 'done');
}
});
}
This can be easily done using Async/Await like following.
const async = require('async');
const fs = require('fs');
const path = require('path');
const zipDir = require('zip-dir');
const axios = require('axios');
var writer;
// faking the directory creation part
async function createFolder(callback) {
return new Promise((resolve, reject) => {
setTimeout(() => {
resolve(true);
}, 2000);
});
}
//Executes in the specified order.
(async () => {
await createFolder();
await downLoadFile();
await zipTheFile();
})();
async function downLoadFile() {
let files = [
'https://free-images.com/lg/be5e/climbing_helmets_climbing_equipment.jpg',
'https://free-images.com/lg/87ce/lilac_lilac_bush_lilac.jpg'
];
for(let i= 0; i<files.length; i++) {
await downLoadFileAxios(files[i]);
}
}
async function downLoadFileAxios(url) {
let name = url.slice(url.lastIndexOf('/') + 1);
let imagePath = path.resolve(__dirname, "newDir", name);
let writer = fs.createWriteStream(imagePath);
const response = await axios({
url,
method: 'GET',
responseType: 'stream'
})
response.data.pipe(writer)
return new Promise((resolve, reject) => {
writer.on('finish', resolve)
writer.on('error', reject)
})
}
function zipTheFile () {
let zipPath = path.resolve(__dirname, "assets", "file.zip");
return new Promise((resolve, reject) => {
zipDir("newDir", {
saveTo: zipPath
}, function(err, buffer) {
if(err) {
return reject(err);
}
return resolve('done');
});
})
}
Hope this helps!.