I am trying to extract multiple files from AWS S3 bucket and willing to merge the response from all files after.
E.g I have following files:
my-bucket/mainfile1.json.gz
my-bucket/mainfile2.json.gz
my-bucket/mainfile3.json.gz
Currently I am accessing a single file like this:
const unzipFromS3 = (key, bucket) => {
return new Promise(async (resolve, reject) => {
AWS.config.loadFromPath(process.env["PWD"]+'/private/awss3/s3_config.json');
var s3 = new AWS.S3();
let options = {
'Bucket': "my-bucket",
'Key': "mainfile1.json.gz",
};
s3.getObject(options, function(err, res) {
if(err) return reject(err);
resolve(zlib.unzipSync(res.Body).toString());
});
});
};
unzipFromS3().then(function(result){
console.dir(result);
});
Now this works perfect for single file, but how can I achieve this with multiple files in case I want to merge data from 3 separate files?
Here's an initial idea of how to read the gzipped JSON files from S3, unzip them, then merge the resulting JavaScript objects, and finally gzip and write the merged results back to S3.
const aws = require('aws-sdk');
const zlib = require('zlib');
const s3 = new aws.S3();
const BUCKET = 'mybucket';
const PREFIX = '';
const FILES = ['test1.json.gz', 'test2.json.gz', 'test3.json.gz'];
(async () => {
const promises = [];
try {
for (let ii = 0; ii < FILES.length; ii++) {
const params = {
Bucket: BUCKET,
Key: `${PREFIX}${FILES[ii]}`,
};
console.log('Get:', params.Key, 'from:', params.Bucket);
promises.push(s3.getObject(params).promise());
}
const results = await Promise.all(promises);
const buffers = results.map(result => result.Body);
const content = buffers.map(buffer => JSON.parse(zlib.unzipSync(buffer).toString()));
console.log('Read OK', JSON.stringify(content));
const merged = Object.assign({}, ...content);
console.log('Merged content', JSON.stringify(merged));
const params = {
Bucket: BUCKET,
Key: `${PREFIX}result/test.json.gz`,
Body: zlib.gzipSync(JSON.stringify(merged), 'utf8'),
};
console.log('Put:', params.Key, 'to:', params.Bucket);
const rc = await s3.putObject(params).promise()
} catch (err) {
console.log(err, err.stack);
throw err;
}
})();
Related
I am currently working on a Api just to get to know Node.js, as I am currently learning it.
I successfully created a route for uploading an mp3 file into an s3 bucket, but when I try to fetch a file from S3 in Uint8List format, I don't get the results I want. (Flutter requires me to send an Uin8List, if this is not a good solution I can also convert it into an Ui8List on the client side)
I am able to create a Readable stream, and when the stream receives chunks it logs it into the console. But I am not quite sure how I can send the data back to the client in buffers, I am only able to send the data in one big list but ofcourse for efficiency this is not the best option.
Anyone able to help me? This is the code is currently have:
var AWS = require('aws-sdk');
AWS.config.update(
{
accessKeyId: AWS_ACCESS_KEY,
secretAccessKey: AWS_SECRET_ACCESS_KEY,
region: AWS_REGION
}
);
var s3 = new AWS.S3();
router.get('/assets/:fileKey', auth, async function (req, res, next) {
try {
const fileKey = req.params.fileKey;
const options = {
Bucket: AWS_BUCKET_NAME,
Key: fileKey,
};
const chunks = [];
const getAsBytes = new Promise((resolve, reject) => {
const readStream = s3.getObject(options).createReadStream();
readStream.on('data', (chunk) => {
// console.log('-------new data received--------')
// console.log(chunk);
chunks.push(chunk);
// res.write(chunk);
});
readStream.on('error', reject)
readStream.on('end', resolve);
}).catch((err) => next(err));
await getAsBytes;
res.write(Uint8Array.from(chunks));
res.end();
} catch (error) {
next(error);
}
});
When I try to pipe the readstream I get a response full of question marks and weird symbols..
Try this -> the chunk is actually a buffer so you need to convert that buffer to actual data using .toString()
var AWS = require('aws-sdk');
AWS.config.update({
accessKeyId: AWS_ACCESS_KEY,
secretAccessKey: AWS_SECRET_ACCESS_KEY,
region: AWS_REGION
});
var s3 = new AWS.S3();
router.get('/assets/:fileKey', auth, async function (req, res, next) {
try {
const fileKey = req.params.fileKey;
const options = {
Bucket: AWS_BUCKET_NAME,
Key: fileKey,
};
const chunks = [];
const getAsBytes = new Promise((resolve, reject) => {
const readStream = s3.getObject(options).createReadStream();
readStream.on('data', (chunk) => {
// console.log('-------new data received--------')
// console.log(chunk);
chunks.push(chunk.toString());
// res.write(chunk);
});
readStream.on('error', reject)
readStream.on('end', resolve);
}).catch((err) => next(err));
await getAsBytes;
res.write(Uint8Array.from(chunks));
res.end();
} catch (error) {
next(error);
}
});
The issue that I am running into is that when I test the function against a .zip file when the function gets to the fs.createReadStream&Zip the function is not running or returning an error and I would like to get an understanding on what I am doing wrong and how a correct solution would look.
const AWS = require('aws-sdk');
const fs = require('fs');
const mkdirp = require('mkdirp');
const unzipper = require('unzipper');
exports.handler = async (event, context) => {
// Variables for bucket init
let sourceBucket = 'am-doc-mgmt-s3-dev-landing';
let storageBucket = 'am-doc-mgmt-s3-dev';
// Variables for folder init and Buffer config
const localZippedFolder = '/tmp/ZippedStudentData/';
const localUnzippedFolder = '/tmp/UnzippedStudentData/';
const ZipBuffer = Buffer.from(localZippedFolder, 'base64');
const UnzippedBuffer = Buffer.from(localUnzippedFolder, 'base64');
// Inits AWS s3 Bucket and DynamoDB
let s3 = new AWS.S3();
let docClient = new AWS.DynamoDB.DocumentClient({ region: 'us-east-1' });
// Gets the file bucket and file name of the s3 object from context
let fileBucket = event.Records[0].s3.bucket.name;
let fileName = event.Records[0].s3.object.key;
let params = {
Bucket: fileBucket,
Key: fileName
};
// Creates temporary variables
let tempFile = localZippedFolder + fileBucket;
let tempUnzippedFile = localUnzippedFolder + fileBucket;
// Make Directories for Zipped and Unzipped files
try {
const zipDirFolder = await mkdirp(localZippedFolder, { recursive: true })
const unZipDirFolder = await mkdirp(localUnzippedFolder, { recursive: true });
console.log('SUCCESS: unzipped directory created!');
console.log('SUCCESS: zipped directory create!')
// Download files from s3 Bucket
let newFolder = await s3.getObject(params).promise()
.then(data => {
console.log(data);
return data;
});
// Extract files from zipped folder and store them in a local directory
fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}))
.on('finish', () => {
fs.readdir(unZipDirFolder);
}).on('error', (err) => {
// error handling here
console.log(err);
});
}
catch (error) {
console.log(error);
}
};
error: not getting anything back from the fs.createReadStream function. Its as if it just skips over the function.
It's honestly hard to figure out what problem you're really trying to solve since you just aren't very specific about that. If you want the containing async function to not resolve it's promise until the unzipping is done, you can wrap the stream in a promise like this:
const AWS = require('aws-sdk');
const fs = require('fs');
const mkdirp = require('mkdirp');
const unzipper = require('unzipper');
exports.handler = async (event, context) => {
// Variables for bucket init
let sourceBucket = 'am-doc-mgmt-s3-dev-landing';
let storageBucket = 'am-doc-mgmt-s3-dev';
// Variables for folder init and Buffer config
const localZippedFolder = '/tmp/ZippedStudentData/';
const localUnzippedFolder = '/tmp/UnzippedStudentData/';
const ZipBuffer = Buffer.from(localZippedFolder, 'base64');
const UnzippedBuffer = Buffer.from(localUnzippedFolder, 'base64');
// Inits AWS s3 Bucket and DynamoDB
let s3 = new AWS.S3();
let docClient = new AWS.DynamoDB.DocumentClient({ region: 'us-east-1' });
// Gets the file bucket and file name of the s3 object from context
let fileBucket = event.Records[0].s3.bucket.name;
let fileName = event.Records[0].s3.object.key;
let params = {
Bucket: fileBucket,
Key: fileName
};
// Creates temporary variables
let tempFile = localZippedFolder + fileBucket;
let tempUnzippedFile = localUnzippedFolder + fileBucket;
// Make Directories for Zipped and Unzipped files
try {
const zipDirFolder = await mkdirp(localZippedFolder, { recursive: true })
const unZipDirFolder = await mkdirp(localUnzippedFolder, { recursive: true });
console.log('SUCCESS: unzipped directory created!');
console.log('SUCCESS: zipped directory create!')
// Download files from s3 Bucket
let newFolder = await s3.getObject(params).promise();
await new Promise((resolve, reject) => {
// Extract files from zipped folder and store them in a local directory
fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}))
.on('finish', resolve);
.on('error', reject);
});
} catch (error) {
console.log(error);
// rethrow error so caller sees the error
throw error;
}
};
And, your caller using this exported function will HAVE to use .then() or await on the returned promise to know when it's done. And, use .catch() or try/catch around await to catch errors.
If someone's open to using Python, they can use a buffer to read and unzip the files. Something like this:
zipped_file = s3_resource.Object(bucket_name=sourcebucketname, key=filekey)
buffer = BytesIO(zipped_file.get()["Body"].read())
zipped = zipfile.ZipFile(buffer)
for file in zipped.namelist():
logger.info(f'current file in zipfile: {file}')
final_file_path = file + '.extension'
with zipped.open(file, "r") as f_in:
content = f_in.read()
destinationbucket.upload_fileobj(io.BytesIO(content),
final_file_path,
ExtraArgs={"ContentType": "text/plain"}
)
There's also a tutorial here: https://betterprogramming.pub/unzip-and-gzip-incoming-s3-files-with-aws-lambda-f7bccf0099c9
I'm trying to get an s3.getObject() running inside an async getInitialProps() function in a nextJS project, but I can't for the love of it figure out how to get the results prepped to they can be returned as an object (which is needed for getInitialProps() and nextJS' SSR to work properly).
Here is the code:
static async getInitialProps({ query }) {
const AWS = require('aws-sdk');
const s3 = new AWS.S3({
credentials: {
accessKeyId: KEY
secretAccessKey: KEY
}
});
// The id from the route (e.g. /img/abc123987)
let filename = query.id;
const params = {
Bucket: BUCKETNAME
Key: KEYDEFAULTS + '/' + filename
};
const res = await s3.getObject(params, (err, data) => {
if (err) throw err;
let imgData = 'data:image/jpeg;base64,' + data.Body.toString('base64');
return imgData;
});
return ...
}
The idea is to fetch an image from S3 and return it as base64 code (just to clear things up).
From your code, s3.getObject, works with callback. you need to wait for the callback to be called.
You can achieve it by converting this callback into a promise.
static async getInitialProps({ query }) {
const AWS = require('aws-sdk');
const s3 = new AWS.S3({
credentials: {
accessKeyId: KEY
secretAccessKey: KEY
}
});
// The id from the route (e.g. /img/abc123987)
let filename = query.id;
const params = {
Bucket: BUCKETNAME
Key: KEYDEFAULTS + '/' + filename
};
const res = await new Promise((resolve, reject) => {
s3.getObject(params, (err, data) => {
if (err) reject(err);
let imgData = 'data:image/jpeg;base64,' + data.Body.toString('base64');
resolve(imgData);
});
});
return ...
}
I’m a bit confused with how to proceed. I am using Archive ( node js module) as a means to write data to a zip file. Currently, I have my code working when I write to a file (local storage).
var fs = require('fs');
var archiver = require('archiver');
var output = fs.createWriteStream(__dirname + '/example.zip');
var archive = archiver('zip', {
zlib: { level: 9 }
});
archive.pipe(output);
archive.append(mybuffer, {name: ‘msg001.txt’});
I’d like to modify the code so that the archive target file is an AWS S3 bucket. Looking at the code examples, I can specify the bucket name and key (and body) when I create the bucket object as in:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myMsgArchive.zip' Body: myStream};
s3.upload( params, function(err,data){
…
});
Or
s3 = new AWS.S3({ parms: {Bucket: ‘myBucket’ Key: ‘myMsgArchive.zip’}});
s3.upload( {Body: myStream})
.send(function(err,data) {
…
});
With regards to my S3 example(s), myStream appears to be a readable stream and I am confused as how to make this work as archive.pipe requires a writeable stream. Is this something where we need to use a pass-through stream? I’ve found an example where someone created a pass-through stream but the example is too terse to gain proper understanding. The specific example I am referring to is:
Pipe a stream to s3.upload()
Any help someone can give me would greatly be appreciated. Thanks.
This could be useful for anyone else wondering how to use pipe.
Since you correctly referenced the example using the pass-through stream, here's my working code:
1 - The routine itself, zipping files with node-archiver
exports.downloadFromS3AndZipToS3 = () => {
// These are my input files I'm willing to read from S3 to ZIP them
const files = [
`${s3Folder}/myFile.pdf`,
`${s3Folder}/anotherFile.xml`
]
// Just in case you like to rename them as they have a different name in the final ZIP
const fileNames = [
'finalPDFName.pdf',
'finalXMLName.xml'
]
// Use promises to get them all
const promises = []
files.map((file) => {
promises.push(s3client.getObject({
Bucket: yourBubucket,
Key: file
}).promise())
})
// Define the ZIP target archive
let archive = archiver('zip', {
zlib: { level: 9 } // Sets the compression level.
})
// Pipe!
archive.pipe(uploadFromStream(s3client, 'someDestinationFolderPathOnS3', 'zipFileName.zip'))
archive.on('warning', function(err) {
if (err.code === 'ENOENT') {
// log warning
} else {
// throw error
throw err;
}
})
// Good practice to catch this error explicitly
archive.on('error', function(err) {
throw err;
})
// The actual archive is populated here
return Promise
.all(promises)
.then((data) => {
data.map((thisFile, index) => {
archive.append(thisFile.Body, { name: fileNames[index] })
})
archive.finalize()
})
}
2 - The helper method
const uploadFromStream = (s3client) => {
const pass = new stream.PassThrough()
const s3params = {
Bucket: yourBucket,
Key: `${someFolder}/${aFilename}`,
Body: pass,
ContentType: 'application/zip'
}
s3client.upload(s3params, (err, data) => {
if (err)
console.log(err)
if (data)
console.log('Success')
})
return pass
}
The following example takes the accepted answer and makes it work with local files as requested.
const archiver = require("archiver")
const fs = require("fs")
const AWS = require("aws-sdk")
const s3 = new AWS.S3()
const stream = require("stream")
const zipAndUpload = async () => {
const files = [`test1.txt`, `test2.txt`]
const fileNames = [`test1target.txt`, `test2target.txt`]
const archive = archiver("zip", {
zlib: { level: 9 } // Sets the compression level.
})
files.map((thisFile, index) => {
archive.append(fs.createReadStream(thisFile), { name: fileNames[index] })
})
const uploadStream = new stream.PassThrough()
archive.pipe(uploadStream)
archive.finalize()
archive.on("warning", function (err) {
if (err.code === "ENOENT") {
console.log(err)
} else {
throw err
}
})
archive.on("error", function (err) {
throw err
})
archive.on("end", function () {
console.log("archive end")
})
await uploadFromStream(uploadStream)
console.log("all done")
}
const uploadFromStream = async pass => {
const s3params = {
Bucket: "bucket-name",
Key: `streamtest.zip`,
Body: pass,
ContentType: "application/zip"
}
return s3.upload(s3params).promise()
}
zipAndUpload()
I am trying to download a zip file in my MERN application. I am getting the file in the response, how ever the client does not download the actual file. I am using archiver to zip files then return them in a fetch call.
Archive Service:
const archiver = require('archiver')
const zip = archiver('zip')
const path = require('path')
const fs = require('fs')
const appDir = path.dirname(require.main.filename)
exports.FileArchiver = function (feed, res) {
// const app = this.app;
const uploadsDir = path.join(appDir, '/uploads/');
const templatesDir = path.join(appDir, '/templates/');
const feedArray = feed.feed.data;
const extensions = [".jpg", ".png", ".svg"];
const feedArrayString = JSON.stringify(feedArray);
const feedArrayObject = JSON.parse(feedArrayString);
let imageArray = [];
let templateType = 'b'; //test
// grab image names from object
feedArrayObject.forEach(function(x){iterate(x)});
// remove duplicates
imageArray = uniq_fast(imageArray);
// zip images
for (let i = 0; i < imageArray.length; i++) {
console.log(imageArray[i])
const filePath = path.join(uploadsDir, imageArray[i]);
zip.append(fs.createReadStream(filePath), { name: 'images/'+imageArray[i] });
}
res.attachment(feed.name + '.zip');
zip.pipe(res);
zip.append(feedArrayString, { name: 'feed.json' })
zip.directory(templatesDir + '/' + templateType, false);
zip.on('error', (err) => { throw err; });
zip.on('warning', (err) => {
if (err.code === 'ENOENT') {
console.log('ENOENT for archive')
} else {
throw err;
}
});
zip.finalize();
return this;
}
Client side fetch:
export const downloadData = (url, _id, name, type) => {
return fetch(url, {method: 'GET'})
.then((res) => {
console.log(res);
return res;
})
}
Client side Headers attached:
content-disposition: attachment; filename="a_gpo.zip"
content-type: application/zip
The network request response returns 200 status and I can also see the attachment response in client contains zip file jargon. However the client does not return the actual file download.
Try to redirect the browser location to the URL
export const downloadData = (url) => {
window.location = url;
}