how to pipe an archive (zip) to an S3 bucket - javascript

I’m a bit confused with how to proceed. I am using Archive ( node js module) as a means to write data to a zip file. Currently, I have my code working when I write to a file (local storage).
var fs = require('fs');
var archiver = require('archiver');
var output = fs.createWriteStream(__dirname + '/example.zip');
var archive = archiver('zip', {
zlib: { level: 9 }
});
archive.pipe(output);
archive.append(mybuffer, {name: ‘msg001.txt’});
I’d like to modify the code so that the archive target file is an AWS S3 bucket. Looking at the code examples, I can specify the bucket name and key (and body) when I create the bucket object as in:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myMsgArchive.zip' Body: myStream};
s3.upload( params, function(err,data){
…
});
Or
s3 = new AWS.S3({ parms: {Bucket: ‘myBucket’ Key: ‘myMsgArchive.zip’}});
s3.upload( {Body: myStream})
.send(function(err,data) {
…
});
With regards to my S3 example(s), myStream appears to be a readable stream and I am confused as how to make this work as archive.pipe requires a writeable stream. Is this something where we need to use a pass-through stream? I’ve found an example where someone created a pass-through stream but the example is too terse to gain proper understanding. The specific example I am referring to is:
Pipe a stream to s3.upload()
Any help someone can give me would greatly be appreciated. Thanks.

This could be useful for anyone else wondering how to use pipe.
Since you correctly referenced the example using the pass-through stream, here's my working code:
1 - The routine itself, zipping files with node-archiver
exports.downloadFromS3AndZipToS3 = () => {
// These are my input files I'm willing to read from S3 to ZIP them
const files = [
`${s3Folder}/myFile.pdf`,
`${s3Folder}/anotherFile.xml`
]
// Just in case you like to rename them as they have a different name in the final ZIP
const fileNames = [
'finalPDFName.pdf',
'finalXMLName.xml'
]
// Use promises to get them all
const promises = []
files.map((file) => {
promises.push(s3client.getObject({
Bucket: yourBubucket,
Key: file
}).promise())
})
// Define the ZIP target archive
let archive = archiver('zip', {
zlib: { level: 9 } // Sets the compression level.
})
// Pipe!
archive.pipe(uploadFromStream(s3client, 'someDestinationFolderPathOnS3', 'zipFileName.zip'))
archive.on('warning', function(err) {
if (err.code === 'ENOENT') {
// log warning
} else {
// throw error
throw err;
}
})
// Good practice to catch this error explicitly
archive.on('error', function(err) {
throw err;
})
// The actual archive is populated here
return Promise
.all(promises)
.then((data) => {
data.map((thisFile, index) => {
archive.append(thisFile.Body, { name: fileNames[index] })
})
archive.finalize()
})
}
2 - The helper method
const uploadFromStream = (s3client) => {
const pass = new stream.PassThrough()
const s3params = {
Bucket: yourBucket,
Key: `${someFolder}/${aFilename}`,
Body: pass,
ContentType: 'application/zip'
}
s3client.upload(s3params, (err, data) => {
if (err)
console.log(err)
if (data)
console.log('Success')
})
return pass
}

The following example takes the accepted answer and makes it work with local files as requested.
const archiver = require("archiver")
const fs = require("fs")
const AWS = require("aws-sdk")
const s3 = new AWS.S3()
const stream = require("stream")
const zipAndUpload = async () => {
const files = [`test1.txt`, `test2.txt`]
const fileNames = [`test1target.txt`, `test2target.txt`]
const archive = archiver("zip", {
zlib: { level: 9 } // Sets the compression level.
})
files.map((thisFile, index) => {
archive.append(fs.createReadStream(thisFile), { name: fileNames[index] })
})
const uploadStream = new stream.PassThrough()
archive.pipe(uploadStream)
archive.finalize()
archive.on("warning", function (err) {
if (err.code === "ENOENT") {
console.log(err)
} else {
throw err
}
})
archive.on("error", function (err) {
throw err
})
archive.on("end", function () {
console.log("archive end")
})
await uploadFromStream(uploadStream)
console.log("all done")
}
const uploadFromStream = async pass => {
const s3params = {
Bucket: "bucket-name",
Key: `streamtest.zip`,
Body: pass,
ContentType: "application/zip"
}
return s3.upload(s3params).promise()
}
zipAndUpload()

Related

Execute function from S3 downloaded JavaScript file without creating a local file on disk

Assume that we download the script below from S3:
const sum_two = (a, b) => {
return a + b;
}
Downloading using this:
async function downloadFileS3(filenameDownload) {
const AWS = require('aws-sdk');
const fs = require('fs');
AWS.config.update(
{
accessKeyId: "XXXXXXXXXXXXXXXXXXXXXXXX",
secretAccessKey: "YYYYYYYYYYYYYYYYYYYYY",
correctClockSkew: true,
}
);
const params = {
Bucket: 'ZZZZZZZZZZZZZZZZZZZZZZZZ',
Key: filenameDownload,
}
const s3 = new AWS.S3();
try {
const data = await s3.getObject(params).promise();
if (data) {
// do something with data.Body
const downloaded = data.Body.toString('utf-8');
// grab sum_two from "downloaded" and use below
.......
.......
.......
}
}
catch (error) {
console.log(error);
}
}
How can we extract sum_two (and use it afterwards) from downloaded WITHOUT creating a local file ?
Meaning I don't want to use the code below !!!
// fs.writeFile(`${filenameDownload}`, downloaded, function (err) {
// if (err) {
// console.log('Failed to create file', err);
// }
// console.log(sum_two(.... , ....));
// });
You can use node.js native vm module for this.
Create a Script from the downloaded code
Run it in the current context
After that the downloaded function will become available for usage:
const vm = require('vm');
// ...
const downloaded = data.Body.toString('utf-8');
const script = new vm.Script(downloaded);
script.runInThisContext()
console.log(sum_two(1,2))

Upload Image from form-data to S3 using a Lambda

So I am writing a Lambda that will take in some form data via a straight POST through API Gateway (testing using Postman for now) and then send that image to S3 for storage. Every time I run it, the image uploaded to S3 is corrupted and won't open properly. I have seen people having to decode/encode the incoming data but I feel like I have tried everything using Buffer.from. I am only looking to store either .png or .jpg. The below code does not reflect my attempts using Base64 encoding/decoding seeing they all failed. Here is what I have so far -
Sample Request in postman
{
image: (uploaded .jpg/.png),
metadata: {tag: 'iPhone'}
}
Lambda
const AWS = require('aws-sdk')
const multipart = require('aws-lambda-multipart-parser')
const s3 = new AWS.S3();
exports.handler = async (event) => {
const form = multipart.parse(event, false)
const s3_response = await upload_s3(form)
return {
statusCode: '200',
body: JSON.stringify({ data: data })
}
};
const upload_s3 = async (form) => {
const uniqueId = Math.random().toString(36).substr(2, 9);
const key = `${uniqueId}_${form.image.filename}`
const request = {
Bucket: 'bucket-name',
Key: key,
Body: form.image.content,
ContentType: form.image.contentType,
}
try {
const data = await s3.putObject(request).promise()
return data
} catch (e) {
console.log('Error uploading to S3: ', e)
return e
}
}
EDIT:
I am now atempting to save the image into the /tmp directory then use a read stream to upload to s3. Here is some code for that
s3 upload function
const AWS = require('aws-sdk')
const fs = require('fs')
const s3 = new AWS.S3()
module.exports = {
upload: (file) => {
return new Promise((resolve, reject) => {
const key = `${Date.now()}.${file.extension}`
const bodyStream = fs.createReadStream(file.path)
const params = {
Bucket: process.env.S3_BucketName,
Key: key,
Body: bodyStream,
ContentType: file.type
}
s3.upload(params, (err, data) => {
if (err) {
return reject(err)
}
return resolve(data)
}
)
})
}
}
form parser function
const busboy = require('busboy')
module.exports = {
parse: (req, temp) => {
const ctype = req.headers['Content-Type'] || req.headers['content-type']
let parsed_file = {}
return new Promise((resolve) => {
try {
const bb = new busboy({
headers: { 'content-type': ctype },
limits: {
fileSize: 31457280,
files: 1,
}
})
bb.on('file', function (fieldname, file, filename, encoding, mimetype) {
const stream = temp.createWriteStream()
const ext = filename.split('.')[1]
console.log('parser -- ext ', ext)
parsed_file = { name: filename, path: stream.path, f: file, type: mimetype, extension: ext }
file.pipe(stream)
}).on('finish', () => {
resolve(parsed_file)
}).on('error', err => {
console.err(err)
resolve({ err: 'Form data is invalid: parsing error' })
})
if (req.end) {
req.pipe(bb)
} else {
bb.write(req.body, req.isBase64Encoded ? 'base64' : 'binary')
}
return bb.end()
} catch (e) {
console.error(e)
return resolve({ err: 'Form data is invalid: parsing error' })
}
})
}
}
handler
const form_parser = require('./form-parser').parse
const s3_upload = require('./s3-upload').upload
const temp = require('temp')
exports.handler = async (event, context) => {
temp.track()
const parsed_file = await form_parser(event, temp)
console.log('index -- parsed form', parsed_file)
const result = await s3_upload(parsed_file)
console.log('index -- s3 result', result)
temp.cleanup()
return {
statusCode: '200',
body: JSON.stringify(result)
}
}
The above edited code is a combination of other code and a github repo I found that is trying to achieve the same results. Even with this solution the file is still corrupted
Figured out this issue. Code works perfectly fine - it was an issue with API Gateway. Need to go into the API Gateway settings and set thee Binary Media Type to multipart/form-data then re-deploy the API. Hope this helps someone else who is banging their head against the wall on figuring out sending images via form data to a lambda.

Node.js process exiting in the middle, with no error (using streams)

I'm writing a Lambda function which is given a list of text files on S3, and concatenates them together, and then zips that resulting file. For some reason, the function is bombing out in the middle of the process, with no errors.
The payload sent to the Lambda func looks like this:
{
"sourceFiles": [
"s3://bucket/largefile1.txt",
"s3://bucket/largefile2.txt"
],
"destinationFile": "s3://bucket/concat.zip",
"compress": true,
"omitHeader": false,
"preserveSourceFiles": true
}
The scenarios in which this function works totally fine:
The two files are small, and compress === false
The two files are small, and compress === true
The two files are large, and compress === false
If I try to have it compress two large files, it quits in the middle. The concatenation process itself works fine, but when it tries to use zip-stream to add the stream to an archive, it fails.
The two large files together are 483,833 bytes. When the Lambda function fails, it reads either 290,229 or 306,589 bytes (it's random) then quits.
This is the main entry point of the function:
const packer = require('zip-stream');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3({ apiVersion: '2006-03-01' });
const { concatCsvFiles } = require('./csv');
const { s3UrlToParts } = require('./utils');
function addToZip(archive, stream, options) {
return new Promise((resolve, reject) => {
archive.entry(stream, options, (err, entry) => {
console.log('entry done', entry);
if (err) reject(err);
resolve(entry);
});
});
}
export const handler = async event => {
/**
* concatCsvFiles returns a readable stream to pass to either the archiver or
* s3.upload.
*/
let bytesRead = 0;
try {
const stream = await concatCsvFiles(event.sourceFiles, {
omitHeader: event.omitHeader,
});
stream.on('data', chunk => {
bytesRead += chunk.length;
console.log('read', bytesRead, 'bytes so far');
});
stream.on('end', () => {
console.log('this is never called :(');
});
const dest = s3UrlToParts(event.destinationFile);
let archive;
if (event.compress) {
archive = new packer();
await addToZip(archive, stream, { name: 'concat.csv' });
archive.finalize();
}
console.log('uploading');
await s3
.upload({
Body: event.compress ? archive : stream,
Bucket: dest.bucket,
Key: dest.key,
})
.promise();
console.log('done uploading');
if (!event.preserveSourceFiles) {
const s3Objects = event.sourceFiles.map(s3Url => {
const { bucket, key } = s3UrlToParts(s3Url);
return {
bucket,
key,
};
});
await s3
.deleteObjects({
Bucket: s3Objects[0].bucket,
Delete: {
Objects: s3Objects.map(s3Obj => ({ Key: s3Obj.key })),
},
})
.promise();
}
console.log('## Never gets here');
// return {
// newFile: event.destinationFile,
// };
} catch (e) {
if (e.code) {
throw new Error(e.code);
}
throw e;
}
};
And this is the concatenation code:
import MultiStream from 'multistream';
import { Readable } from 'stream';
import S3 from 'aws-sdk/clients/s3';
import { s3UrlToParts } from './utils';
const s3 = new S3({ apiVersion: '2006-03-01' });
/**
* Takes an array of S3 URLs and returns a readable stream of the concatenated results
* #param {string[]} s3Urls Array of S3 URLs
* #param {object} options Options
* #param {boolean} options.omitHeader Omit the header from the final output
*/
export async function concatCsvFiles(s3Urls, options = {}) {
// Get the header so we can use the length to set an offset in grabbing files
const firstFile = s3Urls[0];
const file = s3UrlToParts(firstFile);
const data = await s3
.getObject({
Bucket: file.bucket,
Key: file.key,
Range: 'bytes 0-512', // first 512 bytes is pretty safe for header size
})
.promise();
const streams = [];
const [header] = data.Body.toString().split('\n');
for (const s3Url of s3Urls) {
const { bucket, key } = s3UrlToParts(s3Url);
const stream = s3
.getObject({
Bucket: bucket,
Key: key,
Range: `bytes=${header.length + 1}-`, // +1 for newline char
})
.createReadStream();
streams.push(stream);
}
if (!options.omitHeader) {
const headerStream = new Readable();
headerStream.push(header + '\n');
headerStream.push(null);
streams.unshift(headerStream);
}
const combinedStream = new MultiStream(streams);
return combinedStream;
}
Got it. The problem was actually with the zip-stream library. Apparently it doesn't work well with S3 + streaming. I tried yazl and it works perfectly.

Angular - how to get S3 bucket contents as Observable

How do I structure a service method in Angular4 to make an s3.listObjects call to return the contents of an S3 bucket as an Observable?
Here's what I'm trying at present, failing miserably:
public loadFilesFromS3(): Observable<any[]> {
const s3 = new AWS.S3();
const params = {
Bucket: 'bucket-name',
Prefix: 'prefix-name'
};
return (
s3.listObjects(params, (err, data) => {
if (err) {
throw err;
} else {
return(data);
}
})
)
}
Just completely stuck on this for the moment! :-|
Well, after some homework here's what I came up with. Seems to work well:
private tracksListSubject = new BehaviorSubject([]);
public tracksList$: Observable<Track[]> = this.tracksListSubject.asObservable();
public loadTracksFromS3() {
console.log('loading tracks from S3...');
this.authenticate();
const s3 = new AWS.S3();
const params = {
Bucket: config.AWS_BUCKET_NAME
};
s3.listObjects(params, (err, data) => {
if (err) {
console.log('error!', err);
}
const raw = data.Contents;
const tracks: Track[] = [];
raw.forEach((item) => {
tracks.push({
id: item.ETag,
title: item.Key,
url: config.AWS_BASE_URL + item.Key,
size: Math.round(item.Size / 1024 / 1024 * 10) / 10
})
});
console.log(tracks.length, 'tracks loaded');
this.tracksListSubject.next(tracks);
});
}
Then in other components I can just inject this service and subscribe to the tracksList$ property on it. Every time I modify the list of tracks inside the service, I issue this.tracksListSubject.next(newTracksList).
You should be able to use the Observable.bindNodeCallback function to convert the s3.listObjects function from a node style callback to a function that returns an Observable.
const listObjectsAsObservable = Observable.bindNodeCallback(s3.listObjects.bind(s3));
Note that as shown above you need to bind the s3 object to the function, to tell it that s3 is this. Otherwise, you will get an error.
Then you can use it as follows:
const params = {
Bucket: config.AWS_BUCKET_NAME
};
listObjectsAsObservable(params)
.subscribe({
next: (response) => console.log(response),
error: (err) => console.log(err)
});

Write to a CSV in Node.js

I am struggling to find a way to write data to a CSV in Node.js.
There are several CSV plugins available however they only 'write' to stdout.
Ideally I want to write on a row-by-row basis using a loop.
You can use fs (https://nodejs.org/api/fs.html#fs_fs_writefile_file_data_options_callback):
var dataToWrite;
var fs = require('fs');
fs.writeFile('form-tracking/formList.csv', dataToWrite, 'utf8', function (err) {
if (err) {
console.log('Some error occured - file either not saved or corrupted file saved.');
} else{
console.log('It\'s saved!');
}
});
The docs for node-csv-parser (npm install csv) specifically state that it can be used with streams (see fromStream, toStream). So it's not hard-coded to use stdout.
Several other CSV parsers also come up when you npm search csv -- you might want to look at them too.
Here is a simple example using csv-stringify to write a dataset that fits in memory to a csv file using fs.writeFile.
import stringify from 'csv-stringify';
import fs from 'fs';
let data = [];
let columns = {
id: 'id',
name: 'Name'
};
for (var i = 0; i < 10; i++) {
data.push([i, 'Name ' + i]);
}
stringify(data, { header: true, columns: columns }, (err, output) => {
if (err) throw err;
fs.writeFile('my.csv', output, (err) => {
if (err) throw err;
console.log('my.csv saved.');
});
});
If you want to use a loop as you say you can do something like this with Node fs:
let fs = require("fs")
let writeStream = fs.createWriteStream('/path/filename.csv')
someArrayOfObjects.forEach((someObject, index) => {
let newLine = []
newLine.push(someObject.stringPropertyOne)
newLine.push(someObject.stringPropertyTwo)
....
writeStream.write(newLine.join(',')+ '\n', () => {
// a line was written to stream
})
})
writeStream.end()
writeStream.on('finish', () => {
console.log('finish write stream, moving along')
}).on('error', (err) => {
console.log(err)
})
In case you don't wanna use any library besides fs, you can do it manually.
let fileString = ""
let separator = ","
let fileType = "csv"
let file = `fileExample.${fileType}`
Object.keys(jsonObject[0]).forEach(value=>fileString += `${value}${separator}`)
fileString = fileString.slice(0, -1)
fileString += "\n"
jsonObject.forEach(transaction=>{
Object.values(transaction).forEach(value=>fileString += `${value}${separator}`)
fileString = fileString.slice(0, -1)
fileString += "\n"
})
fs.writeFileSync(file, fileString, 'utf8')
For those who prefer fast-csv:
const { writeToPath } = require('#fast-csv/format');
const path = `${__dirname}/people.csv`;
const data = [{ name: 'Stevie', id: 10 }, { name: 'Ray', id: 20 }];
const options = { headers: true, quoteColumns: true };
writeToPath(path, data, options)
.on('error', err => console.error(err))
.on('finish', () => console.log('Done writing.'));
**In case you don't wanna use any library besides fs, you can do it manually. More over you can filter the data as you want to write to CSV file
**
router.get('/apiname', (req, res) => {
const data = arrayOfObject; // you will get from somewhere
/*
// Modify old data (New Key Names)
let modifiedData = data.map(({ oldKey1: newKey1, oldKey2: newKey2, ...rest }) => ({ newKey1, newKey2, ...rest }));
*/
const path = './test'
writeToFile(path, data, (result) => {
// get the result from callback and process
console.log(result) // success or error
});
});
writeToFile = (path, data, callback) => {
fs.writeFile(path, JSON.stringify(data, null, 2), (err) => { // JSON.stringify(data, null, 2) help you to write the data line by line
if (!err) {
callback('success');
// successfull
}
else {
callback('error');
// some error (catch this error)
}
});
}
this is the code that worked for me in nest js
import { Parser } from "json2csv";
const csv = require('csvtojson');
const csvFilePath = process.cwd() + '/' + file.path;
let csv data = await csv().fromFile(csvFilePath); /// read data from csv into an array of json
/// * from here how to write data into csv *
data.push({
label: value,
.......
})
}
const fields = [
'field1','field2', ...
]
const parser = new Parser({ fields, header:false }); /// if dont want header else remove header: false
const csv = parser.parse(data);
appendFileSync('./filename.csv',`${csv}\n`); // remove /n if you dont want new line at the end

Categories