I am creating service which upload json file in s3 bucket and then later on need to export it to perform further operation.
let record = [{a:1,b:1,c:1}]
export class JsonService {
constructor(public s3Service: S3Service) {}
addRowsJSON(raw_data, worksheet) {
let new_worksheet = worksheet.concat(raw_data)
return new_worksheet
}
createJSONSheet(raw_data){
return raw_data
}
async readJSONSheet(tableName, id) {
const file = await this.s3Service.getFile(`${tableName}.json`, id)
let sheetData = JSON5.parse(file.Body.toString('utf-8'))
return sheetData
}
}
I am using json service to perform basic operation
Caller is using
try {
rows = await cursor.read(batchSize);
if (rows.length) {
worksheet = this.jsonService.createJSONSheet(rows)
while (rows.length) {
rows = await cursor.read(batchSize);
if (rows.length) {
await this.jsonService.addRowsJSON(rows, worksheet)
}
}
}else{
worksheet = this.jsonService.createJSONSheet([])
}
await this.s3Service.uploadFile(worksheet, tableName)
}
Problem in this while uploading is everything needs to be stored in memory, is there way to stream upload, pushing batches.
export class S3Service {
constructor() {}
async uploadFile(imageBuffer, fileName: string, id: string){
Logger.log(`Upload File for ${fileName}`)
return new Promise((resolve, reject) => {
const s3 = new S3();
const params = {
Bucket: `${id}`,
Body: JSON5.stringify(imageBuffer),
Key: `${fileName}.json`,
ContentType: 'application/json'
}
Logger.log(`Uploading file for ${fileName}`)
resolve(s3.upload(params).promise())
});
}
async getFile(key, id){
const s3 = new S3();
const params = {
Bucket: `${id}`,
Key: key
}
return await s3.getObject(params).promise()
}
}
How to create read Stream i am using
const data = await this.jsonService.readJSONSheet(`${table_name}`, id)
// Performing operation on data
This is increasing memory.
Related
i am initializing a node js app with crucial data for the app to work from a database in index.js.
index.ts
import {getInitialData} from 'initData.ts';
export let APP_DATA: AppData;
export const initializeAppData = async () => {
try {
APP_DATA = (await getInitialData()) as AppData;
if (process.env.NODE_ENV !== 'test') {
initializeMongoose();
startServer();
}
} catch (error) {
console.log(error);
}
};
initData.ts
let dbName: string = 'initialData';
if (process.env.NODE_ENV === 'test') {
dbName = 'testDb';
}
const uri = `${process.env.MONGODB_URI}/?maxPoolSize=20&w=majority`;
export async function getInitialData() {
const client = new MongoClient(uri);
try {
await client.connect();
const database = client.db(dbName);
const configCursor = database
.collection('config')
.find({}, { projection: { _id: 0 } });
const config = await configCursor.toArray();
const aaoCursor = database
.collection('aao')
.find({}, { projection: { _id: 0 } });
const aao = await aaoCursor.toArray();
return { config, aao };
} catch {
(err: Error) => console.log(err);
} finally {
await client.close();
}
}
I'm using this array in another file and import it there.
missionCreateHandler
import { APP_DATA } from '../index';
export const addMissionResources = (
alarmKeyword: AlarmKeyword,
newMission: MissionDocument
) => {
const alarmKeywordObject = APP_DATA?.aao.find(
(el) => Object.keys(el)[0] === alarmKeyword
);
const resourceCommand = Object.values(alarmKeywordObject!);
resourceCommand.forEach((el) => {
Object.entries(el).forEach(([key, value]) => {
for (let ii = 1; ii <= value; ii++) {
newMission.resources?.push({
initialType: key,
status: 'unarranged',
});
}
});
});
};
I'm setting up a mongodb-memory-server in globalSetup.ts for Jest and copy the relevant data to the database from json-files.
globalSetup.ts
export = async function globalSetup() {
const instance = await MongoMemoryServer.create({
instance: { dbName: 'testDb' },
});
const uri = instance.getUri();
(global as any).__MONGOINSTANCE = instance;
process.env.MONGODB_URI = uri.slice(0, uri.lastIndexOf('/'));
process.env.JWT_SECRET = 'testSECRET';
const client = new MongoClient(
`${process.env.MONGODB_URI}/?maxPoolSize=20&w=majority`
);
try {
await client.connect();
const database = client.db('testDb');
database.createCollection('aao');
//#ts-ignore
await database.collection('aao').insertMany(aao['default']);
} catch (error) {
console.log(error);
} finally {
await client.close();
}
};
missionCreateHandler.test.ts
test('it adds the correct mission resources to the array', async () => {
const newMission = await Mission.create({
address: {
street: 'test',
houseNr: 23,
},
alarmKeyword: 'R1',
});
const expected = {
initialType: 'rtw',
status: 'unarranged',
};
addMissionResources('R1', newMission);
expect(newMission.resources[0].initialType).toEqual(expected.initialType);
expect(newMission.resources[0].status).toEqual(expected.status);
});
When runing the test, i get an 'TypeError: Cannot convert undefined or null to object at Function.values ()'. So it seems that the APP_DATA object is not set. I checked that the mongodb-memory-server is set up correctly and feed with the needed data.
When i hardcode the content of APP_DATA in index.ts, the test runs without problems.
So my questions are: How is the best practice to set up initial data in a node js app and where to store it (global object, simple variable and import it in the files where needed)? How can the test successfully run, or is my code just untestable?
Thank you!
The script used when trying to get contents from the csv stored in the s3 bucket
const mysql = require("mysql");
const fs = require("fs");
const { google } = require("googleapis");
const AWS = require("aws-sdk");
const client = new AWS.SecretsManager({ region: "eu-west-1" });
const analyticsreporting = google.analyticsreporting("v4");
const csv = require('ya-csv')
const fastCsv = require('fast-csv')
const s3 = new AWS.S3();
const getParams = {
Bucket: 'data',
Key: 'athena_test/nameplate.csv'
};
exports.handler = async (context, event) => {
const data = await s3.getObject(getParams, function (err, data){
if(err){console.log("ERROR: ",err)}
else {return data}
})
console.log(data.Body)
}
the console log returns undefined rather than the contents of the csv
Hey you can try this one:-
const csv = require('#fast-csv/parse');
const s3Stream = await s3.getObject(params).createReadStream();
const data = await returnDataFromCSV();
console.log(data.Body);
const returnDataFromCSV =()=> {
let promiseData = new Promise((resolve, reject) => {
const parser = csv
.parseStream(csvFile, { headers: true })
.on("data", (data) => {
console.log('Parsed Data:-', data);
})
.on("end", ()=> {
resolve("CSV finished here");
})
.on("error",()=> {
reject("if failed");
});
});
try {
return await promiseData;
} catch (error) {
console.log("Get Error: ", error);
return error;
}
}
CreateStream: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#createReadStream-property
So I am writing a Lambda that will take in some form data via a straight POST through API Gateway (testing using Postman for now) and then send that image to S3 for storage. Every time I run it, the image uploaded to S3 is corrupted and won't open properly. I have seen people having to decode/encode the incoming data but I feel like I have tried everything using Buffer.from. I am only looking to store either .png or .jpg. The below code does not reflect my attempts using Base64 encoding/decoding seeing they all failed. Here is what I have so far -
Sample Request in postman
{
image: (uploaded .jpg/.png),
metadata: {tag: 'iPhone'}
}
Lambda
const AWS = require('aws-sdk')
const multipart = require('aws-lambda-multipart-parser')
const s3 = new AWS.S3();
exports.handler = async (event) => {
const form = multipart.parse(event, false)
const s3_response = await upload_s3(form)
return {
statusCode: '200',
body: JSON.stringify({ data: data })
}
};
const upload_s3 = async (form) => {
const uniqueId = Math.random().toString(36).substr(2, 9);
const key = `${uniqueId}_${form.image.filename}`
const request = {
Bucket: 'bucket-name',
Key: key,
Body: form.image.content,
ContentType: form.image.contentType,
}
try {
const data = await s3.putObject(request).promise()
return data
} catch (e) {
console.log('Error uploading to S3: ', e)
return e
}
}
EDIT:
I am now atempting to save the image into the /tmp directory then use a read stream to upload to s3. Here is some code for that
s3 upload function
const AWS = require('aws-sdk')
const fs = require('fs')
const s3 = new AWS.S3()
module.exports = {
upload: (file) => {
return new Promise((resolve, reject) => {
const key = `${Date.now()}.${file.extension}`
const bodyStream = fs.createReadStream(file.path)
const params = {
Bucket: process.env.S3_BucketName,
Key: key,
Body: bodyStream,
ContentType: file.type
}
s3.upload(params, (err, data) => {
if (err) {
return reject(err)
}
return resolve(data)
}
)
})
}
}
form parser function
const busboy = require('busboy')
module.exports = {
parse: (req, temp) => {
const ctype = req.headers['Content-Type'] || req.headers['content-type']
let parsed_file = {}
return new Promise((resolve) => {
try {
const bb = new busboy({
headers: { 'content-type': ctype },
limits: {
fileSize: 31457280,
files: 1,
}
})
bb.on('file', function (fieldname, file, filename, encoding, mimetype) {
const stream = temp.createWriteStream()
const ext = filename.split('.')[1]
console.log('parser -- ext ', ext)
parsed_file = { name: filename, path: stream.path, f: file, type: mimetype, extension: ext }
file.pipe(stream)
}).on('finish', () => {
resolve(parsed_file)
}).on('error', err => {
console.err(err)
resolve({ err: 'Form data is invalid: parsing error' })
})
if (req.end) {
req.pipe(bb)
} else {
bb.write(req.body, req.isBase64Encoded ? 'base64' : 'binary')
}
return bb.end()
} catch (e) {
console.error(e)
return resolve({ err: 'Form data is invalid: parsing error' })
}
})
}
}
handler
const form_parser = require('./form-parser').parse
const s3_upload = require('./s3-upload').upload
const temp = require('temp')
exports.handler = async (event, context) => {
temp.track()
const parsed_file = await form_parser(event, temp)
console.log('index -- parsed form', parsed_file)
const result = await s3_upload(parsed_file)
console.log('index -- s3 result', result)
temp.cleanup()
return {
statusCode: '200',
body: JSON.stringify(result)
}
}
The above edited code is a combination of other code and a github repo I found that is trying to achieve the same results. Even with this solution the file is still corrupted
Figured out this issue. Code works perfectly fine - it was an issue with API Gateway. Need to go into the API Gateway settings and set thee Binary Media Type to multipart/form-data then re-deploy the API. Hope this helps someone else who is banging their head against the wall on figuring out sending images via form data to a lambda.
I'm writing a Lambda function which is given a list of text files on S3, and concatenates them together, and then zips that resulting file. For some reason, the function is bombing out in the middle of the process, with no errors.
The payload sent to the Lambda func looks like this:
{
"sourceFiles": [
"s3://bucket/largefile1.txt",
"s3://bucket/largefile2.txt"
],
"destinationFile": "s3://bucket/concat.zip",
"compress": true,
"omitHeader": false,
"preserveSourceFiles": true
}
The scenarios in which this function works totally fine:
The two files are small, and compress === false
The two files are small, and compress === true
The two files are large, and compress === false
If I try to have it compress two large files, it quits in the middle. The concatenation process itself works fine, but when it tries to use zip-stream to add the stream to an archive, it fails.
The two large files together are 483,833 bytes. When the Lambda function fails, it reads either 290,229 or 306,589 bytes (it's random) then quits.
This is the main entry point of the function:
const packer = require('zip-stream');
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3({ apiVersion: '2006-03-01' });
const { concatCsvFiles } = require('./csv');
const { s3UrlToParts } = require('./utils');
function addToZip(archive, stream, options) {
return new Promise((resolve, reject) => {
archive.entry(stream, options, (err, entry) => {
console.log('entry done', entry);
if (err) reject(err);
resolve(entry);
});
});
}
export const handler = async event => {
/**
* concatCsvFiles returns a readable stream to pass to either the archiver or
* s3.upload.
*/
let bytesRead = 0;
try {
const stream = await concatCsvFiles(event.sourceFiles, {
omitHeader: event.omitHeader,
});
stream.on('data', chunk => {
bytesRead += chunk.length;
console.log('read', bytesRead, 'bytes so far');
});
stream.on('end', () => {
console.log('this is never called :(');
});
const dest = s3UrlToParts(event.destinationFile);
let archive;
if (event.compress) {
archive = new packer();
await addToZip(archive, stream, { name: 'concat.csv' });
archive.finalize();
}
console.log('uploading');
await s3
.upload({
Body: event.compress ? archive : stream,
Bucket: dest.bucket,
Key: dest.key,
})
.promise();
console.log('done uploading');
if (!event.preserveSourceFiles) {
const s3Objects = event.sourceFiles.map(s3Url => {
const { bucket, key } = s3UrlToParts(s3Url);
return {
bucket,
key,
};
});
await s3
.deleteObjects({
Bucket: s3Objects[0].bucket,
Delete: {
Objects: s3Objects.map(s3Obj => ({ Key: s3Obj.key })),
},
})
.promise();
}
console.log('## Never gets here');
// return {
// newFile: event.destinationFile,
// };
} catch (e) {
if (e.code) {
throw new Error(e.code);
}
throw e;
}
};
And this is the concatenation code:
import MultiStream from 'multistream';
import { Readable } from 'stream';
import S3 from 'aws-sdk/clients/s3';
import { s3UrlToParts } from './utils';
const s3 = new S3({ apiVersion: '2006-03-01' });
/**
* Takes an array of S3 URLs and returns a readable stream of the concatenated results
* #param {string[]} s3Urls Array of S3 URLs
* #param {object} options Options
* #param {boolean} options.omitHeader Omit the header from the final output
*/
export async function concatCsvFiles(s3Urls, options = {}) {
// Get the header so we can use the length to set an offset in grabbing files
const firstFile = s3Urls[0];
const file = s3UrlToParts(firstFile);
const data = await s3
.getObject({
Bucket: file.bucket,
Key: file.key,
Range: 'bytes 0-512', // first 512 bytes is pretty safe for header size
})
.promise();
const streams = [];
const [header] = data.Body.toString().split('\n');
for (const s3Url of s3Urls) {
const { bucket, key } = s3UrlToParts(s3Url);
const stream = s3
.getObject({
Bucket: bucket,
Key: key,
Range: `bytes=${header.length + 1}-`, // +1 for newline char
})
.createReadStream();
streams.push(stream);
}
if (!options.omitHeader) {
const headerStream = new Readable();
headerStream.push(header + '\n');
headerStream.push(null);
streams.unshift(headerStream);
}
const combinedStream = new MultiStream(streams);
return combinedStream;
}
Got it. The problem was actually with the zip-stream library. Apparently it doesn't work well with S3 + streaming. I tried yazl and it works perfectly.
I’m a bit confused with how to proceed. I am using Archive ( node js module) as a means to write data to a zip file. Currently, I have my code working when I write to a file (local storage).
var fs = require('fs');
var archiver = require('archiver');
var output = fs.createWriteStream(__dirname + '/example.zip');
var archive = archiver('zip', {
zlib: { level: 9 }
});
archive.pipe(output);
archive.append(mybuffer, {name: ‘msg001.txt’});
I’d like to modify the code so that the archive target file is an AWS S3 bucket. Looking at the code examples, I can specify the bucket name and key (and body) when I create the bucket object as in:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myMsgArchive.zip' Body: myStream};
s3.upload( params, function(err,data){
…
});
Or
s3 = new AWS.S3({ parms: {Bucket: ‘myBucket’ Key: ‘myMsgArchive.zip’}});
s3.upload( {Body: myStream})
.send(function(err,data) {
…
});
With regards to my S3 example(s), myStream appears to be a readable stream and I am confused as how to make this work as archive.pipe requires a writeable stream. Is this something where we need to use a pass-through stream? I’ve found an example where someone created a pass-through stream but the example is too terse to gain proper understanding. The specific example I am referring to is:
Pipe a stream to s3.upload()
Any help someone can give me would greatly be appreciated. Thanks.
This could be useful for anyone else wondering how to use pipe.
Since you correctly referenced the example using the pass-through stream, here's my working code:
1 - The routine itself, zipping files with node-archiver
exports.downloadFromS3AndZipToS3 = () => {
// These are my input files I'm willing to read from S3 to ZIP them
const files = [
`${s3Folder}/myFile.pdf`,
`${s3Folder}/anotherFile.xml`
]
// Just in case you like to rename them as they have a different name in the final ZIP
const fileNames = [
'finalPDFName.pdf',
'finalXMLName.xml'
]
// Use promises to get them all
const promises = []
files.map((file) => {
promises.push(s3client.getObject({
Bucket: yourBubucket,
Key: file
}).promise())
})
// Define the ZIP target archive
let archive = archiver('zip', {
zlib: { level: 9 } // Sets the compression level.
})
// Pipe!
archive.pipe(uploadFromStream(s3client, 'someDestinationFolderPathOnS3', 'zipFileName.zip'))
archive.on('warning', function(err) {
if (err.code === 'ENOENT') {
// log warning
} else {
// throw error
throw err;
}
})
// Good practice to catch this error explicitly
archive.on('error', function(err) {
throw err;
})
// The actual archive is populated here
return Promise
.all(promises)
.then((data) => {
data.map((thisFile, index) => {
archive.append(thisFile.Body, { name: fileNames[index] })
})
archive.finalize()
})
}
2 - The helper method
const uploadFromStream = (s3client) => {
const pass = new stream.PassThrough()
const s3params = {
Bucket: yourBucket,
Key: `${someFolder}/${aFilename}`,
Body: pass,
ContentType: 'application/zip'
}
s3client.upload(s3params, (err, data) => {
if (err)
console.log(err)
if (data)
console.log('Success')
})
return pass
}
The following example takes the accepted answer and makes it work with local files as requested.
const archiver = require("archiver")
const fs = require("fs")
const AWS = require("aws-sdk")
const s3 = new AWS.S3()
const stream = require("stream")
const zipAndUpload = async () => {
const files = [`test1.txt`, `test2.txt`]
const fileNames = [`test1target.txt`, `test2target.txt`]
const archive = archiver("zip", {
zlib: { level: 9 } // Sets the compression level.
})
files.map((thisFile, index) => {
archive.append(fs.createReadStream(thisFile), { name: fileNames[index] })
})
const uploadStream = new stream.PassThrough()
archive.pipe(uploadStream)
archive.finalize()
archive.on("warning", function (err) {
if (err.code === "ENOENT") {
console.log(err)
} else {
throw err
}
})
archive.on("error", function (err) {
throw err
})
archive.on("end", function () {
console.log("archive end")
})
await uploadFromStream(uploadStream)
console.log("all done")
}
const uploadFromStream = async pass => {
const s3params = {
Bucket: "bucket-name",
Key: `streamtest.zip`,
Body: pass,
ContentType: "application/zip"
}
return s3.upload(s3params).promise()
}
zipAndUpload()