I am attempting to grab a PDF stored in Azure Blob Storage via a node backend and then serve that PDF file to a React Frontend. I am using Microsofts #azure/storage-blob with a BlockBlobClient but every example I find online converts the readableStreamBody to a string. The blob has a content type of application/pdf. Ive tried passing the readableStreamBody and the pure output to the frontend but those result in broken pdf's. I also followed the documentation online and made it a string and passed that to the frontend. That produced a PDF that would open and had the proper amount of pages but was completly blank.
Node.js Code on the Backend
app.get('/api/file/:company/:file', (req, res) => {
const containerClient = blobServiceClient.getContainerClient(req.params.company);
const blockBlobClient = containerClient.getBlockBlobClient(req.params.file);
blockBlobClient.download(0)
.then(blob => streamToString(blob.readableStreamBody))
.then(response => res.send(response))
});
FrontEnd Code
getFileBlob = (company,file) => {
axios(`/api/file/${company}/${file}`, { method: 'GET', responseType: 'blob'})
.then(response => {
const file = new Blob(
[response.data],
{type: 'application/pdf'});
const fileURL = URL.createObjectURL(file);
window.open(fileURL);
})
.catch(error => {
console.log(error);
});
}
This might help you, its working for me.
Node
var express = require('express');
const { BlobServiceClient } = require('#azure/storage-blob');
var router = express.Router();
const AZURE_STORAGE_CONNECTION_STRING =
'YOUR_STRING';
async function connectAzure() {
// Create the BlobServiceClient object which will be used to create a container client
const blobServiceClient = BlobServiceClient.fromConnectionString(
AZURE_STORAGE_CONNECTION_STRING
);
const containerName = 'filestorage';
const blobName = 'sample.pdf';
console.log('\nConnecting container...');
console.log('\t', containerName);
// Get a reference to a container
const containerClient = blobServiceClient.getContainerClient(containerName);
// Get a block blob client
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
for await (const blob of containerClient.listBlobsFlat()) {
console.log('\t', blob.name);
}
const downloadBlockBlobResponse = await blockBlobClient.download(0);
const data = await streamToString(downloadBlockBlobResponse.readableStreamBody)
return data;
}
async function streamToString(readableStream) {
return new Promise((resolve, reject) => {
const chunks = [];
readableStream.on('data', data => {
chunks.push(data.toString());
});
readableStream.on('end', () => {
resolve(chunks.join(''));
});
readableStream.on('error', reject);
});
}
router.get('/', async function(req, res, next) {
const data = await connectAzure();
res.send({data}).status(200);
});
module.exports = router;
Front-end
function createFile() {
fetch('/createfile').then(res => {
res.json().then(data => {
var blob = new Blob([data.data], { type: 'application/pdf' });
var fileURL = URL.createObjectURL(blob);
if (filename) {
if (typeof a.download === 'undefined') {
window.location.href = fileURL;
} else {
window.open(fileURL, '_blank');
}
}
})
}).catch(err => console.log(err))
}
HTML
<body><h1>Express</h1><p>Welcome to Express</p><button onclick="createFile()">Create File</button></body>
Related
i am trying to make a component that take a pdf from input or an already uploaded one and then extract pages from it and uploaded again
when choosing a file from input (choosing file from my computer)
i am using this
const handleFileChange = async (event) => {
const file = event.target.files[0];
setFiles(event.target.files[0])
const fileName = event.target.files[0].name
setFileName(fileName);
const fileReader = new FileReader();
fileReader.onload = async () => {
const pdfBytes = new Uint8Array(fileReader.result);
const pdfDoc = await PDFDocument.load(pdfBytes);
setPdfDoc(pdfDoc);
setPdfBlob(pdfBytes)
};
fileReader.readAsArrayBuffer(file);
setShowPdf(true)
};
we get a pdfDoc and a Unit8Array
then i use the pdfDoc to get pages and extract a new pdf file....
this works fine
now when selecting a file that we already uploaded
i use this to ping the api to get the file
const handleGetFile = async (url) => {
const headers = {
Authorization: "Bearer " + (localStorage.getItem("token")),
Accept: 'application/pdf'
}
await axios.put(`${process.env.NEXT_PUBLIC_API_URL}getPdfFileBlob`, {
pdfUrl: `https://handle-pdf-photos-project-through-compleated-task.s3.amazonaws.com/${url}`
}, { responseType: 'arraybuffer', headers }).then((res) => {
const handlePdf = async () => {
const uint8Array = new Uint8Array(res.data);
const pdfBlob = new Blob([uint8Array], { type: 'application/pdf' });
setPdfBlob(uint8Array)
// setPdfDoc(pdfBlob) .....? how do i create a pdf doc from the unit8array
}
handlePdf()
}).catch((err) => {
console.log(err)
})
}
this the the end point i am pinging
app.put('/getPdfFileBlob',async function(req,res){
try {
console.log(req.body.pdfUrl)
const url =req.body.pdfUrl;
const fileName = 'file.pdf';
const file = fs.createWriteStream(fileName);
https.get(url, (response) => {
response.pipe(file);
file.on('finish', () => {
file.close();
// Serve the file as a response
const pdf = fs.readFileSync(fileName);
res.setHeader('Content-Type', 'application/pdf');
res.setHeader( 'Content-Transfer-Encoding', 'Binary'
);
res.setHeader('Content-Disposition', 'inline; filename="' + fileName + '"');
res.send(pdf);
});
});
} catch (error) {
res.status(500).json({success:false,msg:"server side err"})
}
})
after getting this file here is what am trying to do
const handlePageSelection = (index) => {
setSelectedPages(prevSelectedPages => {
const newSelectedPages = [...prevSelectedPages];
const pageIndex = newSelectedPages.indexOf(index);
if (pageIndex === -1) {
newSelectedPages.push(index);
} else {
newSelectedPages.splice(pageIndex, 1);
}
return newSelectedPages;
});
};
const handleExtractPages = async () => {
for (let i = pdfDoc.getPageCount() - 1; i >= 0; i -= 1) {
if (!selectedPages.includes(i + 1)) {
pdfDoc.removePage(i);
}
}
await pdfDoc.save();
};
well in the first case where i upload the pdf file from local storage i get a pdfDoc
console of pdf Doc and pdfBlob
and when i select already existing file i can't find a way to transfer unit8array buffer to pdf doc
log of pdfBlob and no pdf doc
what i want is transform the pdfblob to pdfDcoument or get the pdf document from the array buffer so i can use getpages on it
The script used when trying to get contents from the csv stored in the s3 bucket
const mysql = require("mysql");
const fs = require("fs");
const { google } = require("googleapis");
const AWS = require("aws-sdk");
const client = new AWS.SecretsManager({ region: "eu-west-1" });
const analyticsreporting = google.analyticsreporting("v4");
const csv = require('ya-csv')
const fastCsv = require('fast-csv')
const s3 = new AWS.S3();
const getParams = {
Bucket: 'data',
Key: 'athena_test/nameplate.csv'
};
exports.handler = async (context, event) => {
const data = await s3.getObject(getParams, function (err, data){
if(err){console.log("ERROR: ",err)}
else {return data}
})
console.log(data.Body)
}
the console log returns undefined rather than the contents of the csv
Hey you can try this one:-
const csv = require('#fast-csv/parse');
const s3Stream = await s3.getObject(params).createReadStream();
const data = await returnDataFromCSV();
console.log(data.Body);
const returnDataFromCSV =()=> {
let promiseData = new Promise((resolve, reject) => {
const parser = csv
.parseStream(csvFile, { headers: true })
.on("data", (data) => {
console.log('Parsed Data:-', data);
})
.on("end", ()=> {
resolve("CSV finished here");
})
.on("error",()=> {
reject("if failed");
});
});
try {
return await promiseData;
} catch (error) {
console.log("Get Error: ", error);
return error;
}
}
CreateStream: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#createReadStream-property
So I am writing a Lambda that will take in some form data via a straight POST through API Gateway (testing using Postman for now) and then send that image to S3 for storage. Every time I run it, the image uploaded to S3 is corrupted and won't open properly. I have seen people having to decode/encode the incoming data but I feel like I have tried everything using Buffer.from. I am only looking to store either .png or .jpg. The below code does not reflect my attempts using Base64 encoding/decoding seeing they all failed. Here is what I have so far -
Sample Request in postman
{
image: (uploaded .jpg/.png),
metadata: {tag: 'iPhone'}
}
Lambda
const AWS = require('aws-sdk')
const multipart = require('aws-lambda-multipart-parser')
const s3 = new AWS.S3();
exports.handler = async (event) => {
const form = multipart.parse(event, false)
const s3_response = await upload_s3(form)
return {
statusCode: '200',
body: JSON.stringify({ data: data })
}
};
const upload_s3 = async (form) => {
const uniqueId = Math.random().toString(36).substr(2, 9);
const key = `${uniqueId}_${form.image.filename}`
const request = {
Bucket: 'bucket-name',
Key: key,
Body: form.image.content,
ContentType: form.image.contentType,
}
try {
const data = await s3.putObject(request).promise()
return data
} catch (e) {
console.log('Error uploading to S3: ', e)
return e
}
}
EDIT:
I am now atempting to save the image into the /tmp directory then use a read stream to upload to s3. Here is some code for that
s3 upload function
const AWS = require('aws-sdk')
const fs = require('fs')
const s3 = new AWS.S3()
module.exports = {
upload: (file) => {
return new Promise((resolve, reject) => {
const key = `${Date.now()}.${file.extension}`
const bodyStream = fs.createReadStream(file.path)
const params = {
Bucket: process.env.S3_BucketName,
Key: key,
Body: bodyStream,
ContentType: file.type
}
s3.upload(params, (err, data) => {
if (err) {
return reject(err)
}
return resolve(data)
}
)
})
}
}
form parser function
const busboy = require('busboy')
module.exports = {
parse: (req, temp) => {
const ctype = req.headers['Content-Type'] || req.headers['content-type']
let parsed_file = {}
return new Promise((resolve) => {
try {
const bb = new busboy({
headers: { 'content-type': ctype },
limits: {
fileSize: 31457280,
files: 1,
}
})
bb.on('file', function (fieldname, file, filename, encoding, mimetype) {
const stream = temp.createWriteStream()
const ext = filename.split('.')[1]
console.log('parser -- ext ', ext)
parsed_file = { name: filename, path: stream.path, f: file, type: mimetype, extension: ext }
file.pipe(stream)
}).on('finish', () => {
resolve(parsed_file)
}).on('error', err => {
console.err(err)
resolve({ err: 'Form data is invalid: parsing error' })
})
if (req.end) {
req.pipe(bb)
} else {
bb.write(req.body, req.isBase64Encoded ? 'base64' : 'binary')
}
return bb.end()
} catch (e) {
console.error(e)
return resolve({ err: 'Form data is invalid: parsing error' })
}
})
}
}
handler
const form_parser = require('./form-parser').parse
const s3_upload = require('./s3-upload').upload
const temp = require('temp')
exports.handler = async (event, context) => {
temp.track()
const parsed_file = await form_parser(event, temp)
console.log('index -- parsed form', parsed_file)
const result = await s3_upload(parsed_file)
console.log('index -- s3 result', result)
temp.cleanup()
return {
statusCode: '200',
body: JSON.stringify(result)
}
}
The above edited code is a combination of other code and a github repo I found that is trying to achieve the same results. Even with this solution the file is still corrupted
Figured out this issue. Code works perfectly fine - it was an issue with API Gateway. Need to go into the API Gateway settings and set thee Binary Media Type to multipart/form-data then re-deploy the API. Hope this helps someone else who is banging their head against the wall on figuring out sending images via form data to a lambda.
I manually upload the JSON file to google cloud storage by creating a new project. I am able to read the metadata for a file but I don't know how to read the JSON content.
The code I used to read the metadata is:
var Storage = require('#google-cloud/storage');
const storage = Storage({
keyFilename: 'service-account-file-path',
projectId: 'project-id'
});
storage
.bucket('project-name')
.file('file-name')
.getMetadata()
.then(results => {
console.log("results is", results[0])
})
.catch(err => {
console.error('ERROR:', err);
});
Can someone guide me to the way to read the JSON file content?
I've used the following code to read a json file from Cloud Storage:
'use strict';
const Storage = require('#google-cloud/storage');
const storage = Storage();
exports.readFile = (req, res) => {
console.log('Reading File');
var archivo = storage.bucket('your-bucket').file('your-JSON-file').createReadStream();
console.log('Concat Data');
var buf = '';
archivo.on('data', function(d) {
buf += d;
}).on('end', function() {
console.log(buf);
console.log("End");
res.send(buf);
});
};
I'm reading from a stream and concat all the data within the file to the buf variable.
Hope it helps.
UPDATE
To read multiple files:
'use strict';
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
listFiles();
async function listFiles() {
const bucketName = 'your-bucket'
console.log('Listing objects in a Bucket');
const [files] = await storage.bucket(bucketName).getFiles();
files.forEach(file => {
console.log('Reading: '+file.name);
var archivo = file.createReadStream();
console.log('Concat Data');
var buf = '';
archivo.on('data', function(d) {
buf += d;
}).on('end', function() {
console.log(buf);
console.log("End");
});
});
};
I was using the createWriteStream method like the other answers but I had a problem with the output in that it randomly output invalid characters (�) for some characters in a string. I thought it could be some encoding problems.
I came up with my workaround that uses the download method. The download method returns a DownloadResponse that contains an array of Buffer. We then use Buffer.toString() method and give it an encoding of utf8 and parse the result with JSON.parse().
const downloadAsJson = async (bucket, path) => {
const file = await new Storage()
.bucket(bucket)
.file(path)
.download();
return JSON.parse(file[0].toString('utf8'));
}
There exists a convenient method:'download' to download a file into memory or to a local destination. You may use download method as follows:
const bucketName='bucket name here';
const fileName='file name here';
const storage = new Storage.Storage();
const file = storage.bucket(bucketName).file(fileName);
file.download(function(err, contents) {
console.log("file err: "+err);
console.log("file data: "+contents);
});
A modern version of this:
const { Storage } = require('#google-cloud/storage')
const storage = new Storage()
const bucket = storage.bucket('my-bucket')
// The function that returns a JSON string
const readJsonFromFile = async remoteFilePath => new Promise((resolve, reject) => {
let buf = ''
bucket.file(remoteFilePath)
.createReadStream()
.on('data', d => (buf += d))
.on('end', () => resolve(buf))
.on('error', e => reject(e))
})
// Example usage
(async () => {
try {
const json = await readJsonFromFile('path/to/json-file.json')
console.log(json)
} catch (e) {
console.error(e)
}
})()
I am trying to download a zip file in my MERN application. I am getting the file in the response, how ever the client does not download the actual file. I am using archiver to zip files then return them in a fetch call.
Archive Service:
const archiver = require('archiver')
const zip = archiver('zip')
const path = require('path')
const fs = require('fs')
const appDir = path.dirname(require.main.filename)
exports.FileArchiver = function (feed, res) {
// const app = this.app;
const uploadsDir = path.join(appDir, '/uploads/');
const templatesDir = path.join(appDir, '/templates/');
const feedArray = feed.feed.data;
const extensions = [".jpg", ".png", ".svg"];
const feedArrayString = JSON.stringify(feedArray);
const feedArrayObject = JSON.parse(feedArrayString);
let imageArray = [];
let templateType = 'b'; //test
// grab image names from object
feedArrayObject.forEach(function(x){iterate(x)});
// remove duplicates
imageArray = uniq_fast(imageArray);
// zip images
for (let i = 0; i < imageArray.length; i++) {
console.log(imageArray[i])
const filePath = path.join(uploadsDir, imageArray[i]);
zip.append(fs.createReadStream(filePath), { name: 'images/'+imageArray[i] });
}
res.attachment(feed.name + '.zip');
zip.pipe(res);
zip.append(feedArrayString, { name: 'feed.json' })
zip.directory(templatesDir + '/' + templateType, false);
zip.on('error', (err) => { throw err; });
zip.on('warning', (err) => {
if (err.code === 'ENOENT') {
console.log('ENOENT for archive')
} else {
throw err;
}
});
zip.finalize();
return this;
}
Client side fetch:
export const downloadData = (url, _id, name, type) => {
return fetch(url, {method: 'GET'})
.then((res) => {
console.log(res);
return res;
})
}
Client side Headers attached:
content-disposition: attachment; filename="a_gpo.zip"
content-type: application/zip
The network request response returns 200 status and I can also see the attachment response in client contains zip file jargon. However the client does not return the actual file download.
Try to redirect the browser location to the URL
export const downloadData = (url) => {
window.location = url;
}