How to parse a 500mb json file? - javascript

I have an upload in my application, which receives an encrypted file. When doing the decryption I'm writing json in another file. After completing this operation I have a 500MB json file.
Note: The properties of my json are dynamic.
How can I parse this json, I read about stream-json/Parser but could not implement it.
const {parser} = require('stream-json/Parser');
static async _recoverObj(pathAnalise) {
let readStream = fs.createReadStream(pathAnalise, {encoding: 'utf8', highWaterMark: 1024});
function createJson(stream) {
let decipher = crypto.createDecipher('aes-128-ecb', 'AmbientalRuralBR');
return new Promise((resolve) => {
stream.on('data', chunk => {
let newDecrypt = decipher.update(chunk, 'base64', 'utf8');
fs.appendFile('./data.json', newDecrypt, function() {
console.log('Saved!');
});
});
stream.on('end', () => {
fs.appendFile('./data.json', '"}', function() {
resolve();
});
});
});
}
const result = await createJson(readStream);
function getParse() {
return new Promise((resolve) => {
let jsonData = './data.json';
let pipeline = fs.createReadStream(jsonData).pipe(parser());
let objectCounter = 0;
pipeline.on('data', data => {
data.name === 'startObject' && ++objectCounter
});
pipeline.on('end', () => {
console.log(`Found ${objectCounter} objects.`);
resolve();
});
});
}
let dataAnalyze = await getParse();
return dataAnalyze;
}
Anyone have any idea how to do this parse?
MY SOLUTION:
I used https://www.npmjs.com/package/big-json
const json = require('big-json');
static async _recoverObj(pathAnalise) {
var readStream = fs.createReadStream(pathAnalise, {encoding: 'utf8', highWaterMark: 1024 * 2});
function createJson(stream) {
var decipher = crypto.createDecipher('aes-128-ecb', 'AmbientalRuralBR');
return new Promise((resolve) => {
stream.on('data', chunk => {
var newDecrypt = decipher.update(chunk, 'base64', 'utf8');
fs.appendFile('./data.json', newDecrypt, function() {
console.log('Saved!');
});
});
stream.on('end', () => {
fs.appendFile('./data.json', '"}', function() {
resolve();
});
});
});
}
await streamToString(readStream);
function getParse() {
return new Promise((resolve) => {
var jsonData = './max.json';
var myObj = [];
const readStream = fs.createReadStream(jsonData);
const parseStream = json.createParseStream();
parseStream.on('data', function(pojo) {
myObj = pojo;
});
parseStream.on('end', function() {
resolve(myObj);
});
readStream.pipe(parseStream);
});
}
let dataAnalyze = await getParse();
return dataAnalyze;
}

Once you have the text string containing your json you can use JSON.parse() in order to translate it to an object that you can later interact with :
var json = '{"result":true, "count":42}';
obj = JSON.parse(json);
console.log(obj.count);
// expected output: 42
console.log(obj.result);
// expected output: true

Related

Run code after executing promise in Javascript

I am trying to save to json the values returned from indeed api. I use indeed-scraper code from github https://github.com/rynobax/indeed-scraper
My code:
... required files ...
const parsedResults = []
indeed.query(queryOptions).then(response => {
response.forEach((res,i) => {
setTimeout(function(){
let url = res.url
let resultCount = 0
console.log(`\n Scraping of ${url} initiated...\n`)
const getWebsiteContent = async (url) => {
try {
const response = await axios.get(url)
const $ = cheerio.load(response.data)
...get scraped data...
parsedResults.push(metadata)
} catch (error) {
exportResults(parsedResults)
console.error(error)
}
}
getWebsiteContent(url)
}
, i*3000);
});
});
const outputFile = 'data.json'
const fs = require('fs');
const exportResults = (parsedResults) => {
fs.writeFile(outputFile, JSON.stringify(parsedResults, null, 4), (err) => {
if (err) {
console.log(err)
}
console.log(`\n ${parsedResults.length} Results exported successfully to ${outputFile}\n`)
})
}
parsedResults is not accessible in last portion of script, so to save as json file.
Any help appreciated!

Nodejs AWS Lambda s3 getObject method returns nothing

The script used when trying to get contents from the csv stored in the s3 bucket
const mysql = require("mysql");
const fs = require("fs");
const { google } = require("googleapis");
const AWS = require("aws-sdk");
const client = new AWS.SecretsManager({ region: "eu-west-1" });
const analyticsreporting = google.analyticsreporting("v4");
const csv = require('ya-csv')
const fastCsv = require('fast-csv')
const s3 = new AWS.S3();
const getParams = {
Bucket: 'data',
Key: 'athena_test/nameplate.csv'
};
exports.handler = async (context, event) => {
const data = await s3.getObject(getParams, function (err, data){
if(err){console.log("ERROR: ",err)}
else {return data}
})
console.log(data.Body)
}
the console log returns undefined rather than the contents of the csv
Hey you can try this one:-
const csv = require('#fast-csv/parse');
const s3Stream = await s3.getObject(params).createReadStream();
const data = await returnDataFromCSV();
console.log(data.Body);
const returnDataFromCSV =()=> {
let promiseData = new Promise((resolve, reject) => {
const parser = csv
.parseStream(csvFile, { headers: true })
.on("data", (data) => {
console.log('Parsed Data:-', data);
})
.on("end", ()=> {
resolve("CSV finished here");
})
.on("error",()=> {
reject("if failed");
});
});
try {
return await promiseData;
} catch (error) {
console.log("Get Error: ", error);
return error;
}
}
CreateStream: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Request.html#createReadStream-property

Node Function Returning Empty Array

In the following node function, it is returning an empty array. Not sure why its doing that. Could this be a async await issue? Would appreciate any help. Thank you
const folderPath = '/public/home.html'
function getCircuitAndFuse(folderPath){
//List containing circuit name with its fuse
let temporaryList = [];
let finalCircuitAndFuseList = []
fs.readFile(__dirname + folderPath, (error, data)=>{
if(error){
console.log(`Unable to read file: ${error}`)
}else{
var $ = cheerio.load(data)
$('img').each(function(index, element){
let getClassAtr = element.attribs.class
temporaryList.push(getClassAtr.slice(0, getClassAtr.lastIndexOf(" ")))
})
finalCircuitAndFuseList = [...new Set(temporaryList)]
}
})
return finalCircuitAndFuseList;
}
let getInfo = getCircuitAndFuse(folderPath)
// Returning empty array
console.log(getInfo)
***Server code****
const server = http.createServer(function(req, res){
res.writeHead(200, {'Content-Type': 'text/plain'})
res.end()
}).listen(port, ()=>{
console.log(`Server listening on port ${port}. Press Ctrl-C to terminate...`)
})
getCircuitAndFuse must return Promise like this:
function getCircuitAndFuse(folderPath) {
return new Promise((resolve, reject) => {
//List containing circuit name with its fuse
let temporaryList = [];
fs.readFile(__dirname + folderPath, (error, data) => {
if (error) {
console.log(`Unable to read file: ${error}`);
} else {
var $ = cheerio.load(data);
$('img').each(function (index, element) {
let getClassAtr = element.attribs.class;
temporaryList.push(
getClassAtr.slice(0, getClassAtr.lastIndexOf(' '))
);
});
resolve([...new Set(temporaryList)]);
}
});
});
}
getCircuitAndFuse(folderPath).then((getInfo) => {
// do something with `getInfo`
});
Another alternative to Faruk's answer would be to just use fs.readFileSync instead of wrapping your function in a promise and requiring some of that extra ceremony. Using fs.readFileSync will ensure that your function doesn't return prematurely.
Here is your code rewritten with that in mind:
function getCircuitAndFuse(folderPath) {
try {
let temporaryList = [];
const data = fs.readFileSync(__dirname + folderPath);
const $ = cheerio.load(data);
$("img").each(function (index, element) {
let getClassAtr = element.attribs.class;
temporaryList.push(getClassAtr.slice(0, getClassAtr.lastIndexOf(" ")));
});
return [...new Set(temporaryList)];
} catch (error) {
console.log(error);
}
}

Why can't I populate an array outside .map()

I have been trying for two days to figure this out and now I'm just coding in circles.
All the data gets processed but I can not return the data from the s3.upload function outside the .map function. What am I missing?
if (req.files['altImage']) {
let result = '';
let dataArr = new Array();
const fileArr = req.files['altImage'];
fileArr.map(async file => {
const fileName = file.filename;
const filePath = file.path;
const stream = fs.createReadStream(filePath);
const params = {
ACL: 'public-read',
Bucket: process.env.AWS_S3_BUCKET,
Body: stream,
Key: `${folder}/${fileName}`,
};
const s3Upload = s3.upload(params).promise()
s3Upload
.then(async () => {
await fs.unlinkSync(filePath);
result = await Promise.resolve(s3Upload);
// returns the exact data I need
console.log('result', result);
await dataArr.push(result);
})
.catch((err) => {
console.log(err);
});
})
console.log('dataArr', dataArr);
// dataArr is empty
return {message: 'uploaded', data: dataArr};
}
});
This is probably the quickest way to fix your code:
if (req.files['altImage']) {
let result = '';
const fileArr = req.files['altImage'];
const requests = fileArr.map(file => {
const fileName = file.filename;
const filePath = file.path;
const stream = fs.createReadStream(filePath);
const params = {
ACL: 'public-read',
Bucket: process.env.AWS_S3_BUCKET,
Body: stream,
Key: `${folder}/${fileName}`,
};
return s3.upload(params).promise()
.then(async result => {
await fs.unlinkSync(filePath));
return result;
})
.catch((err) => {
console.log(err);
});
});
Promise.all(requests).then(dataArr => res.send({
message: 'uploaded',
data: dataArr
}));
}
Explanation: you are filling dataArr asynchronously (because the s3 requests are asynchronous), so you need to wait for the requests to end before returning it.
Also, with Express.js, you don't simply return the result, but you pass it to the res object which represents the response.

javascript FileReader - how to parsing long file in chunks?

Initially, I have made loading here so like this
export function сonvertFilesToByteArray(e) {
const MAX_FILE_SIZE = 1024 * 1024 * 50; // 50MB
const files = Object.keys(e.target.files);
const asyncReadFile = eachFile =>
new Promise((resolve, reject) => {
if (e.target.files[eachFile].size > MAX_FILE_SIZE) {
return reject([{ message: `File ${e.target.files[eachFile].name} too large` }]);
}
const reader = new FileReader();
const targetFileInfo = {
contentType: e.target.files[eachFile].type,
filename: e.target.files[eachFile].name,
};
reader.readAsArrayBuffer(e.target.files[eachFile]);
reader.onload = () => {
resolve({ ...targetFileInfo, body: Array.from(new Uint8Array(reader.result)) });
};
reader.onerror = error => reject(error);
});
return Promise.all(files.map(asyncReadFile));
}
Here in the constant files, I define how many at my files and I apply a function to each of them.
And then I get my file(s) in the component
handleFileUpload = (e) => {
сonvertFilesToByteArray(e)
.then((result) => {
runInAction(() => {
this.files = [
...this.files,
...result,
];
});
})
.catch(err => runInAction(() => {
this.errors = [...this.errors, err[0].message];
}));
}
And put in this.files and finally my this.files looks like [{contentType: 'plain/text', filename: 'blabla', body: [123, 456, 23, ...] }]
Where [123, 456, 23...] there is my ArrayBuffer
But at such approach in spite of the fact that I use Promise.all, when loading files/files which have weight more ~ 2MB, the page is frozen, it is impossible to interact with her in any way (but I can scroll). Except as realization when each file are divided into chunks nothing has come to mind to correct a situation.
Ok, I try to rewrite the code: With chunks
export function сonvertFilesToByteArray(e) {
const MAX_FILE_SIZE = 1024 * 1024 * 50; // 50MB
const files = Object.keys(e.target.files);
const asyncReadFile = eachFile =>
new Promise((resolve, reject) => {
if (e.target.files[eachFile].size > MAX_FILE_SIZE) {
return reject([{ message: `File ${e.target.files[eachFile].name} too large` }]);
}
const file = e.target.files[eachFile];
let offset = 0;
console.log(offset, 'offset', file.size, 'size');
const defaultChunkSize = 64 * 1024; // bytes
const fileReader = new FileReader();
const blob = file.slice(offset, offset + defaultChunkSize);
const isEndOfFile = () => offset >= file.size;
const testEndOfFile = () => {
if (isEndOfFile()) {
console.log('Done reading file');
}
};
fileReader.readAsArrayBuffer(blob);
fileReader.onloadend = (event) => {
const target = (event.target);
if (target.error == null) {
const result = target.result;
offset += result.length;
testEndOfFile();
console.log(result, 'result');
resolve(result);
} else {
reject(target.error);
}
};
});
return Promise.all(files.map(asyncReadFile));
}
Here I receive the file and I divide it. But the problem is that if the file is more than a chunk, then I should bring together him from them again and again. But how to make it in my case? I can't understand it in any way...
Please help me :) What it is necessary to make to read the file in chunks and to receive it as ArrayBuffer?

Categories