Node: How to async await folder file reads - javascript

How to read files asynchronously in node js, here is a simple function.
There are a lot of convoluted answers on the internet, does anyone agree if this is the simplest?
export default async function handler(req, res) {
let data = await readFiles('data/companies/');
res.status(200).json(data);
}
// async file reader
function readFiles(dirname) {
return new Promise(function (resolve, reject) {
let data = {}
fs.readdir(dirname, async function(err, filenames) {
filenames.forEach(function(filename) {
fs.readFile(dirname + filename, 'utf-8', function(err, content) {
if (err) {
reject(err)
}
data[filename] = content;
if (filenames.length === Object.keys(data).length) {
resolve(data)
}
});
});
});
})
}

A bit cleaner and easier using the built in promise support in fs.promises:
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
// async file reader
async function readFiles(dirname) {
const data = {};
const files = await fsp.readdir(dirname);
for (const filename of files) {
const full = path.join(dirname, filename);
const content = await fsp.readFile(full, {encoding: 'utf8'});
data[filename] = content;
}
return data;
}
Or, if you want to run your file operations in parallel (at least to the limit of the thread pool), you might get slightly faster end-to-end performance like this:
// async file reader
async function readFiles(dirname) {
const data = {};
const files = await fsp.readdir(dirname);
await Promise.all(files.map(async filename => {
const full = path.join(dirname, filename);
const content = await fsp.readFile(full, {encoding: 'utf8'});
data[filename] = content;
}));
return data;
}
Also, this:
res.status(200).json(data);
can be replaced with:
res.json(data);
200 is already the default status so there is no reason to specify it.

Related

Read txt file, run text filter using a Node module and write to a new .txt file using NodeJS

I am trying to try out open a file, run a filter from an NPM module and then write the result as new .txt file.
It works, but the output is blank. Does the file need to be streamed in both read and write process?
Here is the code:
async function main() {
const fs = require('fs');
const parBuild = require('paragraph-builder');
const util = require('util');
const filename = "Guardian-Yvette-TextBlock.txt";
var data;
//const readFile = util.promisify(fs.readFile);
fs.readFile('Guardian-Yvette-TextBlock.txt', 'utf8', function(err, text) {
if (err) throw err;
console.log(text);
});
const sourceTXT = await readFile(filename, 'utf8')
.then((text) => {
console.log('TXT File:', text);
})
.catch((err) => {
console.log('Error', err);
});
var resultText = parBuild.toString(sourceTXT);
const writeTXTFile = util.promisify(fs.writeFile);
await writeTXTFile(filename + "-para.txt", resultText, 'utf-8');
console.log('Paragraph TXT file created and written to local directory');
}
main().catch(console.error);
These are the text (.txt) files from the newspaper article I am working with.
https://friendly-mccarthy-005993.netlify.app/
Uncomment this //const readFile = util.promisify(fs.readFile); on line 8 and check a cleaner solution:
<!-- language: javascript -->
const fs = require('fs');
const parBuild = require('paragraph-builder');
const util = require('util');
async function main() {
const filename = "Guardian-Yvette-TextBlock.txt";
const readFile = util.promisify(fs.readFile);
const content = await readFile(filename, 'utf8')
const resultText = parBuild.toString(content);
const writeFile = util.promisify(fs.writeFile);
await writeFile(filename + "-para.txt", resultText, 'utf-8');
console.log('Paragraph TXT file created and written to local directory');
}
try {
main()
} catch (error) {
console.error(error);
}

fs.readdir not reading files extracted to a local directory (function not even running)

I have extracted zip files to a local directory within my lambda function however fs.readdir function is not working and is also not returning an error. I think maybe I have a syntax error with the way I set up the function to run asynchronously.
// Make Directories for Zipped and Unzipped files
try {
const zipDirFolder = await mkdirp(localZippedFolder, { recursive: true });
const unZipDirFolder = await mkdirp(localUnzippedFolder, { recursive: true });
console.log(unZipDirFolder);
// Download files from s3 Bucket
let newFolder = await s3.getObject(params).promise()
.then(data => {
console.log(data);
return data;
});
const newZipFile = newFolder.Body;
// Extract files from zipped folder and store them in a local directory
let filezFolder = await fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}));
console.log(unZipDirFolder);
// Read Contents of that local directory
await fs.readdir(unZipDirFolder, function (err, files) {
if (err) {
console.log('THERE WAS AN ERROR:' + err);
} else {
console.log(files);
}
});
}
Problem: fs.readdir is not running nor is it returning an error message.
There is a problem is in this statement:
let filezFolder = await fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}));
fs.createReadStream().pipe() does not return a promise. So using await on it doesn't do anything useful. So, you have not waited until the .pipe() is done. Thus, you are calling fs.readdir() before the unzip operation is done.
At the moment, streams are not particularly promise-friendly. You will probably need to register for some sort of event on the resulting stream that indicates when your unzip operation is done and then do the fs.readdir() from within that event handler. I don't know your unzip library myself to know which event or how to best monitor it for completion.
Since .pipe() return a writable stream, you can probably do this:
fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}))
.on('finish', () => {
fs.readdir(...)
}).on('error', (err) => {
// error handling here
console.log(err);
});
Here's a simple little stand-alone program that shows the concept I'm talking about:
const unzipper = require('unzipper');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
const inputZip = path.join(__dirname, "zip", "photos.zip");
const outputZipDir = path.join(__dirname, "zip-output")
function run() {
fs.createReadStream(inputZip)
.pipe(unzipper.Extract({ path: outputZipDir }))
.on('finish', async () => {
let files = await fsp.readdir(outputZipDir);
console.log(files);
// use the files here
}).on('err', err => {
console.log(err);
});
}
run();
The setup for this program is to put photos.zip into a zip subdirectory from where this program is run and to create a zip-output subdirectory from where this program is run for the unzipped files to go.
And, here's a promisified version that uses the same setup:
const unzipper = require('unzipper');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
const inputZip = path.join(__dirname, "zip", "photos.zip");
const outputZipDir = path.join(__dirname, "zip-output")
// returns a promise that resolves/rejects when the unzip operation is done
function unzip(inputZip, outputDir) {
return new Promise((resolve, reject) => {
fs.createReadStream(inputZip)
.pipe(unzipper.Extract({ path: outputDir }))
.on('finish', resolve)
.on('err', reject);
});
}
async function run2() {
await unzip(inputZip, outputZipDir);
let files = await fsp.readdir(outputZipDir);
console.log(files);
}
run2().catch(err => {
console.log(err);
});
This is probably because you'r trying to await fs.readdir and at the same time trying to callback. You shouldn't do both. Try removing the await or setting your lambda function async and remove the callback.
//Using await (Don't forget to place this in a async function)
const res = await fs.readdir(yourFilePath);
//Using Callbacks
fs.readdir(yourFilePath, (err, files) => {
if (err) {
console.log('THERE WAS AN ERROR:' + err);
} else {
console.log(files);
}
});
Or you could use fs.readdirSync and do
const res = fs.readdirSync(yourFilePath)

How to write this with Promises?

var pdfParser = require('pdf-parser')
var fs = require('fs')
var PDF_PATH = __dirname + '/pdfs'
var results = []
var failed = []
fs.readdir(PDF_PATH, function(err, files){
if(err){
return console.log(err)
}
for(const file of files){
let the_ent = {
'name': '',
'other data': []
}
pdfParser.pdf2json(PDF_PATH + '/' + file, function(error, pdf){
if(error != null){
console.log(error)
}else if(pdf['pages'] == undefined){
failed.push(file)
console.log(file +' failed')
}else{
//populate 'results' array
}
console.log(/*pdf_data*/)
results.push(/*pdf_data*/)
})
}
console.log(results)
console.log(failed)
results = JSON.stringify(results)
//fs.writeFileSync() write results to json
})
I don't know what is wrong with me this morning, I can't work out how to write this in async; obviously the logs/writefile at the bottom fire as soon as the script executes.
I have tried wrapping in async functions and awaiting the readdir / pdf parsing instead of using callbacks - clearly not correctly. I'm just trying to parse every pdf in a folder - push what I want to some arrays and then log them once the loop finishes zzz.
Wrap the smallest asynchronous tasks into Promises, then use async/await to combine them:
// the Promise wrapper:
const parsePdf = file => new Promise((res, rej) => pdfParser.pdf2json(file, (err, r) => err ? rej(err) : res(r));
(async function () { // start an asynchronous context
const PDF_PATH = __dirname + '/pdfs';
const results = [], failed = []; // prefer const over let
// reading files in a promising way is already provided natively:
const files = await fs.promises.readdir(PDF_PATH);
for(const file of files){ // this is in series, in parallel would probably be faster
let the_ent = {
name: '',
'other data': [], // avoid whitespaces!
};
const pdf = await parsePdf(PDF_PATH + '/' +file);
if(pdf.pages === undefined) { // prefer equality (===) over comparison (==)
failed.push(file);
console.log(file + ' failed');
} else {
// populate 'results' array
}
}
console.log(results, failed);
})();
You can probably process the files in parallel too.
I would promisify the async operations and use async/await. For the fs operations, use the new fs.promises API. For others, use util.promisify() to make promisified versions.
The resolved value of the parsePDFs function I create will be an array of JSON and an array of failed filenames so you get both pieces of information back:
const util = require('util');
const pdfParser = require('pdf-parser');
// make promisified version of the function
const pdfParser.pdf2jsonP = util.promisify(pdfParser.pdf2json);
const fsp = require('fs').promises;
const path = require('path');
const PDF_PATH = path.join(__dirname, 'pdfs');
async function parsePDFs(dir) {
const files = await fsp.readdir(dir);
const results = [];
const failed = [];
for (const file of files) {
let the_ent = {
'name': '',
'other data': []
}
try {
let pdf = await pdfParser.pdf2jsonP(path.join(dir, file));
if (!pdf || pdf.pages === undefined) {
throw new Error("pdf.pages is empty")
}
results.push(pdf);
} catch(e){
console.log(e);
failed.push(file);
}
}
// this will be the resolved value of the returned promise
return {results, failed};
}
parsePDFs(PDF_PATH).then(data => {
console.log("failed filenames: " data.failed);
console.log("json results: ", data.results);
// do something with data.results and data.failed
}).catch(err => {
console.log(err);
});
Note: You declare, but never use the variable the_ent.
You can use util.promisify to promisify the sync functions:
const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);
Minimal demo:
const fs = require('fs');
const util = require('util');
var pdfParser = require('pdf-parser');
const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);
var PDF_PATH = __dirname + '/pdfs';
(async () => {
async function processFiles() {
let results = [];
let files = await readdir(PDF_PATH);
for (const file of files) {
let pdf = await reader(PDF_PATH + '/' + file);
results.push(pdf);
}
return results;
}
const result = await processFiles();
console.info(result);
})();

function is being executed twice when called only once

I am making a program that will read content of files inside nested folders.For now I am just trying to log the content of the file in console. But I am getting two logs instead of only one.Here is what I have done till now
const fs = require('fs');
const path = require('path');
const { promisify } = require('util');
const getStats = promisify(fs.stat);
const readdir = promisify(fs.readdir);
const http = require('http');
handle_files = async (req, res) => {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
// console.log(result)
res.write("Hello");
res.end();
};
check_file_content = (file_data) => {
console.log(file_data[1])
}
async function read_content(files) {
let file_data = []
files.map(file => {
let start_index = file.toString().lastIndexOf('.') + 1
let ext = file.substring(start_index, file.length)
if (ext == 'data') {
file_data.push(fs.readFileSync(file, { encoding: 'utf-8' }))
}
})
return file_data
}
http.createServer(handle_files).listen(8080)
async function scanDir(dir, fileList = []) {
// fetch list of files from the giver directory
let files = await readdir(dir);
// loop through all the files
for (let file of files) {
// join new folder name after the parent folder
// logs_of_109/24
let filePath = path.join(dir, file);
try {
//
let stats = await getStats(filePath);
if (!stats.isDirectory()) {
// add the filepath to the array
fileList.push(filePath);
}
if (stats.isDirectory()) {
await scanDir(filePath, fileList);
}
} catch (err) {
// Drop on the floor..
}
}
return fileList;
}
I expect the file content to be logged only once but it is logging twice on my console. Why is this happening and how do I stop this?
Your browser is making two requests to your server, most likely one for the URL you put in the address bar and another for favicon.ico. (You can quickly tell by opening the dev tools on your browser and going to the Network tab.)
handleFiles should look at req (specifically its url property) and act according to what's being requested. (This is something the code should be doing anyway.)
Side note 1: You're passing an async function into something (createServer) that won't do anything with the promise it returns. If you do that, it's important to catch any errors in the function locally within the function, since (again) nothing else is going to handle them. E.g.:
handle_files = async (req, res) => {
try {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
// console.log(result)
res.write("Hello");
res.end();
} catch (e) {
// ...handle error here...
}
};
Side note 2: That code is falling prey to The Horror of Implicit Globals¹. Declare your variables in the appropriate scope. Not declaring them, in loose mode, makes them globals. (Also recommend using strict mode, so you get an error for this.)
¹ (that's a post on my anemic little blog)
The answer above is correct.
My approach is to solve it via 'routing' of any kind.
Here is small basic example of how it can be done
const fs = require('fs');
const path = require('path');
const { promisify } = require('util');
const getStats = promisify(fs.stat);
const readdir = promisify(fs.readdir);
const http = require('http');
handle_routes = async (req, res) => {
switch(req.url) {
case '/files':
handle_files(req, res);
default:
console.log('for default page');
}
}
handle_files = async (req, res) => {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write("Hello");
res.end();
};
check_file_content = (file_data) => {
console.log(file_data[1])
}
async function read_content(files) {
let file_data = []
files.map(file => {
let start_index = file.toString().lastIndexOf('.') + 1
let ext = file.substring(start_index, file.length)
if (ext == 'data') {
file_data.push(fs.readFileSync(file, { encoding: 'utf-8' }))
}
})
return file_data
}
http.createServer(handle_routes).listen(8080)
async function scanDir(dir, fileList = []) {
// fetch list of files from the giver directory
let files = await readdir(dir);
// loop through all the files
for (let file of files) {
// join new folder name after the parent folder
// logs_of_109/24
let filePath = path.join(dir, file);
try {
//
let stats = await getStats(filePath);
if (!stats.isDirectory()) {
// add the filepath to the array
fileList.push(filePath);
}
if (stats.isDirectory()) {
await scanDir(filePath, fileList);
}
} catch (err) {
// Drop on the floor..
}
}
return fileList;
}
This gives you possibility to call handle_files function by going to localhost:8080/files url

how can i download a video mp4 file using node.js?

I want to let users download a video from my AWS S3 bucket. The video format is MP4:
app.get("/download_video", function(req,res) {
filename = "s3.xxx.amazon.com/bucketname/folder/video_example.mp4";
// im stuck on what i can do here
});
There are a lot of examples on how to download images and textfiles online using nodejs, but I can't find anything on videos.
use strict
const Fs = require('fs')
const Path = require('path')
const Listr = require('listr')
const Axios = require('axios')
function one (tasks) {
tasks.run()
.then(process.exit)
.catch(process.exit)
}
if (process.argv) {
const tasks = [{
title: 'Downloading',
task: async (ctx, task) => {
const url = 'https://s3.xxx.amazon.com/bucketname/folder/video_example.mp4"'
const path = Path.resolve(__dirname, 'media', 'video.mp4')
const response = await Axios({
method: 'GET',
url: url,
responseType: 'stream'
})
response.data.pipe(Fs.createWriteStream(path))
return new Promise((resolve, reject) => {
response.data.on('end', () => {
resolve()
})
response.data.on('error', err => {
reject(err)
})
})
}
}]
one(new Listr(tasks))
}
Try this
const fetch = require('node-fetch');
const fs = require('fs');
const response = await fetch(yourUrl);
const buffer = await response.buffer();
fs.writeFile(`./videos/name.mp4`, buffer, () =>
console.log('finished downloading video!'));
Third-party modules are no longer needed as of Node.js v18.
import { createWriteStream } from 'node:fs';
import { Readable } from 'node:stream';
const videoFileUrl = 'https://sveltejs.github.io/assets/caminandes-llamigos.mp4';
const videoFileName = 'video.mp4';
if (typeof (fetch) === 'undefined') throw new Error('Fetch API is not supported.');
const response = await fetch(videoFileUrl);
if (!response.ok) throw new Error('Response is not ok.');
const writeStream = createWriteStream(videoFileName);
// Reference https://stackoverflow.com/a/66629140/12817553
const readable = Readable.fromWeb(response.body);
readable.pipe(writeStream);
await new Promise((resolve, reject) => {
readable.on('end', resolve);
readable.on('error', reject);
});

Categories