function is being executed twice when called only once - javascript

I am making a program that will read content of files inside nested folders.For now I am just trying to log the content of the file in console. But I am getting two logs instead of only one.Here is what I have done till now
const fs = require('fs');
const path = require('path');
const { promisify } = require('util');
const getStats = promisify(fs.stat);
const readdir = promisify(fs.readdir);
const http = require('http');
handle_files = async (req, res) => {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
// console.log(result)
res.write("Hello");
res.end();
};
check_file_content = (file_data) => {
console.log(file_data[1])
}
async function read_content(files) {
let file_data = []
files.map(file => {
let start_index = file.toString().lastIndexOf('.') + 1
let ext = file.substring(start_index, file.length)
if (ext == 'data') {
file_data.push(fs.readFileSync(file, { encoding: 'utf-8' }))
}
})
return file_data
}
http.createServer(handle_files).listen(8080)
async function scanDir(dir, fileList = []) {
// fetch list of files from the giver directory
let files = await readdir(dir);
// loop through all the files
for (let file of files) {
// join new folder name after the parent folder
// logs_of_109/24
let filePath = path.join(dir, file);
try {
//
let stats = await getStats(filePath);
if (!stats.isDirectory()) {
// add the filepath to the array
fileList.push(filePath);
}
if (stats.isDirectory()) {
await scanDir(filePath, fileList);
}
} catch (err) {
// Drop on the floor..
}
}
return fileList;
}
I expect the file content to be logged only once but it is logging twice on my console. Why is this happening and how do I stop this?

Your browser is making two requests to your server, most likely one for the URL you put in the address bar and another for favicon.ico. (You can quickly tell by opening the dev tools on your browser and going to the Network tab.)
handleFiles should look at req (specifically its url property) and act according to what's being requested. (This is something the code should be doing anyway.)
Side note 1: You're passing an async function into something (createServer) that won't do anything with the promise it returns. If you do that, it's important to catch any errors in the function locally within the function, since (again) nothing else is going to handle them. E.g.:
handle_files = async (req, res) => {
try {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
// console.log(result)
res.write("Hello");
res.end();
} catch (e) {
// ...handle error here...
}
};
Side note 2: That code is falling prey to The Horror of Implicit Globals¹. Declare your variables in the appropriate scope. Not declaring them, in loose mode, makes them globals. (Also recommend using strict mode, so you get an error for this.)
¹ (that's a post on my anemic little blog)

The answer above is correct.
My approach is to solve it via 'routing' of any kind.
Here is small basic example of how it can be done
const fs = require('fs');
const path = require('path');
const { promisify } = require('util');
const getStats = promisify(fs.stat);
const readdir = promisify(fs.readdir);
const http = require('http');
handle_routes = async (req, res) => {
switch(req.url) {
case '/files':
handle_files(req, res);
default:
console.log('for default page');
}
}
handle_files = async (req, res) => {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write("Hello");
res.end();
};
check_file_content = (file_data) => {
console.log(file_data[1])
}
async function read_content(files) {
let file_data = []
files.map(file => {
let start_index = file.toString().lastIndexOf('.') + 1
let ext = file.substring(start_index, file.length)
if (ext == 'data') {
file_data.push(fs.readFileSync(file, { encoding: 'utf-8' }))
}
})
return file_data
}
http.createServer(handle_routes).listen(8080)
async function scanDir(dir, fileList = []) {
// fetch list of files from the giver directory
let files = await readdir(dir);
// loop through all the files
for (let file of files) {
// join new folder name after the parent folder
// logs_of_109/24
let filePath = path.join(dir, file);
try {
//
let stats = await getStats(filePath);
if (!stats.isDirectory()) {
// add the filepath to the array
fileList.push(filePath);
}
if (stats.isDirectory()) {
await scanDir(filePath, fileList);
}
} catch (err) {
// Drop on the floor..
}
}
return fileList;
}
This gives you possibility to call handle_files function by going to localhost:8080/files url

Related

Node: How to async await folder file reads

How to read files asynchronously in node js, here is a simple function.
There are a lot of convoluted answers on the internet, does anyone agree if this is the simplest?
export default async function handler(req, res) {
let data = await readFiles('data/companies/');
res.status(200).json(data);
}
// async file reader
function readFiles(dirname) {
return new Promise(function (resolve, reject) {
let data = {}
fs.readdir(dirname, async function(err, filenames) {
filenames.forEach(function(filename) {
fs.readFile(dirname + filename, 'utf-8', function(err, content) {
if (err) {
reject(err)
}
data[filename] = content;
if (filenames.length === Object.keys(data).length) {
resolve(data)
}
});
});
});
})
}
A bit cleaner and easier using the built in promise support in fs.promises:
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
// async file reader
async function readFiles(dirname) {
const data = {};
const files = await fsp.readdir(dirname);
for (const filename of files) {
const full = path.join(dirname, filename);
const content = await fsp.readFile(full, {encoding: 'utf8'});
data[filename] = content;
}
return data;
}
Or, if you want to run your file operations in parallel (at least to the limit of the thread pool), you might get slightly faster end-to-end performance like this:
// async file reader
async function readFiles(dirname) {
const data = {};
const files = await fsp.readdir(dirname);
await Promise.all(files.map(async filename => {
const full = path.join(dirname, filename);
const content = await fsp.readFile(full, {encoding: 'utf8'});
data[filename] = content;
}));
return data;
}
Also, this:
res.status(200).json(data);
can be replaced with:
res.json(data);
200 is already the default status so there is no reason to specify it.

getting fs.readme to work with async await and promisify

I am trying to create a node script that looks into my inbox of recently created markdown notes, finds the metadata at the top of that note, finds the category within that metadata and then moves the file into its relevant folder. I keep getting the error message:
(node:82025) UnhandledPromiseRejectionWarning: ReferenceError: paths is not defined
I believe this is because my code is not waiting for the readFile function to finish, despite me having labeled it as async/await and having wrapped it in promisify at the top of the file. Could someone please point me in the direction as to where I'm going wrong? Complete code below (other dependencies are for another script):
const fs = require('fs');
const path = require('path');
const firstline = require('firstline');
const moment = require('moment');
const { promisify } = require('util');
const readFile = promisify(fs.readFile);
const pathToResources =
'/Users/Rob/Library/Mobile Documents/9CR7T2DMDG~com~ngocluu~onewriter/Documents/Test/Resources/';
function markdownNotesOnly(files) {
return (notes = files.filter((file) => path.extname(file) === '.md'));
}
async function getNewPath(note) {
let oldPath = path.join(__dirname, note);
let paths = {};
let newPath = '';
readFile(oldPath, (err, data) => {
if (err) throw err;
let metadata = data
.toString()
.match(/-{3}\n.*\n-{3}/gms)
.toString();
let category = metadata.match(/#§-\d{3}-.*/);
if (!category) {
paths = { oldPath, newPath };
return paths;
}
category = category.toString().substring(3);
newPath = path.join(pathToResources, category);
paths = { oldPath, newPath };
return paths;
});
}
function sortFiles(startPath) {
fs.readdir(startPath, (err, files) => {
if (err) throw err;
markdownNotesOnly(files); // returns array called notes
notes.map(async (note) => {
await getNewPath(note);
let { oldPath, newPath } = paths;
fs.rename(oldPath, path.join(newPath, note), (err) => {
if (err) throw err;
console.log('File moved successfully');
});
});
});
}
Update:
Followed the advice of Mestre San below and got it working this morning. Here is my finished code if it is of use to anybody.
import { readdir, rename } from 'fs';
import { promises as fsPromises } from 'fs';
import { resolve, extname, join } from 'path';
const __dirname = resolve();
const pathToResources =
'/Users/Rob/Library/Mobile Documents/9CR7T2DMDG~com~ngocluu~onewriter/Documents/Test/Resources/';
function markdownNotesOnly(files) {
let notes = files.filter((file) => extname(file) === '.md');
return notes;
}
async function getPaths(note) {
const metadataRegex = /-{3}\n.*\n-{3}/gms;
const categoryRegex = /#§-\d{3}-.*/;
let oldPath = join(__dirname, note);
let newPath = '';
const file = await fsPromises.readFile(oldPath);
if (file instanceof Error) throw err;
if (!file.toString().match(metadataRegex)) {
let paths = { oldPath, newPath };
return paths;
}
let metadata = file.toString().match(metadataRegex).toString();
let category = metadata.match(categoryRegex);
if (!category) {
let paths = { oldPath, newPath };
return paths;
}
category = category.toString().substring(3);
newPath = join(pathToResources, category);
let paths = { oldPath, newPath };
return paths;
}
export function sortFiles(startPath) {
readdir(startPath, (err, files) => {
if (err) throw err;
let notes = markdownNotesOnly(files);
notes.map(async (note) => {
let paths = await getPaths(note);
let { oldPath, newPath } = paths;
rename(oldPath, join(newPath, note), (err) => {
if (err) throw err;
console.log('File moved successfully');
});
});
});
}
The snippet you posted is definitely incomplete since we can only give advice based on what we see here it goes my considerations:
You are creating a promisified function but you are using it with a callback. You should pick one style. The way the code is now your callback will never be called. You have to either use fs.readFile there or readFile(oldPath).then(data =>. Even better would be to use fs.promises.readFile if it is available on the Node.js version you are using
I can that the paths variable is indeed not defined on line inside the sortFiles function. The code is declaring that variable inside the function getNewPath making it unavailable inside the sortFiles function.
The variable notes is not defined either. The code is creating the notes variable inside the function markdownNotesOnly. Although sharing these values across function using the global scope is not encouraged if you choose to do that you should definitely declare it outside that function with let notes.
The sortFiles is not being called but since it is using an async function and you are not passing any callback to it as an argument it will pretty much work as a fire and forget kinda thing
The sortFiles function has the word sort in its name but it has a rename call in its body, that can be confusing even for yourself in the future.
But, the way to make your code stop throwing that very same error is by doing something like this:
'use strict'
const fs = require('fs')
const path = require('path')
// const firstline = require('firstline')
// const moment = require('moment')
const { promisify } = require('util')
const readFile = promisify(fs.readFile)
const pathToResources =
'/Users/Rob/Library/Mobile Documents/9CR7T2DMDG~com~ngocluu~onewriter/Documents/Test/Resources/'
let notes, paths
function markdownNotesOnly (files) {
'use strict'
return (notes = files.filter((file) => path.extname(file) === '.md'))
}
async function getNewPath (note) {
const oldPath = path.join(__dirname, note)
// let paths = {}
let newPath = ''
return readFile(oldPath).then(data => {
const metadata = data
.toString()
.match(/-{3}\n.*\n-{3}/gms)
.toString()
let category = metadata.match(/#§-\d{3}-.*/)
if (!category) {
paths = { oldPath, newPath }
return paths
}
category = category.toString().substring(3)
newPath = path.join(pathToResources, category)
paths = { oldPath, newPath }
return paths
})
}
function sortFiles (startPath) {
fs.readdir(startPath, (err, files) => {
if (err) throw err
markdownNotesOnly(files) // returns array called notes
notes.map(async (note) => {
await getNewPath(note)
const { oldPath, newPath } = paths
fs.rename(oldPath, path.join(newPath, note), (err) => {
if (err) throw err
console.log('File moved successfully')
})
})
})
}

fs.readdir not reading files extracted to a local directory (function not even running)

I have extracted zip files to a local directory within my lambda function however fs.readdir function is not working and is also not returning an error. I think maybe I have a syntax error with the way I set up the function to run asynchronously.
// Make Directories for Zipped and Unzipped files
try {
const zipDirFolder = await mkdirp(localZippedFolder, { recursive: true });
const unZipDirFolder = await mkdirp(localUnzippedFolder, { recursive: true });
console.log(unZipDirFolder);
// Download files from s3 Bucket
let newFolder = await s3.getObject(params).promise()
.then(data => {
console.log(data);
return data;
});
const newZipFile = newFolder.Body;
// Extract files from zipped folder and store them in a local directory
let filezFolder = await fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}));
console.log(unZipDirFolder);
// Read Contents of that local directory
await fs.readdir(unZipDirFolder, function (err, files) {
if (err) {
console.log('THERE WAS AN ERROR:' + err);
} else {
console.log(files);
}
});
}
Problem: fs.readdir is not running nor is it returning an error message.
There is a problem is in this statement:
let filezFolder = await fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}));
fs.createReadStream().pipe() does not return a promise. So using await on it doesn't do anything useful. So, you have not waited until the .pipe() is done. Thus, you are calling fs.readdir() before the unzip operation is done.
At the moment, streams are not particularly promise-friendly. You will probably need to register for some sort of event on the resulting stream that indicates when your unzip operation is done and then do the fs.readdir() from within that event handler. I don't know your unzip library myself to know which event or how to best monitor it for completion.
Since .pipe() return a writable stream, you can probably do this:
fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}))
.on('finish', () => {
fs.readdir(...)
}).on('error', (err) => {
// error handling here
console.log(err);
});
Here's a simple little stand-alone program that shows the concept I'm talking about:
const unzipper = require('unzipper');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
const inputZip = path.join(__dirname, "zip", "photos.zip");
const outputZipDir = path.join(__dirname, "zip-output")
function run() {
fs.createReadStream(inputZip)
.pipe(unzipper.Extract({ path: outputZipDir }))
.on('finish', async () => {
let files = await fsp.readdir(outputZipDir);
console.log(files);
// use the files here
}).on('err', err => {
console.log(err);
});
}
run();
The setup for this program is to put photos.zip into a zip subdirectory from where this program is run and to create a zip-output subdirectory from where this program is run for the unzipped files to go.
And, here's a promisified version that uses the same setup:
const unzipper = require('unzipper');
const fs = require('fs');
const fsp = fs.promises;
const path = require('path');
const inputZip = path.join(__dirname, "zip", "photos.zip");
const outputZipDir = path.join(__dirname, "zip-output")
// returns a promise that resolves/rejects when the unzip operation is done
function unzip(inputZip, outputDir) {
return new Promise((resolve, reject) => {
fs.createReadStream(inputZip)
.pipe(unzipper.Extract({ path: outputDir }))
.on('finish', resolve)
.on('err', reject);
});
}
async function run2() {
await unzip(inputZip, outputZipDir);
let files = await fsp.readdir(outputZipDir);
console.log(files);
}
run2().catch(err => {
console.log(err);
});
This is probably because you'r trying to await fs.readdir and at the same time trying to callback. You shouldn't do both. Try removing the await or setting your lambda function async and remove the callback.
//Using await (Don't forget to place this in a async function)
const res = await fs.readdir(yourFilePath);
//Using Callbacks
fs.readdir(yourFilePath, (err, files) => {
if (err) {
console.log('THERE WAS AN ERROR:' + err);
} else {
console.log(files);
}
});
Or you could use fs.readdirSync and do
const res = fs.readdirSync(yourFilePath)

How to test recursive function in Jest.js

I have an script to looping over directories and match files with specific type. Unfortunately jest passes this test before it ends. I know why, but I don't know how to make script to wait for the end of looping.
import fs from 'fs'
const path = require('path');
describe('something', () => {
it('should something', () => {
const traverseDir = (dir, callback) => {
fs.readdirSync(dir).forEach(file => {
let fullPath = path.join(dir, file);
if (fs.lstatSync(fullPath).isDirectory()) {
callback(fullPath)
traverseDir(fullPath, callback);
} else {
callback(fullPath)
}
});
}
traverseDir('src/', (fullPath) => {
const splitted = fullPath.split('/')
const filename = splitted[splitted.length - 1]
if (filename.match(/.*.foo/)) {
fs.readFile(fullPath, 'utf8', (err, data) => {
expect(err).toBe(null)
// some assertion
})
}
})
})
})
You could pass done in the test parameter and call it when the test ends.
You can read more about async testing here.
import fs from "fs";
const path = require("path");
describe("something", () => {
it("should something", done => {
const traverseDir = (dir, callback) => {
fs.readdirSync(dir).forEach(file => {
let fullPath = path.join(dir, file);
if (fs.lstatSync(fullPath).isDirectory()) {
callback(fullPath);
traverseDir(fullPath, callback);
} else {
callback(fullPath);
}
});
done(); // Call done to tell Jest that the test has finished.
};
traverseDir("src/", fullPath => {
const splitted = fullPath.split("/");
const filename = splitted[splitted.length - 1];
if (filename.match(/.*.foo/)) {
fs.readFile(fullPath, "utf8", (err, data) => {
expect(err).toBe(null);
});
}
});
});
});
You should use fs.promises functions to list the contents of your directory recursively to obtain a single unified file list.
Unit test this function separately from any code that actually reads the file. (e.g.: your filename.match and readFile code should be tested separately from the traverseDir code.)
Example of walking directories asynchronously to get a unified file list:
This asynchronous allFilesIn function gets all files within a directory recursively and returns the list as a single array with full (relative) paths.
const fs = require('fs').promises;
const path = require('path');
const allFilesIn = async (dir, results = []) => {
const files = await fs.readdir(dir);
for (file of files) {
const fullPath = path.join(dir, file);
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
await allFilesIn(fullPath, results);
} else {
results.push(fullPath);
}
}
return results;
}
// Example call:
allFilesIn('src/').then(files => {
console.log(files); // e.g.: [ 'src\\foo.cpp', 'src\\bar.cpp', 'src\\include\\foo.h' ]
});
Once you have a single array of all the files it should be easy to use a single forEach to do something for all the files in the unified list.

How to write this with Promises?

var pdfParser = require('pdf-parser')
var fs = require('fs')
var PDF_PATH = __dirname + '/pdfs'
var results = []
var failed = []
fs.readdir(PDF_PATH, function(err, files){
if(err){
return console.log(err)
}
for(const file of files){
let the_ent = {
'name': '',
'other data': []
}
pdfParser.pdf2json(PDF_PATH + '/' + file, function(error, pdf){
if(error != null){
console.log(error)
}else if(pdf['pages'] == undefined){
failed.push(file)
console.log(file +' failed')
}else{
//populate 'results' array
}
console.log(/*pdf_data*/)
results.push(/*pdf_data*/)
})
}
console.log(results)
console.log(failed)
results = JSON.stringify(results)
//fs.writeFileSync() write results to json
})
I don't know what is wrong with me this morning, I can't work out how to write this in async; obviously the logs/writefile at the bottom fire as soon as the script executes.
I have tried wrapping in async functions and awaiting the readdir / pdf parsing instead of using callbacks - clearly not correctly. I'm just trying to parse every pdf in a folder - push what I want to some arrays and then log them once the loop finishes zzz.
Wrap the smallest asynchronous tasks into Promises, then use async/await to combine them:
// the Promise wrapper:
const parsePdf = file => new Promise((res, rej) => pdfParser.pdf2json(file, (err, r) => err ? rej(err) : res(r));
(async function () { // start an asynchronous context
const PDF_PATH = __dirname + '/pdfs';
const results = [], failed = []; // prefer const over let
// reading files in a promising way is already provided natively:
const files = await fs.promises.readdir(PDF_PATH);
for(const file of files){ // this is in series, in parallel would probably be faster
let the_ent = {
name: '',
'other data': [], // avoid whitespaces!
};
const pdf = await parsePdf(PDF_PATH + '/' +file);
if(pdf.pages === undefined) { // prefer equality (===) over comparison (==)
failed.push(file);
console.log(file + ' failed');
} else {
// populate 'results' array
}
}
console.log(results, failed);
})();
You can probably process the files in parallel too.
I would promisify the async operations and use async/await. For the fs operations, use the new fs.promises API. For others, use util.promisify() to make promisified versions.
The resolved value of the parsePDFs function I create will be an array of JSON and an array of failed filenames so you get both pieces of information back:
const util = require('util');
const pdfParser = require('pdf-parser');
// make promisified version of the function
const pdfParser.pdf2jsonP = util.promisify(pdfParser.pdf2json);
const fsp = require('fs').promises;
const path = require('path');
const PDF_PATH = path.join(__dirname, 'pdfs');
async function parsePDFs(dir) {
const files = await fsp.readdir(dir);
const results = [];
const failed = [];
for (const file of files) {
let the_ent = {
'name': '',
'other data': []
}
try {
let pdf = await pdfParser.pdf2jsonP(path.join(dir, file));
if (!pdf || pdf.pages === undefined) {
throw new Error("pdf.pages is empty")
}
results.push(pdf);
} catch(e){
console.log(e);
failed.push(file);
}
}
// this will be the resolved value of the returned promise
return {results, failed};
}
parsePDFs(PDF_PATH).then(data => {
console.log("failed filenames: " data.failed);
console.log("json results: ", data.results);
// do something with data.results and data.failed
}).catch(err => {
console.log(err);
});
Note: You declare, but never use the variable the_ent.
You can use util.promisify to promisify the sync functions:
const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);
Minimal demo:
const fs = require('fs');
const util = require('util');
var pdfParser = require('pdf-parser');
const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);
var PDF_PATH = __dirname + '/pdfs';
(async () => {
async function processFiles() {
let results = [];
let files = await readdir(PDF_PATH);
for (const file of files) {
let pdf = await reader(PDF_PATH + '/' + file);
results.push(pdf);
}
return results;
}
const result = await processFiles();
console.info(result);
})();

Categories