getting fs.readme to work with async await and promisify - javascript

I am trying to create a node script that looks into my inbox of recently created markdown notes, finds the metadata at the top of that note, finds the category within that metadata and then moves the file into its relevant folder. I keep getting the error message:
(node:82025) UnhandledPromiseRejectionWarning: ReferenceError: paths is not defined
I believe this is because my code is not waiting for the readFile function to finish, despite me having labeled it as async/await and having wrapped it in promisify at the top of the file. Could someone please point me in the direction as to where I'm going wrong? Complete code below (other dependencies are for another script):
const fs = require('fs');
const path = require('path');
const firstline = require('firstline');
const moment = require('moment');
const { promisify } = require('util');
const readFile = promisify(fs.readFile);
const pathToResources =
'/Users/Rob/Library/Mobile Documents/9CR7T2DMDG~com~ngocluu~onewriter/Documents/Test/Resources/';
function markdownNotesOnly(files) {
return (notes = files.filter((file) => path.extname(file) === '.md'));
}
async function getNewPath(note) {
let oldPath = path.join(__dirname, note);
let paths = {};
let newPath = '';
readFile(oldPath, (err, data) => {
if (err) throw err;
let metadata = data
.toString()
.match(/-{3}\n.*\n-{3}/gms)
.toString();
let category = metadata.match(/#§-\d{3}-.*/);
if (!category) {
paths = { oldPath, newPath };
return paths;
}
category = category.toString().substring(3);
newPath = path.join(pathToResources, category);
paths = { oldPath, newPath };
return paths;
});
}
function sortFiles(startPath) {
fs.readdir(startPath, (err, files) => {
if (err) throw err;
markdownNotesOnly(files); // returns array called notes
notes.map(async (note) => {
await getNewPath(note);
let { oldPath, newPath } = paths;
fs.rename(oldPath, path.join(newPath, note), (err) => {
if (err) throw err;
console.log('File moved successfully');
});
});
});
}
Update:
Followed the advice of Mestre San below and got it working this morning. Here is my finished code if it is of use to anybody.
import { readdir, rename } from 'fs';
import { promises as fsPromises } from 'fs';
import { resolve, extname, join } from 'path';
const __dirname = resolve();
const pathToResources =
'/Users/Rob/Library/Mobile Documents/9CR7T2DMDG~com~ngocluu~onewriter/Documents/Test/Resources/';
function markdownNotesOnly(files) {
let notes = files.filter((file) => extname(file) === '.md');
return notes;
}
async function getPaths(note) {
const metadataRegex = /-{3}\n.*\n-{3}/gms;
const categoryRegex = /#§-\d{3}-.*/;
let oldPath = join(__dirname, note);
let newPath = '';
const file = await fsPromises.readFile(oldPath);
if (file instanceof Error) throw err;
if (!file.toString().match(metadataRegex)) {
let paths = { oldPath, newPath };
return paths;
}
let metadata = file.toString().match(metadataRegex).toString();
let category = metadata.match(categoryRegex);
if (!category) {
let paths = { oldPath, newPath };
return paths;
}
category = category.toString().substring(3);
newPath = join(pathToResources, category);
let paths = { oldPath, newPath };
return paths;
}
export function sortFiles(startPath) {
readdir(startPath, (err, files) => {
if (err) throw err;
let notes = markdownNotesOnly(files);
notes.map(async (note) => {
let paths = await getPaths(note);
let { oldPath, newPath } = paths;
rename(oldPath, join(newPath, note), (err) => {
if (err) throw err;
console.log('File moved successfully');
});
});
});
}

The snippet you posted is definitely incomplete since we can only give advice based on what we see here it goes my considerations:
You are creating a promisified function but you are using it with a callback. You should pick one style. The way the code is now your callback will never be called. You have to either use fs.readFile there or readFile(oldPath).then(data =>. Even better would be to use fs.promises.readFile if it is available on the Node.js version you are using
I can that the paths variable is indeed not defined on line inside the sortFiles function. The code is declaring that variable inside the function getNewPath making it unavailable inside the sortFiles function.
The variable notes is not defined either. The code is creating the notes variable inside the function markdownNotesOnly. Although sharing these values across function using the global scope is not encouraged if you choose to do that you should definitely declare it outside that function with let notes.
The sortFiles is not being called but since it is using an async function and you are not passing any callback to it as an argument it will pretty much work as a fire and forget kinda thing
The sortFiles function has the word sort in its name but it has a rename call in its body, that can be confusing even for yourself in the future.
But, the way to make your code stop throwing that very same error is by doing something like this:
'use strict'
const fs = require('fs')
const path = require('path')
// const firstline = require('firstline')
// const moment = require('moment')
const { promisify } = require('util')
const readFile = promisify(fs.readFile)
const pathToResources =
'/Users/Rob/Library/Mobile Documents/9CR7T2DMDG~com~ngocluu~onewriter/Documents/Test/Resources/'
let notes, paths
function markdownNotesOnly (files) {
'use strict'
return (notes = files.filter((file) => path.extname(file) === '.md'))
}
async function getNewPath (note) {
const oldPath = path.join(__dirname, note)
// let paths = {}
let newPath = ''
return readFile(oldPath).then(data => {
const metadata = data
.toString()
.match(/-{3}\n.*\n-{3}/gms)
.toString()
let category = metadata.match(/#§-\d{3}-.*/)
if (!category) {
paths = { oldPath, newPath }
return paths
}
category = category.toString().substring(3)
newPath = path.join(pathToResources, category)
paths = { oldPath, newPath }
return paths
})
}
function sortFiles (startPath) {
fs.readdir(startPath, (err, files) => {
if (err) throw err
markdownNotesOnly(files) // returns array called notes
notes.map(async (note) => {
await getNewPath(note)
const { oldPath, newPath } = paths
fs.rename(oldPath, path.join(newPath, note), (err) => {
if (err) throw err
console.log('File moved successfully')
})
})
})
}

Related

How do I get an array of folders name in an certain directory? [duplicate]

I was hoping this would be a simple thing, but I cannot find anything out there to do so.
I just want to get all folders/directories within a given folder/directory.
So for example:
<MyFolder>
|- SomeFolder
|- SomeOtherFolder
|- SomeFile.txt
|- SomeOtherFile.txt
|- x-directory
I would expect to get an array of:
["SomeFolder", "SomeOtherFolder", "x-directory"]
Or the above with the path if that was how it was served...
So does anything already exist to do the above?
Promise
import { readdir } from 'fs/promises'
const getDirectories = async source =>
(await readdir(source, { withFileTypes: true }))
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name)
Callback
import { readdir } from 'fs'
const getDirectories = (source, callback) =>
readdir(source, { withFileTypes: true }, (err, files) => {
if (err) {
callback(err)
} else {
callback(
files
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name)
)
}
})
Syncronous
import { readdirSync } from 'fs'
const getDirectories = source =>
readdirSync(source, { withFileTypes: true })
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name)
List directories using a path.
function getDirectories(path) {
return fs.readdirSync(path).filter(function (file) {
return fs.statSync(path+'/'+file).isDirectory();
});
}
Recursive solution
I came here in search of a way to get all of the subdirectories, and all of their subdirectories, etc. Building on the accepted answer, I wrote this:
const fs = require('fs');
const path = require('path');
function flatten(lists) {
return lists.reduce((a, b) => a.concat(b), []);
}
function getDirectories(srcpath) {
return fs.readdirSync(srcpath)
.map(file => path.join(srcpath, file))
.filter(path => fs.statSync(path).isDirectory());
}
function getDirectoriesRecursive(srcpath) {
return [srcpath, ...flatten(getDirectories(srcpath).map(getDirectoriesRecursive))];
}
This should do it:
CoffeeScript (sync)
fs = require 'fs'
getDirs = (rootDir) ->
files = fs.readdirSync(rootDir)
dirs = []
for file in files
if file[0] != '.'
filePath = "#{rootDir}/#{file}"
stat = fs.statSync(filePath)
if stat.isDirectory()
dirs.push(file)
return dirs
CoffeeScript (async)
fs = require 'fs'
getDirs = (rootDir, cb) ->
fs.readdir rootDir, (err, files) ->
dirs = []
for file, index in files
if file[0] != '.'
filePath = "#{rootDir}/#{file}"
fs.stat filePath, (err, stat) ->
if stat.isDirectory()
dirs.push(file)
if files.length == (index + 1)
cb(dirs)
JavaScript (async)
var fs = require('fs');
var getDirs = function(rootDir, cb) {
fs.readdir(rootDir, function(err, files) {
var dirs = [];
for (var index = 0; index < files.length; ++index) {
var file = files[index];
if (file[0] !== '.') {
var filePath = rootDir + '/' + file;
fs.stat(filePath, function(err, stat) {
if (stat.isDirectory()) {
dirs.push(this.file);
}
if (files.length === (this.index + 1)) {
return cb(dirs);
}
}.bind({index: index, file: file}));
}
}
});
}
Alternatively, if you are able to use external libraries, you can use filehound. It supports callbacks, promises and sync calls.
Using promises:
const Filehound = require('filehound');
Filehound.create()
.path("MyFolder")
.directory() // only search for directories
.find()
.then((subdirectories) => {
console.log(subdirectories);
});
Using callbacks:
const Filehound = require('filehound');
Filehound.create()
.path("MyFolder")
.directory()
.find((err, subdirectories) => {
if (err) return console.error(err);
console.log(subdirectories);
});
Sync call:
const Filehound = require('filehound');
const subdirectories = Filehound.create()
.path("MyFolder")
.directory()
.findSync();
console.log(subdirectories);
For further information (and examples), check out the docs: https://github.com/nspragg/filehound
Disclaimer: I'm the author.
With node.js version >= v10.13.0, fs.readdirSync will return an array of fs.Dirent objects if withFileTypes option is set to true.
So you can use,
const fs = require('fs')
const directories = source => fs.readdirSync(source, {
withFileTypes: true
}).reduce((a, c) => {
c.isDirectory() && a.push(c.name)
return a
}, [])
var getDirectories = (rootdir , cb) => {
fs.readdir(rootdir, (err, files) => {
if(err) throw err ;
var dirs = files.map(filename => path.join(rootdir,filename)).filter( pathname => fs.statSync(pathname).isDirectory());
return cb(dirs);
})
}
getDirectories( myDirectories => console.log(myDirectories));``
Using fs-extra, which promises the async fs calls, and the new await async syntax:
const fs = require("fs-extra");
async function getDirectories(path){
let filesAndDirectories = await fs.readdir(path);
let directories = [];
await Promise.all(
filesAndDirectories.map(name =>{
return fs.stat(path + name)
.then(stat =>{
if(stat.isDirectory()) directories.push(name)
})
})
);
return directories;
}
let directories = await getDirectories("/")
This answer does not use blocking functions like readdirSync or statSync. It does not use external dependencies nor find itself in the depths of callback hell.
Instead we use modern JavaScript conveniences like Promises and and async-await syntaxes. And asynchronous results are processed in parallel; not sequentially -
const { readdir, stat } =
require ("fs") .promises
const { join } =
require ("path")
const dirs = async (path = ".") =>
(await stat (path)) .isDirectory ()
? Promise
.all
( (await readdir (path))
.map (p => dirs (join (path, p)))
)
.then
( results =>
[] .concat (path, ...results)
)
: []
I'll install an example package, and then test our function -
$ npm install ramda
$ node
Let's see it work -
> dirs (".") .then (console.log, console.error)
[ '.'
, 'node_modules'
, 'node_modules/ramda'
, 'node_modules/ramda/dist'
, 'node_modules/ramda/es'
, 'node_modules/ramda/es/internal'
, 'node_modules/ramda/src'
, 'node_modules/ramda/src/internal'
]
Using a generalised module, Parallel, we can simplify the definition of dirs -
const Parallel =
require ("./Parallel")
const dirs = async (path = ".") =>
(await stat (path)) .isDirectory ()
? Parallel (readdir (path))
.flatMap (f => dirs (join (path, f)))
.then (results => [ path, ...results ])
: []
The Parallel module used above was a pattern that was extracted from a set of functions designed to solve a similar problem. For more explanation, see this related Q&A.
And a async version of getDirectories, you need the async module for this:
var fs = require('fs');
var path = require('path');
var async = require('async'); // https://github.com/caolan/async
// Original function
function getDirsSync(srcpath) {
return fs.readdirSync(srcpath).filter(function(file) {
return fs.statSync(path.join(srcpath, file)).isDirectory();
});
}
function getDirs(srcpath, cb) {
fs.readdir(srcpath, function (err, files) {
if(err) {
console.error(err);
return cb([]);
}
var iterator = function (file, cb) {
fs.stat(path.join(srcpath, file), function (err, stats) {
if(err) {
console.error(err);
return cb(false);
}
cb(stats.isDirectory());
})
}
async.filter(files, iterator, cb);
});
}
Fully async version with ES6, only native packages, fs.promises and async/await, does file operations in parallel:
const fs = require('fs');
const path = require('path');
async function listDirectories(rootPath) {
const fileNames = await fs.promises.readdir(rootPath);
const filePaths = fileNames.map(fileName => path.join(rootPath, fileName));
const filePathsAndIsDirectoryFlagsPromises = filePaths.map(async filePath => ({path: filePath, isDirectory: (await fs.promises.stat(filePath)).isDirectory()}))
const filePathsAndIsDirectoryFlags = await Promise.all(filePathsAndIsDirectoryFlagsPromises);
return filePathsAndIsDirectoryFlags.filter(filePathAndIsDirectoryFlag => filePathAndIsDirectoryFlag.isDirectory)
.map(filePathAndIsDirectoryFlag => filePathAndIsDirectoryFlag.path);
}
Tested, it works nicely.
You can use graph-fs
const {Node} = require("graph-fs");
const directory = new Node("/path/to/directory");
const subDirectories = directory.children.filter(child => child.is.directory);
Using the glob package, just add a trailing slash to find directories only:
import {promise as glob} from "glob-promise"
const firstLevelFolders = await glob("MyFolder/*/")
const recursiveFolders = await glob("MyFolder/**/")
CoffeeScript version of this answer, with proper error handling:
fs = require "fs"
{join} = require "path"
async = require "async"
get_subdirs = (root, callback)->
fs.readdir root, (err, files)->
return callback err if err
subdirs = []
async.each files,
(file, callback)->
fs.stat join(root, file), (err, stats)->
return callback err if err
subdirs.push file if stats.isDirectory()
callback null
(err)->
return callback err if err
callback null, subdirs
Depends on async
Alternatively, use a module for this!
(There are modules for everything. [citation needed])
If you need to use all async version. You can have something like this.
Record the directory length, uses it as an indicator to tell if all async stat tasks are finished.
If the async stat tasks are finished, all the file stat has been checked, so call the callback
This will only work as long as Node.js is single thread, because it assumes no two async tasks will increase the counter at the same time.
'use strict';
var fs = require("fs");
var path = require("path");
var basePath = "./";
function result_callback(results) {
results.forEach((obj) => {
console.log("isFile: " + obj.fileName);
console.log("fileName: " + obj.isFile);
});
};
fs.readdir(basePath, (err, files) => {
var results = [];
var total = files.length;
var finished = 0;
files.forEach((fileName) => {
// console.log(fileName);
var fullPath = path.join(basePath, fileName);
fs.stat(fullPath, (err, stat) => {
// this will work because Node.js is single thread
// therefore, the counter will not increment at the same time by two callback
finished++;
if (stat.isFile()) {
results.push({
fileName: fileName,
isFile: stat.isFile()
});
}
if (finished == total) {
result_callback(results);
}
});
});
});
As you can see, this is a "depth first" approach, and this could result in callback hell, and it is not quite "functional" . People try to solve this problem with Promise, by wrapping the async task into an Promise object.
'use strict';
var fs = require("fs");
var path = require("path");
var basePath = "./";
function result_callback(results) {
results.forEach((obj) => {
console.log("isFile: " + obj.fileName);
console.log("fileName: " + obj.isFile);
});
};
fs.readdir(basePath, (err, files) => {
var results = [];
var total = files.length;
var finished = 0;
var promises = files.map((fileName) => {
// console.log(fileName);
var fullPath = path.join(basePath, fileName);
return new Promise((resolve, reject) => {
// try to replace fullPath wil "aaa", it will reject
fs.stat(fullPath, (err, stat) => {
if (err) {
reject(err);
return;
}
var obj = {
fileName: fileName,
isFile: stat.isFile()
};
resolve(obj);
});
});
});
Promise.all(promises).then((values) => {
console.log("All the promise resolved");
console.log(values);
console.log("Filter out folder: ");
values
.filter((obj) => obj.isFile)
.forEach((obj) => {
console.log(obj.fileName);
});
}, (reason) => {
console.log("Not all the promise resolved");
console.log(reason);
});
});
use fs、path module can got the folder. this use Promise. If your will get the fill, your can change isDirectory() to isFile() Nodejs--fs--fs.Stats.At last, you can get the file'name file'extname and so on Nodejs---Path
var fs = require("fs"),
path = require("path");
//your <MyFolder> path
var p = "MyFolder"
fs.readdir(p, function (err, files) {
if (err) {
throw err;
}
//this can get all folder and file under <MyFolder>
files.map(function (file) {
//return file or folder path, such as **MyFolder/SomeFile.txt**
return path.join(p, file);
}).filter(function (file) {
//use sync judge method. The file will add next files array if the file is directory, or not.
return fs.statSync(file).isDirectory();
}).forEach(function (files) {
//The files is array, so each. files is the folder name. can handle the folder.
console.log("%s", files);
});
});
Just in case anyone else ends up here from a web search, and has Grunt already in their dependency list, the answer to this becomes trivial. Here's my solution:
/**
* Return all the subfolders of this path
* #param {String} parentFolderPath - valid folder path
* #param {String} glob ['/*'] - optional glob so you can do recursive if you want
* #returns {String[]} subfolder paths
*/
getSubfolders = (parentFolderPath, glob = '/*') => {
return grunt.file.expand({filter: 'isDirectory'}, parentFolderPath + glob);
}
Another recursive approach
Thanks to Mayur for knowing me about withFileTypes. I written following code for getting files of particular folder recursively. It can be easily modified to get only directories.
const getFiles = (dir, base = '') => readdirSync(dir, {withFileTypes: true}).reduce((files, file) => {
const filePath = path.join(dir, file.name)
const relativePath = path.join(base, file.name)
if(file.isDirectory()) {
return files.concat(getFiles(filePath, relativePath))
} else if(file.isFile()) {
file.__fullPath = filePath
file.__relateivePath = relativePath
return files.concat(file)
}
}, [])
functional programming
const fs = require('fs')
const path = require('path')
const R = require('ramda')
const getDirectories = pathName => {
const isDirectory = pathName => fs.lstatSync(pathName).isDirectory()
const mapDirectories = pathName => R.map(name => path.join(pathName, name), fs.readdirSync(pathName))
const filterDirectories = listPaths => R.filter(isDirectory, listPaths)
return {
paths:R.pipe(mapDirectories)(pathName),
pathsFiltered: R.pipe(mapDirectories, filterDirectories)(pathName)
}
}
You could use dree, if using a module is affordable
const dree = require('dree');
const options = {
depth: 1
};
const fileCallback = function() {};
const directories = [];
const dirCallback = function(dir) {
directories.push(dir.name);
};
dree.scan('./dir', {});
console.log(directories);
The directories which are directed children of the specified path ("./dir") will be printed.
If you do not put the option depth: 1, you would even obtain all the directories in a recursively way, so not only the directed children of the specified path.

How to test recursive function in Jest.js

I have an script to looping over directories and match files with specific type. Unfortunately jest passes this test before it ends. I know why, but I don't know how to make script to wait for the end of looping.
import fs from 'fs'
const path = require('path');
describe('something', () => {
it('should something', () => {
const traverseDir = (dir, callback) => {
fs.readdirSync(dir).forEach(file => {
let fullPath = path.join(dir, file);
if (fs.lstatSync(fullPath).isDirectory()) {
callback(fullPath)
traverseDir(fullPath, callback);
} else {
callback(fullPath)
}
});
}
traverseDir('src/', (fullPath) => {
const splitted = fullPath.split('/')
const filename = splitted[splitted.length - 1]
if (filename.match(/.*.foo/)) {
fs.readFile(fullPath, 'utf8', (err, data) => {
expect(err).toBe(null)
// some assertion
})
}
})
})
})
You could pass done in the test parameter and call it when the test ends.
You can read more about async testing here.
import fs from "fs";
const path = require("path");
describe("something", () => {
it("should something", done => {
const traverseDir = (dir, callback) => {
fs.readdirSync(dir).forEach(file => {
let fullPath = path.join(dir, file);
if (fs.lstatSync(fullPath).isDirectory()) {
callback(fullPath);
traverseDir(fullPath, callback);
} else {
callback(fullPath);
}
});
done(); // Call done to tell Jest that the test has finished.
};
traverseDir("src/", fullPath => {
const splitted = fullPath.split("/");
const filename = splitted[splitted.length - 1];
if (filename.match(/.*.foo/)) {
fs.readFile(fullPath, "utf8", (err, data) => {
expect(err).toBe(null);
});
}
});
});
});
You should use fs.promises functions to list the contents of your directory recursively to obtain a single unified file list.
Unit test this function separately from any code that actually reads the file. (e.g.: your filename.match and readFile code should be tested separately from the traverseDir code.)
Example of walking directories asynchronously to get a unified file list:
This asynchronous allFilesIn function gets all files within a directory recursively and returns the list as a single array with full (relative) paths.
const fs = require('fs').promises;
const path = require('path');
const allFilesIn = async (dir, results = []) => {
const files = await fs.readdir(dir);
for (file of files) {
const fullPath = path.join(dir, file);
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
await allFilesIn(fullPath, results);
} else {
results.push(fullPath);
}
}
return results;
}
// Example call:
allFilesIn('src/').then(files => {
console.log(files); // e.g.: [ 'src\\foo.cpp', 'src\\bar.cpp', 'src\\include\\foo.h' ]
});
Once you have a single array of all the files it should be easy to use a single forEach to do something for all the files in the unified list.

How to write this with Promises?

var pdfParser = require('pdf-parser')
var fs = require('fs')
var PDF_PATH = __dirname + '/pdfs'
var results = []
var failed = []
fs.readdir(PDF_PATH, function(err, files){
if(err){
return console.log(err)
}
for(const file of files){
let the_ent = {
'name': '',
'other data': []
}
pdfParser.pdf2json(PDF_PATH + '/' + file, function(error, pdf){
if(error != null){
console.log(error)
}else if(pdf['pages'] == undefined){
failed.push(file)
console.log(file +' failed')
}else{
//populate 'results' array
}
console.log(/*pdf_data*/)
results.push(/*pdf_data*/)
})
}
console.log(results)
console.log(failed)
results = JSON.stringify(results)
//fs.writeFileSync() write results to json
})
I don't know what is wrong with me this morning, I can't work out how to write this in async; obviously the logs/writefile at the bottom fire as soon as the script executes.
I have tried wrapping in async functions and awaiting the readdir / pdf parsing instead of using callbacks - clearly not correctly. I'm just trying to parse every pdf in a folder - push what I want to some arrays and then log them once the loop finishes zzz.
Wrap the smallest asynchronous tasks into Promises, then use async/await to combine them:
// the Promise wrapper:
const parsePdf = file => new Promise((res, rej) => pdfParser.pdf2json(file, (err, r) => err ? rej(err) : res(r));
(async function () { // start an asynchronous context
const PDF_PATH = __dirname + '/pdfs';
const results = [], failed = []; // prefer const over let
// reading files in a promising way is already provided natively:
const files = await fs.promises.readdir(PDF_PATH);
for(const file of files){ // this is in series, in parallel would probably be faster
let the_ent = {
name: '',
'other data': [], // avoid whitespaces!
};
const pdf = await parsePdf(PDF_PATH + '/' +file);
if(pdf.pages === undefined) { // prefer equality (===) over comparison (==)
failed.push(file);
console.log(file + ' failed');
} else {
// populate 'results' array
}
}
console.log(results, failed);
})();
You can probably process the files in parallel too.
I would promisify the async operations and use async/await. For the fs operations, use the new fs.promises API. For others, use util.promisify() to make promisified versions.
The resolved value of the parsePDFs function I create will be an array of JSON and an array of failed filenames so you get both pieces of information back:
const util = require('util');
const pdfParser = require('pdf-parser');
// make promisified version of the function
const pdfParser.pdf2jsonP = util.promisify(pdfParser.pdf2json);
const fsp = require('fs').promises;
const path = require('path');
const PDF_PATH = path.join(__dirname, 'pdfs');
async function parsePDFs(dir) {
const files = await fsp.readdir(dir);
const results = [];
const failed = [];
for (const file of files) {
let the_ent = {
'name': '',
'other data': []
}
try {
let pdf = await pdfParser.pdf2jsonP(path.join(dir, file));
if (!pdf || pdf.pages === undefined) {
throw new Error("pdf.pages is empty")
}
results.push(pdf);
} catch(e){
console.log(e);
failed.push(file);
}
}
// this will be the resolved value of the returned promise
return {results, failed};
}
parsePDFs(PDF_PATH).then(data => {
console.log("failed filenames: " data.failed);
console.log("json results: ", data.results);
// do something with data.results and data.failed
}).catch(err => {
console.log(err);
});
Note: You declare, but never use the variable the_ent.
You can use util.promisify to promisify the sync functions:
const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);
Minimal demo:
const fs = require('fs');
const util = require('util');
var pdfParser = require('pdf-parser');
const readdir = util.promisify(fs.readdir);
const reader = util.promisify(pdfParser.pdf2json);
var PDF_PATH = __dirname + '/pdfs';
(async () => {
async function processFiles() {
let results = [];
let files = await readdir(PDF_PATH);
for (const file of files) {
let pdf = await reader(PDF_PATH + '/' + file);
results.push(pdf);
}
return results;
}
const result = await processFiles();
console.info(result);
})();

function is being executed twice when called only once

I am making a program that will read content of files inside nested folders.For now I am just trying to log the content of the file in console. But I am getting two logs instead of only one.Here is what I have done till now
const fs = require('fs');
const path = require('path');
const { promisify } = require('util');
const getStats = promisify(fs.stat);
const readdir = promisify(fs.readdir);
const http = require('http');
handle_files = async (req, res) => {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
// console.log(result)
res.write("Hello");
res.end();
};
check_file_content = (file_data) => {
console.log(file_data[1])
}
async function read_content(files) {
let file_data = []
files.map(file => {
let start_index = file.toString().lastIndexOf('.') + 1
let ext = file.substring(start_index, file.length)
if (ext == 'data') {
file_data.push(fs.readFileSync(file, { encoding: 'utf-8' }))
}
})
return file_data
}
http.createServer(handle_files).listen(8080)
async function scanDir(dir, fileList = []) {
// fetch list of files from the giver directory
let files = await readdir(dir);
// loop through all the files
for (let file of files) {
// join new folder name after the parent folder
// logs_of_109/24
let filePath = path.join(dir, file);
try {
//
let stats = await getStats(filePath);
if (!stats.isDirectory()) {
// add the filepath to the array
fileList.push(filePath);
}
if (stats.isDirectory()) {
await scanDir(filePath, fileList);
}
} catch (err) {
// Drop on the floor..
}
}
return fileList;
}
I expect the file content to be logged only once but it is logging twice on my console. Why is this happening and how do I stop this?
Your browser is making two requests to your server, most likely one for the URL you put in the address bar and another for favicon.ico. (You can quickly tell by opening the dev tools on your browser and going to the Network tab.)
handleFiles should look at req (specifically its url property) and act according to what's being requested. (This is something the code should be doing anyway.)
Side note 1: You're passing an async function into something (createServer) that won't do anything with the promise it returns. If you do that, it's important to catch any errors in the function locally within the function, since (again) nothing else is going to handle them. E.g.:
handle_files = async (req, res) => {
try {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
// console.log(result)
res.write("Hello");
res.end();
} catch (e) {
// ...handle error here...
}
};
Side note 2: That code is falling prey to The Horror of Implicit Globals¹. Declare your variables in the appropriate scope. Not declaring them, in loose mode, makes them globals. (Also recommend using strict mode, so you get an error for this.)
¹ (that's a post on my anemic little blog)
The answer above is correct.
My approach is to solve it via 'routing' of any kind.
Here is small basic example of how it can be done
const fs = require('fs');
const path = require('path');
const { promisify } = require('util');
const getStats = promisify(fs.stat);
const readdir = promisify(fs.readdir);
const http = require('http');
handle_routes = async (req, res) => {
switch(req.url) {
case '/files':
handle_files(req, res);
default:
console.log('for default page');
}
}
handle_files = async (req, res) => {
let files = await scanDir("logs_of_109");
let result = await read_content(files)
check_file_content(result)
res.writeHead(200, { 'Content-Type': 'text/html' });
res.write("Hello");
res.end();
};
check_file_content = (file_data) => {
console.log(file_data[1])
}
async function read_content(files) {
let file_data = []
files.map(file => {
let start_index = file.toString().lastIndexOf('.') + 1
let ext = file.substring(start_index, file.length)
if (ext == 'data') {
file_data.push(fs.readFileSync(file, { encoding: 'utf-8' }))
}
})
return file_data
}
http.createServer(handle_routes).listen(8080)
async function scanDir(dir, fileList = []) {
// fetch list of files from the giver directory
let files = await readdir(dir);
// loop through all the files
for (let file of files) {
// join new folder name after the parent folder
// logs_of_109/24
let filePath = path.join(dir, file);
try {
//
let stats = await getStats(filePath);
if (!stats.isDirectory()) {
// add the filepath to the array
fileList.push(filePath);
}
if (stats.isDirectory()) {
await scanDir(filePath, fileList);
}
} catch (err) {
// Drop on the floor..
}
}
return fileList;
}
This gives you possibility to call handle_files function by going to localhost:8080/files url

How to set variable = a value from a function result inside async function

Inside a function, I would like to set the value of a variable (foldersInDir) to the results of getting the contents of a directory using fs.readdir();
I thought using await would force the console.log line to wait for a response, but it's not.
How can I set foldersInDir = the return value?
/*Begin function*/
const listContents = async (myPath) => {
var fs = require('fs');
let foldersInDir = await fs.readdir(myPath, function(err, items) {
console.log(items); //works
return items;
});
console.log(foldersInDir); //does not work, undefined
}
You need to convert readdir to a promise, e.g.:
const foldersPromised = (path) =>
new Promise((resolve, reject) =>
fs.readdir(path, (err, items) =>
err !== undefined ? reject(err) : resolve(items)
)
);
try {
let foldersInDir = await foldersPromised(myPath);
} catch(err) {
console.log(err);
}
const fs = require('fs');
const test = () => {
let folders = fs.readdirSync('.');
return folders;
}
console.log(test());
Edit: sorry, need to promisify() the function
const fs = require('fs');
const { promisify } = require('util') // available in node v8 onwards
const readdir = promisify(fs.readdir)
async function listContents() {
try { // wrap in try-catch in lieu of .then().catch() syntax
const foldersInDir = await readdir(myPath) // call promised function
console.log('OK, folders:', foldersInDir) // success
} catch (e) {
console.log('FAIL reading dir:', e) // fail
}
}
listContents('path/to/folder') // run test
I recommend using the promisify function provided by Node.js to fix the problem. This function will convert a callback-based function to a promise-based function, which can then be used using the await keyword.
const fs = require('fs');
const {
promisify
} = require('util');
const readdirAsync = promisify(fs.readdir);
/*Begin function*/
const listContents = async(myPath) => {
let foldersInDir = await readdirAsync(myPath);
console.log(foldersInDir);
}

Categories