Confused about Node's event loop by using promises - javascript

I'm writing an recursive function, which creates an object tree of selected file directory. My code works, but in the wrong order that I expected. I can't see the output of my code. Here is the code:
const fs = require("fs");
const basePath = process.argv[2];
const result = {};
const isDirectory = path => {
return new Promise((resolve, reject) => {
fs.lstat(path, (err, stats) => {
if (err) reject("No such file or Directory");
resolve(stats.isDirectory());
});
});
};
const createTree = (path, target) => {
return new Promise((reject, resolve) => {
fs.readdir(path, (err, list) => {
for (const item of list) {
const currentLocation = `${path}/${item}`;
isDirectory(currentLocation).then(isDir => {
console.log(result); //I CAN SEE THE RESULT HERE
if (!isDir) {
target[item] = true;
} else {
target[item] = {};
resolve(createTree(currentLocation, target[item]));
}
});
}
});
reject("Somthing went wrong while getting the list of files");
});
};
createTree(basePath, result)
.then(() => console.log("result --->", result)) //BUT NOT HERE
.catch(err => console.log("Consume Error ==>", err));
I also done it with async await, but I'm curious why it doesn't work with promises.
Here is the fully working exaple with async await:
const fs = require("fs");
const basePath = process.argv[2]; //Getting the path
const result = {};
//Function to check my path is exist and it's a directory
const isDirectory = async path => {
try {
const stats = await fs.promises.lstat(path); //Used istat to get access to the "isDirectory()" method
return stats.isDirectory();
} catch (error) {
throw new Error("No such file or Directory");
}
};
//Recursive function that should create the object tree of the file system
const createTree = async (path, target) => {
try {
const list = await fs.promises.readdir(path);
for (const item of list) {
const currentLocation = `${path}/${item}`;
const isDir = await isDirectory(currentLocation);
//If it's a file, then assign it to true
//Otherwise create object of that directory and do the same for it
if (!isDir) {
target[item] = true;
} else {
target[item] = {};
await createTree(currentLocation, target[item]);
}
}
} catch (err) {
console.log("Somthing went wrong while getting the list of files");
}
};
//Consuming the createTree function
(async () => {
try {
await createTree(basePath, result);
console.log(result);
} catch (error) {
console.log(error.message);
}
})();

I'm just curious is it possible to do the same but only with promises.
async and await are simply syntactic sugar that makes it easier to work with Promise-based programs. Any program depending on those keywords can be rewritten not to use them -
// main.js
import { readdir } from "fs/promises"
import { join } from "path"
function createTree (init = ".")
{ const one = path => p =>
p.isDirectory()
? many(join(path, p.name)).then(r => ({ [p.name]: r }))
: { [p.name]: true }
const many = path =>
readdir(path, { withFileTypes: true })
.then(r => Promise.all(r.map(one(path))))
.then(r => Object.assign(...r))
return many(init)
}
createTree(".")
.then(v => console.log(JSON.stringify(v, null, 2)))
.catch(console.error)
Now let's add some sample files so we can see our program working correctly -
$ yard add immutable # (any example package)
$ node main.js
Output -
{
"main.js": true,
"node_modules": {
".yarn-integrity": true,
"immutable": {
"LICENSE": true,
"README.md": true,
"contrib": {
"cursor": {
"README.md": true,
"__tests__": {
"Cursor.ts.skip": true
},
"index.d.ts": true,
"index.js": true
}
},
"dist": {
"immutable-nonambient.d.ts": true,
"immutable.d.ts": true,
"immutable.es.js": true,
"immutable.js": true,
"immutable.js.flow": true,
"immutable.min.js": true
},
"package.json": true
}
},
"package.json": true,
"yarn.lock": true
}
If you would like the init path to be included in the tree, only a small modification is necessary -
// main.js
import { readdir } from "fs/promises"
import { join, basename } from "path" // !
function createTree (init = ".")
{ const one = path => p =>
p.isDirectory()
? many(join(path, p.name)).then(r => ({ [p.name]: r })) // !
: { [p.name]: true }
const many = path =>
readdir(path, { withFileTypes: true })
.then(r => Promise.all(r.map(one(path))))
.then(r => Object.assign(...r)) // !
.then(r => ({ [basename(path)]: Object.assign(...r) })) // !
return many(init)
}
Now the tree contains our initial path -
createTree(".")
.then(v => console.log(JSON.stringify(v, null, 2)))
.catch(console.error)
{ ".": // <- starting path
{ ... }
}
To see how to write this program using async generators, please see the original Q&A.

Related

Node.js fs.access returns undefined no matter what

Here's the code:
import * as path from 'path';
import * as fs from 'fs';
const doesPathExist = async (path: string) => {
return await fs.access(path, fs.constants.R_OK, (err) => {
return err ? false : true;
});
};
const someFunc = async (documentName: string) => {
const documentPath = path.join(__dirname, documentName);
const pathExists = doesPathExist(documentName);
};
The function doesPathExistseems to return Promise<void>, making the pathExists variable undefined no matter the outcome. I've tried initializing a temp variable at the top of the function before running fs.access and changing its value inside the callback but still no luck.
The issue is with fs.access, this function does not return a Promise or anything else.
There are a few options to solve it.
you can always use fs.accessSync()
const doesPathExist = async (path: string) => {
try {
fs.accessSync(path, fs.constants.R_OK)
return true
} catch (e) {
return false
}
};
Use fs.promises
const fsPromises = fs.promises;
const doesPathExistB = async (path: string) => {
try {
await fsPromises.access(path, fs.constants.R_OK)
return true
} catch (e) {
return false
}
};
// OR
const doesPathExistA = async (path: string) => {
return new Promise(async (resolve) => {
await fsPromises.access(path, fs.constants.R_OK)
.then(() => resolve(true))
.catch(() => resolve(false))
})
};

How to read files present in array nodejs

I would like to know to read the files and search for keyword sample in nodejs.
If keyword found, display the path
const allfiles = [
'C:\\Users\\public',
'C:\\Users\\public\\images',
'C:\\Users\\public\\javascripts\\index1.js',
'C:\\Users\\public\\javascripts\\index2.js'
]
const readFile = (path, opts = 'utf8') =>
new Promise((resolve, reject) => {
try{
let result=[];
fs.readFile(path, opts, (err, data) => {
if (err) reject(err)
else {
if(data.indexOf("sample")>=0){
result.push(data);
resolve(result);
}
}
})
}
catch (e) {
console.log("e", e);
}
})
const run = async () => {
allfiles.forEach(e=>{
const s = await readFile(e);
console.log(s);
})
}
run();
Expected Output
[
'C:\\Users\\public\\javascripts\\index1.js',
'C:\\Users\\public\\javascripts\\index2.js'
]
Some tips:
What happens when "sample" isn't found in readFile?
You're currently pushing the data into result instead of the path.
Think about what you're trying to accomplish with readFile. To me, what you want to do is see if that file has the word "sample", and return true if so and if not return false. So I'd name the function checkIfFileHasSample and have it return a boolean. Then in your run function, in the forEach you have the path, so that is where I'd add the path to a list of results.
Maybe you already realized this, but run is never actually called in your code sample. Ie. run() doesn't happen.
Solution:
You had some syntax errors and a tricky gotcha with async-await with run. For the syntax errors, it'll come with experience, but I'd also recommend using ESLint to help you catch them, as well as making sure your code is always properly indented.
const fs = require("fs");
const allfiles = [
"C:\\Users\\public",
"C:\\Users\\public\\images",
"C:\\Users\\public\\javascripts\\index1.js",
"C:\\Users\\public\\javascripts\\index2.js",
];
const checkIfFileHasSample = (path, opts = "utf8") =>
new Promise((resolve, reject) => {
fs.readFile(path, opts, (err, data) => {
if (err) {
reject(err);
} else {
if (data.includes("sample")) {
resolve(true);
} else {
resolve(false);
}
}
});
});
const run = async () => {
const results = [];
for (let i = 0; i < allFiles.length; i++) {
try {
const file = allFiles[i];
const hasSample = await checkIfFileHasSample(file);
if (hasSample) {
results.push(file);
}
} catch (e) {
console.log(e);
}
}
console.log(results);
};
run();

Read and write to csv file with Node.js fast-csv library

I may be lacking some in depth understanding of streams in general. However, I would like to know how efficiently what I need should work.
I want to implement so that a csv file would be read, then to each row a query to the database (or api) is made and data is attached. After that the row with attached data is written to a new csv file. I am using fast-csv node library for this.
Here is my implementation:
const fs = require("fs");
const csv = require("fast-csv");
const delay = t => new Promise(resolve => setTimeout(resolve, t));
const asyncFunction = async (row, csvStream) => {
// Imitate some stuff with database
await delay(1200);
row.data = "data";
csvStream.write(row);
};
const array = [];
const csvStream = csv.format({ headers: true });
const writeStream = fs.createWriteStream("output.csv");
csvStream.pipe(writeStream).on("finish", () => {
console.log("End of writing");
});
fs.createReadStream("input.csv")
.pipe(csv.parse({ headers: true }))
.transform(async function(row, next) {
array.push(asyncFunction(row, csvStream));
next();
})
.on("finish", function() {
console.log("finished reading file");
//Wait for all database requests and writings to be finished to close write stream
Promise.all(array).then(() => {
csvStream.end();
console.log("finished writing file");
});
});
Particularly I would like to know are there ways to optimize what I am doing here, because I feel that I am missing something important on how this library can be used for these type of cases
Regards,
Rokas
I was able to find a solution in fast-csv issues section. A good person doug-martin, provided this gist, on how you can do efficiently this kind of operation via Transform stream:
const path = require('path');
const fs = require('fs');
const { Transform } = require('stream');
const csv = require('fast-csv');
class PersistStream extends Transform {
constructor(args) {
super({ objectMode: true, ...(args || {}) });
this.batchSize = 100;
this.batch = [];
if (args && args.batchSize) {
this.batchSize = args.batchSize;
}
}
_transform(record, encoding, callback) {
this.batch.push(record);
if (this.shouldSaveBatch) {
// we have hit our batch size to process the records as a batch
this.processRecords()
// we successfully processed the records so callback
.then(() => callback())
// An error occurred!
.catch(err => err(err));
return;
}
// we shouldnt persist so ignore
callback();
}
_flush(callback) {
if (this.batch.length) {
// handle any leftover records that were not persisted because the batch was too small
this.processRecords()
// we successfully processed the records so callback
.then(() => callback())
// An error occurred!
.catch(err => err(err));
return;
}
// no records to persist so just call callback
callback();
}
pushRecords(records) {
// emit each record for down stream processing
records.forEach(r => this.push(r));
}
get shouldSaveBatch() {
// this could be any check, for this example is is record cont
return this.batch.length >= this.batchSize;
}
async processRecords() {
// save the records
const records = await this.saveBatch();
// besure to emit them
this.pushRecords(records);
return records;
}
async saveBatch() {
const records = this.batch;
this.batch = [];
console.log(`Saving batch [noOfRecords=${records.length}]`);
// This is where you should save/update/delete the records
return new Promise(res => {
setTimeout(() => res(records), 100);
});
}
}
const processCsv = ({ file, batchSize }) =>
new Promise((res, rej) => {
let recordCount = 0;
fs.createReadStream(file)
// catch file read errors
.on('error', err => rej(err))
.pipe(csv.parse({ headers: true }))
// catch an parsing errors
.on('error', err => rej(err))
// pipe into our processing stream
.pipe(new PersistStream({ batchSize }))
.on('error', err => rej(err))
.on('data', () => {
recordCount += 1;
})
.on('end', () => res({ event: 'end', recordCount }));
});
const file = path.resolve(__dirname, `batch_write.csv`);
// end early after 30000 records
processCsv({ file, batchSize: 5 })
.then(({ event, recordCount }) => {
console.log(`Done Processing [event=${event}] [recordCount=${recordCount}]`);
})
.catch(e => {
console.error(e.stack);
});
https://gist.github.com/doug-martin/b434a04f164c81da82165f4adcb144ec

.then() does not appear to be waiting for the previous .then()

I'm creating a process that converts multiple markdown files into a single pdf. It creates a pdf file for each .md file found in the source directory. Then it merges the individual pdf files into one pdf. It is this last step that is failing saying the individual pdf files do not exist.
const markdownpdf = require('markdown-pdf')
const path = require('path')
const PDFMerge = require('pdf-merge')
const fse = require('fs-extra')
const srcDir = '../manuscript'
const outDir = 'out'
const main = () => {
fse.pathExists(outDir)
.then(() => {
fse.remove(outDir).then(() => {
fse.ensureDir(outDir)
}).then(() => {
return fse.readdir(srcDir)
}).then((srcDirFiles) => {
console.log('source directory file count = ', srcDirFiles.length)
return srcDirFiles.filter(f => path.extname(f) === '.md')
}).then((mdFiles) => {
console.log('number of md files', mdFiles.length);
return mdFiles.map(file => {
const outFileName = `${path.basename(file, '.md')}.pdf`
fse.createReadStream(`${srcDir}/${file}`)
.pipe(markdownpdf())
.pipe(fse.createWriteStream(`${outDir}/${outFileName}`))
return `${outDir}/${outFileName}`
})
}).then(outFiles => {
console.log('number of pdf files created =', outFiles.length)
PDFMerge(outFiles, { output: `${__dirname}/3.pdf` })
})
})
}
main()
If I wrap the PDFMerge() line in setTimeout() it does work
setTimeout(() => {
PDFMerge(outFiles, { output: `${__dirname}/3.pdf` })
}, 1000)
I'm wondering why the setTimeout() is needed and what needs to be changed so it isn't.
I also wrote an async/await version that had the same problem and also worked with setTimeOut()
Edit
In response to Zach Holt's suggestion, here is the async/await version:
const markdownpdf = require('markdown-pdf')
const path = require('path')
const PDFMerge = require('pdf-merge')
const fse = require('fs-extra')
const srcDir = '../manuscript'
const outDir = 'out'
const createPdf = async (file) => {
try {
const outFileName = `${path.basename(file, '.md')}.pdf`
await fse.createReadStream(`${srcDir}/${file}`)
.pipe(markdownpdf())
.pipe(await fse.createWriteStream(`${outDir}/${outFileName}`))
}
catch (e) {
console.log(e)
}
}
const makePdfFiles = (files) => {
files.forEach(file => {
if (path.extname(file) === '.md') {
createPdf(file)
}
})
}
const mergeFiles = async (files) => {
try {
await PDFMerge(files, {output: `${__dirname}/3.pdf`})
}
catch (e) {
console.log(e)
}
}
const addPathToPdfFiles = (files) => {
return files.map(file => {
return `${outDir}/${file}`
})
}
const main = async () => {
try {
const exists = await fse.pathExists(outDir)
if (exists) {
await fse.remove(outDir)
}
await fse.ensureDir(outDir)
const mdFiles = await fse.readdir(srcDir)
const filesMade = await makePdfFiles(mdFiles)
const pdfFiles = await fse.readdir(outDir)
const pdfFilesWithPath = addPathToPdfFiles(pdfFiles)
mergeFiles(pdfFilesWithPath)
// setTimeout(() => {
// mergeFiles(pdfFilesWithPath)
// }, 1000)
} catch (e) {
console.log(e)
}
}
It has the same problem.
I also tried:
const makePdfFiles = files => {
return new Promise((resolve, reject) => {
try {
files.forEach(file => {
if (path.extname(file) === '.md') {
createPdf(file)
}
})
resolve(true)
} catch (e) {
reject(false)
console.log('makePdfFiles ERROR', e)
}
})
}
But it made no difference.
You need to return the promise from ensureDir() to make it wait for it.
I think the issue might be that you're creating a read stream for each of the .md files, but not waiting for the reads to finish before trying to merge outFiles.
You could likely wait until the outFiles length is the same as the number of md files found before merging.
Also, you should stick with async/await for this. It'll keep the code much clearer
Let me over-simplify your code to illustrate the problem:
p1.then(() => {
p2.then().then().then()
}).then(/* ??? */)
which is the same as:
p1.then(() => {
p2.then().then().then()
return undefined
}).then(/* undefined */)
What you need for chaining is to return the inner Promise:
p1.then(() => // no {code block} here, just return value
p2.then().then().then()
).then(/* ??? */)
which is the same as:
p1.then(() => {
p3 = p2.then()
p4 = p3.then()
p5 = p4.then()
return p5
}).then(/* p5 */)
As far as I can tell the original problem was the approach and not the obvious errors correctly pointed out by others. I found a much simpler solution to the overall goal of producing a single pdf from multiple md files.
const markdownpdf = require('markdown-pdf')
const path = require('path')
const fse = require('fs-extra')
const srcDir = '../manuscript'
const filterAndAddPath = (files) => {
try {
const mdFiles = files
.filter(f => path.extname(f) === '.md')
.map(f => `${srcDir}/${f}`)
return mdFiles
}
catch (e) {
console.log('filterAndAddPath', e)
}
}
const main4 = async () => {
const allFiles = await fse.readdir(srcDir)
const mdFiles = filterAndAddPath(allFiles)
const bookPath = 'book.pdf'
markdownpdf()
.concat.from(mdFiles)
.to(bookPath, function() {
console.log('Created', bookPath)
})
}
main4()

Chaining a variable number of promises synchronously that share a resource in Javascript

I'm trying to make use of Chrome lighthouse to check a number of urls from a list. I can do this with the one url successfully using their sample code.
function launchChromeAndRunLighthouse(url, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
return lighthouse(url, opts, config).then(results => {
// use results.lhr for the JS-consumeable output
// https://github.com/GoogleChrome/lighthouse/blob/master/types/lhr.d.ts
// use results.report for the HTML/JSON/CSV output as a string
// use results.artifacts for the trace/screenshots/other specific case you need (rarer)
return chrome.kill().then(() => results.lhr)
});
});
}
However, I would like to check multiple urls. This will require creating the browser window with chromeLauncher.launch, executing the lighthouse function on each url, and then finally calling chrome.kill() on the window, before returning the results.
Initially, I tried solving this using Promise.all
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
const checks = urls.map((url) => {
console.log('checking', url)
return lighthouse(url, opts, config)
})
return Promise.all(checks).then((results) => {
return chrome.kill().then(() => results)
})
});
but since each call to lighthouse uses a shared resource they must wait for the previous call to return before continuing, whereas Promise.all tries make all the lighthouse calls in parallel, and then resolves. This doesn't work with the single shared browser resource.
Then I tried experimenting with using a reducer, with the chrome launcher promise as the initial value.
const checks = urls.reduce((previous, url) => {
return previous.then((previousValue) => {
return lighthouse(url, opts, config)
})
}, chromeLauncher.launch({chromeFlags: opts.chromeFlags}))
checks
.then((results) => {
console.log('something')
return chrome.kill()
})
.then((results) => results.lhr)
but this also doesn't work when calling the lighthouse function, but I think this is the correct approach to calling a chain of promises synchronously.
Does anyone have any suggestions where I'm going wrong, or what else I could try?
With async await:
const lighthouse = require('lighthouse');
const chromeLauncher = require('chrome-launcher');
async function launchChromeAndRunLighthouse(urls, opts, config = null) {
const chrome = await chromeLauncher.launch({chromeFlags: opts.chromeFlags})
opts.port = chrome.port;
const results = [];
for (const url of urls) {
results.push(await lighthouse(url, opts, config));
}
await chrome.kill();
return results;
}
const opts = {
chromeFlags: ['--show-paint-rects']
};
// Usage:
const urls = ['http://www.google.de', 'http://www.heise.de'];
launchChromeAndRunLighthouse(urls, opts).then(results => {
console.log(results);
});
Classic:
const lighthouse = require('lighthouse');
const chromeLauncher = require('chrome-launcher');
function launchChromeAndRunLighthouse(urls, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
const results = [];
return urls.reduce((p, url) => {
return p.then(() => {
return lighthouse(url, opts, config).then((data) => {
results.push(data);
return results;
});
});
}, Promise.resolve())
.then(() => chrome.kill())
.then(() => results)
});
}
const opts = {
chromeFlags: ['--show-paint-rects']
};
// Usage:
const urls = ['http://www.google.de', 'http://www.heise.de'];
launchChromeAndRunLighthouse(urls, opts).then(results => {
console.log(results);
})
Or using a library like Bluebird:
const Promise = require('bluebird');
const lighthouse = require('lighthouse');
const chromeLauncher = require('chrome-launcher');
function launchChromeAndRunLighthouse(urls, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
return Promise.mapSeries(urls, url => lighthouse(url, opts, config))
.then((results) => chrome.kill().then(() => results))
});
}
const opts = {
chromeFlags: ['--show-paint-rects']
};
// Usage:
const urls = ['http://www.google.de', 'http://www.heise.de'];
launchChromeAndRunLighthouse(urls, opts).then(results => {
console.log(results);
});

Categories