I'm using through2 to generate multiple files from a Gulp stream. I'm using NodeJS 10.6.0 so thought I'd make full use of async/await, but am not fully understanding the mechanics yet. Currently the through2 done() callback is being fired before all files have been written.
Here's what I have (simplified) - note that I'm not returning the stream at the end as there is no need to.
async function createDirectory(pathDir) {
return new Promise((resolve, reject) => {
mkdirp(pathDir, (err) => {
if (err) reject(err);
else resolve();
});
});
}
async function writeFile(outputFilePath, outputFileContent) {
return new Promise((resolve, reject) => {
fs.writeFile(outputFilePath, outputFileContent, (err) => {
if (err) reject(err);
else resolve();
});
});
}
async function doWriteFile(outputFolderPath, outputFilePath, outputContent) {
await createDirectory(outputFolderPath);
await writeFile(outputFilePath, outputContent, outputContent);
}
async function doGenerateVariant(data, variantArr) {
for (const variant of variantArr) {
/* Do a load of stuff */
const variantOutputFolderPath = blah;
const variantOutputFilePath = blah;
const variantOutputContent = blah;
await doWriteFile(variantOutputFolderPath, variantOutputFilePath, variantOutputContent);
}
}
const generateVariant = () => {
return through.obj((file, enc, done) => {
const data = JSON.parse(file.contents.toString());
*/ Do a load of stuff */
const { variant } = data;
const variantArr = Object.values(variant);
doGenerateVariant(data, variantArr);
return done();
});
};
This doesn't work as done() gets returned before all files have been written. I'm guessing I'm missing a return or two but nothing I do seems to be working.
If I pass done() into doGenerateVariant and call it after doWriteFile everything works as expected but I know this isn't correct.
You need to wait for doGenerateVariant to do its job before calling done. Remember async function always returns a Promise. So you could do it this way
const generateVariant = () => {
return through.obj((file, enc, done) => {
const data = JSON.parse(file.contents.toString());
*/ Do a load of stuff */
const { variant } = data;
const variantArr = Object.values(variant);
doGenerateVariant(data, variantArr).then(() => done());
});
};
or using async/await
const generateVariant = () => {
return through.obj(async (file, enc, done) => {
const data = JSON.parse(file.contents.toString());
*/ Do a load of stuff */
const { variant } = data;
const variantArr = Object.values(variant);
await doGenerateVariant(data, variantArr);
done();
});
};
Related
I have an application that it has nodejs as backend and some scripts in Python
The problem is to make the 'PythonShell' (function to access the scripts) as a async function. I do not know why but it is not working.
I'll put the code from my router.js file and inside of it I put three 'console.log('steps')' to check the sequences.
It should be Step01 > Step02 > Step03, but as it is not working, It always prints Step01 > Step03 > Step02
Everything is working fine, except for this async problem! For me it should work as it is.
How can I edit my functions to execute first the 'goToscript/PythonShell' and then execute 'res.json(responseScript)'?
Thanks
router.put("/uploads/script-03", async (req, res) => {
let options = {
scriptPath: "scripts",
args: JSON.stringify(req.body)
};
const goToScript = async () => {
await PythonShell.run("script-01.py", options, (err, res) => {
if (err) {
}
if (res) {
responseScript = JSON.parse(res)
console.log('Step 02')
}
});
}
console.log('Step 01')
goToScript()
console.log('Step 03')
res.json(responseScript)
});
module.exports = router
A couple things:
1. Your goToScript is not actually async/returning a Promise
From what I can tell, PythonShell doesn't support async, only callbacks, so you can rewrite your gotToScript like so:
const goToScript = () => {
return new Promise((resolve, reject) => {
PythonShell.run("script-01.py", options, (err, res) => {
if (err) {
reject(err)
}
if (res) {
responseScript = JSON.parse(res)
console.log('Step 02')
resolve(responseScript)
}
})
})
}
const scriptResult = await goToScript()
This code will work like a regular async function, where the promise will resolve to the parsed JSON, and reject with the error if it meets one.
2. You are not awaiting your call to goToScript
When you want to make an async call that finishes in sequence with everything else, you need to await it. Take these two examples:
In this first chunk of code, waitFn waits before 100ms before logging "Step 2!":
const waitFn = () => {
return new Promise((resolve) => {
setTimeout(() => {
console.log('Step 2!')
resolve()
}, 100)
})
}
console.log('Step 1!')
waitFn()
console.log('Step 3!')
Because you do not await the result of the Promise, your code doesn't care that is has not finished, and will print:
Step 1!
Step 3!
Step 2!
Instead, however, if you await the result of the Promise returned in waitFn, it will execute in order:
const waitFn = () => {
return new Promise((resolve) => {
setTimeout(() => {
console.log('Step 2!')
resolve()
}, 100)
})
}
console.log('Step 1!')
await waitFn() // Finishes before moving on
console.log('Step 3!')
You can read a bunch more about Promises and async/await here :)
To be able to await function - this function needs to return a promise. In the npm page of your lib there is a description, about what PythonShell.run returns. It does not return a promise. So it is asynchronous, but not awaitable, it is callback based.
All you need to do - to promisify this function. Additionaly - you need to await the call to goToScript();.
router.put("/uploads/script-03", async (req, res) => {
let options = {
scriptPath: "scripts",
args: JSON.stringify(req.body)
};
const goToScript = async () => {
return new Promise((resolve, reject) => {
PythonShell.run("script-01.py", options, (err, res) => {
console.log("Step 02");
if (err) return reject(err);
return resolve(JSON.parse(res));
});
});
};
console.log("Step 01");
const responseScript = await goToScript();
console.log("Step 03");
res.json(responseScript);
});
module.exports = router;
I am new to node and stuck with this issue. Here' the file:
I am running 'startProcess' function and I want to run 'downloadFiles' and wait until it's completed and save the files before executing any code after it.
This code always ends up running 'runVideoUploadEngine' even before the download has been completed?
const downloadAndSaveFiles = async ({ url, dir }) => {
try {
https.get(url, (res) => {
// File will be stored at this path
console.log('dir: ', dir);
var filePath = fs.createWriteStream(dir);
res.pipe(filePath);
filePath.on('finish', () => {
filePath.close();
console.log('Download Completed');
});
});
return true;
} catch (e) {
console.log(e);
throw e;
}
};
const downloadFiles = async ({ data }) => {
try {
mediaUrl = data.mediaUrl;
thumbnailUrl = data.thumbnailUrl;
const mediaExt = path.extname(mediaUrl);
const thumbExt = path.extname(thumbnailUrl);
mediaDir = `${__dirname}/temp/${'media'}${mediaExt}`;
thumbDir = `${__dirname}/temp/${'thumb'}${thumbExt}`;
await downloadAndSaveFiles({ url: mediaUrl, dir: mediaDir });
await downloadAndSaveFiles({ url: thumbnailUrl, dir: thumbDir });
return { mediaDir, thumbDir };
} catch (e) {
console.log(e);
throw e;
}
};
module.exports = {
startProcess: async ({ message }) => {
//check if message is proper
data = JSON.parse(message.Body);
//download video and thumbnail and store in temp.
console.log('starting download..');
const { mediaDir, thumbDir } = await downloadFiles({ data });
console.log('dir:- ', mediaDir, thumbDir);
pageAccessToken =
'myRandomToken';
_pageId = 'myRandomPageID';
console.log('running engine');
await runVideoUploadEngine({ pageAccessToken, _pageId, mediaDir, thumbDir });
//start videoUploadEngine
//on success: delete video/thumbnail
},
};
What am I doing wrong?
downloadAndSaveFiles returns a promise (because the function is async) but that promise doesn't "wait" for https.get or fs.createWriteStream to finish, and therefore none of the code that calls downloadAndSaveFiles can properly "wait".
If you interact with callback APIs you cannot really use async/await. You have to create the promise manually. For example:
const downloadAndSaveFiles = ({ url, dir }) => {
return new Promise((resolve, reject) => {
// TODO: Error handling
https.get(url, (res) => {
// File will be stored at this path
console.log('dir: ', dir);
var filePath = fs.createWriteStream(dir);
filePath.on('finish', () => {
filePath.close();
console.log('Download Completed');
resolve(); // resolve promise once everything is done
});
res.pipe(filePath);
});
});
};
I was working on a Scraper script using cheerio and puppeteer. Inside a cheerio each loop I wanted to resolve a redirected url field. Using Promise I can console log it but what is the best way to insert it in the metadata?
I am actually confused about the control flow also.
(async function main() {
const browser = await puppeteer.launch({
headless: true,
});
const page = await browser.newPage();
await page.goto('https://www.example.com/?q=async+urls&s=s');
const content = await page.content();
const $ = cheerio.load(content);
var parsedResults = [];
const fetchRedirect = async (url) => {
try {
let response = await doRequest(url);
return response;
} catch (err) {
return false;
}
};
const videoBlocks = $('td[itemprop="subjectOf"]').first().each(function (i, element) {
const url = ($(this).find('a.title').attr('href'));
const fetchUrl = fetchRedirect(url);
// ** i can console log the redirected url **/
fetchUrl.then(url => console.log(url));
const title = ($(this).find('a.title').text());
var metadata = {
title
};
parsedResults.push(metadata);
});
function doRequest(url) {
return new Promise(function (resolve, reject) {
request(url, function (error, res, body) {
if (!error) {
resolve(getPathFromUrl(res.request.uri.href));
} else {
reject(error);
}
});
});
}
function getPathFromUrl(url) {
return url.split(/[?#]/)[0];
}
console.log(parsedResults);
await page.close()
await browser.close();
})();
//...............await!
const fetchUrl = await fetchRedirect(url);
// ** then instead that **/
// fetchUrl.then(url => console.log(url));
// just
console.log(fetchUrl)
const title = ($(this).find('a.title').text());
var metadata = {
title
};
parsedResults.push(metadata);
Personally I would go for a Node module. The code like this is hard to read as inside one function(main) you have declared multiple functions. It is upon your preference whether to apply OOP or functional programming there(the second is much more popular in Node environment but I prefer the first if I am going to create a functionality around a specific entity). Avoiding nesting and abstract functions(functions that you declare inside other functions) is crucial in creating a reusable and readable code.
Here it is a worked prototype(not tested). It makes a puppeteer instance reusable. The module exposes three methods: start, stop, crawlWeb
'use strict'
var browser;
async function crawlWeb(options) {
// validate options and throw errors
if (!options.url) {
throw new Error('url is invalid');
}
if (!browser) {
throw new Error('puppeteer is not started');
}
const page = await browser.newPage();
await page.goto(optionsl.url);
const content = await page.content();
const $ = cheerio.load(content);
const metas = extracMetadata($);
for (let metadata of metas) {
// you can verify if site is valid
// you can use await
try {
await doRequest(metadata.url);
}
catch(err) {
// do something if not valid
}
}
return metas;
}
async function start(options) {
browser = await puppeteer.launch(options);
}
async function stop() {
if (!browser) {
throw new Error('puppeteer is not started');
}
await page.close()
await browser.close();
}
function extracMetadata($) {
const metas = [];
$('td[itemprop="subjectOf"]').first().each(function (i, element) {
const url = ($(this).find('a.title').attr('href'));
const title = ($(this).find('a.title').text());
var metadata = {
url,
title
};
metas.push(metadata);
});
return metas;
}
function doRequest(url) {
return new Promise(function (resolve, reject) {
request(url, function (error, res, body) {
if (!error) {
resolve(getPathFromUrl(res.request.uri.href));
} else {
reject(error);
}
});
});
}
function getPathFromUrl(url) {
return url.split(/[?#]/)[0];
}
module.exports = {
crawlWeb,
start,
stop
};
I would like to know to read the files and search for keyword sample in nodejs.
If keyword found, display the path
const allfiles = [
'C:\\Users\\public',
'C:\\Users\\public\\images',
'C:\\Users\\public\\javascripts\\index1.js',
'C:\\Users\\public\\javascripts\\index2.js'
]
const readFile = (path, opts = 'utf8') =>
new Promise((resolve, reject) => {
try{
let result=[];
fs.readFile(path, opts, (err, data) => {
if (err) reject(err)
else {
if(data.indexOf("sample")>=0){
result.push(data);
resolve(result);
}
}
})
}
catch (e) {
console.log("e", e);
}
})
const run = async () => {
allfiles.forEach(e=>{
const s = await readFile(e);
console.log(s);
})
}
run();
Expected Output
[
'C:\\Users\\public\\javascripts\\index1.js',
'C:\\Users\\public\\javascripts\\index2.js'
]
Some tips:
What happens when "sample" isn't found in readFile?
You're currently pushing the data into result instead of the path.
Think about what you're trying to accomplish with readFile. To me, what you want to do is see if that file has the word "sample", and return true if so and if not return false. So I'd name the function checkIfFileHasSample and have it return a boolean. Then in your run function, in the forEach you have the path, so that is where I'd add the path to a list of results.
Maybe you already realized this, but run is never actually called in your code sample. Ie. run() doesn't happen.
Solution:
You had some syntax errors and a tricky gotcha with async-await with run. For the syntax errors, it'll come with experience, but I'd also recommend using ESLint to help you catch them, as well as making sure your code is always properly indented.
const fs = require("fs");
const allfiles = [
"C:\\Users\\public",
"C:\\Users\\public\\images",
"C:\\Users\\public\\javascripts\\index1.js",
"C:\\Users\\public\\javascripts\\index2.js",
];
const checkIfFileHasSample = (path, opts = "utf8") =>
new Promise((resolve, reject) => {
fs.readFile(path, opts, (err, data) => {
if (err) {
reject(err);
} else {
if (data.includes("sample")) {
resolve(true);
} else {
resolve(false);
}
}
});
});
const run = async () => {
const results = [];
for (let i = 0; i < allFiles.length; i++) {
try {
const file = allFiles[i];
const hasSample = await checkIfFileHasSample(file);
if (hasSample) {
results.push(file);
}
} catch (e) {
console.log(e);
}
}
console.log(results);
};
run();
In the 1st function 'get_files()' I can log the file_list variable, it is correct here, however when I log it again in my 2nd function 'get_diffs()' it is undefined..
// Get files
async function get_files() {
await fs.readdir(dirPath, function (err, files) {
(async () => {
if (await err) {
console.log("Error getting directory information.", err)
} else {
var file_list = []; // Reset
await files.forEach(function (file) {
file_list.push(file);
});
console.log('1st Call = ' + file_list); // Correct
return await file_list;
}
})();
});
}
// Get Diffs
async function get_diffs() {
const file_list = await get_files();
console.log('2nd Call = ' + file_list); // Undefined
const dates = await get_dates();
return await files.filter(x => !dates.includes(x));
}
You have misunderstood async/await. Learn the basics here
function get_files() {
return new Promise((resolve, reject) => {
fs.readdir(dirPath, function (err, files) {
if (err) {
reject(err);
} else {
var file_list = []; // Reset
files.forEach(function (file) {
file_list.push(file);
});
console.log('1st Call = ' + file_list); // Correct
resolve(file_list);
}
});
})
}
fs.readdir does not return a promise. Use the promise based function fs.promise.readdir instead.
async function get_diffs() {
const file_list = await fs.promise.readdir(dirPath);
// ...
}
So you don't really need the other function. It had many problems anyway. await makes not much sense when used with an expression that is not a promise. All the places where you have used await in get_files, the expression that follows it does not represent a promise.