I would like to know to read the files and search for keyword sample in nodejs.
If keyword found, display the path
const allfiles = [
'C:\\Users\\public',
'C:\\Users\\public\\images',
'C:\\Users\\public\\javascripts\\index1.js',
'C:\\Users\\public\\javascripts\\index2.js'
]
const readFile = (path, opts = 'utf8') =>
new Promise((resolve, reject) => {
try{
let result=[];
fs.readFile(path, opts, (err, data) => {
if (err) reject(err)
else {
if(data.indexOf("sample")>=0){
result.push(data);
resolve(result);
}
}
})
}
catch (e) {
console.log("e", e);
}
})
const run = async () => {
allfiles.forEach(e=>{
const s = await readFile(e);
console.log(s);
})
}
run();
Expected Output
[
'C:\\Users\\public\\javascripts\\index1.js',
'C:\\Users\\public\\javascripts\\index2.js'
]
Some tips:
What happens when "sample" isn't found in readFile?
You're currently pushing the data into result instead of the path.
Think about what you're trying to accomplish with readFile. To me, what you want to do is see if that file has the word "sample", and return true if so and if not return false. So I'd name the function checkIfFileHasSample and have it return a boolean. Then in your run function, in the forEach you have the path, so that is where I'd add the path to a list of results.
Maybe you already realized this, but run is never actually called in your code sample. Ie. run() doesn't happen.
Solution:
You had some syntax errors and a tricky gotcha with async-await with run. For the syntax errors, it'll come with experience, but I'd also recommend using ESLint to help you catch them, as well as making sure your code is always properly indented.
const fs = require("fs");
const allfiles = [
"C:\\Users\\public",
"C:\\Users\\public\\images",
"C:\\Users\\public\\javascripts\\index1.js",
"C:\\Users\\public\\javascripts\\index2.js",
];
const checkIfFileHasSample = (path, opts = "utf8") =>
new Promise((resolve, reject) => {
fs.readFile(path, opts, (err, data) => {
if (err) {
reject(err);
} else {
if (data.includes("sample")) {
resolve(true);
} else {
resolve(false);
}
}
});
});
const run = async () => {
const results = [];
for (let i = 0; i < allFiles.length; i++) {
try {
const file = allFiles[i];
const hasSample = await checkIfFileHasSample(file);
if (hasSample) {
results.push(file);
}
} catch (e) {
console.log(e);
}
}
console.log(results);
};
run();
Related
As the title describes, I would like to read all files from a specific directory and return the names of all the files that have a specific string in its contents.
Here is how my code looks so far:
const dirname = path.resolve("./results/");
async function readFiles(dirname) {
const allResults = []
fs.readdir(dirname, function(err, filenames) {
if (err) {
console.log(err);
return;
}
filenames.forEach(async function(filename) {
fs.readFile(dirname + "/" + filename, 'utf-8', function(err, content) {
if (err) {
console.log(err);
return;
}
if (content.includes('content string')) {
allResults.push(filename);
}
});
});
});
return allResults;
}
readFiles(dirname).then((res) => {
console.log(res);
})
The result I'm getting is []
so I understand it's an issue with promises and async functions, however, this is not a concept I fully grasp yet, and despite trying several combinations of possibilities (new Promise(), or .then, await, or readdirSync and readFileSync) I had no success.
What am I missing so that it returns the allResults array only once all files have been read?
You should ditch callback syntax and use fs.promises Api. This looks much cleaner
const fs = require("fs").promises;
const path = require("path");
const dirname = path.resolve("./results/");
async function readDir(dirname) {
const allResults = [];
try {
const files = await fs.readdir(dirname);
for (const fileName of files) {
try {
const content = await fs.readFile(`${dirname}/${fileName}`, {
encoding: "utf-8"
});
if (content.includes("content string")) {
allResults.push(fileName);
}
} catch (error) {
console.log(error.message);
}
}
return allResults;
} catch (error) {
console.log(error);
}
}
readDir(dirname).then(data => {
console.log(data);
});
I've created a script using request and cheerio libraries to scrape links of different provinces from this webpage and then use those urls to parse the links of different offices from here. Finally, use those office links to scrape title from here.
When I run the script, I can see that it does it's job accordingly until it gets stuck somewhere in it's execution. When it gets stuck, it doesn't throw any error.
Here are the steps in image what the script is following:
Firstly, the script grabs links from here
Secondly, it grabs links from here
And finally, the script parses title from here
Here is what I've tried with:
const request = require('request');
const cheerio = require('cheerio');
const link = 'https://www.egyptcodebase.com/en/p/all';
const base_link = 'https://www.egyptcodebase.com/en/';
let getLinks = (link) => {
const items = [];
return new Promise((resolve, reject) => {
request(link, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
$('.table tbody tr').each(function() {
items.push(base_link + $(this).find("a[href]").attr("href"));
});
resolve(items);
} catch (e) {
reject(e);
}
});
});
};
let getData = (links) => {
const nitems = [];
const promises = links
.map(nurl => new Promise((resolve, reject) => {
request(nurl, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
$('.table tbody tr').each(function() {
nitems.push(base_link + $(this).find("a[href]").attr("href"));
});
resolve(nitems);
} catch (e) {
reject(e);
}
})
}))
return Promise.all(promises)
}
let FetchData = (links) => {
const promises = links
.map(turl => new Promise((resolve, reject) => {
request(turl, function(error, response, html) {
if (error) return reject(error);
let $ = cheerio.load(html);
try {
const title = $(".home-title > h2").eq(0).text();
console.log({
title: title,
itemLink: turl
});
resolve(title);
} catch (e) {
reject(e);
}
})
}))
return Promise.all(promises)
}
(async function main() {
const result = await getLinks(link);
const resultSecond = await getData(result);
const merged = resultSecond.flat(1);
const resultFinal = await FetchData(merged);
for (const title of resultFinal) {
console.log(title);
}
})().catch(console.error);
How can I make the script finish it's execution process?
PS Although the script appears to be big, the functions used in there are alomost identical to each other except for the selectors.
Ok, so on testing this code, I ran across two problems right off the bat:
resultSecond, containing the data from getData(), returned an Array-like Object, not an Array, so I wasn't able to use the flat(). So I created a function toArray that converts Objects to Arrays and added another variable after resultSecond called resultThird and used this function on resultSecond, turning it to an array.
flat() did not exist in the Array prototype, so I had to add it manually.
After resolving those issues, I was able to run your code, and experienced the hang you were talking about.
An ECONNRESET error occurred, and then proceeded to make probably a couple thousand requests before hanging. An ECONNRESET usually results from not handling asynchronous network errors or the server you're requesting decides to kill the connection. Not sure how the request module would handle such an event, but it seems like the module could potentially not be handling the network errors or terminated connections properly.
The issue was you were making 15,000 requests to this sites API, so the API probably had a rate limiter, saw the amount of requests and terminated most of them, but allowed a couple thousand to go through, but since you're not handling the terminated connections-- most likely due to the request module swallowing those errors-- it's "hanging" there with the node process not exiting.
So I batched the requests into intervals of 300 using the async module and it worked like a charm. No terminated connections because I didn't reach the rate limit. You could probably up the interval limit higher than 300.
However, I would suggest not using the request module and use another http module like axios, which most likely handle these issues. You should consider using async when you're doing a ton of asynchronous requests. It has so many helpful methods. Lmk if you need more explanation to what the async module is doing here, but I'd advise reading the documentation first: https://caolan.github.io/async/v3/docs.html#mapLimit
const request = require('request');
const cheerio = require('cheerio');
const _async = require('async');
const link = 'https://www.egyptcodebase.com/en/p/all';
const base_link = 'https://www.egyptcodebase.com/en/';
const toArray = (obj) => {
const arr = [];
for (const prop in obj) {
arr.push(obj[prop])
}
return arr;
}
Object.defineProperty(Array.prototype, 'flat', {
value: function(depth = 1) {
return this.reduce(function (flat, toFlatten) {
return flat.concat((Array.isArray(toFlatten) && (depth>1)) ? toFlatten.flat(depth-1) : toFlatten);
}, []);
}
});
let getLinks = (link) => {
const items = [];
return new Promise((resolve, reject) => {
request(link, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
$('.table tbody tr').each(function() {
items.push(base_link + $(this).find("a[href]").attr("href"));
});
resolve(items);
} catch (e) {
reject(e);
}
});
});
};
let getData = (links) => {
const nitems = [];
const promises = links
.map(nurl => new Promise((resolve, reject) => {
request(nurl, function(error, response, html) {
let $ = cheerio.load(html);
if (error) return reject(error);
try {
$('.table tbody tr').each(function() {
nitems.push(base_link + $(this).find("a[href]").attr("href"));
});
return resolve(nitems);
} catch (e) {
return reject(e);
}
})
}))
return Promise.all(promises)
}
let FetchData = (links) => {
const limit = 300;
return new Promise((resolve, reject) => {
const itr = (col, cb) => {
request(col, function(error, response, html) {
if (error) cb(error)
let $ = cheerio.load(html);
try {
const title = $(".home-title > h2").eq(0).text();
console.log({
title: title,
itemLink: col
});
cb(null, title);
} catch (e) {
cb(e);
}
})
}
_async.mapLimit(links, limit, itr, function(err, results) {
if (err) reject(err);
return resolve(results);
})
})
}
(async function main() {
const result = await getLinks(link);
const resultSecond = await getData(result);
const resultThird = toArray(resultSecond);
const merged = resultThird.flat(1);
const resultFinal = await FetchData(merged);
for (const title of resultFinal) {
console.log("title: ", title);
}
})().catch(err => console.log(err))
//good to listen to these
process.on('uncaughtException', err => { console.log(err) });
process.on('unhandledRejection', err => { console.log(err) });
I am new to NodeJs.
I want to push elements in files array in order of url. But i am getting random order .
Below is the code for same. Can anyone suggest what i am doing wrong here.
const mergePdf =async (urlArray, files) => {
for (let i = 0; i < urlArray.length; i++) {
try {
const pdfBytes = await fetch(urlArray[i]).then((res) => {
return res.arrayBuffer();
});
let bytes = new Uint8Array(pdfBytes);
files[i]=bytes;
} catch (err) {
console.log(err);
}
}
}
write seprate function of your api call in promises like
const other_func = (url) => {
return new Promise(async (resolve) => {//updated line
try {
const pdfBytes = await fetch(url).then((res) => {
let bytes = new Uint8Array(pdfBytes);
resolve(bytes);// return value pass in resolve method [Updated]
});
} catch (err) {
console.log(err);
}
})
}
and in your original function call this function using await
like
const mergePdf =async (urlArray, files) => {
for (let i = 0; i < urlArray.length; i++) {
files[i] = await other_func(urlArray[i])
}}
I created multiples functions under a directory called data and fill them with some random data returned by a function called generatedRandomData.
To create multiple files I wrote these functions:
const createFile = (fileName, data) => {
if (fs.existsSync(fileName)) throw new Error('Filename already exists');
fs.writeFile(fileName, data, {
encoding: 'utf8',
flag: 'w',
}, (error) => {
if (error) return error;
console.log('File created successfully');
return null;
});
};
const createFiles = (dirPath, sizeList) => {
if (sizeList && !sizeList.length) throw new Error('The list of size should not be empty');
const fileCreationPromises = sizeList.map(async (size) => {
const data = generateRandomData(size);
const fileName = resolve(dirPath, `./data_${size}.txt`);
await createFile(fileName, data);
});
return Promise.all(fileCreationPromises);
};
Then I call the function generateData in order to generate random data and call the functions described above then create the files:
const generateData = async (dirPath, sizeList) => {
if (!dirPath) throw new Error('No directory path was provied');
if (!sizeList || (sizeList && !sizeList.length)) throw new Error('Size list should not be empty');
await createFiles(dirPath, sizeList);
};
I call another function called execute which reads data from those file in order to continue the treatment:
const execute = async (func, dirPath, label) => {
const files = fs.readdirSync(dirPath);
const result = [];
if (files && files.length) {
for (const file of files) {
const filename = resolve(dirPath, `./${file}`);
const parsedData = readDataFromFile(filename);
const data = parsedData.split(',').map((d) => Number(d));
const { length } = data;
result.push({
label: length,
value: getExecutionTime(func, data),
});
}
}
await createFile(resolve(dirPath, `./${label}`), result);
};
Finally, I call the function initialize:
const { resolve } = require('path');
const fs = require('fs');
const { generateData, sizeList, execute } = require('../utils/helpers');
const { underscorePartial } = require('../main/partial');
const dirPath = resolve(__dirname, '../data');
const initialize = () => {
if (!fs.existsSync(dirPath)) {
fs.mkdir(dirPath, async (error) => {
if (error) throw error;
await generateData(dirPath, sizeList);
await execute(underscorePartial, dirPath, 'uExecutionTime.txt');
});
}
};
try {
initialize();
} catch (error) {
console.log(error);
}
However I realized that uExecutionTime.txt to be created in the final step contains undefined due to the function readDataFromFile which returns undefined.
I guess the readDataFromFile starts reading from files before the creation of data finished.Any suggestions to fix my code or are there anything missed or wrong in the code?
The problem is your createFile function. You care awaiting it while it doesn't return promise. It is a callback style. It should be wrapped in promise.
const createFile = (fileName, data) => {
if (fs.existsSync(fileName)) throw new Error('Filename already exists');
return new Promise((resolve, reject) => {
fs.writeFile(fileName, data, {
encoding: 'utf8',
flag: 'w',
}, (error) => {
if (error) reject(error);
console.log('File created successfully');
resolve(null);
});
});
};
Hope this resolves the issue.
I'm using through2 to generate multiple files from a Gulp stream. I'm using NodeJS 10.6.0 so thought I'd make full use of async/await, but am not fully understanding the mechanics yet. Currently the through2 done() callback is being fired before all files have been written.
Here's what I have (simplified) - note that I'm not returning the stream at the end as there is no need to.
async function createDirectory(pathDir) {
return new Promise((resolve, reject) => {
mkdirp(pathDir, (err) => {
if (err) reject(err);
else resolve();
});
});
}
async function writeFile(outputFilePath, outputFileContent) {
return new Promise((resolve, reject) => {
fs.writeFile(outputFilePath, outputFileContent, (err) => {
if (err) reject(err);
else resolve();
});
});
}
async function doWriteFile(outputFolderPath, outputFilePath, outputContent) {
await createDirectory(outputFolderPath);
await writeFile(outputFilePath, outputContent, outputContent);
}
async function doGenerateVariant(data, variantArr) {
for (const variant of variantArr) {
/* Do a load of stuff */
const variantOutputFolderPath = blah;
const variantOutputFilePath = blah;
const variantOutputContent = blah;
await doWriteFile(variantOutputFolderPath, variantOutputFilePath, variantOutputContent);
}
}
const generateVariant = () => {
return through.obj((file, enc, done) => {
const data = JSON.parse(file.contents.toString());
*/ Do a load of stuff */
const { variant } = data;
const variantArr = Object.values(variant);
doGenerateVariant(data, variantArr);
return done();
});
};
This doesn't work as done() gets returned before all files have been written. I'm guessing I'm missing a return or two but nothing I do seems to be working.
If I pass done() into doGenerateVariant and call it after doWriteFile everything works as expected but I know this isn't correct.
You need to wait for doGenerateVariant to do its job before calling done. Remember async function always returns a Promise. So you could do it this way
const generateVariant = () => {
return through.obj((file, enc, done) => {
const data = JSON.parse(file.contents.toString());
*/ Do a load of stuff */
const { variant } = data;
const variantArr = Object.values(variant);
doGenerateVariant(data, variantArr).then(() => done());
});
};
or using async/await
const generateVariant = () => {
return through.obj(async (file, enc, done) => {
const data = JSON.parse(file.contents.toString());
*/ Do a load of stuff */
const { variant } = data;
const variantArr = Object.values(variant);
await doGenerateVariant(data, variantArr);
done();
});
};