Creating An Object From A Directory - javascript

Im just wondering how to create a JavaScript Object from a directory using node js.
Here's What I Mean:
If the root folder was "test" and the tree would look like this
So the object structure would look like this:
{ test: { joe: {mama: "(file contents)"}, pp: {help: "very mature example\r\ni'm aware"} } }
edit: here's my attempt
const fs = require("fs");
const path = require("path");
const root = "test";
const dir = __dirname + "\\" + root + "\\";
var currentFolder = dir.replace(__dirname + "\\", "");
var data = {};
const getFileSize = function (dirPath) {
files = fs.readdirSync(dirPath);
var length = 0;
files.forEach(function (file) {
length++;
});
return length;
};
fs.readdirSync(dir).forEach((rootFile, rootIndex) => {
fs.readdirSync(currentFolder).forEach((file, index) => {
if (getFileSize(currentFolder) - 1 == index) {
console.log(index, file, rootFile, currentFolder);
currentFolder = currentFolder.replace(`\\file`, "");
index++;
}
// if (file.includes("."))
// data[currentFolder + file] = fs.readFileSync(currentFolder + file, "utf8");
// else currentFolder = currentFolder + file + "\\";
});
});
console.log(data);

Yeh after hours of googling and stack overflow I found this:
var fs = require("fs");
var path = require("path");
var walk = function (dir, done) {
var results = [];
fs.readdir(dir, function (err, list) {
if (err) return done(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) return done(null, results);
file = path.resolve(dir, file);
fs.stat(file, function (err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function (err, res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
};
var root = "test";
var data = {};
walk("test", function (err, results) {
if (err) throw err;
for (i in results) {
data[
results[i].replace(__dirname + "\\" + root + "\\", "")
] = fs.readFileSync(results[i], "utf8");
}
console.log(data);
});

Related

How to download N number of files asynchronously using wget and node

I have the following Node script:
var exec = require('child_process').exec;
var download_file_wget = function(file_url) {
// compose the wget command
var wget = `wget ${file_url}`;
// execute wget using child_process' exec function
var child = exec(wget, function(err) {
if (err) throw err;
else console.log(`${file_url} downloaded`);
});
};
download_file_wget('http://placekitten.com/10/10');
It does successfully download 1 file. I need to asynchronously download 128 files at once. How can I do this with this code?
If the files requested are big consider use spawn instead of exec.
const http = require('http');
const exec = require('child_process').exec;
const DOWNLOAD_DIR = './downloads/';
const generate_width_and_height = function() {
const random = Math.floor((Math.random() * 100) + 200);
console.log(random);
return random
}
const create_file_url = function() {
return "http://placekitten.com/" + generate_width_and_height() + "/" + generate_width_and_height()
}
const oneHundredTwentyEightElementsArray = Array.from(Array(127), (_,x) => x);
const oneHundredTwentyEightUrlsArray = oneHundredTwentyEightElementsArray.map( _ => create_file_url())
const download_file_wget = function(file_url, file_number) {
// extract the file name
const file_name = "file_" + file_number
// compose the wget command
const wget = 'wget -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
const child = exec(wget, function(err, stdout, stderr) {
if (err) throw err;
else console.log(file_name + ' downloaded to ' + DOWNLOAD_DIR);
});
};
for (let index = 0; index < oneHundredTwentyEightElementsArray.length; index++) {
const url = oneHundredTwentyEightUrlsArray[index];
download_file_wget(url, index)
}
You can use Javascript Promises to download multiple files with node and wget.
First wrap your inner code in a promise:
const downloadFile = (url) => {
return new Promise((resolve) => {
console.log(`wget ${url} --no-check-certificate`)
exec(`wget ${url} --no-check-certificate`, function(err, stdout, stderr) {
if (err) {
console.log('ERR', err, url)
} else {
console.log('SUCCESS ' + url);
resolve(1)
}
});
})
}
Then use Promise.all to process all the downloads asynchronously:
const files = [
'http://placekitten.com/10/10',
'http://placekitten.com/10/10'
// etc
]
(async () => {
await Promise.all(files.map(url => downloadFile(url)))
})()

Edit this function to return the most recent files path

I tried and have looked at StackOverflow and the other posts are not answering my questions. This is a unique question. How can I edit to have the path of the most recently uploaded file returned from the getMostRecent() function and saved to the lastdownloadedimage variable?
var pathtocheck = "C:\Users\user1\Downloads";
var path = require('path');
var fs = require('fs');
var getMostRecent = function (dir, cb) {
var dir = path.resolve(dir);
var files = fs.readdir(dir, function (err, files) {
var sorted = files.map(function(v) {
var filepath = path.resolve(dir, v);
return {
name:v,
time:fs.statSync(filepath).mtime.getTime()
};
})
.sort(function(a, b) { return b.time - a.time; })
.map(function(v) { return v.name; });
if (sorted.length > 0) {
cb(null, sorted[0]);
} else {
cb('Y U NO have files in this dir?');
}
})
}
await getMostRecent(pathtocheck, function (err, recent) {
if (err) console.error(err);
console.log(recent);
});
var lastdownloadedimage = ;
Declare lastdownloadedimage before the call to await getMostRecent(...), and change the value of lastdownloadedimage once the call to getMostRecent() returns:
var lastdownloadedimage;
await getMostRecent(pathtocheck, function (err, recent) {
if (err) console.error(err);
lastdownloadedimage = recent;
console.log(recent);
});

Next is not defined next(err)

var fs = require('fs');
var path = require('path');
var walk = require('walk');
var xml2js = require('xml2js');
var jsonDir ;
var convertXml = function (xml_dir, json_dir) {
jsonDir = json_dir;
var walker = walk.walk(xml_dir, { followLinks: true });
walker.on("errors", fDirWalkError);
walker.on("end", fDirWalkEnd);
walker.on("file", fDirWalkFile);
}
function fDirWalkError (err) {
console.log ("fDirWalkError: " + err);
next (err);
}
function fDirWalkEnd () {
console.log ("======= End of directory walk");
}
function fDirWalkFile (root, fileStat, next) {
if (fileStat.name.indexOf(".xml") < 0) {
console.log ("skipping file " + fileStat.name + " (does not end in .xml)");
return;
} else {
var xml_file = path.resolve(root, fileStat.name);
console.log ("xml file: " + xml_file);
fs.readFile('xml_file', function (err, data) {
if (err) {
console.log ("error reading file:" + xml_file);
next (err);
}
xml2js.parseString (data, function (err, json_obj) {
if (err) {
console.log (err);
next (err);
}
var json_string = JSON.stringify(json_obj, null, 2);
var json_file = path.resolve (jsonDir, path.basename(xml_file).replace(/\.xml$/, ".json"));
console.log ("json file: ", json_file);
fs.writeFile(json_file, json_string, "utf8", function (err) {
if (err) {
console.log ("error converting yin (%s) to json(%s)", xml_file, json_file);
next (new Error ("error converting xml(" + xml_file + ") to json(" + json_file + ")"));
}
else {
console.log ("Converted xml (%s) to json(%s)", xml_file, json_file);
}
});
});
});
}
next ();
}
module.exports.convertXml = convertXml;
var path = require ('path');
var xml2js = require ('./xml2js');
console.log ("__dirname: " + __dirname);
var templateDir = path.resolve (__dirname);
var xmlDir = path.resolve (templateDir, "xml");
var jsonDir = path.resolve (templateDir, "jsons");
console.log( templateDir);
xml2js.convertXml (xmlDir, jsonDir)
/Users//Documents/GitHub//xml2js.js:19
next (err);
^
ReferenceError: next is not defined
at Walker.fDirWalkError (/Users//Documents/GitHub//xml2js.js:19:9)
at Walker.emit (events.js:321:20)
at Walker._wPostFilesHandler (/UsersDocuments/GitHub//node_modules/walk/lib/walk.js:134:10)
at /Users//Documents/GitHub//node_modules/foreachasync/forEachAsync.js:15:16
at Array.forEach ()
at Walker.next [as _wCurFileCallback] (/Users//Documents/GitHub/node_modules/foreachasync/forEachAsync.js:14:15)
at Walker._wLstatHandler (/Users//Documents/GitHub//node_modules/walk/lib/walk.js:84:10)
at /Users//Documents/GitHub//node_modules/walk/lib/walk.js:106:12
depends on the walk documentation at https://www.npmjs.com/package/walk
the next param is the third param that the event send
in your case you are using
function fDirWalkError (err) {
console.log ("fDirWalkError: " + err);
next (err);
}
but you didnt get next function from the module in your function args
try to put it in your handler args
like this
function fDirWalkError (err, nodeStatsArray, next) {
console.log ("fDirWalkError: " + err);
next (err);
}
it should work

Find Email-adresses in the mailbody with Mailparser

I'm quite new to the topic and i'm still having some issues with my mailparser. Though searching and finding emails in the email header (mail.from) does work, it doesn't work in the email body. Does anybody have some experience with that and is willing to help? You can find the function i'm talking about under the "// Check for other addresses in Mail-Body (Doesn't work yet)"-comment. I think, that my Regex is correct. Also if the matchAll-Function give back an array and it can't be saved in the the subscriber.email-object, it shall be at least logged to the console. Also i checked manually in the inbox if there are mails with email adresses in the mail body. There are at least two, which shall be found..
The part of the App.js, that does the mailparsing:
const simpleParser = require('mailparser').simpleParser;
//const htmlparser = require("htmlparser2");
var fs = require('fs');
var config = require('./config');
var Imap = require('imap');
var imap = new Imap(config.imap);
var blacklistString = '';
String.prototype.matchAll = function(regexp) {
var matches = [];
this.replace(regexp, function() {
var arr = ([]).slice.call(arguments, 0);
var extras = arr.splice(-2);
arr.index = extras[0];
arr.input = extras[1];
matches.push(arr);
});
return matches.length ? matches : null;
};
function openInbox(subbox,cb) {
imap.openBox('INBOX.'+subbox, true, cb);
}
function getBoxes(cb) {
imap.getBoxes(cb);
}
function showBoxes(boxes) {
imap.end();
}
function logArrayElements(element) {
if(element[1].indexOf('placeholder.de')==-1){
addToBlacklistString(element[1]);
}
}
function addToBlacklistString(str) {
blacklistString += str+"\n";
}
function writeBlacklistFile() {
fs.appendFile('data/data.csv', blacklistString, function (err) {
if (err) throw err;
console.log('Saved!');
});
}
function search(searchArray, regex){
imap.search(searchArray, function(err, results) {
if (err) throw err;
var temp = 0;
var mailtemp = [];
var f = imap.fetch(results, { bodies: '' });
f.on('message', function(msg, seqno) {
console.log('Message #%d', seqno);
var prefix = '(#' + seqno + ') ';
msg.on('body', function(stream, info) {
simpleParser(stream, (err, mail)=>{
//console.log(temp);
//console.log(mail.subject);
/*fs.writeFile('data/'+seqno+'.txt',mail.text, function(err){
console.log(err);
});*/
//var text = mail.text;
// New Subscriber Object
var subscr = new Subscriber({nr: '', mailIdent: '', from: '', emails: '', text:'', uLink: '', anwalt: false });
subscr.nr = seqno;
//Check for From-Address
if(!!mail.from) {
//console.log(mail.from.value);
for(var i = 0; i < mail.from.value.length; i++) {
mailtemp = mail.from.value[i].address.matchAll(regex);
mailtemp.forEach(function(element){
/*fs.appendFile('data/data.csv', element[0] + "\n", function(error){
console.log(error);
});*/
subscr.from = element[0];
});
if(!!mailtemp) {
mailtemp.forEach(logArrayElements);
}
}
}else{
//console.log(mail.text);
}
// Message-ID
if(!!mail.messageId) {
subscr.mailIdent = mail.messageId;
}
console.log(mail.messageId);
// Check for other addresses in Mail-Body (Doesn't work yet)
var regexEmails = new RegExp('/([\w\.\-\_\#\+]+#[\w\.\-\_äüö]+\.[a-zA-Z]+)/g');
if(!!mail.text){
if(mail.text.matchAll(regexEmails)!=null) {
subscr.emails = mail.text.matchAll(regexEmails);
console.log(subscr.emails);
}
}
/* Split mail.text at substrings in substr-array. Extend if necessary..
*
* Also check for 'Anwalt'-Expression in splitted Substring
*
* If mail.text doesn't exist -> Check for html body and convert it to text-format
*/
//var regexLink = new RegExp('\.de\/(unsubscribe|austragen)\/([^\"]+)');
var regexAnwalt = new RegExp('nwalt|echtsanwalt|rechtlicher');
if(!!mail.text) {
var substr = ["schrieb pplaceholder.de", "Von: \"placeholder.de", "Von: pplaceholder.de", "From: placeholder.de", "Ursprüngliche Nachricht"];
for (var i = 0; i<substr.length; i++) {
if(mail.text.indexOf(substr[i]) > -1) {
var textTemp = mail.text;
var arr = textTemp.split(substr[i]);
if(arr[0].matchAll(regexAnwalt)!=null) {
subscr.anwalt = true;
};
subscr.text = arr[0];
break;
} else {
subscr.text = mail.text;
}
}
//console.log(arr);
}
else
{
var html = mail.html;
var text = htmlToText.fromString(html, {
noLinkBrackets: true,
ignoreImage: true,
uppercaseHeadings: false,
preserveNewlines: false,
wordwrap:130,
format: {
heading: function (node, fn, options) {
var h = fn(node.children, options);
return '\n==== ' + h + ' ====\n\n';
}
}
});
subscr.text = text;
}
mail.headers.forEach(function(value, key) {
//console.log(value);
});
subscr.save();
//console.log(subscr);
temp++;
});
});
msg.once('end', function() {
console.log(prefix + 'Finished');
});
});
f.once('error', function(err) {
console.log('Fetch error: ' + err);
});
f.once('end', function() {
console.log('Done fetching all messages!');
//writeBlacklistFile();
imap.end();
});
});
}
imap.once('ready', function() {
openInbox('Test',function(err, box) {
var searchArray = [['FROM', '#']];
search(searchArray,/([\w\.\-\_\#\+]+#[\w\.\-\_äüö]+\.[a-zA-Z]+)/g);
});
});
imap.once('error', function(err) {
console.log(err);
});
imap.once('end', function() {
console.log('Connection ended');
});
imap.connect();
app.listen(2700, function(){
console.log("Listening on Port 2700")
});
module.exports = app;
subscriber.js
const mongoose = require('mongoose');
var subscriberSchema = mongoose.Schema({
nr: Number,
mailIdent: String,
from: String,
emails: String,
text: String,
uLink: String,
anwalt: Boolean
});
var Subscriber = module.exports = mongoose.model('Subscriber', subscriberSchema);
//get Subscriber
module.exports.getSubscribers = function(callback, limit){
Subscriber.find(callback).limit(limit);
};
module.exports.getSubscriberByID = function(_id, callback){
Subscriber.findById(_id, callback);
};
The Regex for the Emails was a little bit wrong.
Also i didn't noticed that the matchAll-Fct. is giving back a two-dimensional Array. Here is the changed part of the code:
var regexEmails = new RegExp("([\\w\\.\\-\\_\\#\\+]+#[\\w\\.\\-\\_äüö]+\\.[a-zA-Z]+)");
var temp1 = mail.text.matchAll(regexEmails);
if(!!temp1){
//console.log(temp1);
for(var i =0; i<temp1.length; i++) {
if(temp1[0][i]!=='info#service.placeholder.de' && temp1[0][i] !== "info#placeholder.de"){
subscr.emails += temp1[0][i];
}
}
}

NodeJS recursively list files in directory

I am trying to list all files in a directory (and files within any subdirectories) with the following code:
var fs = require('fs')
var walk = function(directoryName) {
fs.readdir(directoryName, function(e, files) {
files.forEach(function(file) {
fs.stat(file, function(e, f) {
if (f.isDirectory()) {
walk(file)
} else {
console.log('- ' + file)
}
})
})
})
}
walk(__dirname)
However, when my code attempts to invoke walk(file) on line 8 I get the following error:
TypeError: Cannot call method 'isDirectory' of undefined
at program.js:7:15
at Object.oncomplete (fs.js:107:15)
Why is f undefined? If I have the directory structure below, shouldn't the code identify aaa.txt and bbb.txt as files, my_dir as a directory at which point it recursively calls walk and begins the process again (with zzz.txt being the value of f)?
- aaa.txt
- bbb.txt
+ my_dir
- zzz.txt
Function fs.readdir lists the simple file names in that directory, not their absolute path. This is why the program failed to find them, thus leading to an error in fs.stat.
Here's the solution: concatenate the directory path name to the file.
var fs = require('fs');
var path = require('path');
var walk = function(directoryName) {
fs.readdir(directoryName, function(e, files) {
if (e) {
console.log('Error: ', e);
return;
}
files.forEach(function(file) {
var fullPath = path.join(directoryName,file);
fs.stat(fullPath, function(e, f) {
if (e) {
console.log('Error: ', e);
return;
}
if (f.isDirectory()) {
walk(fullPath);
} else {
console.log('- ' + fullPath);
}
});
});
});
};
var fs = require('fs');
var path = require('path');
var walk = function(directoryName) {
fs.readdir(directoryName, function(e, files) {
files.forEach(function(file) {
fs.stat(directoryName + path.sep + file, function(e, f) {
if (f.isDirectory()) {
walk(directoryName + path.sep + file)
} else {
console.log(' - ' + file)
}
})
})
})
}
walk(__dirname)
A fully synchronous version, for those situations where you cannot use async:
const walk = (dir, files = []) => {
const dirFiles = fs.readdirSync(dir)
for (const f of dirFiles) {
const stat = fs.lstatSync(dir + path.sep + f)
if (stat.isDirectory()) {
walk(dir + path.sep + f, files)
} else {
files.push(dir + path.sep + f)
}
}
return files
}
const allFiles = walk(someDir)
Here's a version for async/await:
const { promises: fs } = require("fs");
const path = require("path");
async function walk(dir) {
const entries = await fs.readdir(dir);
let ret = [];
for (const entry of entries) {
const fullpath = path.resolve(dir, entry);
const info = await fs.stat(fullpath);
if (info.isDirectory()) {
ret = [...ret, ...(await walk(fullpath))];
} else {
ret = [...ret, fullpath];
}
}
return ret;
}
(async function () {
console.log(await walk("/path/to/some/dir"));
})();

Categories