I wrote a pipeline that reads an input stream from a file, transforms it and streams it out to an output file. The pipeline works well for one file. Now I want to iterate through all files in a folder, producing a separate output file for each input file.
I am completely new to JS and synchronous streams. Did my best to figure it out. However, given n input files, what I currently get is n copies processed from the last input file.
const fs = require('fs');
const pipeline = require("stream");
async function processFolder(inPath, outPath) {
const dir = await fs.promises.opendir(inPath)
for await (const dirent of dir) {
let inFile = inPath.concat(dirent.name);
let outFile = outPath.concat(dirent.name.slice(0,-4), ".json");
console.log(inFile);
await pipeline(
fs.createReadStream(inFile).setEncoding('utf8'),
transform,
fs.createWriteStream(outFile),
(err) => {
if (err) {
console.error('Pipeline failed', err);
} else {
console.log('Pipeline succeeded');
}
}
);
}
}
processFolder(inPath, outPath);
How can I change the code above so that it processes all files in the directory?
Thank you!
Related
I am having an issue where I cannot seem to find a solution.
I have written a Discord bot from Discord.JS that needs to send a list of file names from a directory as one message. So far I have tried using fs.readddir with path.join and fs.readfilesync(). Below is one example.
const server = message.guild.id;
const serverpath = `./sounds/${server}`;
const fs = require('fs');
const path = require('path');
const directoryPath = path.join('/home/pi/Pablo', serverpath);
fs.readdir(directoryPath, function(err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function(file) {
message.channel.send(file);
});
});
Whilst this does send a message of every file within the directory, it sends each file as a separate message. This causes it to take a while due to Discord API rate limits. I want them to all be within the same message, separated with a line break, with a max of 2000 characters (max limit for Discord messages).
Can someone assist with this?
Thanks in advance.
Jake
I recommend using fs.readdirSync(), it will return an array of the file names in the given directory. Use Array#filter() to filter the files down to the ones that are JavaScript files (extentions ending in ".js"). To remove ".js" from the file names use Array#map() to replace each ".js" to "" (effectively removing it entirely) and use Array#join() to join them into a string and send.
const server = message.guild.id;
const serverpath = `./sounds/${server}`;
const { readdirSync } = require('fs');
const path = require('path');
const directoryPath = path.join('/home/pi/Pablo', serverpath);
const files = readdirSync(directoryPath)
.filter(fileName => fileName.endsWith('.js'))
.map(fileName => fileName.replace('.js', ''));
.join('\n');
message.channel.send(files);
Regarding handling the sending of a message greater than 2000 characters:
You can use the Util.splitMessage() method from Discord.JS and provide a maxLength option of 2000. As long as the number of chunks needed to send is not more than a few you should be fine from API ratelimits
const { Util } = require('discord.js');
// Defining "files"
const textChunks = Util.splitMessage(files, {
maxLength: 2000
});
textChunks.forEach(async chunk => {
await message.channel.send(chunk);
});
Built an array of strings (names of files) then join with "\n".
let names = []
fs.readdir(directoryPath, function(err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function(file) {
names << file
});
});
message.channel.send(names.join("\n"));
I am working on a small project. discussing Step by step
At first I am uploading zip files though multer
extracting those files (How can I call extract function after completing upload using multer?)
After extracting those I am trying to filter those files
after filtering those files I want to move some files to another directory
in my main index.js I have
A simple route to upload files which is working
// MAIN API ENDPOINT
app.post("/api/zip-upload", upload, async (req, res, next) => {
console.log("FIles - ", req.files);
});
Continuous checking for if there is any zip file that needs to unzip but the problem is after uploading it's not showing any files or dir
// UNZIP FILES
const dir = `${__dirname}/uploads`;
const files = fs.readdirSync("./uploads");
const filesUnzip = async () => {
try {
if (fs.existsSync(dir)) {
console.log("files - ", files);
for (const file of files) {
console.log("file - ", file);
try {
const extracted = await extract("./uploads/" + file, { dir: __dirname + "/uploads/" });
console.log("Extracted - ",extracted);
// const directories = await fs.statSync(dir + '/' + file).isDirectory();
} catch (bufErr) {
// console.log("------------");
console.log(bufErr.syscall);
}
};
// const directories = await files.filter(function (file) { return fs.statSync(dir + '/' + file).isDirectory(); });
// console.log(directories);
}
} catch (err) {
console.log(err);
}
return;
}
setInterval(() => {
filesUnzip();
}, 2000);
Moving files to static directory but here is the same problem no directory found
const getAllDirs = async () => {
// console.log(fs.existsSync(dir));
// FIND ALL DIRECTORIES
if (fs.existsSync(dir)) {
const directories = await files.filter(function (file) { return fs.statSync(dir + '/' + file).isDirectory(); });
console.log("Directories - ",directories);
if (directories.length > 0) {
for (let d of directories) {
const subdirFiles = fs.readdirSync("./uploads/" + d);
for (let s of subdirFiles) {
if (s.toString().match(/\.xml$/gm) || s.toString().match(/\.xml$/gm) !== null) {
console.log("-- ", d + "/" + s);
const move = await fs.rename("uploads/" + d + "/" + s, __dirname + "/static/" + s, (err) => { console.log(err) });
console.log("Move - ", move);
}
}
}
}
}
}
setInterval(getAllDirs, 3000);
There are so many issues with your code, I don't know where to begin:
Why are you using fs.xxxSync() methods if all your functions are async? Using xxxSync() methods is highly discouraged because it's blocking the server (ie parallel requests can't/won't be accepted while a sync reading is in progress). The fs module supports a promise api ...
Your "Continuous checking" for new files is always checking the same (probably empty) files array because it seems you are executing files = fs.readdirSync("./uploads"); only once (probably at server start, but I can't tell for sure because there isn't any context for that snippet)
You shouldn't be polling that "uploads" directory. Because as writing a file (if done properly) is an asynchronous process, you may end up reading incomplete files. Instead you should trigger the unzipping from your endpoint handler. Once it is hit, body.files contains the files that have been uploaded. So you can simply use this array to start any further processing instead of frequently polling a directory.
At some points you are using the callback version of the fs API (for instance fs.rename(). You cannot await a function that expects a callback. Again, use the promise api of fs.
EDIT
So I'm trying to address your issues. Maybe I can't solve all of them because of missing infomation, but you should get the general idea.
First of all, you shuld use the promise api of the fs module. And also for path manipulation, you should use the available path module, which will take care of some os specific issues.
const fs = require('fs').promises;
const path = require('path');
Your API endpoint isn't currently returning anything. I suppose you stripped away some code, but still. Furthermore, you should trigger your filehandling from here, so you don't have to do directory polling, which is
error prone,
wasting resources and
if you do it synchronously like you do blocks the server
app.post("/api/zip-upload", upload, async (req, res, next) => {
console.log("FIles - ", req.files);
//if you want to return the result only after the files have been
//processed use await
await handleFiles(req.files);
//if you want to return to the client immediately and process files
//skip the await
//handleFiles(req.files);
res.sendStatus(200);
});
Handling the files seems to consist of two different steps:
unzipping the uploaded zip files
copying some of the extracted files into another directory
const source = path.join(".", "uploads");
const target = path.join(__dirname, "uploads");
const statics = path.join(__dirname, "statics");
const handleFiles = async (files) => {
//a random folder, which will be unique for this upload request
const tmpfolder = path.join(target, `tmp_${new Date().getTime()}`);
//create this folder
await fs.mkdir(tmpfolder, {recursive: true});
//extract all uploaded files to the folder
//this will wait for a list of promises and resolve once all of them resolved,
await Promise.all(files.map(f => extract(path.join(source, f), { dir: tmpfolder })));
await copyFiles(tmpfolder);
//you probably should delete the uploaded zipfiles and the tempfolder
//after they have been handled
await Promise.all(files.map(f => fs.unlink(path.join(source, f))));
await fs.rmdir(tmpfolder, { recursive: true});
}
const copyFiles = async (tmpfolder) => {
//get all files/directory names in the tmpfolder
const allfiles = await fs.readdir(tmpfolder);
//get their stats
const stats = await Promise.all(allfiles.map(f => fs.stat(path.join(tmpfolder, f))));
//filter directories only
const dirs = allfiles.filter((_, i) => stats[i].isDirectory());
for (let d of dirs) {
//read all filenames in the subdirectory
const files = await fs.readdir(path.join(tmpfolder, d)));
//filter by extension .xml
const xml = files.filter(x => path.extname(x) === ".xml");
//move all xml files
await Promise.all(xml.map(f => fs.rename(path.join(tmpfolder, d, f), path.join(statics, f))));
}
}
That should do the trick. Of course you may notice there is no error handling with this code. You should add that.
And I'm not 100% sure about your paths. You should consider the following
./uploads refers to a directory uploads in the current working directory (whereever that may be)
${__dirname}/uploads refers to a directory uploads which is in the same directory as the script file currently executing Not sure if that is the directory you want ...
./uploads and ${__dirname}/uploads may point to the same folder or to completely different folders. No way knowing that without additional context.
Furthermore in your code you extract the ZIP files from ./uploads to ${__dirname}/uploads and then later try to copy XML files from ./uploads/xxx to ${__dirname}/statics, but there won't be any directory xxx in ./uploads because you extracted the ZIP file to a (probably) completely different folder.
Anty body did write file names in a folder to the CSV file using javascript
my folder structure is
Data
+IMG
+test
-1.png
-2.png
+train
-3.png
-4.png
an output CSV file will be like this
Data/IMG/test/1.png Data/IMG/train/3.png
Data/IMG/test/2.png Data/IMG/train/4.png
You just need to loop through all folders and find all files.
You can refer to this answer to do this.
when you are finding all files' paths, you can write these paths in a string for the csv file:
const fs = require('fs');
const path = require('path');
let csvStr = "";
async function loop(startPath) {
// Our starting point
try {
// Get the files as an array
const files = await fs.promises.readdir(startPath);
// Loop them all with the new for...of
for (const file of files) {
// Get the full paths
const currentPath = path.join(startPath, file);
// Stat the file to see if we have a file or dir
const stat = await fs.promises.stat(currentPath);
if (stat.isFile()) {
console.log("'%s' is a file.", currentPath);
// put the file into csv string
csvStr += currentPath + ", "
} else if (stat.isDirectory()) {
console.log("'%s' is a directory.", currentPath);
// enter the dictionary and loop
await loop(currentPath);
}
} // End for...of
} catch (e) {
// Catch anything bad that happens
console.error("We've thrown! Whoops!", e);
}
}
// Make an async function that gets executed immediately
(async () => {
// start loop from the path where you run node
await loop("./");
fs.writeFileSync("your.csv", csvStr);
})();
I am parsing multiple large JSON files to my mongoDB database. At the moment I am using stream-json npm package. After I load one file I change the filename that I am loading and relaunch the script to load the next file. This is unnecessarily time consuming. So how can I iterate through all the files automatically? At the moment my code looks like this:
const StreamArray = require('stream-json/utils/StreamArray');
const path = require('path');
const fs = require('fs');
const filename = path.join(__dirname, './data/xa0.json'); //The next file is named xa1.json and so on.
const stream = StreamArray.make();
stream.output.on('data', function (object) {
// my function block
});
stream.output.on('end', function () {
console.log('File Complete');
});
fs.createReadStream(filename).pipe(stream.input);
I tried iterating through the names of the files by adding a loop that would add +1 to the filename i.e. xa0 to xa1 at the same point where the script console.log('File Complete') but this didn't work. Any ideas how I might be able to achieve this or something similar.
Just scan your JSON files directory using fs.readdir. It will return a list of file names that you can then iterate, something like this :
fs.readdir("./jsonfiles", async (err, files) => {
for( file in files ){
await saveToMongo("./jsonfiles/" + file)
}
})
So you just launch your script once and wait until full completion.
Of course, in order for it to be awaited, you need to promisify the saveToMongo function, something like :
const saveToMongo = fileName => {
return new Promise( (resolve, reject) => {
// ... logic here
stream.output.on('end', function () {
console.log('File Complete');
resolve() // Will trigger the next await
});
})
}
The purpose is to extract the contents of a .sketch file.
I have a file with the name myfile.sketch. On renaming the file extension to myfile.zip and extracting the same in Finder, I'm able view the files within. I tried doing the same on the server using Node.js by renaming the file extension to .zip. I wasn't able to extract the files, rather I got some ZIP files within the files.
var oldPath = __dirname+'/uploads/myfile.sketch',
newPath = __dirname+'/uploads/myfile.zip';
fs.rename(oldPath, newPath, function (err) {
console.log('rename callback ', err);
});
Is it possible to extract a non-ZIP file using frameworks like JSzip?
As a .sketch file is essentially a ZIP file, the extension does not matter. Any tool that is capable of unpacking a ZIP file will work.
You can verify this with the file command:
$ file myfile.sketch
myfile.sketch: Zip archive data, at least v1.0 to extract
As you are working on the server already, there is nothing stopping you from just using the OS's command line tools like unzip.
Like this:
const util = require('util');
const exec = util.promisify(require('child_process').exec);
async function unzip() {
const filename = 'myfile.sketch'
const { stdout, stderr } = await exec('unzip ' + filename);
console.log('stdout:', stdout);
console.log('stderr:', stderr);
}
unzip();
Doing it with JSZip is straight-forward as well:
var fs = require('fs');
var JSZip = require('jszip');
new JSZip.external.Promise(function (resolve, reject) {
fs.readFile('myfile.sketch', function(err, data) {
if (err) {
reject(e);
} else {
resolve(data);
}
});
}).then(function (data) {
return JSZip.loadAsync(data);
})