Adding files into a gulp stream during the pipeline - javascript

I've got a gulp stream that reads a few hundred markdown files and runs through a series of pipes like so:
var mystream = src('app/pages/**/*.md')
.pipe(frontMatter({
property: 'fm',
remove: true
}))
.pipe(data(function (file) {
if (file.fm.title) file.fm.title_html = markdown.marked.parseInline(file.fm.title)
if (file.fm.description) file.fm.description_html = markdown.marked(file.fm.description)
}))
.pipe(data(process_last_update))
....and like 10 more pipes writing the stream at the end....
.pipe(dest('site'))
This all works. What I'm trying to do is during one of those pipe stages, I'm checking the markdown files frontmatter and if it has a certain value (array of strings), then I want to duplicate the file setting the path to what it finds in the frontmatter and add them to the stream.
This is the code I'm trying:
.pipe(data(function (file) {
if (file.fm.slug_history) {
file.fm.slug_history.forEach(slug => {
let newfile = Object.assign({}, file);
newfile.path = file.base + "/" + slug + "/index.html";
mystream.write(newfile);
})
}
}))
But the .pipe(dest('site')) at the end just isn't writing the additional files I've seemingly added to the stream. Any idea what I'm doing wrong?

Related

Discord.JS - List all files within a directory as one message

I am having an issue where I cannot seem to find a solution.
I have written a Discord bot from Discord.JS that needs to send a list of file names from a directory as one message. So far I have tried using fs.readddir with path.join and fs.readfilesync(). Below is one example.
const server = message.guild.id;
const serverpath = `./sounds/${server}`;
const fs = require('fs');
const path = require('path');
const directoryPath = path.join('/home/pi/Pablo', serverpath);
fs.readdir(directoryPath, function(err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function(file) {
message.channel.send(file);
});
});
Whilst this does send a message of every file within the directory, it sends each file as a separate message. This causes it to take a while due to Discord API rate limits. I want them to all be within the same message, separated with a line break, with a max of 2000 characters (max limit for Discord messages).
Can someone assist with this?
Thanks in advance.
Jake
I recommend using fs.readdirSync(), it will return an array of the file names in the given directory. Use Array#filter() to filter the files down to the ones that are JavaScript files (extentions ending in ".js"). To remove ".js" from the file names use Array#map() to replace each ".js" to "" (effectively removing it entirely) and use Array#join() to join them into a string and send.
const server = message.guild.id;
const serverpath = `./sounds/${server}`;
const { readdirSync } = require('fs');
const path = require('path');
const directoryPath = path.join('/home/pi/Pablo', serverpath);
const files = readdirSync(directoryPath)
.filter(fileName => fileName.endsWith('.js'))
.map(fileName => fileName.replace('.js', ''));
.join('\n');
message.channel.send(files);
Regarding handling the sending of a message greater than 2000 characters:
You can use the Util.splitMessage() method from Discord.JS and provide a maxLength option of 2000. As long as the number of chunks needed to send is not more than a few you should be fine from API ratelimits
const { Util } = require('discord.js');
// Defining "files"
const textChunks = Util.splitMessage(files, {
maxLength: 2000
});
textChunks.forEach(async chunk => {
await message.channel.send(chunk);
});
Built an array of strings (names of files) then join with "\n".
let names = []
fs.readdir(directoryPath, function(err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function(file) {
names << file
});
});
message.channel.send(names.join("\n"));

folders and files are not visible after uploading file though multer

I am working on a small project. discussing Step by step
At first I am uploading zip files though multer
extracting those files (How can I call extract function after completing upload using multer?)
After extracting those I am trying to filter those files
after filtering those files I want to move some files to another directory
in my main index.js I have
A simple route to upload files which is working
// MAIN API ENDPOINT
app.post("/api/zip-upload", upload, async (req, res, next) => {
console.log("FIles - ", req.files);
});
Continuous checking for if there is any zip file that needs to unzip but the problem is after uploading it's not showing any files or dir
// UNZIP FILES
const dir = `${__dirname}/uploads`;
const files = fs.readdirSync("./uploads");
const filesUnzip = async () => {
try {
if (fs.existsSync(dir)) {
console.log("files - ", files);
for (const file of files) {
console.log("file - ", file);
try {
const extracted = await extract("./uploads/" + file, { dir: __dirname + "/uploads/" });
console.log("Extracted - ",extracted);
// const directories = await fs.statSync(dir + '/' + file).isDirectory();
} catch (bufErr) {
// console.log("------------");
console.log(bufErr.syscall);
}
};
// const directories = await files.filter(function (file) { return fs.statSync(dir + '/' + file).isDirectory(); });
// console.log(directories);
}
} catch (err) {
console.log(err);
}
return;
}
setInterval(() => {
filesUnzip();
}, 2000);
Moving files to static directory but here is the same problem no directory found
const getAllDirs = async () => {
// console.log(fs.existsSync(dir));
// FIND ALL DIRECTORIES
if (fs.existsSync(dir)) {
const directories = await files.filter(function (file) { return fs.statSync(dir + '/' + file).isDirectory(); });
console.log("Directories - ",directories);
if (directories.length > 0) {
for (let d of directories) {
const subdirFiles = fs.readdirSync("./uploads/" + d);
for (let s of subdirFiles) {
if (s.toString().match(/\.xml$/gm) || s.toString().match(/\.xml$/gm) !== null) {
console.log("-- ", d + "/" + s);
const move = await fs.rename("uploads/" + d + "/" + s, __dirname + "/static/" + s, (err) => { console.log(err) });
console.log("Move - ", move);
}
}
}
}
}
}
setInterval(getAllDirs, 3000);
There are so many issues with your code, I don't know where to begin:
Why are you using fs.xxxSync() methods if all your functions are async? Using xxxSync() methods is highly discouraged because it's blocking the server (ie parallel requests can't/won't be accepted while a sync reading is in progress). The fs module supports a promise api ...
Your "Continuous checking" for new files is always checking the same (probably empty) files array because it seems you are executing files = fs.readdirSync("./uploads"); only once (probably at server start, but I can't tell for sure because there isn't any context for that snippet)
You shouldn't be polling that "uploads" directory. Because as writing a file (if done properly) is an asynchronous process, you may end up reading incomplete files. Instead you should trigger the unzipping from your endpoint handler. Once it is hit, body.files contains the files that have been uploaded. So you can simply use this array to start any further processing instead of frequently polling a directory.
At some points you are using the callback version of the fs API (for instance fs.rename(). You cannot await a function that expects a callback. Again, use the promise api of fs.
EDIT
So I'm trying to address your issues. Maybe I can't solve all of them because of missing infomation, but you should get the general idea.
First of all, you shuld use the promise api of the fs module. And also for path manipulation, you should use the available path module, which will take care of some os specific issues.
const fs = require('fs').promises;
const path = require('path');
Your API endpoint isn't currently returning anything. I suppose you stripped away some code, but still. Furthermore, you should trigger your filehandling from here, so you don't have to do directory polling, which is
error prone,
wasting resources and
if you do it synchronously like you do blocks the server
app.post("/api/zip-upload", upload, async (req, res, next) => {
console.log("FIles - ", req.files);
//if you want to return the result only after the files have been
//processed use await
await handleFiles(req.files);
//if you want to return to the client immediately and process files
//skip the await
//handleFiles(req.files);
res.sendStatus(200);
});
Handling the files seems to consist of two different steps:
unzipping the uploaded zip files
copying some of the extracted files into another directory
const source = path.join(".", "uploads");
const target = path.join(__dirname, "uploads");
const statics = path.join(__dirname, "statics");
const handleFiles = async (files) => {
//a random folder, which will be unique for this upload request
const tmpfolder = path.join(target, `tmp_${new Date().getTime()}`);
//create this folder
await fs.mkdir(tmpfolder, {recursive: true});
//extract all uploaded files to the folder
//this will wait for a list of promises and resolve once all of them resolved,
await Promise.all(files.map(f => extract(path.join(source, f), { dir: tmpfolder })));
await copyFiles(tmpfolder);
//you probably should delete the uploaded zipfiles and the tempfolder
//after they have been handled
await Promise.all(files.map(f => fs.unlink(path.join(source, f))));
await fs.rmdir(tmpfolder, { recursive: true});
}
const copyFiles = async (tmpfolder) => {
//get all files/directory names in the tmpfolder
const allfiles = await fs.readdir(tmpfolder);
//get their stats
const stats = await Promise.all(allfiles.map(f => fs.stat(path.join(tmpfolder, f))));
//filter directories only
const dirs = allfiles.filter((_, i) => stats[i].isDirectory());
for (let d of dirs) {
//read all filenames in the subdirectory
const files = await fs.readdir(path.join(tmpfolder, d)));
//filter by extension .xml
const xml = files.filter(x => path.extname(x) === ".xml");
//move all xml files
await Promise.all(xml.map(f => fs.rename(path.join(tmpfolder, d, f), path.join(statics, f))));
}
}
That should do the trick. Of course you may notice there is no error handling with this code. You should add that.
And I'm not 100% sure about your paths. You should consider the following
./uploads refers to a directory uploads in the current working directory (whereever that may be)
${__dirname}/uploads refers to a directory uploads which is in the same directory as the script file currently executing Not sure if that is the directory you want ...
./uploads and ${__dirname}/uploads may point to the same folder or to completely different folders. No way knowing that without additional context.
Furthermore in your code you extract the ZIP files from ./uploads to ${__dirname}/uploads and then later try to copy XML files from ./uploads/xxx to ${__dirname}/statics, but there won't be any directory xxx in ./uploads because you extracted the ZIP file to a (probably) completely different folder.

node.js stream pipeline separately over all files in folder

I wrote a pipeline that reads an input stream from a file, transforms it and streams it out to an output file. The pipeline works well for one file. Now I want to iterate through all files in a folder, producing a separate output file for each input file.
I am completely new to JS and synchronous streams. Did my best to figure it out. However, given n input files, what I currently get is n copies processed from the last input file.
const fs = require('fs');
const pipeline = require("stream");
async function processFolder(inPath, outPath) {
const dir = await fs.promises.opendir(inPath)
for await (const dirent of dir) {
let inFile = inPath.concat(dirent.name);
let outFile = outPath.concat(dirent.name.slice(0,-4), ".json");
console.log(inFile);
await pipeline(
fs.createReadStream(inFile).setEncoding('utf8'),
transform,
fs.createWriteStream(outFile),
(err) => {
if (err) {
console.error('Pipeline failed', err);
} else {
console.log('Pipeline succeeded');
}
}
);
}
}
processFolder(inPath, outPath);
How can I change the code above so that it processes all files in the directory?
Thank you!

Parsing multiple large JSON files with node to mongoDB

I am parsing multiple large JSON files to my mongoDB database. At the moment I am using stream-json npm package. After I load one file I change the filename that I am loading and relaunch the script to load the next file. This is unnecessarily time consuming. So how can I iterate through all the files automatically? At the moment my code looks like this:
const StreamArray = require('stream-json/utils/StreamArray');
const path = require('path');
const fs = require('fs');
const filename = path.join(__dirname, './data/xa0.json'); //The next file is named xa1.json and so on.
const stream = StreamArray.make();
stream.output.on('data', function (object) {
// my function block
});
stream.output.on('end', function () {
console.log('File Complete');
});
fs.createReadStream(filename).pipe(stream.input);
I tried iterating through the names of the files by adding a loop that would add +1 to the filename i.e. xa0 to xa1 at the same point where the script console.log('File Complete') but this didn't work. Any ideas how I might be able to achieve this or something similar.
Just scan your JSON files directory using fs.readdir. It will return a list of file names that you can then iterate, something like this :
fs.readdir("./jsonfiles", async (err, files) => {
for( file in files ){
await saveToMongo("./jsonfiles/" + file)
}
})
So you just launch your script once and wait until full completion.
Of course, in order for it to be awaited, you need to promisify the saveToMongo function, something like :
const saveToMongo = fileName => {
return new Promise( (resolve, reject) => {
// ... logic here
stream.output.on('end', function () {
console.log('File Complete');
resolve() // Will trigger the next await
});
})
}

How to get Gulp4 to wait for file write tasks to compete?

I'm trying to write a workflow with Gulp 4 (see below for specific version info) that will
watch a local folder for an .html file
strip multiple tables out into individual .html files per table
convert said tables into .csv for further processing
clean the temporary directory all these files are dumped too.
The problem I'm running into is no matter what I try I cannot get my cleaning task to wait for the rest of the tasks to write files to the disk. I've tried nesting the data collection functions, including all the alteration methods into one long stream, and a handful of other clumsy solutions offered up here and other places - none of them work though. Any pointers would be a great help.
var gulp = require('gulp');
var exec = require('child_process').exec;
var rename = require('gulp-rename');
var inject = require('gulp-inject-string');
var htmlSplit = require('gulp-htmlsplit');
var del = require('del');
// Clean all non-csv files from ./data/temp
function clean() {
return del(['data/temp/*', '!data/temp/*.csv']);
}
// Convert HTML tables to CSV files
function convertCSV(filename) {
return exec('node node_modules/html-table-to-csv data/temp/' + filename + '.html data/temp/' + filename + '.csv');
}
// Move a renamed copy of original report to .data/temp/
function getData() {
return gulp.src('data/report/*.html')
.pipe(rename('injected.html'))
.pipe(gulp.dest('data/temp'));
}
// Inject split start comments before each <table> tag
function injectBefore() {
return gulp.src('data/temp/*.html')
.pipe(inject.beforeEach('<table', '<!-- split table.html -->\n'))
.pipe(gulp.dest('data/temp'));
}
// Inject split stop comments after each </table> tag
function injectAfter() {
return gulp.src('data/temp/*.html')
.pipe(inject.afterEach('</table>', '\n<!-- split stop -->'))
.pipe(gulp.dest('data/temp'));
}
// Split each table into its own HTML file for CSV conversion
function htmlCSV(done) {
var i = 0;
return gulp.src('data/temp/injected.html')
.pipe(htmlSplit())
.pipe(rename(function(file) {
// Append unique number to end of each HTML file
file.basename += i >= 9 ? ++i : '0' + ++i;
// Send unique numbered HTML file to convertCSV()
convertCSV(file.basename);
}))
.pipe(gulp.dest('data/temp'));
done();
}
gulp.task('default', gulp.series(getData, injectBefore, injectAfter, htmlCSV, clean));
// FILE STRUCTURE
// analytics
// |_bower_components
// |_data
// |_report <-- Original report in HTML dumped here
// |_temp <-- Injected and converted files dumped here
// |_node_modules
// |_gulpfile.js and other files
//
// Gulp - CLI version 1.2.2
// Gulp - Local version 4.0.0-alpha.2
// Node - v6.9.5
// NPM - 3.10.10
// OS - Windows 7 6.1.7601 Service pack 1 Build 7601
I removed the regular gulp plugins and the actual csv transformation as that is just a child_process execution.
The main issue with your code is that Node core child_process.exec is Asnyc, and will not return the end unless you add a callback. Replacing it with sync-exec will allow a sync process call since the gulp-rename callback does not have a callback.
var gulp = require('gulp');
var exec = require('sync-exec');
var rename = require('gulp-rename');
var del = require('del');
// Clean all non-csv files from ./data/temp
function clean() {
return del(['temp']);
}
// Convert HTML tables to CSV files
function convertCSV(filename) {
// return exec('node node_modules/html-table-to-csv data/temp/' + filename + '.html data/temp/' + filename + '.csv');
return exec('sleep 5;');
}
// Move a renamed copy of original report to .data/temp/
function getData() {
return gulp.src('t.html')
.pipe(gulp.dest('temp/'));
}
// Split each table into its own HTML file for CSV conversion
function htmlCSV() {
var i = 0;
return gulp.src('t.html')
.pipe(rename(function(file) {
// Append unique number to end of each HTML file
file.basename += i >= 9 ? ++i : '0' + ++i;
// Send unique numbered HTML file to convertCSV()
convertCSV(file.basename);
}))
.pipe(gulp.dest('dist'));
}
gulp.task('default', gulp.series(getData, htmlCSV, clean));
Use es7 async/await syntax as well as util.promisify to wait for it to finish:
const util = require('util');
const exec = util.promisify(require('child_process').exec);
// Convert HTML tables to CSV files
async function convertCSV(filename) {
return await exec('node node_modules/html-table-to-csv',
['data/temp/' + filename + '.html',
'data/temp/' + filename + '.csv']);
}
No need for third party libraries

Categories