I'm trying to watch for any newly added files to an ftp server, which has the directory mapped to a drive on the server that's running the node application. The problem is that it doesn't register any events for files added through ftp; when files are modified or created through the node application they are picked up fine.
I'm currently using chokidar to watch the directory and log any events with the simple code below:
const watcher = chokidar.watch('./myDir', {
persistent: true,
awaitWriteFinish: {
stabilityThreshold: 2000,
pollInterval: 100
}
});
watcher
.on('add', path => console.log(`File ${path} has been added`))
.on('change', path => console.log(`File ${path} has been changed`));
I've added the awaitWriteFinish option to try to see if it will register when the file is completed from the ftp transfer, but with no joy.
Any suggestions?
You can watch a directory using the native module fs:
const fs = require('fs');
const folderPath = './test';
const pollInterval = 300;
let folderItems = {};
setInterval(() => {
fs.readdirSync(folderPath)
.forEach((file) => {
let path = `${folderPath}/${file}`;
let lastModification = fs.statSync(path).mtimeMs;
if (!folderItems[file]) {
folderItems[file] = lastModification;
console.log(`File ${path} has been added`);
} else if (folderItems[file] !== lastModification) {
folderItems[file] = lastModification;
console.log(`File ${path} has been changed`);
}
});
}, pollInterval);
But the above example will not watch the files in subfolders. Another approach to watch all subfolders, is to use unix find through the node child_process.exec function.
const fs = require('fs');
const {execSync} = require('child_process');
const folderPath = './test';
const pollInterval = 500;
let folderItems = {};
setInterval(() => {
let fileList = execSync(`find ${folderPath}`).toString().split('\n');
for (let file of fileList) {
if (file.length < 1) continue;
let lastModification = fs.statSync(file).mtimeMs;
if (!folderItems[file]) {
folderItems[file] = lastModification;
console.log(`File ${file} has been added`);
} else if (folderItems[file] !== lastModification) {
folderItems[file] = lastModification;
console.log(`File ${file} has been changed`);
}
}
}, pollInterval);
Related
This is the code I currently have, how would adapt this to check each sub-directory:
const fs = require('fs')
module.exports = (client, Discord) =>{
const command_files = fs.readdirSync('./commands/').filter(file => file.endsWith('.js'))
for(const file of command_files){
const command = require(`../commands/${file}`);
if(command.name) {
client.commands.set(command.name, command);
} else {
continue
}
}
}
And this is the layout I have for the commands folder the folder layout
You need to wrap the whole code into a function and use some recursion.
Please note that, when using recusion, a depth variable is a wise way to handle it
Something like this should do it:
const fs = require('fs')
module.exports = (client, Discord) =>{
const depth = 3;
const finder = (path, currentDepth = 0) => {
if (currentDepth >= depth) {
return; // Breaks here
}
const dirContent = fs.readdirSync(path);
const command_files = dirContent.filter(file => file.endsWith('.js'));
const folders = dirContent.filter(file => {
const dirPath = path + file;
// Exists + is a directory verification
return fs.existsSync(dirPath) && fs.lstatSync(dirPath).isDirectory();
);
for(const file of command_files){
const filePath = '../' + path + file;
const command = require(filePath);
if(command.name) {
client.commands.set(command.name, command);
} else {
continue
}
}
// Loops through folders
folders.map((folder) => finder(path + folder + '/', currentDepth + 1));
}
finder('./commands/');
}
Cloud Functions for Firebase has this nice sample where they create a thumbnail for each uploaded image. This is done by making use of ImageMagick.
I tried to convert the sample to convert PDFs to images. This is something ImageMagick can do, but I can't make it work with Cloud Functions for Firebase. I keep getting a code 1 error:
ChildProcessError: `convert /tmp/cd9d0278-16b2-42be-aa3d-45b5adf89332.pdf[0] -density 200 /tmp/cd9d0278-16b2-42be-aa3d-45b5adf89332.pdf` failed with code 1
at ChildProcess.<anonymous> (/user_code/node_modules/child-process-promise/lib/index.js:132:23)
at emitTwo (events.js:106:13)
at ChildProcess.emit (events.js:191:7)
at maybeClose (internal/child_process.js:877:16)
at Socket.<anonymous> (internal/child_process.js:334:11)
at emitOne (events.js:96:13)
at Socket.emit (events.js:188:7)
at Pipe._handle.close [as _onclose] (net.js:498:12)
Of course one possibility is that converting PDFs are simply not supported.
const functions = require('firebase-functions');
const gcs = require('#google-cloud/storage')();
const spawn = require('child-process-promise').spawn;
// [END import]
// [START generateThumbnail]
/**
* When an image is uploaded in the Storage bucket We generate a thumbnail automatically using
* ImageMagick.
*/
// [START generateThumbnailTrigger]
exports.generateThumbnail = functions.storage.object().onChange(event => {
// [END generateThumbnailTrigger]
// [START eventAttributes]
const object = event.data; // The Storage object.
const fileBucket = object.bucket; // The Storage bucket that contains the file.
const filePath = object.name; // File path in the bucket.
const contentType = object.contentType; // File content type.
const resourceState = object.resourceState; // The resourceState is 'exists' or 'not_exists' (for file/folder deletions).
// [END eventAttributes]
// [START stopConditions]
// Exit if this is triggered on a file that is not an image.
if (!contentType.startsWith('application/pdf')) {
console.log('This is not a pdf.');
return;
}
// Get the file name.
const fileName = filePath.split('/').pop();
// Exit if the image is already a thumbnail.
if (fileName.startsWith('thumb_')) {
console.log('Already a Thumbnail.');
return;
}
// Exit if this is a move or deletion event.
if (resourceState === 'not_exists') {
console.log('This is a deletion event.');
return;
}
// [END stopConditions]
// [START thumbnailGeneration]
// Download file from bucket.
const bucket = gcs.bucket(fileBucket);
const tempFilePath = `/tmp/${fileName}`;
return bucket.file(filePath).download({
destination: tempFilePath
}).then(() => {
console.log('Pdf downloaded locally to', tempFilePath);
// Generate a thumbnail of the first page using ImageMagick.
return spawn('convert', [tempFilePath+'[0]' ,'-density', '200', tempFilePath]).then(() => {
console.log('Thumbnail created at', tempFilePath);
// Convert pdf extension to png
const thumbFilePath = filePath.replace('.pdf', 'png');
// Uploading the thumbnail.
return bucket.upload(tempFilePath, {
destination: thumbFilePath
});
});
});
// [END thumbnailGeneration]
});
Node modules can install native code that is in the same directory as the Cloud Function's source code. I found that some node libraries on github that do this for ghostscript which is a very useful library for PDF processing:
Node library that wraps Ghostscript command line:
https://github.com/sina-masnadi/node-gs
Compiled Ghostscript which is
used via git submodule:
https://github.com/sina-masnadi/lambda-ghostscript
I put lambda-ghostscript into a sub-directory of my functions directory, then add the node-gs as a dependency in my package file like this:
{
"name": "functions",
"dependencies": {
"#google-cloud/storage": "^1.3.1",
"child-process-promise": "^2.2.1",
"firebase-admin": "~5.4.0",
"firebase-functions": "^0.7.2",
"gs": "https://github.com/sina-masnadi/node-gs/tarball/master"
}
}
Then in my index.js file I can just require the node library to easily use ghostscript from JavaScript. Here's the complete code for the Cloud Function that uses a Google Cloud Storage trigger:
const functions = require('firebase-functions');
const gcs = require('#google-cloud/storage')();
const spawn = require('child-process-promise').spawn;
const path = require('path');
const os = require('os');
const fs = require('fs');
var gs = require('gs');
exports.makePNG = functions.storage.object().onChange(event => {
// ignore delete events
if (event.data.resourceState == 'not_exists') return false;
const filePath = event.data.name;
const fileDir = path.dirname(filePath);
const fileName = path.basename(filePath);
const tempFilePath = path.join(os.tmpdir(), fileName);
if (fileName.endsWith('.png')) return false;
if (!fileName.endsWith('.pdf')) return false;
const newName = path.basename(filePath, '.pdf') + '.png';
const tempNewPath = path.join(os.tmpdir(), newName);
// // Download file from bucket.
const bucket = gcs.bucket(event.data.bucket);
return bucket.file(filePath).download({
destination: tempFilePath
}).then(() => {
console.log('Image downloaded locally to', tempFilePath);
return new Promise(function (resolve, reject) {
gs()
.batch()
.nopause()
.option('-r' + 50 * 2)
.option('-dDownScaleFactor=2')
.executablePath('lambda-ghostscript/bin/./gs')
.device('png16m')
.output(tempNewPath)
.input(tempFilePath)
.exec(function (err, stdout, stderr) {
if (!err) {
console.log('gs executed w/o error');
console.log('stdout',stdout);
console.log('stderr',stderr);
resolve();
} else {
console.log('gs error:', err);
reject(err);
}
});
});
}).then(() => {
console.log('PNG created at', tempNewPath);
// Uploading the thumbnail.
return bucket.upload(tempNewPath, {destination: newName});
// Once the thumbnail has been uploaded delete the local file to free up disk space.
}).then(() => {
fs.unlinkSync(tempNewPath);
fs.unlinkSync(tempFilePath);
}).catch((err) => {
console.log('exception:', err);
return err;
});
});
Here's the project on github: https://github.com/ultrasaurus/ghostscript-cloud-function
Disclaimer: This is using compiled native code and I verified experimentally that works for this case, so it is probably fine. I didn't look into the specific compile options and validate if they exactly correct for the Cloud Functions environment.
WORKING SOLUTION
Thank you #Ultrasaurus for pointing out this approach! However, for me it did not work and in your Github repo your also stated I haven't tested them. I modified your solution a little bit and got the following code, which is 100% working for me:
{
"dependencies": {
"#google-cloud/firestore": "^4.4.0",
"#google-cloud/storage": "^5.3.0",
"ghostscript": "https://github.com/musubu/node-ghostscript/tarball/master",
"pdf-image": "^2.0.0",
"rimraf": "^3.0.2",
"uuid": "^8.3.1"
}
}
The function is triggered by a Firestore event:
const Storage = require('#google-cloud/storage')
const fs = require('fs')
const rimraf = require('rimraf')
const os = require('os')
const gs = require('ghostscript')
const GOOGLE_PROJECT_ID = 'MY_GOOGLE_PROJECT_ID'
const GOOGLE_STORAGE_BUCKET_NAME = 'MY_GOOGLE_STORAGE_BUCKET_NAME'
const storage = new Storage.Storage({
projectId: GOOGLE_PROJECT_ID
})
exports.createImage = async (event) => {
let {
appointment,
name
} = event.value.fields
name = getFileName(name.stringValue)
appointment = appointment.stringValue
console.log(`Processing document ${name} in appointment ${appointment}`)
const tempDir = createTempDir(appointment)
const tmpDocumentPath = await downloadPdf(tempDir, name, appointment)
const imagePath = await convertPdfToImage(tmpDocumentPath)
await uploadImage(imagePath, appointment)
deleteDir(tempDir)
}
function getFileName (name) {
const nameParts = name.split('/')
return nameParts[nameParts.length - 1]
}
function createTempDir (appointment) {
const tempDir = `${os.tmpdir()}/${appointment}_${Math.random()}`
fs.mkdirSync(tempDir)
console.log(`Created dir ${tempDir}`)
return tempDir
}
async function downloadPdf (tempDir, name, appointment) {
const destination = `${tempDir}/${name}`
await storage.bucket(GOOGLE_STORAGE_BUCKET_NAME).file(`${appointment}/${name}`).download({ destination })
console.log(`Successfully downloaded document ${name}`)
return destination
}
async function convertPdfToImage (pdfPath) {
const imagePath = pdfPath.replace('pdf', 'png')
return new Promise(function (resolve, reject) {
try {
gs()
.batch()
.nopause()
.device('png16m')
.output(imagePath)
.input(pdfPath)
.exec(function (err, stdout, stderr) {
if (!err) {
console.log('gs executed w/o error')
console.log('stdout', stdout)
console.log('stderr', stderr)
resolve(imagePath)
} else {
console.log('gs error:', err)
reject(err)
}
})
} catch (error) {
console.log(error)
}
})
}
async function uploadImage (imagePath, appointment) {
const imagePathParts = imagePath.split('/')
const imageName = imagePathParts[imagePathParts.length - 1]
console.log(`Starting upload for ${imageName} at ${imagePath} to storage ${appointment}/${imageName}`)
await storage.bucket(GOOGLE_STORAGE_BUCKET_NAME).upload(imagePath, {
destination: `${appointment}/${imageName}`,
metadata: {
metadata: { appointment }
}
})
console.log(`Successfully uploaded image for appointment ${appointment}`)
}
function deleteDir (dir) {
rimraf.sync(dir)
}
// codenotworking
const path = require("path");
const fs = require("fs");
log = console.log;
const names = [];
function collectFileNamesRecursively(path) {
fs.readdir(path, (err, files) => {
err ? log(err) : log(files);
// replacing paths
for (const index in files) {
const file = files[index];
files[index] = path.resolve(path, file);
}
for (let file of files) {
fs.stat(file, (err, stat) => {
err ? log(err) : null;
if (stat.isDirectory()) {
collectFileNamesRecursively(file);
}
names.push(file);
});
}
});
}
collectFileNamesRecursively(path.join(__dirname, "../public"));
i am using nodejs v10.8.0 and the directory stucture is
- project/
- debug/
- codenotworking.js
- public/
- js/
- file2.js
- file.html
whenever i run this code i get the following error
TypeError: path.resolve is not a function
at fs.readdir (C:\backup\project\debug\codenotworking.js:17:24)
at FSReqWrap.oncomplete (fs.js:139:20)
what am i doing wrong here ?
You're shadowing your path import by specifing the path parameter in collectFileNamesRecursively. Change the parameter name to something else.
Apart from that using recursion with callbacks this way won't work - I would recommend using async/await. Something like:
const path = require('path');
const fs = require('fs');
async function collectFileNamesRecursively(currBasePath, foundFileNames) {
const dirContents = await fs.promises.readdir(currBasePath);
for (const file of dirContents) {
const currFilePath = path.resolve(currBasePath, file);
const stat = await fs.promises.stat(currFilePath);
if (stat.isDirectory()) {
await collectFileNamesRecursively(currFilePath, foundFileNames);
} else {
foundFileNames.push(file);
}
}
}
The issue that I am running into is that when I test the function against a .zip file when the function gets to the fs.createReadStream&Zip the function is not running or returning an error and I would like to get an understanding on what I am doing wrong and how a correct solution would look.
const AWS = require('aws-sdk');
const fs = require('fs');
const mkdirp = require('mkdirp');
const unzipper = require('unzipper');
exports.handler = async (event, context) => {
// Variables for bucket init
let sourceBucket = 'am-doc-mgmt-s3-dev-landing';
let storageBucket = 'am-doc-mgmt-s3-dev';
// Variables for folder init and Buffer config
const localZippedFolder = '/tmp/ZippedStudentData/';
const localUnzippedFolder = '/tmp/UnzippedStudentData/';
const ZipBuffer = Buffer.from(localZippedFolder, 'base64');
const UnzippedBuffer = Buffer.from(localUnzippedFolder, 'base64');
// Inits AWS s3 Bucket and DynamoDB
let s3 = new AWS.S3();
let docClient = new AWS.DynamoDB.DocumentClient({ region: 'us-east-1' });
// Gets the file bucket and file name of the s3 object from context
let fileBucket = event.Records[0].s3.bucket.name;
let fileName = event.Records[0].s3.object.key;
let params = {
Bucket: fileBucket,
Key: fileName
};
// Creates temporary variables
let tempFile = localZippedFolder + fileBucket;
let tempUnzippedFile = localUnzippedFolder + fileBucket;
// Make Directories for Zipped and Unzipped files
try {
const zipDirFolder = await mkdirp(localZippedFolder, { recursive: true })
const unZipDirFolder = await mkdirp(localUnzippedFolder, { recursive: true });
console.log('SUCCESS: unzipped directory created!');
console.log('SUCCESS: zipped directory create!')
// Download files from s3 Bucket
let newFolder = await s3.getObject(params).promise()
.then(data => {
console.log(data);
return data;
});
// Extract files from zipped folder and store them in a local directory
fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}))
.on('finish', () => {
fs.readdir(unZipDirFolder);
}).on('error', (err) => {
// error handling here
console.log(err);
});
}
catch (error) {
console.log(error);
}
};
error: not getting anything back from the fs.createReadStream function. Its as if it just skips over the function.
It's honestly hard to figure out what problem you're really trying to solve since you just aren't very specific about that. If you want the containing async function to not resolve it's promise until the unzipping is done, you can wrap the stream in a promise like this:
const AWS = require('aws-sdk');
const fs = require('fs');
const mkdirp = require('mkdirp');
const unzipper = require('unzipper');
exports.handler = async (event, context) => {
// Variables for bucket init
let sourceBucket = 'am-doc-mgmt-s3-dev-landing';
let storageBucket = 'am-doc-mgmt-s3-dev';
// Variables for folder init and Buffer config
const localZippedFolder = '/tmp/ZippedStudentData/';
const localUnzippedFolder = '/tmp/UnzippedStudentData/';
const ZipBuffer = Buffer.from(localZippedFolder, 'base64');
const UnzippedBuffer = Buffer.from(localUnzippedFolder, 'base64');
// Inits AWS s3 Bucket and DynamoDB
let s3 = new AWS.S3();
let docClient = new AWS.DynamoDB.DocumentClient({ region: 'us-east-1' });
// Gets the file bucket and file name of the s3 object from context
let fileBucket = event.Records[0].s3.bucket.name;
let fileName = event.Records[0].s3.object.key;
let params = {
Bucket: fileBucket,
Key: fileName
};
// Creates temporary variables
let tempFile = localZippedFolder + fileBucket;
let tempUnzippedFile = localUnzippedFolder + fileBucket;
// Make Directories for Zipped and Unzipped files
try {
const zipDirFolder = await mkdirp(localZippedFolder, { recursive: true })
const unZipDirFolder = await mkdirp(localUnzippedFolder, { recursive: true });
console.log('SUCCESS: unzipped directory created!');
console.log('SUCCESS: zipped directory create!')
// Download files from s3 Bucket
let newFolder = await s3.getObject(params).promise();
await new Promise((resolve, reject) => {
// Extract files from zipped folder and store them in a local directory
fs.createReadStream(params.Key)
.pipe(unzipper.Extract({path: unZipDirFolder}))
.on('finish', resolve);
.on('error', reject);
});
} catch (error) {
console.log(error);
// rethrow error so caller sees the error
throw error;
}
};
And, your caller using this exported function will HAVE to use .then() or await on the returned promise to know when it's done. And, use .catch() or try/catch around await to catch errors.
If someone's open to using Python, they can use a buffer to read and unzip the files. Something like this:
zipped_file = s3_resource.Object(bucket_name=sourcebucketname, key=filekey)
buffer = BytesIO(zipped_file.get()["Body"].read())
zipped = zipfile.ZipFile(buffer)
for file in zipped.namelist():
logger.info(f'current file in zipfile: {file}')
final_file_path = file + '.extension'
with zipped.open(file, "r") as f_in:
content = f_in.read()
destinationbucket.upload_fileobj(io.BytesIO(content),
final_file_path,
ExtraArgs={"ContentType": "text/plain"}
)
There's also a tutorial here: https://betterprogramming.pub/unzip-and-gzip-incoming-s3-files-with-aws-lambda-f7bccf0099c9
I want to check if the path is a file or a directory. If it's a directory then Log the directory and file separately. Later I want to send them as json object.
const testFolder = './data/';
fs.readdir(testFolder, (err, files) => {
files.forEach(file => {
console.log(`FILES: ${file}`);
})});
Edit:
If I try to this
fs.readdir(testFolder, (err, files) => {
files.forEach(file => {
if (fs.statSync(file).isDirectory()) {
console.log(`DIR: ${file}`);
} else {
console.log(`FILE: ${file}`)
}
})});
I get this error:
nodejs binding.lstat(pathModule._makeLong(path))
Update: Found the solution. I had to add testFolder + file like this :
if (fs.statSync(testFolder + file).isDirectory()) {
quick google search..
var fs = require('fs');
var stats = fs.statSync("c:\\dog.jpg");
console.log('is file ? ' + stats.isFile());
read: http://www.technicalkeeda.com/nodejs-tutorials/how-to-check-if-path-is-file-or-directory-using-nodejs
Since Node 10.10+, fs.readdir has withFileTypes option which makes it return directory entry fs.Dirent instead of just the filename. Directory entry contains useful methods such as isDirectory or isFile.
Your example then would be solved by:
const testFolder = './data/';
fs.readdir(testFolder, { withFileTypes: true }, (err, dirEntries) => {
dirEntries.forEach((dirEntry) => {
const { name } = dirEntry;
if (dirEntry.isDirectory()) {
console.log(`DIR: ${name}`);
} else {
console.log(`FILE: ${name}`);
}
})})