How to download N number of files asynchronously using wget and node

How to download N number of files asynchronously using wget and node - javascript

I have the following Node script:
var exec = require('child_process').exec;
var download_file_wget = function(file_url) {
// compose the wget command
var wget = `wget ${file_url}`;
// execute wget using child_process' exec function
var child = exec(wget, function(err) {
if (err) throw err;
else console.log(`${file_url} downloaded`);
});
};
download_file_wget('http://placekitten.com/10/10');
It does successfully download 1 file. I need to asynchronously download 128 files at once. How can I do this with this code?

If the files requested are big consider use spawn instead of exec.
const http = require('http');
const exec = require('child_process').exec;
const DOWNLOAD_DIR = './downloads/';
const generate_width_and_height = function() {
const random = Math.floor((Math.random() * 100) + 200);
console.log(random);
return random
}
const create_file_url = function() {
return "http://placekitten.com/" + generate_width_and_height() + "/" + generate_width_and_height()
}
const oneHundredTwentyEightElementsArray = Array.from(Array(127), (_,x) => x);
const oneHundredTwentyEightUrlsArray = oneHundredTwentyEightElementsArray.map( _ => create_file_url())
const download_file_wget = function(file_url, file_number) {
// extract the file name
const file_name = "file_" + file_number
// compose the wget command
const wget = 'wget -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
const child = exec(wget, function(err, stdout, stderr) {
if (err) throw err;
else console.log(file_name + ' downloaded to ' + DOWNLOAD_DIR);
});
};
for (let index = 0; index < oneHundredTwentyEightElementsArray.length; index++) {
const url = oneHundredTwentyEightUrlsArray[index];
download_file_wget(url, index)
}

You can use Javascript Promises to download multiple files with node and wget.
First wrap your inner code in a promise:
const downloadFile = (url) => {
return new Promise((resolve) => {
console.log(`wget ${url} --no-check-certificate`)
exec(`wget ${url} --no-check-certificate`, function(err, stdout, stderr) {
if (err) {
console.log('ERR', err, url)
} else {
console.log('SUCCESS ' + url);
resolve(1)
}
});
})
}
Then use Promise.all to process all the downloads asynchronously:
const files = [
'http://placekitten.com/10/10',
'http://placekitten.com/10/10'
// etc
]
(async () => {
await Promise.all(files.map(url => downloadFile(url)))
})()

Related

Ignore errors returned from API calls nodeJS

I have a NodeJS script which is making thousands of REST API GET calls to SharePoint to retrieve all folders and files recursively. A problem I have encountered is once in a while I am receiving ECONNRESET errors and 404 Not Found for some folders with unsupported characters. This causes my code to stop and I lose my progress in retrieving all the files and folders. Is there something I can do to prevent these errors from stopping my code and just have it ignore the error and continue onto the next folder?
I thought catching the error and console logging it would suffice but this still ends the execution of the code. Thanks
//Imports and Declarations
const prompt = require("prompt-sync")({ sigint: true });
const { getCreds, pullFromSharePoint } = require('./helper');
const sprequest = require('sp-request');
var path = require('path');
let creds = getCreds();
let spr = sprequest.create(creds)
let SPUrl = 'url to sharepoint site';
let files = [];
let folders = [];
let scannedFolders = [];
let reiteration = false;
let processingReq = false;
//Prompt user for which folder to copy
let targetFolder = prompt("Folder to copy: ");
//Call function to get list of files and folders in specified folder
// getFolderContents(targetFolder);
processInfo([], targetFolder)
//Gets list of files/folders inside folder
function getFolderContents(targetFolder) {
return new Promise((resolve, reject) => {
logger.info('Scanning this folder: ' + targetFolder)
//Skip if folder name contains chars we dont support
if (targetFolder.includes("'")) {
targetFolder = targetFolder.replace(/'/g, "''")
}
//If reiteration = true, format the target folder
if (reiteration === true) {
targetFolder = targetFolder.replace('/sites/sharepointsite', '');
}
console.log('Scanning this folder: ' + targetFolder)
targetFolder = encodeURIComponent(targetFolder)
var url = `${SPUrl}/_api/web/GetFolderByServerRelativeUrl('Shared%20Documents/${targetFolder}')/Files?$filter=TimeLastModified gt datetime'2021-07-01T00:00:00'`;
var url2 = `${SPUrl}/_api/web/GetFolderByServerRelativeUrl('Shared%20Documents/${targetFolder}')?$expand=Folders`;
//THESE SPR CALLS ARE THE REST API CALLS TO SHAREPOINT RESULTING IN THE MOST 404 ERRORS, HOW TO IGNORE ERRORS FROM THESE?
// Call to get list of files
spr.get(url).then(response => {
console.log('Calling SP API to get files')
//Adds files to array
let fileRes = response.body.d.results;
for (let i = 0; i < fileRes.length; i++) {
files.push(fileRes[i].ServerRelativeUrl)
}
console.log(files.length)
}).catch(function (err) {
console.log('Fetch Error :-S', err);
});
//Call to get list of folders
spr.get(url2).then(response => {
//Adds folders to array
let folderRes = response.body.d.Folders.results;
for (let j = 0; j < folderRes.length; j++) {
folders.push(folderRes[j].ServerRelativeUrl)
}
//Push current folder read through to another array so we dont scan it multiple times
scannedFolders.push('/sites/sharepointsite/Shared Documents' + targetFolder);
resolve(folders);
}).catch(function (err) {
console.log('Fetch Error :-S', err);
});
})
}
//If folders exist in folders array scan them; once all folders have been scanned, send files to be copied
async function processInfo(folders, firstFolder) {
let firstRun = await getFolderContents(firstFolder);
let firstGone = false;
if (firstRun) {
if (firstRun.length > 0) {
for (let k = 0; k < firstRun.length; k++) {
await sleep(500);
//If folder has already been scanned, remove from array and skip to next iteration
if (scannedFolders.includes(firstRun[k])) {
if (k === 0) {
firstRun.splice(k, 1)
}
firstRun.splice(k, k)
continue;
}
//Else if folder has not been scanned, send it to be scanned
else {
if (firstRun[k].includes('%') || firstRun[k].includes('%')) {
console.log('skipping')
continue;
}
reiteration = true;
let foldersScanned = await getFolderContents(firstRun[k]).catch(function (err) {
console.log('Fetch Error :-S', err);
});;
//Send each file to pullFile() function to be downloaded
// if (foldersScanned && k == firstRun.length - 1) {
if (firstRun[k] == 10) {
do {
await pullFile(files[0]);
files.shift();
} while (files.length > 0)
}
}
}
}
}
console.log(files.length)
}
//Manipulate file string to get SP folder and File Name, then call the helper function to download the file
async function pullFile(file) {
let filename = file.replace(/^.*[\\\/]/, '')
let spFolder = file.replace('/sites/sharepointsite/', '')
spFolder = spFolder.replace(filename, '');
let downloadPath = path.join('./testfolder', spFolder)
const sppullContext = {
siteUrl: SPUrl, //SharePoint URL
creds: creds //Credentials
};
const sppullOptions = {
spRootFolder: spFolder, // The folder path for the file in SharePoint
dlRootFolder: downloadPath, // Where the file is saved locally
strictObjects: [filename], // Only download the filename specified as a query parameter
muteConsole: true
};
pullFromSharePoint(sppullContext, sppullOptions) //Pull file with filename from SharePoint
.then(() => {
return true;
}).catch(err => {
return err;
});
}
//Timer to wait specified amount of time
function sleep(ms) {
return new Promise((resolve) => {
setTimeout(resolve, ms);
});
}
Code from helper.js
//Imports and Declarations
var fs = require('fs');
const { SPPull } = require('sppull');
const winston = require('winston');
//Winston logger
const logger = winston.createLogger({
level: 'info',
format: winston.format.json(),
defaultMeta: { service: 'user-service' },
transports: [
new winston.transports.File({ filename: 'error.log', level: 'error' }),
new winston.transports.File({ filename: 'info.log' }),
],
});
/**
* Returns the SharePoint credentials object to use.
*/
function getCreds() {
return {
username: 'UN',
password: 'PW'
};
}
module.exports.getCreds = getCreds;
/**
* Pulls a file from SharePoint and then checks for errors.
*
* #param sppullContext context object for sppull
* #param sppullOptions options object for sppull
*/
async function pullFromSharePoint(sppullContext, sppullOptions) {
SPPull.download(sppullContext, sppullOptions)
.then((res) => {
logger.info(res)
return res
})
.catch((err) => {
logger.error(err)
return err
});
}

node.js async/await function called multiple times (before it ends)

My main file is a "file watcher" that's watching a folder using "node-watch" module. For each file placed in the folder, it has to do couple of MSSQL calls and then process the file accordingly.
The file watcher script looks like that:
var watch = require('node-watch');
const timestamp = require('./SHR_modules/timestamp');
const sql = require('./SHR_modules/sql');
const configuration = require('./SHR_modules/config');
const logger = require('./SHR_modules/logger');
watch('./Incoming', { recursive: true }, async function (evt, name) {
if (evt == 'update') {
logger.addLog(configuration.logFile.watcher, 'File found: ' + name);
var startTime = await timestamp.dateTimeDelimited(); //capture time at proicessing start
filePath = await name.replace(name.split('\\')[name.split('\\').length-1], '');
fileName = await name.replace(filePath, '');
var myLoad = await sql.readLoadSpec(fileName, filePath).catch(err => {
logger.addLog(configuration.logFile.error, 'Error while reading Load Specs for file: ' + name);
logger.addLog(configuration.logFile.error, err);
});
if (typeof myLoad !== 'undefined' && myLoad){
console.log(evt);
logger.addLog(configuration.logFile.watcher, 'Finished processing: ' + name);
};
var finishTime = await timestamp.dateTimeDelimited(); //capture time at processing finish
}
else {
logger.addLog(configuration.logFile.watcher, 'File removed: ' + name);
}
console.log(startTime);
console.log(finishTime);
});
and the SQL function:
var readLoadSpec = (fileName, filePath) => {
return new Promise(async (resolve, reject) => {
specIDs = await findSpecs(fileName, filePath)
.catch(err => {
reject(err);
console.log(fileName + ' not found')
});
if (typeof specIDs !== 'undefined' && specIDs) {
console.log('FOUND!!!!');
var addLoadSpec = [];
for (let i = 0; i < specIDs.length; i++) {
console.log(specIDs);
var tempQuery1 = `select * from LoadSpec where LoadID = ${specIDs[i]}`;
var tempQuery2 = `select * from Mappings where LoadID = ${specIDs[i]} ORDER BY OutputColumn`;
console.log(specIDs); <========= code work sup to this line
const results = await queryDB(tempQuery1);
const resultsMapping = await queryDB(tempQuery2);
console.log(fileName);
console.log(results.recordset[0].OutputFileName);
console.log(specIDs);
const inputFrmt = {
name: results.recordset[0].InputFormatName,
format: results.recordset[0].InputFormat,
delimiter: results.recordset[0].InputDelimiter,
headerRows: results.recordset[0].InputHeaderRows
};
.
.
.
console.log(results.recordset[0].OutputColumnCount);
addLoadSpec.push(loadSpec);
};
resolve(addLoadSpec);
};
});
};
So the code runs to the row I've marked (before the await for query results) and the loops back to beginning.
So all is fine if I drop a single file into the folder, but when multiple files arrive at more or less the same time, the syncIDs var takes value of another file, before the code resumes after the query await part is completed. syncIDs is just an array so let's say for file 'a.csv' it's [1,2], for file 'b.csv' it's [3,4] and for file 'c.csv' it's undefined. So before it get's the answer from the DB, this loop:
specIDs = await findSpecs(fileName, filePath)
.catch(err => {
reject(err);
console.log(fileName + ' not found')
});
if (typeof specIDs !== 'undefined' && specIDs) {
console.log('FOUND!!!!');
var addLoadSpec = [];
for (let i = 0; i < specIDs.length; i++) {
Get's error for index of undefined
How do I make it process files one by one?

Creating An Object From A Directory

Im just wondering how to create a JavaScript Object from a directory using node js.
Here's What I Mean:
If the root folder was "test" and the tree would look like this
So the object structure would look like this:
{ test: { joe: {mama: "(file contents)"}, pp: {help: "very mature example\r\ni'm aware"} } }
edit: here's my attempt
const fs = require("fs");
const path = require("path");
const root = "test";
const dir = __dirname + "\\" + root + "\\";
var currentFolder = dir.replace(__dirname + "\\", "");
var data = {};
const getFileSize = function (dirPath) {
files = fs.readdirSync(dirPath);
var length = 0;
files.forEach(function (file) {
length++;
});
return length;
};
fs.readdirSync(dir).forEach((rootFile, rootIndex) => {
fs.readdirSync(currentFolder).forEach((file, index) => {
if (getFileSize(currentFolder) - 1 == index) {
console.log(index, file, rootFile, currentFolder);
currentFolder = currentFolder.replace(`\\file`, "");
index++;
}
// if (file.includes("."))
// data[currentFolder + file] = fs.readFileSync(currentFolder + file, "utf8");
// else currentFolder = currentFolder + file + "\\";
});
});
console.log(data);

Yeh after hours of googling and stack overflow I found this:
var fs = require("fs");
var path = require("path");
var walk = function (dir, done) {
var results = [];
fs.readdir(dir, function (err, list) {
if (err) return done(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) return done(null, results);
file = path.resolve(dir, file);
fs.stat(file, function (err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function (err, res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
};
var root = "test";
var data = {};
walk("test", function (err, results) {
if (err) throw err;
for (i in results) {
data[
results[i].replace(__dirname + "\\" + root + "\\", "")
] = fs.readFileSync(results[i], "utf8");
}
console.log(data);
});

Count the number of files in a directory using JavaScript/nodejs?

How can I count the number of files in a directory using nodejs with just plain JavaScript or packages? I want to do something like this:
How to count the number of files in a directory using Python
Or in bash script I'd do this:
getLength() {
DIRLENGTH=1
until [ ! -d "DIR-$((DIRLENGTH+1))" ]; do
DIRLENGTH=$((DIRLENGTH+1))
done
}

Using fs, I found retrieving the directory file count to be straightforward.
const fs = require('fs');
const dir = './directory';
fs.readdir(dir, (err, files) => {
console.log(files.length);
});

const fs = require('fs')
const length = fs.readdirSync('/home/directory').length

1) Download shell.js and node.js (if you don't have it)
2) Go where you download it and create there a file named countFiles.js
var sh = require('shelljs');
var count = 0;
function annotateFolder (folderPath) {
sh.cd(folderPath);
var files = sh.ls() || [];
for (var i=0; i<files.length; i++) {
var file = files[i];
if (!file.match(/.*\..*/)) {
annotateFolder(file);
sh.cd('../');
} else {
count++;
}
}
}
if (process.argv.slice(2)[0])
annotateFolder(process.argv.slice(2)[0]);
else {
console.log('There is no folder');
}
console.log(count);
3) Open the command promt in the shelljs folder (where countFiles.js is) and write node countFiles "DESTINATION_FOLDER" (e.g. node countFiles "C:\Users\MyUser\Desktop\testFolder")

Alternative solution without external module, maybe not the most efficient code, but will do the trick without external dependency:
var fs = require('fs');
function sortDirectory(path, files, callback, i, dir) {
if (!i) {i = 0;} //Init
if (!dir) {dir = [];}
if(i < files.length) { //For all files
fs.lstat(path + '\\' + files[i], function (err, stat) { //Get stats of the file
if(err) {
console.log(err);
}
if(stat.isDirectory()) { //Check if directory
dir.push(files[i]); //If so, ad it to the list
}
sortDirectory(callback, i + 1, dir); //Iterate
});
} else {
callback(dir); //Once all files have been tested, return
}
}
function listDirectory(path, callback) {
fs.readdir(path, function (err, files) { //List all files in the target directory
if(err) {
callback(err); //Abort if error
} else {
sortDirectory(path, files, function (dir) { //Get only directory
callback(dir);
});
}
})
}
listDirectory('C:\\My\\Test\\Directory', function (dir) {
console.log('There is ' + dir.length + ' directories: ' + dir);
});

Here the simple code,
import RNFS from 'react-native-fs';
RNFS.readDir(dirPath)
.then((result) => {
console.log(result.length);
});

Okay, I got a bash script like approach for this:
const shell = require('shelljs')
const path = require('path')
module.exports.count = () => shell.exec(`cd ${path.join('path', 'to', 'folder')} || exit; ls -d -- */ | grep 'page-*' | wc -l`, { silent:true }).output
That's it.

const readdir = (path) => {
return new Promise((resolve, reject) => {
fs.readdir(path, (error, files) => {
error ? reject(error) : resolve(files);
});
});
};s
readdir("---path to directory---").then((files) => {
console.log(files.length);
});

I think many people look for function like this:
const countFiles = (dir: string): number =>
fs.readdirSync(dir).reduce((acc: number, file: string) => {
const fileDir = `${dir}/${file}`;
if (fs.lstatSync(fileDir).isDirectory()) {
return (acc += countFiles(fileDir));
}
if (fs.lstatSync(fileDir).isFile()) {
return ++acc;
}
return acc;
}, 0);
They count all files in the entire file tree.

NodeJS recursively list files in directory

I am trying to list all files in a directory (and files within any subdirectories) with the following code:
var fs = require('fs')
var walk = function(directoryName) {
fs.readdir(directoryName, function(e, files) {
files.forEach(function(file) {
fs.stat(file, function(e, f) {
if (f.isDirectory()) {
walk(file)
} else {
console.log('- ' + file)
}
})
})
})
}
walk(__dirname)
However, when my code attempts to invoke walk(file) on line 8 I get the following error:
TypeError: Cannot call method 'isDirectory' of undefined
at program.js:7:15
at Object.oncomplete (fs.js:107:15)
Why is f undefined? If I have the directory structure below, shouldn't the code identify aaa.txt and bbb.txt as files, my_dir as a directory at which point it recursively calls walk and begins the process again (with zzz.txt being the value of f)?
- aaa.txt
- bbb.txt
+ my_dir
- zzz.txt

Function fs.readdir lists the simple file names in that directory, not their absolute path. This is why the program failed to find them, thus leading to an error in fs.stat.
Here's the solution: concatenate the directory path name to the file.
var fs = require('fs');
var path = require('path');
var walk = function(directoryName) {
fs.readdir(directoryName, function(e, files) {
if (e) {
console.log('Error: ', e);
return;
}
files.forEach(function(file) {
var fullPath = path.join(directoryName,file);
fs.stat(fullPath, function(e, f) {
if (e) {
console.log('Error: ', e);
return;
}
if (f.isDirectory()) {
walk(fullPath);
} else {
console.log('- ' + fullPath);
}
});
});
});
};

var fs = require('fs');
var path = require('path');
var walk = function(directoryName) {
fs.readdir(directoryName, function(e, files) {
files.forEach(function(file) {
fs.stat(directoryName + path.sep + file, function(e, f) {
if (f.isDirectory()) {
walk(directoryName + path.sep + file)
} else {
console.log(' - ' + file)
}
})
})
})
}
walk(__dirname)

A fully synchronous version, for those situations where you cannot use async:
const walk = (dir, files = []) => {
const dirFiles = fs.readdirSync(dir)
for (const f of dirFiles) {
const stat = fs.lstatSync(dir + path.sep + f)
if (stat.isDirectory()) {
walk(dir + path.sep + f, files)
} else {
files.push(dir + path.sep + f)
}
}
return files
}
const allFiles = walk(someDir)

Here's a version for async/await:
const { promises: fs } = require("fs");
const path = require("path");
async function walk(dir) {
const entries = await fs.readdir(dir);
let ret = [];
for (const entry of entries) {
const fullpath = path.resolve(dir, entry);
const info = await fs.stat(fullpath);
if (info.isDirectory()) {
ret = [...ret, ...(await walk(fullpath))];
} else {
ret = [...ret, fullpath];
}
}
return ret;
}
(async function () {
console.log(await walk("/path/to/some/dir"));
})();

We Keep Coding

JavaScript is the programming language of the Web.

How to download N number of files asynchronously using wget and node - javascript

Related

Ignore errors returned from API calls nodeJS

node.js async/await function called multiple times (before it ends)

Creating An Object From A Directory

Count the number of files in a directory using JavaScript/nodejs?

NodeJS recursively list files in directory

Categories

Resources