Async Loop seems to partially parallel - javascript

I'm trying to implement a function, which slices a file into chunks and then sends them to my backend one after another.
The function has to hash each file & validate if the hash is already known before starting the upload.
The following code is the code part, where my problematic function is called.
process: async (
fieldName,
file,
metadata,
load,
error,
progress,
abort,
transfer,
options,
) => {
// fieldName is the name of the input field - No direct relevance for us
// logger.log(`fieldName: ${fieldName}`);
// Usually Empty - Can be added with Metadata-Plugin
// logger.log(metadata);
const source = this.$axios.CancelToken.source();
const abortProcess = () => {
// This function is entered if the user has tapped the cancel button
source.cancel('Operation cancelled by user');
// Let FilePond know the request has been cancelled
abort();
};
let chunks = [];
const {
chunkForce,
chunkRetryDelays,
chunkServer,
chunkSize,
chunkTransferId,
chunkUploads,
} = options;
// Needed Parameters of file
const { name, size } = file;
if (chunkTransferId) {
/** Here we handle what happens, when Retry-Button is pressed */
logger.log(`Already defined: ${chunkTransferId}`);
return { abortProcess };
}
this.hashFile(file)
.then((hash) => {
logger.log(`File Hashed: ${hash}`);
if (hash.length === 0) {
error('Hash not computable');
}
return hash;
})
.then((hash) => {
logger.log(`Hash passed through: ${hash}`);
return this.requestTransferId(file, hash, source.token)
.then((transferId) => {
logger.log(`T-ID receieved: ${transferId}`);
return transferId;
})
.catch((err) => {
error(err);
});
})
.then((transferId) => {
transfer(transferId);
logger.log(`T-ID passed through: ${transferId}`);
// Split File into Chunks to prepare Upload
chunks = this.splitIntoChunks(file, chunkSize);
// Filter Chunks - Remove all those which have already been uploaded with success
const filteredChunks = chunks.filter(
(chunk) => chunk.status !== ChunkStatus.COMPLETE,
);
logger.log(filteredChunks);
return this.uploadChunks(
filteredChunks,
{ name, size, transferId },
progress,
error,
source.token,
).then(() => transferId);
})
.then((transferId) => {
// Now Everything should be uploaded -> Set Progress to 100% and make item appear finished
progress(true, size, size);
load(transferId);
logger.log(transferId);
})
.catch((err) => error(err));
return { abortProcess };
},
uploadChunks is where the problem starts.
async uploadChunks(chunks, options, progress, error, cancelToken) {
const { name, size, transferId } = options;
for (let index = 0; index < chunks.length; index += 1) {
let offset = 0;
const chunk = chunks[index];
chunk.status = ChunkStatus.PROCESSING;
// eslint-disable-next-line no-await-in-loop
await this.uploadChunk(chunk.chunk, options, offset)
.then(() => {
chunk.status = ChunkStatus.COMPLETE;
offset += chunk.chunk.size;
progress(true, offset, size);
logger.log(offset); // This is always chunk.chunk.size, instead of getting bigger
})
.catch((err) => {
chunk.status = ChunkStatus.ERROR;
error(err);
});
}
},
uploadChunk(fileChunk, options, offset) {
const { name, size, transferId } = options;
const apiURL = `${this.$config.api_url}/filepond/patch?id=${transferId}`;
return this.$axios.$patch(apiURL, fileChunk, {
headers: {
'content-type': 'application/offset+octet-stream',
'upload-name': name,
'upload-length': size,
'upload-offset': offset,
},
});
},
As you can see uploadChunks takes an array of chunks, some options, two functions (progress & error) and a cancelToken (which I currently don't use, since I'm still stuck at this problem)
Each chunk in the array has the form of:
{
status: 0, // Some Status indicating, if it's completed or not
chunk: // binary data
}
The Function uploadChunks iterates over the chunk array and should in theory upload one chunk after another and always increment offset after each upload and then call progress. After this it should start the next iteration of the loop, where offset would be bigger than in the call before.
The calls themselves get executed one after another, but every call has the same offset and progress does not get repeatedly called. Instead my progress-bar locks until everything is uploaded and them jumps to 100%, due to the load-call in the first function right at the end.
So the upload itself works fine in the correct order, but all the code after the await this.uploadChunk... doesn't get called after each chunk and blocks somehow.

You are setting offset to 0 inside the loop. So offset is always 0. You should move this line:
let offset = 0;
before the for statement.

Related

How to reset forEach() function if statement is matched

const downloadurl = ['url1', 'url2']
const filename = 'run.jar'
downloadurl.forEach((urls) => {
https.get(urls, (httpsres) => {
const path = `${installationdirectory}/${filename}`
const filePath = fs.createWriteStream(path);
httpsres.pipe(filePath);
filePath.on('finish',() => {
filePath.close();
const filesize = fs.statSync(path).size
// Verify if the size of the file is smaller than 1Kb
if((filesize / 1024) < 1) return;
// Want to skip this console.log if the file size is smaller than 1Kb and try with the other downloadurl (Hence why the downloadurl.forEach)
console.log('File downloaded!');
})
})
})
I'm using the https module from NodeJS which doesn't give an error if the download url doesn't exist, as it just creates a 1-byte file with the run.jar name instead. So, I want to make it stop executing code and try to download with the other URL if the size of the file is smaller than 1Kb, any ideas?
you can use every() loop. and when you want to break the loop just don't return anything.
// Prints "1, 2, 3"
[1, 2, 3, 4, 5].every(v => {
if (v > 3) {
return false;
}
console.log(v);
// Make sure you return true. If you don't return a value, `every()` will stop.
return true;
});
or you can change length of array using third
const myNums = [1, 2, 3, 4, 5];
myNums.forEach((v, index, arr) => {
console.log(v);
if (val > 3) {
arr.length = index + 1; // Behaves like `break`
}
}
If using a callback structure, as you are at the moment, you could try a recursive function to try further downloads.
function tryDownload(downloadUrls):
// try download the first
https.get(downloadUrls[0], (res) => {
// read it, etc
// ...
// if too small, try the next urls
if ((filesize / 1024) < 1)
tryDownload(downloadUrls.slice(1));
else
// success!
// ...
})
You may find it clear to restructure this as an asynchronous function, though. In pseudocode:
for each url in download urls
res = await download url
if res indicates success
handle the res
return
Used #Matt's comment as it was the simplest one to implement (And I didn't know that the HTTPS module does have a response to check the status code).
const downloadurl = ['url1', 'url2']
const filename = 'run.jar'
downloadurl.forEach((urls) => {
https.get(urls, (httpsres) => {
if(httpsres.statusCode !== 200) {
return console.log(`Attempt of downloading failed with ${httpsres.statusCode} error! Retrying with another download URL`);
} else {
const path = `${installationdirectory}/${filename}`
const filePath = fs.createWriteStream(path);
httpsres.pipe(filePath);
filePath.on('finish',() => {
filePath.close();
const filesize = fs.statSync(path).size
// Verify if the size of the file is smaller than 1Kb
if((filesize / 1024) < 1) return;
// Want to skip this console.log if the file size is smaller than 1Kb and try with the other downloadurl (Hence why the downloadurl.forEach)
console.log('File downloaded!');
})
}
})
})

How can I return different values from a function depending on code inside an Axios promise? NodeJS - a

I have a block of code that calls an Api and saves results if there are differences or not. I would like to return different values for DATA as layed out on the code. But this is obviously not working since Its returning undefined.
let compare = (term) => {
let DATA;
//declare empty array where we will push every thinkpad computer for sale.
let arrayToStore = [];
//declare page variable, that will be the amount of pages based on the primary results
let pages;
//this is the Initial get request to calculate amount of iterations depending on result quantities.
axios.get('https://api.mercadolibre.com/sites/MLA/search?q='+ term +'&condition=used&category=MLA1652&offset=' + 0)
.then(function (response) {
//begin calculation of pages
let amount = response.data.paging.primary_results;
//since we only care about the primary results, this is fine. Since there are 50 items per page, we divide
//amount by 50, and round it up, since the last page can contain less than 50 items
pages = Math.ceil(amount / 50);
//here we begin the for loop.
for(i = 0; i < pages; i++) {
// So for each page we will do an axios request in order to get results
//Since each page is 50 as offset, then i should be multiplied by 50.
axios.get('https://api.mercadolibre.com/sites/MLA/search?q='+ term +'&condition=used&category=MLA1652&offset=' + i * 50)
.then((response) => {
const cleanUp = response.data.results.map((result) => {
let image = result.thumbnail.replace("I.jpg", "O.jpg");
return importante = {
id: result.id,
title: result.title,
price: result.price,
link: result.permalink,
image: image,
state: result.address.state_name,
city: result.address.city_name
}
});
arrayToStore.push(cleanUp);
console.log(pages, i)
if (i === pages) {
let path = ('./compare/yesterday-' + term +'.json');
if (fs.existsSync(path)) {
console.log("Loop Finished. Reading data from Yesterday")
fs.readFile('./compare/yesterday-' + term +'.json', (err, data) => {
if (err) throw err;
let rawDataFromYesterday = JSON.parse(data);
// test
//first convert both items to check to JSON strings in order to check them.
if(JSON.stringify(rawDataFromYesterday) !== JSON.stringify(arrayToStore)) {
//Then Check difference using id, otherwise it did not work. Using lodash to help.
let difference = _.differenceBy(arrayToStore[0], rawDataFromYesterday[0],'id');
fs.writeFileSync('./compare/New'+ term + '.json', JSON.stringify(difference));
//if they are different save the new file.
//Then send it via mail
console.log("different entries, wrote difference to JSON");
let newMail = mail(difference, term);
fs.writeFileSync('./compare/yesterday-' + term +'.json', JSON.stringify(arrayToStore));
DATA = {
content: difference,
message: "These were the differences, items could be new or deleted.",
info: "an email was sent, details are the following:"
}
return DATA;
} else {
console.log("no new entries, cleaning up JSON");
fs.writeFileSync('./compare/New'+ term + '.json', []);
DATA = {
content: null,
message: "There were no difference from last consultation",
info: "The file" + './compare/New'+ term + '.json' + ' was cleaned'
}
return DATA;
}
});
} else {
console.error("error");
console.log("file did not exist, writing new file");
fs.writeFileSync('./compare/yesterday-' + term +'.json', JSON.stringify(arrayToStore));
DATA = {
content: arrayToStore,
message: "There were no registries of the consultation",
info: "Writing new file to ' " + path + "'"
}
return DATA;
}
}
})
}
}).catch(err => console.log(err));
}
module.exports = compare
So I export this compare function, which I call on my app.js.
What I want is to make this compare function return the DATA object, so I can display the actual messages on the front end,
My hopes would be, putting this compare(term) function inside a route in app.js like so:
app.get("/api/compare/:term", (req, res) => {
let {term} = req.params
let data = compare(term);
res.send(data);
})
But as I said, Its returning undefined. I tried with async await, or returning the whole axios first axios call, but Im always returning undefined.
Thank you

Dealing with multiple asynchronous function calls in a for loop

I am trying to do multiple asynchronous actions: Axios requests inside of a for loop. I want to do something after everything is resolved but there is so much going on I don't know how to do it.
I thought of making my sourcer function async and awaiting it on each iteration (and wrapping the for loop in an async function), but one problem is that sourcer doesn't actually return anything. I don't know how to return from sourcer from inside an Axios "finally" clause. Another problem is that I don't want to await each sourcer call because it would be a hit on performance.
Promise.all sounds like the right direction to take but I don't know how to implement it with this for loop.
Here is the relevant part of my code (ts is a large array of objects):
.then(ts => {
// Create an association object that determines each media item's source
const sourcer = media => { // Input is either [image filename, image url] or [image filename, image url, video filename, video url]
// Test to see if the original URL works
let validURL = true
axios.get(media[1])
.then(resp => {
if (resp.status.toString()[0] !== '2') validURL = false
})
.catch(resp => {
if (resp.status.toString()[0] !== '2') validURL = false
})
.finally(() => {
let newSources = JSON.parse(JSON.stringify(this.state.sources))
let newModals = JSON.parse(JSON.stringify(this.state.modals))
if (validURL) newSources[media[0]] = media[1]
// If the original URL does not work, pull media item from server
else newSources[media[0]] = `http://serveripaddress/get_media?filename=${media[0]}`
newModals[media[0]] = false
this.setState({ sources: newSources, modals: newModals })
})
if (media.length > 2) { // If the media item is a video, do the same checks
let validVURL = true
axios.get(media[3])
.then(resp => {
if (resp.status.toString()[0] !== '2') validVURL = false
})
.catch(resp => {
if (resp.status.toString()[0] !== '2') validVURL = false
})
.finally(() => {
let newSources2 = JSON.parse(JSON.stringify(this.state.sources))
let newThumbnails = JSON.parse(JSON.stringify(this.state.thumbnails))
if (validVURL) newSources2[media[2]] = media[3]
else newSources2[media[2]] = `http://serveripaddress/get_media?filename=${media[2]}`
newThumbnails[media[0]] = media[2] // Add an association for the video and its thumbnail
this.setState({ sources: newSources2, thumbnails: newThumbnails })
})
}
}
for (let t of ts) {
if (t.media) for (let m of t.media) sourcer(m)
if (t.preview_media) sourcer(t.preview_media)
if (t.video) sourcer(t.video)
}
})
I want to do something after ts has been iterated through and all sourcer calls are completed.
I'm not fishing for someone to write my code for me but a nudge in the right direction would be greatly appreciated.
axios.get will return a Promise, so simply build up your array of Promises and use Promise.all
So, in your case, instead of executing the http call and waiting on the response, just add it to your array.
Something like this will work. I removed your code that was handling the response of each individual get request. You can merge that code (or just copy/paste) into where I put the placeholder below:
.then(ts => {
// Create an association object that determines each media item's source
const sourcer = media => { // Input is either [image filename, image url] or [image filename, image url, video filename, video url]
// Test to see if the original URL works
let validURL = true;
const promises = [];
promises.push(axios.get(media[1]));
if (media.length > 2) { // If the media item is a video, do the same checks
let validVURL = true;
promises.push(axios.get(media[3]));
}
}
for (let t of ts) {
if (t.media)
for (let m of t.media) sourcer(m)
if (t.preview_media) sourcer(t.preview_media)
if (t.video) sourcer(t.video)
}
// Execute the Promises
Promise.all(promises).then( results => {
const media1 = results[0];
const media3 = results[1];
// TODO: Run your code for media1/media3 results
})
})

Node fs.readdir freezing in folders with too many files

In Node.js I have to read files in a folder and for each file get file handler info, this is my simplest implementation using fs.readdir:
FileServer.prototype.listLocal = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
fs.readdir(mediaDir, (error, results) => {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
} else { // list files
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
// format meta data
results = results.map(file => {
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
fs.closeSync(fd);//close file
return item;
});
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
}
})
});
}
so that for each file I get an array of items
{
"name": "sample121.mp3",
"path": "/data/sample121.mp3",
"size": 5751405,
"sizehr": "5.4850 MB",
"birthtime": "2018-10-08T15:26:08.397Z",
"mtime": "2018-10-08T15:26:11.650Z",
"atime": "2018-10-10T09:01:48.534Z",
"timestamp": 1539012371650,
"media_id": "sample121"
}
That said, the problem is it's knonw that node.js fs.readdir may freeze Node I/O Loop when the folder to list has a large number of files, let's say from ten thousands to hundred thousands and more.
This is a known issue - see here for more info.
There are also plans to improve fs.readdir in a some way, like streaming - see here about this.
In the meanwhile I'm searching for like a patch to this, because my folders are pretty large.
Since the problem is the Event Loop get frozen, someone proposed a solution using process.nextTick, that I have ensembled here
FileServer.prototype.listLocalNextTick = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
var AsyncArrayProcessor = function (inArray, inEntryProcessingFunction) {
var elemNum = 0;
var arrLen = inArray.length;
var ArrayIterator = function () {
inEntryProcessingFunction(inArray[elemNum]);
elemNum++;
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
fs.readdir(mediaDir, function (error, results) {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
}
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
var ProcessDirectoryEntry = function (file) {
// This may be as complex as you may fit in a single event loop
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
// map to file item
file = item;
}//ProcessDirectoryEntry
// LP: fs.readdir() callback is finished, event loop continues...
AsyncArrayProcessor(results, ProcessDirectoryEntry);
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
});
});
}//listLocalNextTick
This seems to avoid the original issue, but I cannot anymore map the files lists to the items with file handler I did before, because when running the AsyncArrayProcessor on the files list, thus the ProcessDirectoryEntry on each file entry the async nature of process.nextTick causes that I cannot get back the results array modified as in the previous listLocal function where I just did an iterative array.map of the results array.
How to patch the listLocalNextTick to behave like the listLocal but keeping process.nextTick approach?
[UPDATE]
According to the proposed solution, this is the best implementation so far:
/**
* Scan files in directory
* #param {String} needle
* #param {object} options
* #returns {nodeStream}
*/
scanDirStream : function(needle,params) {
var options = {
type: 'f',
name: '*'
};
for (var attrname in params) { options[attrname] = params[attrname]; }
return new Promise((resolve, reject) => {
var opt=[needle];
for (var k in options) {
var v = options[k];
if (!Util.empty(v)) {
opt.push('-' + k);
opt.push(v);
}
};
var data='';
var listing = spawn('find',opt)
listing.stdout.on('data', _data => {
var buff=Buffer.from(_data, 'utf-8').toString();
if(buff!='') data+=buff;
})
listing.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
listing.on('close', (code) => {
var res = data.split('\n');
return resolve(res);
});
});
Example of usage:
scanDirStream(mediaRoot,{
name: '*.mp3'
})
.then(results => {
console.info("files:%d", results);
})
.catch(error => {
console.error("error %s", error);
});
This can be eventually modified to add a tick callback at every stdout.on event emitted when getting a new file in the directory listening.
I have Created a wrapper around find for it but you could use dir or ls in the same way.
const { spawn } = require('child_process');
/**
* findNodeStream
* #param {String} dir
* #returns {nodeStream}
*/
const findNodeStream = (dir,options) => spawn('find',[dir,options].flat().filter(x=>x));
/**
* Usage Example:
let listing = findNodeStream('dir',[options])
listing.stdout.on('data', d=>console.log(d.toString()))
listing.stderr.on('data', d=>console.log(d.toString()))
listing.on('close', (code) => {
console.log(`child process exited with code ${code}`);
});
*/
this allows you to stream a directory chunked and not in a whole as fs.readdir does.
Important
NodeJS > 12.11.1 will have async readdir support
Landed in cbd8d71 ( https://github.com/nodejs/node/commit/cbd8d715b2286e5726e6988921f5c870cbf74127 ) as fs{Promises}.opendir(), which returns an fs.Dir, which exposes an async iterator. tada
https://nodejs.org/api/fs.html#fs_fspromises_opendir_path_options
const fs = require('fs');
async function print(path) {
const dir = await fs.promises.opendir(path);
for await (const dirent of dir) {
console.log(dirent.name);
}
}
print('./').catch(console.error);

Conditional async callbacks

I'm writing an Electron program which takes a CSV file as input, and does file operations depending on the CSV content and file existence (it's to manage MAME arcade roms).
In order to have a progress bar on the UI side, I have switched the code from fully synchronous (because it was much easier) to asynchronous.
I just cannot find out how to reliably display a message to the user when all the lines in the CSV file are processed, and all the zip files are copied or removed.
Here is a (simplified) sample method:
fs.readFile(file, { 'encoding': 'utf8' }, (err, fileContents) => {
let fileCsv = csvparse(fileContents);
let lines = fileCsv.length;
fileCsv.forEach((line) => {
lines--;
let zip = line.name + '.zip';
let sourceRom = path.join(romset, zip);
let destRom = path.join(selection, zip);
this.emit('progress.add', fileCsv.length, fileCsv.length - lines, zip);
if (fs.existsSync(sourceRom) && !fs.existsSync(destRom)) {
fs.copy(sourceRom, destRom, (err) => {
let sourceChd = path.join(romset, game);
if (fs.existsSync(sourceChd)) {
fs.copy(sourceChd, path.join(selection, game), (err) => {
if (lines <= 0) { callback(); } // chd is copied
});
} else {
if (lines <= 0) { callback(); } // no chd, rom is copied
}
});
} else {
if (lines <= 0) { callback(); } // no source found or already exists
}
});
});
The problem is that the CSV file is processed really fast, but the file are not copied as fast. So it decrements the lines counter to 0, then after each file copy, it finds that it's zero and triggers the callback.
How do I reliably trigger the callback at the end of all these nested callbacks and conditions?
Thanks
I tried to change the code without massively overwriting your style - assuming there is a reason to avoid things like bluebird, async/await & native Promises, and the async lib.
You need to decrement lines after a line is processed. I pulled the processing logic out into a function to make this clearer:
function processLine({
sourceRom, destRom, romset, game, callback
}) {
if (fs.existsSync(sourceRom) && !fs.existsSync(destRom)) {
fs.copy(sourceRom, destRom, (err) => {
// *really* should handle this error
let sourceChd = path.join(romset, game);
if (fs.existsSync(sourceChd)) {
fs.copy(sourceChd, path.join(selection, game), (err) => {
// *really* should handle this error
callback();
});
} else {
callback();
}
});
} else {
callback() // no source found or already exists
}
}
fs.readFile(file, { 'encoding': 'utf8' }, (err, fileContents) => {
let fileCsv = csvparse(fileContents);
let lines = fileCsv.length;
fileCsv.forEach((line) => {
let zip = line.name + '.zip';
let sourceRom = path.join(romset, zip);
let destRom = path.join(selection, zip);
this.emit('progress.add', fileCsv.length, fileCsv.length - lines, zip);
processLine({ sourceRom, destRom, game, romset, callback: () => {
lines--;
if (lines <= 0) {
callback();
}
}})
});
});

Categories