nodejs download image from url synchronously - javascript

I am trying to build a small node app it calls an api which returns an array of urls which point to image blob png files.
I am then trying to loop over the array and download the files using a utility function. I need this to work synchronously. Once the downloads are complete I then want to fire an additional function.
I started off using some asynchronous code which I took from here: https://sabe.io/blog/node-download-image
The async code in my utils file looked like this:
import { promises as fs } from "fs";
import fetch from "node-fetch";
const downloadFile = async (url, path, cb) => {
const response = await fetch(url);
const blob = await response.blob();
const arrayBuffer = await blob.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
fs.writeFile(path, buffer);
cb();
}
export { downloadFile };
I have tried to convert it to be purely synchronous using this code:
import fs from "fs";
import fetch from "node-fetch";
const downloadFile = (url, path, cb) => {
const response = fetch(url);
const blob = response.blob();
const arrayBuffer = await blob.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
fs.writeFileSync(path, buffer);
cb();
}
export { downloadFile };
Then in my index.js file I am using it like so:
import { downloadFile } from './utils/downloadFiles.js';
let imagesArray = [];
let newImageNames = [];
imagesArray.forEach((item, index) => {
const fileName = `${prompt}__${index}_${uuid.v4()}.png`;
const filePath = path.join('src', 'images');
newImageNames.push(fileName);
downloadFile(item, filePath, fileDownloadCallback);
});
processDataCallback(); // This is the function which is being fired before the previous downloadFile functions have finished processing.
const fileDownloadCallback = () => {
console.log(`File download callback`);
}
My images array is being populated and looks like this as an example:
data: [
{
url: 'https://someurl.com/HrwNAzC8YW/A%3D'
},
{
url: 'https://someurl.com/rGL7UeTeWTfhAuLWPg%3D'
},
{
url: 'https://someurl.com/xSKR36gCdOI3/tofbQrR8YTlN6W89DI%3D'
},
{
url: 'https://someurl.com/2y9cgRWkH9Ff0%3D'
}
]
When I try and use the synchronous method I get this error TypeError: response.blob is not a function. This function does work when using it asynchronously, but then it is firing my next function before the image downloads have finished.
I have tried several iterations, first off using createWriteStream and createWriteStreamSync (which I believe are deprecated). So switched to fileWrite. I also tried using a synchronous fileWriteSync inside the async function, but still no dice. The other issue is that fetch works asynchronously, so I still don't know how to wire this up to only work synchronously. I was also wondering If I could chain a then onto the end of my fileDownload util function.
All of my code is in github, so I can share a url if required. Or please ask for more explanation if needed.
Is there something equivalent to jsfiddle for Node? If so I am more than happy to try and make a demo.
Any help greatly appreciated.

We can leave the original async downloadFile util alone (though there's a little room for improvement there).
In the index file...
import { downloadFile } from './utils/downloadFiles.js';
let imagesArray = [];
let newImageNames = [];
// I'm a little confused about how we get anything out of iterating an empty array
// but presuming it get's filled with URLs somehow...
const promises = imagesArray.map((item, index) => {
const fileName = `${prompt}__${index}_${uuid.v4()}.png`;
const filePath = path.join('src', 'images');
newImageNames.push(fileName);
// we can use the callback for progress, but we care about finishing all
return downloadFile(item, filePath, () => {
console.log('just wrote', filePath);
});
});
Promise.all(promises).then(() => {
console.log('all done')
})

Array.forEach doesnt work with async code.
Convert your code into a for...of or for code and it will works.
Also. You don't use callbacks with async/await code.
Your code will look like this:
let index = 0;
for(const item of imageArray) => {
const fileName = `${prompt}__${index}_${uuid.v4()}.png`;
const filePath = path.join('src', 'images');
newImageNames.push(fileName);
downloadFile(item, filePath);
fileDownloadCallback();
index++;
});

Related

How do I make a function wait for the completion of another function before running

The problem I'm facing is that I want to create a temporary work folder for a certain function to hold its assets and work on them.
So:
await function someFunc() {
createFolder()
...
makeSomeFiles()
doOtherStuff()
...
deleteFolder()
}
But the functions that I am using, in node.js, are all async. Creating a folder is fs.mkdir() and deleting a folder is fs.rmdir() and downloading images and saving them is also an async procedure of some kind.
The problem is such: the folder gets created, and deleted, before any of the code in the middle executes. So I get errors from the middle section code that the folder doesn't exist, because it gets deleted prematurely. How do i make fs.rmdir(), at the end, wait for all the middle code to run first, before deleting the folder.
The specific code is this:
async function run() {
//GENERATE SESSION ID AND FOLDER
const sessionID = str.random(50);
fs.mkdir('images/'+sessionID, (err) => {
if (err) return err;
});
//DOWNLOAD IMAGE
https.get('https://en.wikipedia.org/wiki/Main_Page#/media/File:RE_Kaja_Kallas.jpg', (file) => {
file.pipe(fs.createWriteStream('images/'+sessionID+'/image.jpeg'));
});
//CLEANUP
fs.rmdir('images/'+sessionID, { recursive: true }, (err) => {
if (err) return err;
});
}
I would use promise-based versions of functions that do these operations and then use async/await with those promises:
const stream = require('stream');
const {promisify} = require('util');
const fs = require('fs');
const fsp = fs.promises;
const got = require('got');
const pipeline = promisify(stream.pipeline);
async function run() {
const sessionID = str.random(50);
const dir = 'images/'+sessionID;
await fsp.mkdir(dir);
await pipeline(
got.stream('https://en.wikipedia.org/wiki/Main_Page#/media/File:RE_Kaja_Kallas.jpg'),
fs.createWriteStream(dir + '/image.jpeg')
);
// not sure why you're trying to remove a directory that you just
// put a file in so it's not empty
await fsp.rmdir(dir, { recursive: true })
}
run().then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
But, this function isn't making a lot of sense to me because you're creating a directory, downloading a file to it and then trying to remove a non-empty directory.
This uses the library got() for downloading the file because it's my goto library for http requests since it has both stream and promise interfaces.

Accessing Parsed CSV data , CSV-Parser Node.js

Im using csv-parser npm package and doing a sample csv parse. My only confusion is accessing the parsed array after running these functions. I understand im pushing the data in .on('data') , then doing a console.log(results); statement in .on('end'); to show what's being stored. Why do I get undefined when i try to access results after running those functions. Doesn't results get the information stored?
const csv = require('csv-parser');
const fs = require('fs');
const results = [];
fs.createReadStream('demo.csv')
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => {
console.log(results);
});
I came here to find the solution to the same issue.
Since this is an async operation, what works here is to call that function that acts on your parsed data once the end handler is called. Something like this should work in this situation:
const csv = require('csv-parser');
const fs = require('fs');
const results = [];
fs.createReadStream('demo.csv')
.pipe(csv())
.on('data', (data) => results.push(data))
.on('end', () => {
console.log(results);
csvData(results);
});
const csvData = ((csvInfo) => {
console.log(csvInfo);
console.log(csvInfo.length);
})
I can get results in .on('end', () => { console.log(results);}); , but
if I put a console.log() after the createReadStream , results is
undefined, does that make sense? – Videoaddict101
Your stream acts asynchronously, that means your data and your end handler will be called later, meanwhile your javascript continue to be executed. So accessing your array just after fs.createReadStream instruction will result of an empty array.
Understanding async is very important using javascript, even more for nodejs.
Please have a look on differents resources for handling async like Promise, Async/Await ...
You should you neat-csv which is the endorsed wrapper for csv-parser that gives you a promise interface.
That said, you can create a promise and resolve it in the on("end", callback)
import fs from "fs";
import csv from "csv-parser";
function getCsv(filename) {
return new Promise((resolve, reject) => {
const data = [];
fs.createReadStream(filename)
.pipe(csv())
.on("error", (error) => reject(error))
.on("data", (row) => data.push(row))
.on("end", () => resolve(data));
});
}
console.log(await getCsv("../assets/logfile0.csv"));

NodeJS: How to read from two files and write to single output file using pipes?

Context
I am using event-stream module to help me in reading and writing to these local files for which I hope to return a resulting file. Long story short, the 2 input files(sent through express API as multipart/form-data) I am expecting can be upwards of 200MB in size containing a list of entries (1 per line). What I would like to do is to combine those entries in the following format <entry1>:<entry2> where entry1 is the entry from the first file and entry2 is from the second file. I did this in a way earlier where I was able to store and return inputs/outputs in memory, but seeing as I have very limited memory space on my application server, I was running out of memory on the heap. I read that I could use event-stream and piping to read in each file line by line and output to a file instead of to a large string in memory using read-streams. The issue is that I can't seem to resolve in the right way/time in order for the resulting output file to be ready to send back to the caller.
What I have so far
What I have so far worked in that I get the correct file output I am expecting, however, this seems to be an asynchronicity problem, in that, I am resolving the promise before the file has actually completed writing/saving. Please see my code below...
const fs = require('fs');
const es = require('event-stream');
const uuid = require('uuid');
const buildFile = async (fileOne, fileTwo) =>
await new Promise((resolve, reject) => {
try {
// Output stream
let fileID = uuid.v4();
let outStream = fs
.createWriteStream(`files/outputFile-${fileID}.txt`, {
flags : 'a',
encoding : 'utf-8'
});
let fileOneRS = fs
.createReadStream(fileOne.path, {
flags : 'r',
encoding : 'utf-8'
})
.pipe(es.split())
.pipe(
es.mapSync((lineOne) => {
fileOneRS.pause();
let fileTwoRS = fs
.createReadStream(fileTwo.path, {
flags : 'r',
encoding : 'utf-8'
})
.pipe(es.split())
.pipe(
es.mapSync((lineTwo) => {
fileTwoRS.pause();
// Write combo to file
outStream.write(`${lineOne}:${lineTwo}\n`);
fileTwoRS.resume();
})
);
fileOneRS.resume();
})
); // This is where I have tried doing .on('end', () => resolve), but it also does not work :(
} catch (err) {
reject(err);
}
});
Note: This function is called from another service function as follows:
buildFile(fileOne, fileTwo)
.then((result) => {
resolve(result);
})
.catch((err) => {
console.log(err);
reject(err);
});
As a novice Javascript developer and even newer to NodeJS, I have been stuck trying to figure this out on my own for over 2 weeks now. If anyone is able to help, I would greatly appreciate some wisdom here!
Thanks 🙂
Edit: Updated the code to conform to the OP's expected output.
The promise' resolve() function should be called once the write stream completes. The comment provided in the OP snippet indicate that the resolve function might have been called upon draining fileOneRS (at the end of the pipe() chain).
Rather than creating a new read stream for each line in the first file, the code should only instantiate the read streams once.
The following example illustrate how this code flow could be refactored to read each line only once, and concatenate the lines from file A and B line-by-line:
import stream from "stream";
import util from "util";
import readline from "readline";
import fs from "fs";
import os from "os";
/** Returns a readable stream as an async iterable over text lines */
function lineIteratorFromFile( fileStream ){
return readline.createInterface({
input: fileStream,
crlfDelay: Infinity
})
}
// Use stream.pipeline to handle errors and to stream the combined output
// to a Writable stream. The promise will resolve once the data has finished
// writing to the output stream.
await util
.promisify(stream.pipeline)(
async function*(){
for await ( const lineA of lineIteratorFromFile(fs.createReadStream( "./in1.txt" ))){
for await (const lineB of lineIteratorFromFile(fs.createReadStream( "./in2.txt" ))){
yield `${lineA}: ${lineB}${os.EOL}`
}
}
},
fs.createWriteStream( outputFile )
);
A runnable example with NodeJS v13+ is available in the collapsed snippet below:
// in1.txt:
foo1
foo2
// in2.txt:
bar1
bar2
// out.txt (the file created by this script, with expected output):
foo1: bar1
foo1: bar2
foo2: bar1
foo2: bar2
// main.mjs:
import stream from "stream";
import util from "util";
import readline from "readline";
import fs from "fs";
import os from "os";
/** Returns a readable stream as an async iterable over text lines */
function lineIteratorFromFile( fileStream ){
return readline.createInterface({
input: fileStream,
crlfDelay: Infinity
})
}
(async ()=>{
await util
.promisify(stream.pipeline)(
async function*(){
for await ( const lineA of lineIteratorFromFile(fs.createReadStream( "./in1.txt" ))){
for await (const lineB of lineIteratorFromFile(fs.createReadStream( "./in2.txt" ))){
yield `${lineA}: ${lineB}${os.EOL}`
}
}
},
fs.createWriteStream( "./out.txt" )
);
})()
.catch(console.error);

How to attach multiple files from the same directory with supertest package in Node?

I want to attach around 100 files into a POST request using supertest, but I haven't found any way to do this.
const supertest = require('supertest);
const fs = require('fs');
const { promisify } = require('utils');
const request = supertest('http://localhost:3000');
const readdir = promisify(fs.readdir);
(async () => {
try {
const files = await readdir(path.resolve(__dirname, '/path/to');
request
.post('/upload')
.attach('file', files)
.end((response => {
console.log(response);
});
} catch(err) => {
console.error(err);
}
)();
Given the following piece of code, the request needs to chain the attach method every time a fileN is wanted to be send in the request, so I want how to attach recursively every file inside the same folder using the fs node core module
supertest extends superagent with testing functionality and works the same way. request.post('/upload') creates request instance and is chainable. Instance methods return an instance itself for chaining:
const requestInstance = request.post('/upload');
requestInstance === requestInstance.attach('file', ...);
supertest and superagent are thenable, it's preferable to chain the result as a promise for correct control flow when it's used with async..await:
try {
const files = await readdir(path.resolve(__dirname, '/path/to');
let requestInstance = request.post('/upload');
for (const file of files) {
// no need to reassign requestInstance because it's same instance
// requestInstance = requestInstance.attach('file', file);
requestInstance.attach('file', file);
}
const response = await requestInstance;
} catch (err) {
console.error(err);
}
You might just want to make several individual requests with supertest and Promise.all. You can use globby to get a list of files eg. without async might even be more straightforward, unless you need the result somewhere else.
const files = glob(path.resolve(__dirname, '/path/to'));
const requests = files.map((file) => {
return request
.post('/upload')
.attach('file', file)
.end((response => {
console.log(response);
});
});
Promise.all(requests).then(console.log('done')).catch(console.error);

How to use Promise.all in the following readFile code?

In the following code, I'm reading some files and getting their filename and text. After that, I'm storing data in an option variable to generate an epub file:
const Epub = require("epub-gen")
const folder = './files/'
const fs = require('fs')
let content = []
fs.readdir(folder, (err, files) => {
files.forEach(filename => {
const title = filename.split('.').slice(0, -1).join('.')
const data = fs.readFileSync(`${folder}${filename}`).toString('utf-8')
content.push({ title, data })
})
})
const option = {
title: "Alice's Adventures in Wonderland", // *Required, title of the book.
content
}
new Epub(option, "./text.epub")
The problem is, new Epub runs before the files are read, before content is ready. I think Promise.all is the right candidate here. I checked the Mozilla docs. But it shows various promises as example, but I have none. So, I'm not very sure how to use Promise.all here.
Any advice?
Your problem is with readdir, which is asynchronous so new Epub, like you already figured out, is called before it's callback parameter.
Switch to using readdirSync or move const option ... new Epub... inside the callback parameter of readdir, after files.forEach.
At the moment you can do everything synchronous since you use readFileSync.
So you can place the Epub creation after the forEach loop.
If you want to go async, my first question would be:
Does your node.js version support util.promisify ( node version 8.x or higher iirc )?
If so, that can be used to turn the callback functions like readFile and such into promises. If not, you can use the same logic, but then with nested callbacks like the other solutions show.
const FS = require( 'fs' );
const { promisify } = require( 'util' );
const readFile = promisify( FS.readFile );
const readFolder = promisify( FS.readFolder );
readFolder( folder )
// extract the file paths. Maybe using `/${filename}` suffices here.
.then( files => files.map( filename => `${folder}${filename}`))
// map the paths with readFile so we get an array with promises.
.then( file_paths => file_paths.map( path => readFile( path )))
// fecth all the promises using Promise.all() .
.then( file_promises => Promise.all( file_promises ))
.then( data => {
// do something with the data array that is returned, like extracting the title.
// create the Epub objects by mapping the data values with their titles
})
// error handling.
.catch( err => console.error( err ));
Add promises to an array. Each promise should resolve with the value you were pushing into content
When all promises resolve, the returned value will be the array previously known as content.
Also, you can, and should, use all async fs calls. So readFileSync can be replaced with readFile (async). I did not replace your code with this async call however, so you can clearly see what was required to answer your original question.
Not sure if I got the nesting right in snippet.
const Epub = require("epub-gen")
const folder = './files/'
const fs = require('fs')
let promises = []
fs.readdir(folder, (err, files) => {
files.forEach(filename => {
promises.push(new Promise((resolve, reject) => {
const title = filename.split('.').slice(0, -1).join('.')
const data = fs.readFile(`${folder}${filename}`).toString('utf-8')
resolve({
title,
data
})
}))
})
})
const option = {
title: "Alice's Adventures in Wonderland", // *Required, title of the book.
content
}
new Epub(option, "./text.epub")
Promise.all(promises).then((content) => {
//done
})

Categories