NodeJS Async Programming - javascript

Totally new to programming with async functions. Also new to node.js which could be adding to my issue. I've read a lot and keep running into the similar problems and it seems like I've been randomly getting some portions of the async code to work, while others doesn't. Here is a simplified version of what I have:
Essentially I'm searching a site for music, scrapping all the results (scraper_start.js) and then it is sent to scrape_individual.js to gather data. It is currently able to get all the data, but when it downloads the album art it comes in "too late".
The image does get logged to the console, but only after info gets returned. Also if you have any good resources to learn async programming please share them - I haven't been able to find a website that is nice and clean and gets into examples big enough that they become realistic (such as multiple async functions working at once and sometimes relying upon each other). Please critic my code as well - I am trying to learn!
File scraper_start.js:
const rp = require('request-promise');
const cheerio = require('cheerio');
const scrape = require('./scrape_individual.js');
const base_url = 'https://www.test.ca';
const url = 'https://www.test.ca/search?mysearchstring';
rp(url)
.then(function(html)
{
const $ = cheerio.load(html);
var results = []
var hits = $('h3 > a').length;
console.log("TOTAL HITS: " + hits);
results = $('h3 > a').map(function(i,v){ return $(v).attr('href'); }).get()
return Promise.all(
results.map(function(url)
{
return scrape(base_url + url);
})
);
})
.then(function(my_data)
{
console.log(my_data);
});
File scrape_individual.js:
const rp = require('request-promise');
const cheerio = require('cheerio');
var info = {}
const scrape = function(url)
{
return rp(url)
.then(function(html)
{
const $ = cheerio.load(html);
if (!html.includes('contentType = "Podcast"'))
{
info = {
title: $('h2.bc-heading:first').text(),
img3: null};
img_data($('.bc-image-inset-border').attr('src'))
.then(function(v)
{
console.log(v);
info.img3 = v; // Log the value once it is resolved
})
.catch(function(v) {
});
return info;
}
})
};
function img_data(src)
{
return new Promise(function(resolve, reject)
{
const { createCanvas, loadImage } = require('canvas');
loadImage(src).then((image) =>
{
const canvas = createCanvas(image.width, image.height);
const ctx = canvas.getContext('2d');
ctx.drawImage(image, 0, 0);
resolve(canvas.toDataURL());
});
});
}
module.exports = scrape;
UPDATE: New Code with ASYNC / AWAIT
scraper_start.js:
const rp = require('request-promise');
const cheerio = require('cheerio');
const scrape = require('./scrape_individual.js');
const base_url = 'https://www.test.ca';
const url = 'https://www.test.ca/search?mysearchstring';
var data = [];
async function get_links(url)
{
let html = await rp(url);
const $ = cheerio.load(html);
var results = [];
var hits = $('h3 > a').length;
console.log("TOTAL HITS: " + hits);
hrefs = $('h3 > a').map(function(i,v){ return $(v).attr('href'); }).get()
await Promise.all(hrefs.map(async (href) =>
{
let data_single = await scrape.scrape_book3(base_url + href);
data.push(data_single);
}));
//QUESTION AREA 1: This data works great with all info.
console.log(data);
return data
}
get_links(url);
//QUESTION AREA 2: This data gets printed before getting the actual data returned.
console.log(data);
scrape_individual.js:
const rp = require('request-promise');
const cheerio = require('cheerio');
var info = {}
//scrape2(url)
module.exports.scrape_individual = scrape2;
async function scrape2(url)
{
let html = await rp(url);
const $ = cheerio.load(html);
if (!html.includes('contentType = "Podcast"'))
{
let my_image = await img_data($('.bc-image-inset-border').attr('src'));
info = {title: $('h2.bc-heading:first').text(),
img3: my_image};
//console.log(info);
return info;
}
}
async function img_data(src)
{
const { createCanvas, loadImage } = require('canvas');
let image = await loadImage(src);
const canvas = createCanvas(image.width, image.height);
const ctx = canvas.getContext('2d');
ctx.drawImage(image, 0, 0);
//console.log(canvas.toDataURL());
return canvas.toDataURL();
}
This code works great now. Also easier to understand. Please feel free to critic as I am trying to master this. My question now is more of a general coding question.
Within scraper_start.js where the end result ends up (data), I marked two comments with "QUESTION AREA 1" and "QUESTION AREA 2"
QUESTION AREA 1: works entirely fine, which I would assume because it is in the async function
QUESTION AREA 2: outside of async function, does not have the object returned yet as there is nothing to say await. Is there a way to make it wait?
My question is pretty loaded. I can't use await since it's not in an async function from my understanding. Does this mean all my code needs to be in functions if I want to maintain an important order? What is best practice? Why not call every function as async?
edit: Fixing typos
edit2: Added ASYNC / AWAIT modifications

Question Area 2 is returning before the data is captured because you are telling JS that the last function is not waiting on anyone, so it is run synchronously.
But the truth is that it is actually waiting for someone, it is waiting for get_links()
So a way to get to print the data would be:
async printer(){
const returnedData = await get_links(URL);
console.log(returnedData)
}
printer();
if you want to use the data returned from an async function you need to call it with await, so JS knows that it needs to wait for a resolved or rejected promise before going on, or else it will return a Promise<pending>. And all await need to be inside an async.
In the beginning, sounds like an endless circle but it really is not.
For instance in our example you don't need any other async to call the printer() (because no other function is depending on that one)
I hope it makes sense, but promises at the beginning need some time to be digested before understanding them.
Async/await in my opinion are a blessing to understand promises, once you get your head around it and how they function you are better prepared to understand the resolve/reject way

Related

NodeJS - | Jimp Write("image.png") | not saving the image until the script ends

I really need some help, I'm new to coding and I'm trying to make a script
The script is supposed to achieve the following:
Takes a picture
Finds text within the image using tesseract
Search for a specific string within the text founded
Preforms an action based on if the specific string has been found or not
The problem I am having is that every time I run the script, it uses the previous version of the image saved, giving me the wrong result at the time.
I could really use some help.
const robot = require('robotjs')
const Jimp = require('jimp')
const Tesseract = require('tesseract.js');
const { Console, log } = require("console");
const fs = require('fs');
const {readFileSync, promises: fsPromises} = require('fs');
const { resolve } = require('path');
const myLogger = new Console({
stdout: fs.createWriteStream("normalStdout.txt")
});
const myLogger2 = new Console({
stdout: fs.createWriteStream("normalStdout2.txt")
});
//////////////////////////////////////////////////////////////////////////////////////////
function main(){
sleep(2000);
performRead();
}
//Edited function to sync instead of async - The problem is still persisting
//Edited function to include tesseractimage() in callback of writeimage()
function writeImage(){
var width = 498;
var height = 135;
var img4 = robot.screen.capture(0, 862, width, height).image;
new Jimp({data: img4, width, height}, (err, image) => {
image.write("image.png", function() {
tesseractimage();
});
});
console.log("Image 1 created");
}
function tesseractimage(){
Tesseract.recognize("image.png", 'eng')
.then(out => myLogger.log(out));
//Saves image to normalstdOut.txt
console.log("Tesseracted image")
}
function readTest(normalStdout, Viverz) {
var path = require('path');
const contents = readFileSync(path.resolve("normalStdout.txt"), 'utf-8');
const result = contents.includes("Viverz");
console.log(result);
}
//Edited performRead removing the call for tesseractimage();, it is now in writeimage();
function performRead(){
writeImage();
readTest();
}
function sleep(ms){
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
return null;
}
main();
I have tried changing functions to async functions,
I've tried numerous methods, pauses,
reiterations of functions multiple times,
nothing saves the file until the script ends and
then after it finds the correct string from the
previously saved screenshot, not the new one.
Current output:
Image 1 created a false Tesseracted image
Even when forcing tesseractimage() to call before the result is published it still has the same problem of not reading the file until the script is over
One way to call tesseractimage() from writeImage() when using image.write():
new Jimp({data: img4, width, height}, (err, image) => {
image.write("image.png", function() {
tesseractimage();
});
});
One way to call tesseractimage() from writeImage() when using image.writeAsync():
new Jimp({data: img4, width, height}, (err, image) => {
image.writeAsync("image.png")
.then((result) => {
tesseractimage();
}).catch((error) => {
// Handle error
})
});
Also remove the function call from within performRead().
For reference look under "Writing to files and buffers".
Solved**
I removed the readTest() altogether, and restructured the tesseractimage to a new function
async function tesseracttest() {
const finalText = await Tesseract.recognize(
"image.png",
'eng',
{ logger: m => console.log(m) }
).then(({ data: { text } }) => {
let extractedText = text.toString();
let finalText = extractedText.includes("Prayer potion");
console.log(extractedText)
console.log(finalText);
return finalText;
});
}

Data not returning from async function with database connection

The goal is to call a function from my main script that connects to a database, reads a document from it, stores pieces of that document in a new object, and returns that object to my main script. The problem is I cannot get it all to work together. If I try one thing, I get the results but my program locks up. If I try something else I get undefined results.
Long story short, how do I open a database and retrieve something from it to another script.
The program is a quiz site and I want to return the quiz name and the questions.
const myDb = require('./app.js');
var myData = myDb.fun((myData) => {
console.log(myData.quizName);
});
Here is the script that tries to open the database and find the data
const { MongoClient } = require("mongodb");
const {mongoClient} = require("mongodb");
const uri = connection uri goes here but my name is hard coded into it at the moment so I removed for privacy
const client = new MongoClient(uri);
const fun = async (cback) => {
try {
await client.connect();
const database = client.db('Quiz-Capstone');
const quizzes = database.collection('Quiz');
const query = {quizName: "CIS01"};
const options = {
sort: {},
projection: {}
};
const quiz = await quizzes.findOne(query, options);
var quizObject = {
quizName: quiz.quizName,
quizQuestions: quiz.quizQuestions
}
//console.log(testOb);
} finally {
await client.close();
cback(quizObject);
}
}
fun().catch(console.dir);
module.exports = {
fun: fun
}
UPDATE: Still stuck. I have read several different threads here about asynchronous calls and callbacks but I cannot get my function located in one file to return a value to the caller located in another file.

Google Apps Script Hoisting and ReferenceError

I've been trying to find an answer to this for a few weeks, but couldn't quite do this, so I decided to ask.
I sometimes get this error:
ReferenceError: <SomeObject> is not defined
...whereas I know for sure that it is. The problem is that the object is located in a different file, so if I call the object from that file (or maybe even a third file), the code does work.
I believe this must have to do with how hoisting works, meaning I'm trying to call an object before it's declared.
But then how does it work exactly when you have different files?
Here's an example:
If I have the following code in one file and I run getID(), it works:
const SomeAPI = (function () {
const _auth = new WeakMap();
const _url = new WeakMap();
class SomeAPI {
constructor(url, user = DEFAULT_USER, pwd = DEFAULT_PWD) {
_url.set(this, url);
_auth.set(this, Utilities.base64Encode(`${user}:${pwd}`));
}
async fetch() {
const headers = {
Authorization: `Basic ${_auth.get(this)}`,
};
const options = {
headers,
};
const response = await UrlFetchApp.fetch(_url.get(this), options);
const data = JSON.parse(response);
return data;
}
}
return SomeAPI;
})();
const LIST_DATA = (async () => await getListData())();
async function getListData() {
const response = await new SomeAPI(ALL_SETTINGS['List API URL']).fetch();
return Array.isArray(response) ? response[0] : response;
}
const getID = async () => {
const listData = await LIST_DATA;
const listId = listData.list_id;
const id = {
sheetId: SpreadsheetApp.getActive().getId(),
listId
};
console.log(id);
return id;
};
However, if I move LIST_DATA, getListData() and getID() to a different file, I get:
ReferenceError: SomeAPI is not defined
Overall the project is composed of 17 different files.
All help is greatly appreciated!
Per the comment by #Cooper to your question, moving the file up works. I had the same problem with a class I created. I moved the file with the class to the top and it solved my problem.

How to wait for the promise when using get in Firestore

I am just trying a simple get command with Firestore, using this code from Google it doesn't work because it's not waiting for the promise?
Earlier I had put only a snippet of code, this is the entirety of index.js -- I'm using Firestore with Dialogflow to build a Google Assistant app and trying to call a function from the welcome intent that gets a field from Firestore, then writes that field to a string (named question1), and then this string should be spoken by the assistant as part of the ssml response. I've been on this for at least 30 hours already, can't seem to comprehend promises in regards to intents, firestore, etc. I've tried about 10 different solutions, this one works, only it says "undefined" in other variations I have tried it would say undefined several times but after 2-3 passes the get command would be complete and then the variable would be read out. I'm just trying to figure out how to get the get command and variable set before moving onto the SSML response. Can anyone point me in the right direction?
'use strict';
const functions = require('firebase-functions'); //don't forget this one
// Import Admin SDK
var admin = require("firebase-admin");
admin.initializeApp(functions.config().firebase);
var db = admin.firestore();
const collectionRef = db.collection('text');
const Firestore = require('#google-cloud/firestore');
var doc;
var question1;
const url = require('url');
const {
dialogflow,
Image,
Permission,
NewSurface,
} = require('actions-on-google');
const {ssml} = require('./util');
const config = functions.config();
const WELCOME_INTENT = 'Default Welcome Intent';
const app = dialogflow({debug: true});
async function dbaccess(rando) {
console.log("dbaseaccess started")
var currentquestion2 = 'question-' + rando.toString();
var cityRef
try { return cityRef = db.collection('text').doc(currentquestion2).get();
console.log("get command completed")
//do stuff
question1 = cityRef.data().n111
} catch(e) {
//error!
}
console.log("one line above return something");
return rando;
}
app.fallback((conv) => {
// intent contains the name of the intent
// you defined in the Intents area of Dialogflow
const intent = conv.intent;
switch (intent) {
case WELCOME_INTENT:
var rando = Math.floor(Math.random() * 3) + 1;
dbaccess(rando);
const ssml =
'<speak>' +
question1 +
'</speak>';
conv.ask(ssml);
break;
exports.dialogflowFirebaseFulfillment = functions.https.onRequest(app);
You have 2 options: you can use async/await or you can use Promise.then() depending on how you want the code to execute.
Async/await:
async function databasetest {
var cityRef;
try{
cityRef = await db.collection('cities').doc('SF');
// do stuff
} catch(e) {
// error!
}
Promise.then():
db.collection('cities').doc('SF').then((cityRef) => {
cityRef.get()
.then(doc => { /* do stuff */ })
.catch(err => { /* error! */ });
});
maybe a little of work around could help you, I'm not sure yet how you are trying to implement it.
function databasetest () {
var cityRef = db.collection('cities').doc('SF');
return cityRef.get()
}
// so you can implement it like
databasetest().then(doc => {
if (!doc.exists) {
console.log('No such document!');
} else {
console.log('Document data:', doc.data());
}
})
.catch(err => {
console.log('Error getting document', err);
});
More context would help to understand your use case better :)

async.each does not finish without error

I have a simple function to where I get the word count from an url. The script works if I have a low amount of urls. I only limit async 4 at a time. I watch my ram and cpu and it doesn't go near the max on my machine. Let's say after about 70ish urls there is no error. The script just sets there. I have it in a try catch block and it never catches. Any help would be appreciated.
I have tried lodash forEach instead of async and I get the same issue.
const async = require('async')
const wordcount = require('wordcount')
const afterLoad = require('after-load')
const htmlToText = require('html-to-text')
function getWordCount(urls, cb) {
async.eachLimit(urls, 4, function(url, cbe) {
try {
let html = afterLoad(url) // https://www.npmjs.com/package/after-load
let text = htmlToText.fromString(html)
let urlWordCount = wordcount(text) // https://www.npmjs.com/package/wordcount
console.log(url, urlWordCount)
cbe(null)
} catch(err) {
console.log(err)
urlWordCount = 0
console.log(url, urlWordCount, err)
cbe(null)
}
}, function(err) {
console.log("finished getting wordcount", err)
if (err) {
cb(err)
} else {
cb(null)
}
})
}
getWordCount(["https://stackoverflow.com/", "https://caolan.github.io/async/docs.html#eachLimit"], function(err){
console.log(err)
})
I think the issue is in the synchronous implementation of that after-load module, but it's indeed hard to judge unless you get an actual error (you could put some console.logs here and there on every line and see where your code actually gets stuck - or use a debugger for the same purpose).
What I'd propose though is to use proper asynchronous code - I run the example below with a set of 1000 urls and it did not get stuck - with usage of [scramjet] it's also more readable:
const {StringStream} = require('scramjet');
const wordcount = require('wordcount');
const fetch = require('node-fetch');
const htmlToText = require('html-to-text');
const {promisify} = require('util');
StringStream.fromArray(["https://stackoverflow.com/", "https://caolan.github.io/async/docs.html#eachLimit"])
.setOptions({maxParallel: 4})
.parse(async url => ({
url,
response: await fetch(url)
}))
.map(async ({url, response}) => {
const html = await response.text();
const text = htmlToText.fromString();
const count = wordcount(text);
return {
url,
count
};
})
.each(console.log)
;
I actually run this from a file with the URL's by changing the first lines to:
StringStream.from(fs.createReadStream('./urls-list.txt'), 'utf-8')
.lines()
.setOptions({maxParallel: 4})
// and so on.

Categories