Nodejs: Fully executing websocket responses sequentially with async calls - javascript

Below is a simple example of what I'm currently working with: a websocket stream which does some asynchronous calls as part of the logic when consuming the incoming data. I'm mimicking async calls with a Promise-ified setTimeout function:
function someAsyncWork() {
return new Promise(resolve => {
setTimeout(() => {
resolve('async work done');
}, 5);
});
}
async function msg() {
const msg = await someAsyncWork();
console.log(msg)
}
const main = async() => {
web3.eth.subscribe('pendingTransactions').on("data", async function(tx){
console.log('1st print: ',tx);
await msg();
console.log('2nd print: ',tx);
})
}
main();
Running the above results in a console output like so:
1st print: 0x8be207fcef...
1st print: 0x753c308980...
1st print: 0x4afa9c548d...
async work done
2nd print: 0x8be207fcef...
async work done
2nd print: 0x753c308980...
async work done
2nd print: 0x4afa9c548d...
.
.
.
I get what's happening here. The '1st print' is executed, followed by await-ing the async calls for each piece of data response. The '2nd print' is only executed after the 'async work done' occurs.
However this isn't quite what I'm looking for.
My logic has conditionals in place, where each data response will first use a global variable to check for a condition, followed by some async work if condition is met. Problem is that there are instances where some data responses will go ahead and execute async work when they shouldn't have: Nodejs's event loop hasn't had a chance to transfer some previous data response's async calls from the callback queue to the call stack, as the stack was too busy processing new incoming data. This means the '2nd prints' haven't executed (where the global variable is updated) before new incoming data has been processed. I imagine the someAsyncWork is only resolved when there is a free period in the websocket with no data incoming.
My question is: is there a way to ensure full, sequential processing of each piece of new data? Ideally the console output would look something like this:
1st print: 0x8be207fcef...
async work done
2nd print: 0x8be207fcef...
1st print: 0x753c308980...
async work done
2nd print: 0x753c308980...
1st print: 0x4afa9c548d...
async work done
2nd print: 0x4afa9c548d...
.
.
.

You can have a queue-like promise that keeps on accumulating promises to make sure they run sequentially:
let cur = Promise.resolve();
function enqueue(f) {
cur = cur.then(f);
}
function someAsyncWork() {
return new Promise(resolve => {
setTimeout(() => {
resolve('async work done');
}, 5);
});
}
async function msg() {
const msg = await someAsyncWork();
console.log(msg);
}
const main = async() => {
web3.eth.subscribe('pendingTransactions').on("data", function(tx) {
enqueue(async function() {
console.log('1st print: ',tx);
await msg();
console.log('2nd print: ',tx);
});
})
}
main();

Related

JS await function with empty Promise

I have a question, I somewhat understand how Promises work but I would like to wait for a function to finish even if it doesn't return anything. I have a similar code but it doesn't wait for b to finish before starting the extra steps on a.
For example in this case I want to create tables in a sqlite db, then load data and run some tests. The thing here is that it's starting to run the tests before finishing to load the data and the arrays created in the loadData method are coming empty. I would even prefer not to return those arrays but to always wait for this process to be completed before running tests.
async function initialize(){
try{
console.log("Beginning connection");
db = await connectToDB();
console.log(`Database: ${db}`)
db.serialize(function() {
console.log("Creating tables")
db.run(queries.__parent_company);
db.run(queries.parent_company);
db.run(queries.__sales_rep);
db.run(queries.sales_rep);
db.run(queries.advertiser);
console.log("Tables created");
});
let response = await loadData();
console.log("Response", response)
testData();
}
catch(e){
throw e;
}
}
function loadData() {
return new Promise(async (resolve, reject) => {
console.log("Beggning data insertion");
try{
let insertedAdvertisers = [];
let insertedSales = [];
let insertedCOmpanies = [];
data.rows.forEach(async row => {
let advertiser = await loadAdvertisers(row);
let sale = await loadSalesRep(row);
let company = await loadParentCompany(row);
insertedAdvertisers.push(advertiser);
insertedSales.push(sale);
insertedCOmpanies.push(company);
console.log("HERE", insertedAdvertisers, insertedSales, insertedCOmpanies)
})
resolve({insertedAdvertisers, insertedSales, insertedCOmpanies});
}
catch(e){
console.log(e);
reject(e);
}
});
}
Taking about Promises, I advice you to read about the event loop and how it works.
What happens in your code that JS knows that this is a promise so it will move it to callback queue until it finishes and continues executing your code and then push it to the stack to be executed.
so if you want to execute a certain logic after an async function finishes you need to provide it as a callback or use .then()
async function a() {
b().then(()=>{
// your code that you want to be executed after a finishes
})
}
This image might illustrate what I'm trying to explain

JavaScript event handler working with an async data store API causing race condition

I need to update some data every time certain browser event fires (for example, when a browser tab closes):
chrome.tabs.onRemoved.addListener(async (tabId) => {
let data = await getData(); // async operation
... // modify data
await setData(data); // async operation
});
The problem is, when multiple such event triggers in quick succession, the async getData() could return stale result in subsequent invocation of the event handler before setData() gets a chance to finish in earlier ones, leading to inconsistent result.
If the event handler can execute synchronously then this problem wouldn't occur, but getData() and setData() both are async operations.
Is this a race condition? What's the recommended pattern to handle this type of logic?
--- Update ---
To provide more context, getData() and setData() are simply promisified version of some Chrome storage API:
async function getData() {
return new Promise(resolve => {
chrome.storage.local.get(key, function(data) => {
// callback
});
});
}
async function setData() {
return new Promise(resolve => {
chrome.storage.local.set({ key: value }, function() => {
// callback
});
});
}
I wrapped the API call in a Promise for readability purposes, but I think it's an async op either way?
You have a fairly classic race condition for a data store with an asynchronous API and the race condition is even worse if you use asynchronous operations in the processing of the data (between the getData() and setData(). The asynchronous operations allow another event to run in the middle of your processing, ruining the atomicity of your sequence of events.
Here's an idea for how to put the incoming tabId in a queue and make sure you're only processing one of these events at a time:
const queue = [];
chrome.tabs.onRemoved.addListener(async (newTabId) => {
queue.push(newTabId);
if (queue.length > 1) {
// already in the middle of processing one of these events
// just leave the id in the queue, it will get processed later
return;
}
async function run() {
// we will only ever have one of these "in-flight" at the same time
try {
let tabId = queue[0];
let data = await getData(); // async operation
... // modify data
await setData(data); // async operation
} finally {
queue.shift(); // remove this one from the queue
}
}
while (queue.length) {
try {
await run();
} catch(e) {
console.log(e);
// decide what to do if you get an error
}
}
});
This could be made more generic so it could be reusably used in more than place (each with their own queue) like this:
function enqueue(fn) {
const queue = [];
return async function(...args) {
queue.push(args); // add to end of queue
if (queue.length > 1) {
// already processing an item in the queue,
// leave this new one for later
return;
}
async function run() {
try {
const nextArgs = queue[0]; // get oldest item from the queue
await fn(...nextArgs); // process this queued item
} finally {
queue.shift(); // remove the one we just processed from the queue
}
}
// process all items in the queue
while (queue.length) {
try {
await run();
} catch(e) {
console.log(e);
// decide what to do if you get an error
}
}
}
}
chrome.tabs.onRemoved.addListener(enqueue(async function(tabId) {
let data = await getData(); // async operation
... // modify data
await setData(data); // async operation
}));
JS ascync/await does not really turns JS code synchronous.
What you cold do is block the event loop on getData using Promisse.all.
So,
chrome.tabs.onRemoved.addListener(async (tabId) => {
... // turns in a composition
await setData(Promise.all([getData])[0]); // async composition
});
You cold do a async composition with a block on event loop, when the event is triggered, the VM will have a list with events, and a block on the await getData.
In fact does not exist async composition, is just a trick with the VM to block the event loop and awaits to the result of an operation cause the VM process this as a list and lists don't wait.
Be careful with your code to become readable when using compositions.

how the async function work in javascript?

Let's say there are two async functions:
async function asyncCall() {
for(var i=0;i<50000;i++){
console.log('1');
}
}
async function asyncCall2() {
for(var i=0;i<5;i++){
console.log('2');
}
}
And I call both functions:
asyncCall();
asyncCall2();
This is the output:
1 50.000 times, then 2 one time.
Shouldn't the 2 be logged somewhere between 50.000 and 1 time?
There is no threading in JavaScript. Since all your code is just executed in the one single thread of js, the output is expected.
Your misconception about async is quite common. You think that async implies some kind of "background work" but it is not! There is no threading involved in async! Within an async function you can just say "I did what I want to do, but know I need to wait.. maybe later I can carry on.. you can get the thread to do other stuff". But if other stuff started, than that other stuff is executed until it is done (or yields back the thread again).
To get your expected result you would need to yield back execution to the rest of the code from within the function - but still it is no threading and hence, if our awaited result arrived it is not processed until the currently running code is done.
function resolveAfter2Seconds() {
return new Promise(resolve => {
setTimeout(() => {
resolve('resolved');
}, 2000);
});
}
async function asyncCall() {
console.log('calling');
const result = await resolveAfter2Seconds(); // we will wait and during that time the single thread executing our code can do other stuff
console.log(result);
// expected output: 'resolved'
}
function fnCall2() {
console.log('fn2');
for(var i=0;i<500;i++) { // this will not be interrupted!
console.log(i);
}
}
asyncCall();
fnCall2();
The result of asyncCall will arrive after 2 seconds, but it does not mean it is processed immediately. If fn2 needs more time than the processing of the result is delayed. Try setting fn2 to iterate to 50000 - the resolve message will always be printed afterwards.

Append items to an array from an async function

I am having trouble appending items to an array and would like assistance in performing such. I have reviewed this and understood zero of it.
Here is my current code which is being ran in AWS Lambda (Node.js 10.x):
var sesData = ["array0", "array1"];
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
console.log("Scan succeeded.");
data.Items.forEach(function(itemdata) {
// append new value to the sesData array
sesData.push("Item :" + JSON.stringify(itemdata));
console.log(sesData);
console.log("Item :",JSON.stringify(itemdata));
});
// continue scanning if we have more items in case it is lots of data
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("Scanning for more...");
params2.ExclusiveStartKey = data.LastEvaluatedKey;
dynamoDB.scan(params2, onScan);
}
}
}
function generateEmailParams (body) {
return {
Source: myEmail,
Destination: { ToAddresses: [myEmail] },
ReplyToAddresses: [myEmail],
Message: {
Body: {
Text: {
Charset: 'UTF-8',
Data: `Message sent. \nContent: \n${sesData}`
}
},
Subject: {
Charset: 'UTF-8',
Data: `Subject`
}
}
}
}
//End email Params
exports.handler = function(event, context) {
console.log("Incoming: ", event);
dynamoDB.scan(params2, onScan); // scanning DDB
console.log('===SENDING EMAIL===');
const emailParams = generateEmailParams(event.body)
var email = ses.sendEmail(emailParams, function(err, data){
if(err) console.log(err);
else {
console.log("===EMAIL SENT===");
console.log(data); // log data
console.log("EMAIL CODE END"); //log end of email
console.log('EMAIL: ', email); // log email
context.succeed(event);
}
});
};
All the ses items are just sending the onScan function data via email. That works fine and is not the trouble, its that sesData never appends. The console.log’s print out the data from dynamoDB without issue but the push to array does not work.
After searching all over the internet, I do not really understand what’s going on as there are no errors so I am missing some sort of logic.
Async means the order of the code doesn't get executed in the same order you wrote it.
dynamoDB.Scan is a async function. You are talking to your DynamoDB, and it takes time, it might be only few ms, but NodeJS wil continue on to the next line of code while it finishes the scan function.
Lets take the following example
let count = 1
console.log(count)
setTimeout(function() {
count = 2
console.log(count)
}, 1000)
count = 3
console.log(count)
setTimeout is a async function, it executes after x ms, in this case 1000 ms = 1 sec. So its similar to your dynamoDB.scan function, it's starts right away, but it takes some time to finish and meanwhile, nodeJS will continue running your code line by line.
So the order the code is 1, 2, 3. But when you run the snippet it comes 1,3,2. Even if you set the timeout to 0 ms, it will still be 1,3,2
let count = 1
console.log(count)
setTimeout(function() {
count = 2
console.log(count)
}, 0)
count = 3
console.log(count)
This is because its an async function and will be put on the bottom of the callstack. Callstack is a fancy javascript word.
To understand this checkout: https://www.youtube.com/watch?v=8aGhZQkoFbQ
Its a really good video that explains how JavaScript works and not to difficult to understand.
Note that the onScan function is a callback function and is executed when the dynamoDb.scan method has finished. So its like "Hey DynamoDB, call the scan function and then do this onScan stuff I created"
You programemd it so that when DynamoDB.scan has finished it should call onScan, which adds to sesData array, but the generateParams function is outside the callback so it gets called right way after you called dynamoDb.scan, not after it has finsed.
So what is happening in your code is this:
You create array sesData
You call dynamoDB.scan and pass it the onScan callback function.
The function starts, but it async and it takes time to connect to dynamoDB, etc
You call generateParams, but the onScan function hasn't finished.
Your code generated the email before the onScan function added items to
sesData.
To fix this, you need to:
Include the generateParams in the onScan callback.
Use promise chains with .then
Use async/await
I haven't used AWS but quick googling shows that scan can return a promise by doing
dyanmoDb.scan(params).promise()
Notice that there is no callback function here, because it returns a promise.
You can then do something like
exports.handler = async function(event, context) {
...
await dynamoDb.scan(params).promise()
...
console.log(sesData)
}

Node asynchronous calls in sequence

I have a couple of asynchronous requests that fetch some data from a url. The problem I am having is that I actually want to delay sending the json response until all requests have come back. The code looks something like this:
getFirstStuff(callback) //imagine this is async
{
console.log('gettingFirstStuff');
callback(stuff);
}
function getFurtherStuff(callback) //imagine this is async
{
console.log('gettingFurtherStuff');
callBack(thing);
}
function getStuff(callBack)
{
getFirstStuff(function(stuff) // async
{
// stuff is an array of 3 items
stuff = stuff.map(function(item) // map is synchronous
{
// For each item in stuff make another async request
getFurtherStuff( function(thing) { // this is also async
stuff.thing = thing;
});
return item;
});
callback(stuff);
});
}
router.get('/getstuff', function(req, res, next) {
getStuff(function(stuff)
{
console.log('finished stuff');
// RETURN RESPONSE AS JSON
res.json(stuff);
});
});
The output will be:
gettingFirstStuff
finished stuff
gettingFurtherStuff
gettingFurtherStuff
gettingFurtherStuff
but it should be:
gettingFirstStuff
gettingFurtherStuff
gettingFurtherStuff
gettingFurtherStuff
finished stuff
I understand that the reason is that getFurtherStuff is async and item will be returned from map before the getFurtherStuff async calls are back with a result. My question is, what is the standard way to wait for these calls to finish before calling the final callback 'callback(stuff)'
There are a bunch of ways to solve this problem. Libraries like async and queue would probably be the best choice, if you have no problem adding dependencies.
The easiest option without external libs is just to count the async jobs and finish when they're all done:
// assuming stuff is an array
var counter = 0;
var jobCount = stuff.length;
// wrap callback in one that checks the counter
var doneCallback = function() {
if (counter >= jobCount) {
// we're ready to go
callback(stuff);
}
};
// run jobs
stuff.map(function(item) {
getFurtherStuff(item, function(itemThing) {
// process async response
stuff.thing = itemThing;
// increment counter;
counter++;
// call the wrapped callback, which won't fire
// until all jobs are complete
doneCallback();
});
});
npm install async
You would then simply throw your functions in to an async.parallel()
More info at https://www.npmjs.com/package/async

Categories