I am having trouble appending items to an array and would like assistance in performing such. I have reviewed this and understood zero of it.
Here is my current code which is being ran in AWS Lambda (Node.js 10.x):
var sesData = ["array0", "array1"];
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
console.log("Scan succeeded.");
data.Items.forEach(function(itemdata) {
// append new value to the sesData array
sesData.push("Item :" + JSON.stringify(itemdata));
console.log(sesData);
console.log("Item :",JSON.stringify(itemdata));
});
// continue scanning if we have more items in case it is lots of data
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("Scanning for more...");
params2.ExclusiveStartKey = data.LastEvaluatedKey;
dynamoDB.scan(params2, onScan);
}
}
}
function generateEmailParams (body) {
return {
Source: myEmail,
Destination: { ToAddresses: [myEmail] },
ReplyToAddresses: [myEmail],
Message: {
Body: {
Text: {
Charset: 'UTF-8',
Data: `Message sent. \nContent: \n${sesData}`
}
},
Subject: {
Charset: 'UTF-8',
Data: `Subject`
}
}
}
}
//End email Params
exports.handler = function(event, context) {
console.log("Incoming: ", event);
dynamoDB.scan(params2, onScan); // scanning DDB
console.log('===SENDING EMAIL===');
const emailParams = generateEmailParams(event.body)
var email = ses.sendEmail(emailParams, function(err, data){
if(err) console.log(err);
else {
console.log("===EMAIL SENT===");
console.log(data); // log data
console.log("EMAIL CODE END"); //log end of email
console.log('EMAIL: ', email); // log email
context.succeed(event);
}
});
};
All the ses items are just sending the onScan function data via email. That works fine and is not the trouble, its that sesData never appends. The console.log’s print out the data from dynamoDB without issue but the push to array does not work.
After searching all over the internet, I do not really understand what’s going on as there are no errors so I am missing some sort of logic.
Async means the order of the code doesn't get executed in the same order you wrote it.
dynamoDB.Scan is a async function. You are talking to your DynamoDB, and it takes time, it might be only few ms, but NodeJS wil continue on to the next line of code while it finishes the scan function.
Lets take the following example
let count = 1
console.log(count)
setTimeout(function() {
count = 2
console.log(count)
}, 1000)
count = 3
console.log(count)
setTimeout is a async function, it executes after x ms, in this case 1000 ms = 1 sec. So its similar to your dynamoDB.scan function, it's starts right away, but it takes some time to finish and meanwhile, nodeJS will continue running your code line by line.
So the order the code is 1, 2, 3. But when you run the snippet it comes 1,3,2. Even if you set the timeout to 0 ms, it will still be 1,3,2
let count = 1
console.log(count)
setTimeout(function() {
count = 2
console.log(count)
}, 0)
count = 3
console.log(count)
This is because its an async function and will be put on the bottom of the callstack. Callstack is a fancy javascript word.
To understand this checkout: https://www.youtube.com/watch?v=8aGhZQkoFbQ
Its a really good video that explains how JavaScript works and not to difficult to understand.
Note that the onScan function is a callback function and is executed when the dynamoDb.scan method has finished. So its like "Hey DynamoDB, call the scan function and then do this onScan stuff I created"
You programemd it so that when DynamoDB.scan has finished it should call onScan, which adds to sesData array, but the generateParams function is outside the callback so it gets called right way after you called dynamoDb.scan, not after it has finsed.
So what is happening in your code is this:
You create array sesData
You call dynamoDB.scan and pass it the onScan callback function.
The function starts, but it async and it takes time to connect to dynamoDB, etc
You call generateParams, but the onScan function hasn't finished.
Your code generated the email before the onScan function added items to
sesData.
To fix this, you need to:
Include the generateParams in the onScan callback.
Use promise chains with .then
Use async/await
I haven't used AWS but quick googling shows that scan can return a promise by doing
dyanmoDb.scan(params).promise()
Notice that there is no callback function here, because it returns a promise.
You can then do something like
exports.handler = async function(event, context) {
...
await dynamoDb.scan(params).promise()
...
console.log(sesData)
}
Related
I'm working in a NodeJS project, this project I decided to change the way I'm doing it because this way wasn't working, let me try to explain it.
I need to insert data into a SQL Server DB, so I did a function insertOffice() this function opens a connection using Tedious, then fetchs data to an url with data from an array data2 to load coords, and then with this coords creates an object, then inserts this object into a DB. When inserting only one part of my data2 array, it works, by only sendind data[0] it adds:
{
latjson: 1,
lonjson: 1,
idoficina: "1",
}
But I want to insert both of the parts of my array, changing data2[0] to data2[index], to be able to insert all my array, so I tried creating another function functionLooper()that loops insertOffice() to insert my data from my array data2.
I builded this little code to learn how to loop a function, this prints index that is the value I use for bringing idoficina.
As you can see functionLooper() runs the code twice, so it can read fully data2 array, I have this little code that works with the same logic, I builded my full code using this:
function insertOffice(index) {
console.log(index);
}
function functionLooper() {
for (let i = 0; i < 5; i++) {
let response = insertOffice(i);
}
}
functionLooper();
This prints:
0
1
2
3
4
So my code it's supposed to send index
I'm expecting my code to loop my insertOffice() and being able to insert my objects, the issue is that this doesn't seems to work as I am getting this error:
C:\...\node_modules\tedious\lib\connection.js:993
throw new _errors.ConnectionError('`.connect` can not be called on a Connection in `' + this.state.name + '` state.');
^
ConnectionError: `.connect` can not be called on a Connection in `Connecting` state.
this is my code:
var config = {
....
};
const data2 = [
...
];
var connection = new Connection(config);
function insertOffice(index) {
console.log(index)
connection.on("connect", function (err) {
console.log("Successful connection");
});
connection.connect();
const request = new Request(
"EXEC SPInsert #Data1, ... ",
function (err) {
if (err) {
console.log("Couldn't insert, " + err);
} else {
console.log("Inserted")
}
}
);
console.log(myObject.Id_Oficina)
request.addParameter("Data1", TYPES.SmallInt, myObject.Id_Oficina);
request.on("row", function (columns) {
columns.forEach(function (column) {
if (column.value === null) {
console.log("NULL");
} else {
console.log("Product id of inserted item is " + column.value);
}
});
});
request.on("requestCompleted", function () {
connection.close();
});
connection.execSql(request);
}
function functionLooper() {
for (let i = 0; i < 2; i++) {
let response = insertOffice(i);
}
}
functionLooper();
I do not know if this is the right way to do it (looping the inserting function insertOffice()twice), if you know a better way to do it and if you could show me how in an example using a similar code to mine, would really appreciate it.
You're approaching an asynchronous problem as if it's a synchronous one. You're also making your life a bit harder by mixing event based async tasks with promise based ones.
For example, connection.connect() is asynchronous (meaning that it doesn't finish all its work before the next lines of code is executed), it is only done when connection emits the connect event. So the trigger for starting the processing of your data should not be started until this event is fired.
For each of the events in your loop they are not running one at a time but all at the same time because the fetch() is a promise (asynchronous) it doesn't complete before the next iteration of the loop. In some cases it may have even finished before the database connection is ready, meaning the code execution has moved on to DB requests before the connection to the database is established.
To allow your code to be as manageable as possible you should aim to "promisify" the connection / requests so that you can then write an entirely promise based program, rather than mixing promises and events (which will be pretty tricky to manage - but is possible).
For example:
const connection = new Connection(config);
// turn the connection event into a promise
function connect() {
return new Promise((resolve, reject) => {
connection.once('connect', (err) => err ? reject(err) : resolve(connection));
connection.connect()
});
}
// insert your data once the connection is ready and then close it when all the work is done
function insertOffices() {
connect().then((conn) => {
// connection is ready I can do what I want
// NB: Make sure you return a promise here otherwise the connection.close() call will fire before it's done
}).then(() => {
connection.close();
});
}
The same approach can be taken to "promisify" the inserts.
// turn a DB request into a promise
function request(conn) {
return new Promise((resolve, reject) => {
const request = new Request(...);
request.once('error', reject);
request.once('requestCompleted', resolve);
conn.execSql(request);
});
}
This can then be combined to perform a loop where it's executed one at a time:
function doInserts() {
return connect().then((conn) => {
// create a "chain" of promises that execute one after the other
let inserts = Promise.resolve();
for (let i = 0; i < limit; i++) {
inserts = inserts.then(() => request(conn));
}
return inserts;
}).then(() => connection.close())
}
or in parallel:
function doInserts() {
return connect().then((conn) => {
// create an array of promises that all execute independently
// NB - this probably won't work currently because it would need
// multiple connections to work (rather than one)
let inserts = [];
for (let i = 0; i < limit; i++) {
inserts.push(request(conn));
}
return Promise.all(inserts);
}).then(() => connection.close())
}
Finally I could fix it, I'm sharing my code for everyone to could use it and do multiple inserts, thanks to Dan Hensby, I didn't do it his way but used part of what he said, thanks to RbarryYoung and MichaelSun90 who told me how, just what I did was changing my
var connection = new Connection(config);
to run inside my
function insertOffice(index) { ... }
Looking like this:
function insertOffice(index) {
var connection = new Connection(config);
....
}
Below is a simple example of what I'm currently working with: a websocket stream which does some asynchronous calls as part of the logic when consuming the incoming data. I'm mimicking async calls with a Promise-ified setTimeout function:
function someAsyncWork() {
return new Promise(resolve => {
setTimeout(() => {
resolve('async work done');
}, 5);
});
}
async function msg() {
const msg = await someAsyncWork();
console.log(msg)
}
const main = async() => {
web3.eth.subscribe('pendingTransactions').on("data", async function(tx){
console.log('1st print: ',tx);
await msg();
console.log('2nd print: ',tx);
})
}
main();
Running the above results in a console output like so:
1st print: 0x8be207fcef...
1st print: 0x753c308980...
1st print: 0x4afa9c548d...
async work done
2nd print: 0x8be207fcef...
async work done
2nd print: 0x753c308980...
async work done
2nd print: 0x4afa9c548d...
.
.
.
I get what's happening here. The '1st print' is executed, followed by await-ing the async calls for each piece of data response. The '2nd print' is only executed after the 'async work done' occurs.
However this isn't quite what I'm looking for.
My logic has conditionals in place, where each data response will first use a global variable to check for a condition, followed by some async work if condition is met. Problem is that there are instances where some data responses will go ahead and execute async work when they shouldn't have: Nodejs's event loop hasn't had a chance to transfer some previous data response's async calls from the callback queue to the call stack, as the stack was too busy processing new incoming data. This means the '2nd prints' haven't executed (where the global variable is updated) before new incoming data has been processed. I imagine the someAsyncWork is only resolved when there is a free period in the websocket with no data incoming.
My question is: is there a way to ensure full, sequential processing of each piece of new data? Ideally the console output would look something like this:
1st print: 0x8be207fcef...
async work done
2nd print: 0x8be207fcef...
1st print: 0x753c308980...
async work done
2nd print: 0x753c308980...
1st print: 0x4afa9c548d...
async work done
2nd print: 0x4afa9c548d...
.
.
.
You can have a queue-like promise that keeps on accumulating promises to make sure they run sequentially:
let cur = Promise.resolve();
function enqueue(f) {
cur = cur.then(f);
}
function someAsyncWork() {
return new Promise(resolve => {
setTimeout(() => {
resolve('async work done');
}, 5);
});
}
async function msg() {
const msg = await someAsyncWork();
console.log(msg);
}
const main = async() => {
web3.eth.subscribe('pendingTransactions').on("data", function(tx) {
enqueue(async function() {
console.log('1st print: ',tx);
await msg();
console.log('2nd print: ',tx);
});
})
}
main();
I'm trying to understand async library from node.js and apply it, but it doesn't work as expected. Called are made in parallels when I was expecting them to be made in series (i.e. one after another)
Here is my code
var users = [{_id:'1',username:'user1'},{_id:'2',username:'user2'}];
async.eachSeries(users,function function1(user,callbackEach){
var username = user.username;
var incomes = [{source:'sourceA',provider:providerA},{source:'sourceB',provider:providerB},{source:'sourceC',provider:providerC}];
async.eachSeries(incomes,function function2(income,callbackSmallEach){
var source = income.source;
income.provider.getEarnings(user._id,username,yesterday,function callbackFromGetEarnings(err,result){
if (err){
// error
} else {
income.earnings = {day : new Number(result)};
income.provider.getMonthEarnings(user._id,username,yesterday,function callbackFromGetMonthEarnings(err,monthTotal){
if (err){
// error
} else {
income.earnings.month = new Number(monthTotal);
callbackSmallEach();
}
});
}
});
},
function sendEmails(err){
if (err) {
// error
} else {
// send email
}
});
console.log("Just before calling callbackEach()");
callbackEach();
});
getEarnings and getMonthEarnings use an external provider, so some time can occur until callback functions callbackFromGetEarnings and callbackFromGetMonthEarnings are called.
My problem is that I dont want both calls to getEarnings to be executed in parallel. I want function getEarnings for user2 to be called only after getEarnings for user1 has returned (and corresponding callbacks have been made).
As you can see I have tried to make the calls for user1 and 2 in a serie, with async.eachSeries but calls are made in parallel. When I execute, the log Just before calling callbackEach() is always executed before the callback function callbackFromGetEarnings is called...
I hope this is clear enough.
The problem is where you are calling your callback. Example:
async.eachSeries(something, function(item, callback) {
async.eachSeries(item.somethingElse, function(subitem, callback2) {
//do something
return callback2();
}, function() {
//when all item.somethingElse is done, call the upper callback
return callback();
})
}, function() {
console.log('done');
})
This way, for each item in something, you will execute in series all the item.somethingElse, before going to the next item.
I have node code that fetches the list of websites to crawl, and on iterating through the result, it calls a crawling function written using phantom. But before the crawling function returns the result, the loop is iterating number of times and hence number of calls to crawling function which is not able to handle it. I need immediate answer for my issue.
Please somebody take me out of this well.
my main page code
db.fetch_serviceEntity(function(serviceEntityData){
if(serviceEntityData!=""){
serviceEntityData.forEach(function(item){
console.log(item.website_url);
db.fetch_entityId(item.id,function(entityId){
crawler.getCount(item.website_url, item.name, function(rCount){
console.log("number of fresh reviews to crawl : ", parseInt(rCount) - parseInt(item.review_count));
if(rCount > item.review_count){
fetchReviews(item.website_url, entityId.id, parseInt(rCount) - parseInt(item.review_count), function(){
db.updateReviewCount(item.id, rCount, function(){
process.exit(0);
});
});
}
});
});
};
});
}
else {
console.log("No websites to crawl/database error");
}
process.exit(0);
});
my crawl function is here
crawler.prototype.crawl = function(webUrl, callback){
console.log(webUrl);
this.driver.create({ path: require('phantomjs').path }, function (err, browser) {
return browser.createPage(function (err,page) {
return page.open(webUrl, function (err,status) {
console.log("opened site? ", status);
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js', function (err) {
// Wait for a bit for AJAX content to load on the page. Here, we are waiting 5 seconds.
setTimeout(function () {
return page.evaluate(function () {
//Get what you want from the page using jQuery.
var reviews = [];
$('li div.review').each(function () {
if($(this).find('div.ypassport').text()){
var d = new Date($(this).find('span.rating-qualifier').text().trim());
var temp = {
id : $(this).attr('data-review-id'),
entity_id : "",
user_id : $(this).attr('data-signup-object').split(":")[1],
}
reviews.push(temp);
}
});
return {
reviews: reviews
};
}, function (err,result) {
browser.exit();
callback(result);
});
}, 5000);
});
});
});
});
};
I am using node-phantom-simple for writing crawling function
my problem here is -> because for loop makes number of calls to it, crawl function giving me error that some or other object not created.
for example in the code it says "createpage is not a function of undefined" hence the meaning is browser object not created.
sometimes it says "open is not a function of undefined " hence the "page" object not creating.
You have async functions, but if you process.exit(0) when you return from your first function, all database connections are dropped and no db.updateReviewCount is called. So you get more or less arbitrary results, depending on how's the one who closes first.
(Beside that, the code is a callback hell. Maybe you want to create smaller functions and then chain them with a library like async or co or even by hand.)
I'm trying out the framework node.js on one of my projects.
I'm really seeing some good advantages on what they called "event-driven, non-blocking I/O model" however if my project there are some moments where I don't necessarily want to have some asynchronous calls and to be able to several operation before launching some asynchronous call.
Especially when I want to do some factorization and create some functions.
Typically I have the following case:
I know that in several part of my program I have to check if a media is existing in my database for a given string or id.
So as a guy who tried to stay organize I want to create a function that I will call each time I need to check this.
However, I did not find the way to do that with node.js and pg (the npm PostgreSQL library (https://github.com/brianc/node-postgres/) . Indeed, there is always a callback in the function and the return is null because of the callback. Here is an example below
/*
Function which is supposed to check if a media existing
*/
function is_media_existing (url_or_id){
log.debug("is_media_existing : begin of the function", {"Parameter" : url_or_id});
pg.connect(connectionstring, function (err, client, done) {
if (err) {
log.warning("is_media_existing : Problem with Database connection", {
"Parameter": url_or_id,
"Error": err
});
}
if (isNaN(url_or_id)) {
// Case is parameter is not a number (string)
var query = client.query('SELECT COUNT(*) as count FROM media WHERE url = $1::string ', url_or_id);
query.on('error', function (error) {
log.warning("is_media_existing : Problem with Database query (connection to db passed but not query " +
"", {"Parameter": url_or_id, "Error": error});
});
return query;
} else {
// Case is parameter is a int
log.debug("is_media_existing : Type of Parameter is a string");
// Case is parameter is not a number (string)
var query = client.query('SELECT COUNT(*) as count FROM media WHERE id = $1::id ', url_or_id);
query.on('error', function (error) {
log.warning("is_media_existing : Problem with Database query (connection to db passed but not query " +
"", {"Parameter": url_or_id, "Error": error});
});
return query;
}
});
}
// Executing the function
var test = is_media_existing("http://random_url_existing_in_db");
// test is always null as the return is in a callback and the callback is asynchronous
i have the feeling my question is touching the core concepts of node.js, and perhaps my approach is wrong and I apologize in advance.
I know it's not good to wait for a response before doing something.
But what's the alternative? How can I factorize my code into functions when I need some functionalities in several part of my code?
So if there would be anyone who could explain how to do that with a best practice of programming it would be great.
Thanks
Anselme
As Cody says, you probably dont want to do synchronous function.
The way you should handle the situation in your example is to pass in your own callback like this
function is_media_existing (url_or_id, callback){
and then instead of return query; use your callback like this-
callback(query);
or probably better to follow the node convention for callback functions to have two parameters (err, result) so your callback would look like this
callback(null, query);
Here is a rework of your sample
function is_media_existing (url_or_id, callback){ /* callback(err, result) */
log.debug("is_media_existing : begin of the function", {"Parameter" : url_or_id});
pg.connect(connectionstring, function (err, client, done) {
if (err) {
done(err);
log.warning("is_media_existing : Problem with Database connection", {
"Parameter": url_or_id,
"Error": err
});
return callback(err, null);
/* note that this return is simply used to exit the connect's callback and the return value is typically
* not used it is the call to callback() that returns the error value */
}
var qrystr;
if (isNaN(url_or_id)) {
log.debug("is_media_existing : Type of Parameter is a string");
qrystr = 'SELECT COUNT(*) as count FROM media WHERE url = $1::string;';
} else {
qrystr = 'SELECT COUNT(*) as count FROM media WHERE id = $1::id;';
}
client.query(qrystr, [url_or_id], function(err, result){
done();
if(err){
/* .. */
}
callback(err, result);
});
});
}
// Executing the function
var test = is_media_existing("http://random_url_existing_in_db", function(err, result){
if(err){
}else {
}
});
If you end up with a hard nest of callbacks, promises are really worth looking into.
I don't think you really do want a synchronous call. The problem with synchronous calls in node is that it stops the entire process from doing anything while a synchronous function is running as it will stop the event loop. As an example lets say your sync function takes 2 seconds to complete. Your server will then do nothing for 2 full seconds. That 2 seconds includes everything (accepting new connections, everything else, etc). The reason blocking functions don't exist is because they are (very) bad. Here is an example how your function will react in an async manor.
is_media_existing("http://random_url_existing_in_db", function(exists){
if (exists){
//do stuff
} else {
//do this other stuff
}
});
Then within is_media_existing you will need to call that callback function when your query completes.
//psuedo
function is_media_existing(url, callback){
query('select COUNT(*) as count FROM media WHERE id = $1::id '. [url], function(err, result){
if (err)
callback(false)
else
callback(result.count > 0)
})
}
With the new ES6 plus async stuff and babel its simpler. You can npm i -g babel npm i babel-runtime then compile and run the following with babel test.js --optional runtime --stage 2 | node. Please read the following example carefully to see how to adapt it to your use case:
let testData = [
{ id: 0, childIds: [1,2]},
{ id: 1, childIds:[] }
];
function dbGet(ids) {
return new Promise( r=> {
// this an example; you could do any db
// query here and call r with the results
r(ids.map((id) => { return testData[id];}));
});
}
async function dbExists(ids) {
let found = await dbGet(ids);
return (found && found.length>0);
}
async function test() {
var exists = await dbExists([0]);
console.log(exists);
}
test().then(f=>{}).catch( e=> {console.log('e',e)});