I've created a script to migrate data from Dynamo to a Mysql DB.
First I was not using Async, but I started getting bottlenecks on the sql side, so I decided to "throttle" the dymano part using the async lib.
The problem: I have a recursion in the middle of the path, as long as dynamo has data I have to continue the process (ultra simple ETL), but I don't know how to perform the recursion inside the waterfall.
My code :
function main() {
async.waterfall([getMaxTimestamp, scanDynamoDB, printout, saveToMySQL], function(err, result) {
if(err) console.log(err)
console.log(result)
});
}
function getMaxTimestamp(callback) {
console.time("max query");
connection.query("SELECT MAX(created_at) as start_date from Tracking;", function(err, data) {
console.timeEnd("max query");
callback(err, data);
})
}
function scanDynamoDB(data, callback) {
if (data[0].start_date != null && data[0].start_date)
query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;
console.time("dynamo read");
dynamoDB.scan(query, function(err, data) {
console.timeEnd("dynamo read");
callback(err, data);
// if (!err) {
// if (data != undefined && data.Count > 0) {
// printout(data.Items) // Print out the subset of results.
// if (data.LastEvaluatedKey) { // Result is incomplete; there is more to come.
// query.ExclusiveStartKey = data.LastEvaluatedKey;
// scanDynamoDB(query);
// }
// } else {
// console.log('No fresh data found on Dynamo')
// } else console.dir(err);
});
};
function assembleSql() {
insertSql = "insert into Tracking (";
for (var i = 0; i < headers.length; i++) {
insertSql += headers[i];
if (i < headers.length - 1)
insertSql += ",";
}
insertSql += ") values ?;"
previousInsertSql = insertSql;
}
function saveToMySQL(items, callback) {
assembleSql();
//connection.connect();
console.time("insert sql")
connection.query(insertSql, [items], function(err, result) {
console.timeEnd("insert sql")
if (err){
callback(err, null)
return;
}
totalInserts += result.affectedRows;
callback(err, totalInserts)
//connection.end();
})
}
function printout(items, callback) {
var headersMap = {};
var values;
var header;
var value;
var out = [];
if (headers.length == 0) {
if (items.length > 0) {
for (var i = 0; i < items.length; i++) {
for (var key in items[i]) {
headersMap[key] = true;
}
}
}
for (var key in headersMap) {
headers.push(key);
}
}
for (index in items) {
values = [];
for (i = 0; i < headers.length; i++) {
value = "";
header = headers[i];
// Loop through the header rows, adding values if they exist
if (items[index].hasOwnProperty(header)) {
if (items[index][header].N) {
value = items[index][header].N;
} else if (items[index][header].S) {
value = items[index][header].S;
} else if (items[index][header].SS) {
value = items[index][header].SS.toString();
} else if (items[index][header].NS) {
value = items[index][header].NS.toString();
} else if (items[index][header].B) {
value = items[index][header].B.toString('base64');
} else if (items[index][header].M) {
value = JSON.stringify(items[index][header].M);
} else if (items[index][header].L) {
value = JSON.stringify(items[index][header].L);
} else if (items[index][header].BOOL !== undefined) {
value = items[index][header].BOOL.toString();
}
}
values.push(value)
}
out.push(values)
}
callback(null, out);
}
main();
The commented part is where the recursion happens, but I don't know where to place this inside my flow !
Any help would be appreciated !
Just don't call callback function inside scanDynamoDB while fetching data. You can implement additional function and call it recursive while errors is not appears, like below
function scanDynamoDB(data, callback) {
if (data[0].start_date != null && data[0].start_date)
query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;
console.time("dynamo read");
var result = []; // for accumulate data of each query
function readNext(err, data) {
if (err)
return callback(err);
if (!data || !data.Count)
return callback(null, result);
// add data to result
dynamoDB.scan(query, readNext);
}
dynamoDB.scan(query, readNext);
};
Actually I was able to figure it out by myself.
async.whilst(function() { return canInsert}, function (callback){
scanDynamoDB(query, callback)
}, function(err, res) {}
function scanDynamoDB(data, callback) {
console.time("dynamo read");
dynamoDB.scan(query, function(err, data) {
console.timeEnd("dynamo read");
if (!err) {
if (data != undefined && data.Count > 0) {
canInsert = data.LastEvaluatedKey;
if (data.LastEvaluatedKey) // Result is incomplete; there is more to come.
query.ExclusiveStartKey = data.LastEvaluatedKey;
}
} else console.dir(err);
});
};
I could have done it just with a while(canInsert). Anyway, I avoided recursion and memory usage is way way lower.
Related
My use case demands me to call an sql recursively till no rows are returned for which I have written the below code which due to async nature doesn't work as expected.
The piece of code which does this invocation is:
let Response = await getData(userId);
async function getData(userId) {
console.log("Invoking Get Data Function");
let arrayOfUserId = [userId];
let fetchMore = true,
j = 1;
let keyWithQoutes = -1;
return new Promise((resolve, reject) => {
do {
console.log(arrayOfUserId, j)
j++;
if (arrayOfUserId.length > 0) {
keyWithQoutes = arrayOfUserId.map((it) => {
return `'${it}'`;
});
}
const sql = ` Select userId from USER where reportingTo in (${arrayOfUserId})`;
console.log(' SQL Query ', sql);
con.query(sql, [], async(error, response) => {
if (error) {
fetchMore = false;
reject(error);
}
console.log(
" Response for ",
userId,
response,
response.length
);
if (response.length == 0) {
fetchMore = false;
resolve(arrayOfUserId);
}
else {
for (let i = 0; i < response.length; i++) {
console.log(response[i].userId);
arrayOfUserId.push(response[i].userId);
}
}
});
} while (fetchMore);
});
}
i having Api Call which execute in For Loop some of the value which returns 10 sec itself some may take nearly 60 sec i have to maintain proper Timeout and clear session (i.e if results comes at 15 sec means it should goes to next input values and run the code) but currenly its waiting for 45 sec each single record how to optimize it
here my sample code :
if (selectedrows.length >= 1) {
for (var i = 0; i < selectedrows.length; i++) {
var myVar = setTimeout (function (k) {
var ob = { results: "Appending ..." };
child.update(selectedrows[k][4], selectedrows[k][4], ob);
var fullName = selectedrows[k][1] + ' ' + selectedrows[k][2];
math.ResultCall.async(fullName,function (err, res) {
if (err) throw err;
var returnedValue = JSON.parse(res);
console.log(returnedValue);
if(returnedValue.Result == null || returnedValue.Result.FOUND_Result == null)
{
console.log("None found")
}
else{
var obj = { results: “res” };
child.update(selectedrows[k][4], selectedrows[k][4], obj);
}
}
});
}, i * 45000,i);
}
}
Rephrasing your question, you need to return the data when your api gets resolved.
For this please go through https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/resolve
JavaScript, by default it work asynchronously because of its event loop.
You have promises and resolve to get notified when your api returns a data
Hope I helped :)
There are several approaches to implement the solution
1. Async-Await: in-case the records-processing order is important
for( let i=0; i<selectedrows.length; i++)
{
let ob = { results: "Appending ..." };
child.update(selectedrows[i][4], selectedrows[i][4], ob);
let fullName = selectedrows[i][1] + ' ' + selectedrows[i][2];
await new Promise((resolve,reject)=>
{
math.ResultCall.async(fullName,(err, res) => {
if (err) reject(err);
let returnedValue = JSON.parse(res);
console.log(returnedValue);
if(returnedValue.Result == null || returnedValue.Result.FOUND_Result == null) {
console.log("None found")
} else {
let obj = { results: “res” };
child.update(selectedrows[i][4], selectedrows[i][4], obj);
}
resolve();
});
}
**don't forget this means the wrapping function should be async as well (which returns a promise that can be resolved if necessary)
2.Promise.All: if the order is not important
let promArray = [];
for( let i=0; i<selectedrows.length; i++)
{
let ob = { results: "Appending ..." };
child.update(selectedrows[i][4], selectedrows[i][4], ob);
let fullName = selectedrows[i][1] + ' ' + selectedrows[i][2];
promArray.push( new Promise((resolve,reject)=>
{
math.ResultCall.async(fullName,(err, res) => {
if (err) reject(err);
let returnedValue = JSON.parse(res);
console.log(returnedValue);
if(returnedValue.Result == null || returnedValue.Result.FOUND_Result == null) {
console.log("None found")
} else {
let obj = { results: “res” };
child.update(selectedrows[i][4], selectedrows[i][4], obj);
}
resolve();
});
);
}
Promise.all(promArray);
** this will also return a Promise that can be resolved if necessary.
I have an array of items and that need to be found and saved to MongoDB. Find from one model and get data from it and save via another model. assets is the array and count is the length of the array.
Problem - Do I need to check out all records that are saved to DB or not?
I need to get JSON response after complete pushing all records to the database.
router.route('/move_qa')
.post(function (req, res) {
console.log('/move_qa');
console.log("*assets" + req.body.assets);
var assets = req.body.assets;
var count = req.body.count;
for (var i = 0; i < count; i++) {
var aBarcode = assets[i];
console.log("$" + aBarcode);
searchAndSave(aBarcode, function (resulttt) {
console.log("#"+resulttt);
});
}
res.json({"result":true});
});
function searchAndSave(assetBarcode, callb) {
pallet.findOne({assets: assetBarcode}, function (err, count) {
if (err) {
console.log("1 /pallet_details");
console.log(err)
} else {
console.log("2 /pallet_details");
if (count == null) {
console.log("3/pallet_details");
} else {
var pbarcode = count.pBarcode;
var date = new Date();
var status = "NOT_COMPLETE";
var newMoveToQA = new movetoqa({
keg_barcode: assetBarcode,
relevant_pbarcode: pbarcode,
move_date: date,
status: status
});
newMoveToQA.save(function (err) {
console.log("####******");
if (err) {
console.log("4");
} else {
console.log("5");
callb(true);
}
});
}
}
});
}
I want to use method in other method in the same class to calc some data from db as below but Im only getting
error
[TypeError: Object #<Query> has no method 'methodMaxLct']"
exports.UserClass = function() {
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
connection.query(dataUserResources, [idu], function(err, results, fields) {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = this.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
});
};
};
Can anyone give me a tip or same clue how to do that in right way?
Try it like so. You have to store the this object into a variable so you can use it in an inner function. The this is different based on the execution context.
exports.UserClass = function() {
var self = this;
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
connection.query(dataUserResources, [idu], function(err, results, fields) {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = self.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
});
};
};
The context (this) of the callback function is determined when the callback function is called. So, you should use either arrow function or .bind( this )
Using an arrow function
exports.UserClass = function() {
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
// use arrow function
connection.query(dataUserResources, [idu], (err, results, fields) => {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = this.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
});
};
};
Using .bind(...) method
exports.UserClass = function() {
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
connection.query(dataUserResources, [idu], function(err, results, fields) {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = this.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
}.bind( this ) );
};
};
I develop application using AngularJS , NodeJS and MongoDB. I'd like to load Product with classified by ProductCategoryCode sending from
AngualrJS to NodeJS. First, I need to find Products by ProductCategoryCode and then iterate for each product to find Uoms by UomCode and ContainUomCode
which each product should has 2 uoms. How can I set uom object docUom back to product document doc[i] and update to product collection doc?
For following code line
doc[i].Uom = docUom;
The system throw error cannot set property 'Uom' of undefined.
Here is product.js snippet code.
router.get("/LoadProductByProductCategoryCode/:productCategoryCode", function (req, res) {
console.log('user.js -> /users ');
var productCategoryCode = req.params.productCategoryCode;
console.log(productCategoryCode );
var MongoClient = require('mongodb').MongoClient,
format = require('util').format;
MongoClient.connect('mongodb://localhost:27017/NodeDB', function (err, db) {
if (err) throw err;
var query = { ProductCategoryCode : productCategoryCode}
var new_product = [];
findProduct(db, query, function (err, doc) {
if(err) {
// something went wrong
console.log(err);
return;
}
if (doc) {
console.log("Found Product..."+doc.length);
for (var i = 0; i < doc.length; i++) {
console.log(doc[i].ProductCode + " each document " + doc[i].UomCode + " " + doc[i].ContainUomCode);
var qUom = {
$or: [ { UomCode: doc[i].UomCode}, { UomCode: doc[i].ContainUomCode } ]
}
// Find uom
findUom(db, qUom, function(errUom, docUom) {
if(errUom) {
console.log("error " + errUom);
return;
}
if (docUom) {
doc[i].Uom = docUom;
console.dir(product);
}
});
}
res.json(doc);
} else {
console.log('something happen');
}
}); //End
}); // MongoClient
var findProduct = function (db, query, callback) {
db.collection('Product').find(query).toArray(function (err, doc) {
if(err) {
callback(err);
}
else {
callback(null, doc);
}
});
}
var findUom = function(db, queryUom, callback) {
db.collection('Uom').find(queryUom).toArray(function (err, doc) {
// db.close();
if(err) {
callback(err);
}
else {
callback(null, doc);
}
});
}
});
Any idea? THANKS
Because of the asynchronous nature of the Node.js MongoDB driver, both the findProduct() and findUom() methods start, but don't necessarily complete by the time you reach res.json(doc) meaning doc will still be empty. You are expecting this to work in a linear fashion, but node works differently.
Instead, you should send your response back once all asynchronous calls complete meaning you could try something like:
findProduct(db, query, function (err, doc) {
if(err) {
// something went wrong
console.log(err);
return;
}
var processedProduct = function (item) {
console.log(item.ProductCode + " each document " + item.UomCode + " " + item.ContainUomCode);
var qUom = {
$or: [ { UomCode: item.UomCode}, { UomCode: item.ContainUomCode } ]
}
// Find uom
findUom(db, qUom, function(errUom, docUom) {
if(errUom) {
console.log("error " + errUom);
return;
}
if (docUom) {
item.Uom = docUom;
console.dir(product);
return item;
}
});
}
if (doc) {
var productsToFind = doc.length;
var products = [];
console.log("Found Products..." + productsToFind);
for (var i = 0; i < doc.length; i++) {
product = doc[i];
product = processedProduct(product);
products.push(product);
productsToFind -= 1;
if(productsToFind === 0){
res.json(products);
}
}
} else {
console.log('something happen');
}
}); //End
I could explain better about asynchronous calls and callbacks as this topic is a bit broad but from the above you can get the idea that I have used a counter productsToFind for all of the inner async calls that once each findUom() call completes this counter decrements and once it reaches 0 it means that all callbacks have fired.