I'm current trying to use sails.js with mongodb, I need some custom mapReduce function to group data.
Now I could achieve what I want by using waterline's native function, but have some questions.
These function has only small variation actually, but I found myself keep repeating codes like the following one:
function getSomeData() {
// First-query
Log.native(function(err, logCollection) {
var mapFunction = function() {
function dateFormatter(date) {
return date.getFullYear() + "-" + (date.getMonth() + 1)
}
//! Generate Grouping Key
emit(dateFormatter(this.emb_date), this.bad_qty)
}
var reduceFunction = function (key, values) {
return Array.sum(values);
}
var outputControl = {
out: {inline: 1},
//! Filters
query: {order_type: product}
}
logCollection.mapReduce(mapFunction, reduceFunction, outputControl, function (err, result) {
if (err) {
callback(err);
return;
}
var resultSet = [];
//! post-processing
for (var i = 0; i < result.length; i++) {
//.....
}
callback(err, resultSet);
});
});
}
Second-query:
function getAnotherData() {
Log.native(function(err, logCollection) {
var mapFunction = function() {
//! Generate Grouping Key
emit(dateFormatter(this.product), this.bad_qty)
}
var reduceFunction = function (key, values) {
return Array.sum(values);
}
var outputControl = {
out: {inline: 1},
//! Filters
query: {order_type: product}
}
logCollection.mapReduce(mapFunction, reduceFunction, outputControl, function (err, result) {
if (err) {
callback(err);
return;
}
var resultSet = [];
//! post-processing
for (var i = 0; i < result.length; i++) {
//......
}
callback(err, resultSet);
});
});
}
As you can see, these two snippet shares lots of common code, only has difference in three place (Generate grouping key, filters, post-process).
So I would really like to extract the common part to make my code cleaner, but have no success.
I first try to make dateFromatter is provided by a callback instead of hard-coding like the following:
function dateFormatter(data) {
return data.emb_date.getFullYear() + "-" + (data.emb_date.getMonth() + 1)
}
function getSomeData(groupingKey) {
// First-query
Log.native(function(err, logCollection) {
var mapFunction = function() {
//! Generate Grouping Key
emit(groupingKey(this.emb_date), this.bad_qty)
}
var reduceFunction = function (key, values) {
return Array.sum(values);
}
var outputControl = {
out: {inline: 1},
//! Filters
query: {order_type: product}
}
logCollection.mapReduce(mapFunction, reduceFunction, outputControl, function (err, result) {
if (err) {
callback(err);
return;
}
var resultSet = [];
//! post-processing
for (var i = 0; i < result.length; i++) {
//.....
}
callback(err, resultSet);
});
});
}
But without any luck, I keep getting error like the following one:
MongoError: exception: ReferenceError: groupingKey is not defined near 'emit(groupingKey(this), this.bad_qty' (line 3)
at Object.toError (/home/brianhsu/zh800/dashboard/node_modules/sails-mongo/node_modules/mongodb/lib/mongodb/utils.js:114:11)
What should I do if I would like to reduce those duplicate part of code?
Finally I found that I need pass the option called 'scope' to mongodb, I come up with the following solution which works quite well.
exports.defineOn = function(options) {
var model = options.model
var groupingFunction = options.groupingFunction
var mongoFilters = options.mongoFilters
var customFilter = options.customFilter
var converter = options.converter
var sorting = options.sorting
return function(callback) {
model.native(function(err, collection) {
var mapFunction = function() { emit(groupingFunction(this), this.bad_qty) }
var reduceFunction = function(key, values) { return Array.sum(values); }
var mapReduceOptions = {
out: {inline: 1},
query: mongoFilters,
scope: {
groupingFunction: groupingFunction,
mongoFilters: mongoFilters,
customFilter: customFilter,
converter: converter
}
}
var processCallback = function (err, result) {
if (err) {
callback(err);
return;
}
if (sorting) {
result.sort(sorting);
}
var resultSet = [];
for (var i = 0; i < result.length; i++) {
if (customFilter && customFilter(result[i])) {
resultSet.push(converter(result[i]));
} else if (!customFilter) {
resultSet.push(converter(result[i]));
}
}
callback(err, resultSet);
}
collection.mapReduce(mapFunction, reduceFunction, mapReduceOptions, processCallback);
});
}
}
Usage:
function machineDetail (year, month, date, machine, callback) {
var startDate = new Date(+year, +(month-1), +date);
var endDate = new Date(+year, +(month-1), (+date) + 1);
var mapReducer = MapReducer.defineOn({
model: Log,
groupingFunction: function(data) {
return {date: data.emb_date, error: data.defact_id};
},
mongoFilters: {
mach_id: machine,
emb_date: {$gte: startDate, $lt: endDate}
},
converter: function (data) {
return {
name: data._id,
value: data.value,
};
}
});
mapReducer(callback);
}
Related
I am trying to read some data from 2 different tables and parse a CSV file before rendering an ejs file.
I can get the data from both tables and from the CSV file but I seem to be unable to return the result.
Pretty sure this is a problem with the way I handle async execution but I fail to see what I am doing wrong.
I've spent the last 2 days reading about this (including the threads around here) and browsing but somehow the answer still escapes me.
First file - usercms.js
app.get('/userscms', function(req, res)
{
existingUsers.getExistingUsers()
.then(function(appUsers)
{
//global users array
//I can display these in my ejs file
globalAppUsers = appUsers;
})
.then(existingUsersAttributesQlik.getExistingUsersAttributesQlik())
.then(function(usersQlikAttributes)
{
//global user attributes array
//undefined data
globalUsersQlikAttributes = usersQlikAttributes;
})
.then(existingSuppliers.parseSuppliersCSV())
.then(function(supplierData)
{
//the result I am expecting
//this prints undefined
console.log(supplierData);
}).then(function()
{
res.render('userscms.ejs',
{
users: globalAppUsers,
attributes: globalUsersQlikAttributes
});
});
});
Second function - getxistingUsers.js (identical to the getExistingUsersAttributesQlik, except for the query)
var userData = [];
var appUsers = [];
(function (exports)
{
exports.getExistingUsers = function ()
{
return promisemysql.createConnection(dbconfig.development).then(function(conn)
{
var result = conn.query("SELECT id, username, firstName, lastName, email, phone, lastLogin, isAdmin, isValid, isPhoneValid, accountCreationDateTime FROM Users");
conn.end();
return result;
}).then(function(rows)
{
return rows;
}).then(function(rows)
{
if (rows.length)
{
userData = [];
appUsers = [];
rows.forEach(function (elem)
{
userData.push(_.toArray(elem));
});
for (i = 0; i < userData.length; i++)
{
var appUser = new appUserModel.AppUser(
userData[i][0],
userData[i][1],
userData[i][2],
userData[i][3],
userData[i][4],
userData[i][5],
userData[i][6],
userData[i][7],
userData[i][8],
userData[i][9],
userData[i][10]);
appUsers.push(_.toArray(appUser));
}
return appUsers;
}
else
{
console.log("NOPE");
return null;
}
}).then(function(appUsers)
{
console.log(appUsers);
return appUsers;
});
};
})(typeof exports === 'undefined' ? this['getExistingUsers'] = {} : exports);
Third file - parseSuppliersCSV.js
var supplierData = [];
var suppliersData = [];
var csvCount = 0;
(function (exports)
{
exports.parseSuppliersCSV = function ()
{
return new Promise(function(resolve, reject)
{
var fileStream = fs.createReadStream("myCSV.csv");
var parser = fastCsv();
csvCount = 0;
supplierData = [];
suppliersData = [];
fileStream
.on("readable", function ()
{
var data;
while ((data = fileStream.read()) !== null)
{
parser.write(data);
}
})
.on("end", function ()
{
parser.end();
});
parser
.on("readable", function ()
{
var data;
while ((data = parser.read()) !== null)
{
if(csvCount >= 1)
{
csvCount++;
var arrayOfStrings = data[0].split(';');
var supplier = new supplierModel.Supplier(arrayOfStrings[0],arrayOfStrings[1]);
suppliersData.push(_.toArray(supplier));
}
else
{
csvCount++;
}
}
})
.on("end", function ()
{
console.log("done");
//all OK here
console.log(suppliersData);
//this doesn't seem to return anything
return suppliersData;
});
});
};
})(typeof exports === 'undefined' ? this['parseSuppliersCSV'] = {} : exports);
Any ideas what I am doing wrong? Am I approaching this the wrong way?
I'll take a guess here and assume the promise you created should resolve to something...instead of returning a value.
.on("end", function ()
{
console.log("done");
//all OK here
console.log(suppliersData);
//this doesn't seem to return anything
return resolve(suppliersData);
});
I'm quite new to the topic and i'm still having some issues with my mailparser. Though searching and finding emails in the email header (mail.from) does work, it doesn't work in the email body. Does anybody have some experience with that and is willing to help? You can find the function i'm talking about under the "// Check for other addresses in Mail-Body (Doesn't work yet)"-comment. I think, that my Regex is correct. Also if the matchAll-Function give back an array and it can't be saved in the the subscriber.email-object, it shall be at least logged to the console. Also i checked manually in the inbox if there are mails with email adresses in the mail body. There are at least two, which shall be found..
The part of the App.js, that does the mailparsing:
const simpleParser = require('mailparser').simpleParser;
//const htmlparser = require("htmlparser2");
var fs = require('fs');
var config = require('./config');
var Imap = require('imap');
var imap = new Imap(config.imap);
var blacklistString = '';
String.prototype.matchAll = function(regexp) {
var matches = [];
this.replace(regexp, function() {
var arr = ([]).slice.call(arguments, 0);
var extras = arr.splice(-2);
arr.index = extras[0];
arr.input = extras[1];
matches.push(arr);
});
return matches.length ? matches : null;
};
function openInbox(subbox,cb) {
imap.openBox('INBOX.'+subbox, true, cb);
}
function getBoxes(cb) {
imap.getBoxes(cb);
}
function showBoxes(boxes) {
imap.end();
}
function logArrayElements(element) {
if(element[1].indexOf('placeholder.de')==-1){
addToBlacklistString(element[1]);
}
}
function addToBlacklistString(str) {
blacklistString += str+"\n";
}
function writeBlacklistFile() {
fs.appendFile('data/data.csv', blacklistString, function (err) {
if (err) throw err;
console.log('Saved!');
});
}
function search(searchArray, regex){
imap.search(searchArray, function(err, results) {
if (err) throw err;
var temp = 0;
var mailtemp = [];
var f = imap.fetch(results, { bodies: '' });
f.on('message', function(msg, seqno) {
console.log('Message #%d', seqno);
var prefix = '(#' + seqno + ') ';
msg.on('body', function(stream, info) {
simpleParser(stream, (err, mail)=>{
//console.log(temp);
//console.log(mail.subject);
/*fs.writeFile('data/'+seqno+'.txt',mail.text, function(err){
console.log(err);
});*/
//var text = mail.text;
// New Subscriber Object
var subscr = new Subscriber({nr: '', mailIdent: '', from: '', emails: '', text:'', uLink: '', anwalt: false });
subscr.nr = seqno;
//Check for From-Address
if(!!mail.from) {
//console.log(mail.from.value);
for(var i = 0; i < mail.from.value.length; i++) {
mailtemp = mail.from.value[i].address.matchAll(regex);
mailtemp.forEach(function(element){
/*fs.appendFile('data/data.csv', element[0] + "\n", function(error){
console.log(error);
});*/
subscr.from = element[0];
});
if(!!mailtemp) {
mailtemp.forEach(logArrayElements);
}
}
}else{
//console.log(mail.text);
}
// Message-ID
if(!!mail.messageId) {
subscr.mailIdent = mail.messageId;
}
console.log(mail.messageId);
// Check for other addresses in Mail-Body (Doesn't work yet)
var regexEmails = new RegExp('/([\w\.\-\_\#\+]+#[\w\.\-\_äüö]+\.[a-zA-Z]+)/g');
if(!!mail.text){
if(mail.text.matchAll(regexEmails)!=null) {
subscr.emails = mail.text.matchAll(regexEmails);
console.log(subscr.emails);
}
}
/* Split mail.text at substrings in substr-array. Extend if necessary..
*
* Also check for 'Anwalt'-Expression in splitted Substring
*
* If mail.text doesn't exist -> Check for html body and convert it to text-format
*/
//var regexLink = new RegExp('\.de\/(unsubscribe|austragen)\/([^\"]+)');
var regexAnwalt = new RegExp('nwalt|echtsanwalt|rechtlicher');
if(!!mail.text) {
var substr = ["schrieb pplaceholder.de", "Von: \"placeholder.de", "Von: pplaceholder.de", "From: placeholder.de", "Ursprüngliche Nachricht"];
for (var i = 0; i<substr.length; i++) {
if(mail.text.indexOf(substr[i]) > -1) {
var textTemp = mail.text;
var arr = textTemp.split(substr[i]);
if(arr[0].matchAll(regexAnwalt)!=null) {
subscr.anwalt = true;
};
subscr.text = arr[0];
break;
} else {
subscr.text = mail.text;
}
}
//console.log(arr);
}
else
{
var html = mail.html;
var text = htmlToText.fromString(html, {
noLinkBrackets: true,
ignoreImage: true,
uppercaseHeadings: false,
preserveNewlines: false,
wordwrap:130,
format: {
heading: function (node, fn, options) {
var h = fn(node.children, options);
return '\n==== ' + h + ' ====\n\n';
}
}
});
subscr.text = text;
}
mail.headers.forEach(function(value, key) {
//console.log(value);
});
subscr.save();
//console.log(subscr);
temp++;
});
});
msg.once('end', function() {
console.log(prefix + 'Finished');
});
});
f.once('error', function(err) {
console.log('Fetch error: ' + err);
});
f.once('end', function() {
console.log('Done fetching all messages!');
//writeBlacklistFile();
imap.end();
});
});
}
imap.once('ready', function() {
openInbox('Test',function(err, box) {
var searchArray = [['FROM', '#']];
search(searchArray,/([\w\.\-\_\#\+]+#[\w\.\-\_äüö]+\.[a-zA-Z]+)/g);
});
});
imap.once('error', function(err) {
console.log(err);
});
imap.once('end', function() {
console.log('Connection ended');
});
imap.connect();
app.listen(2700, function(){
console.log("Listening on Port 2700")
});
module.exports = app;
subscriber.js
const mongoose = require('mongoose');
var subscriberSchema = mongoose.Schema({
nr: Number,
mailIdent: String,
from: String,
emails: String,
text: String,
uLink: String,
anwalt: Boolean
});
var Subscriber = module.exports = mongoose.model('Subscriber', subscriberSchema);
//get Subscriber
module.exports.getSubscribers = function(callback, limit){
Subscriber.find(callback).limit(limit);
};
module.exports.getSubscriberByID = function(_id, callback){
Subscriber.findById(_id, callback);
};
The Regex for the Emails was a little bit wrong.
Also i didn't noticed that the matchAll-Fct. is giving back a two-dimensional Array. Here is the changed part of the code:
var regexEmails = new RegExp("([\\w\\.\\-\\_\\#\\+]+#[\\w\\.\\-\\_äüö]+\\.[a-zA-Z]+)");
var temp1 = mail.text.matchAll(regexEmails);
if(!!temp1){
//console.log(temp1);
for(var i =0; i<temp1.length; i++) {
if(temp1[0][i]!=='info#service.placeholder.de' && temp1[0][i] !== "info#placeholder.de"){
subscr.emails += temp1[0][i];
}
}
}
I have an array of items and that need to be found and saved to MongoDB. Find from one model and get data from it and save via another model. assets is the array and count is the length of the array.
Problem - Do I need to check out all records that are saved to DB or not?
I need to get JSON response after complete pushing all records to the database.
router.route('/move_qa')
.post(function (req, res) {
console.log('/move_qa');
console.log("*assets" + req.body.assets);
var assets = req.body.assets;
var count = req.body.count;
for (var i = 0; i < count; i++) {
var aBarcode = assets[i];
console.log("$" + aBarcode);
searchAndSave(aBarcode, function (resulttt) {
console.log("#"+resulttt);
});
}
res.json({"result":true});
});
function searchAndSave(assetBarcode, callb) {
pallet.findOne({assets: assetBarcode}, function (err, count) {
if (err) {
console.log("1 /pallet_details");
console.log(err)
} else {
console.log("2 /pallet_details");
if (count == null) {
console.log("3/pallet_details");
} else {
var pbarcode = count.pBarcode;
var date = new Date();
var status = "NOT_COMPLETE";
var newMoveToQA = new movetoqa({
keg_barcode: assetBarcode,
relevant_pbarcode: pbarcode,
move_date: date,
status: status
});
newMoveToQA.save(function (err) {
console.log("####******");
if (err) {
console.log("4");
} else {
console.log("5");
callb(true);
}
});
}
}
});
}
I've created a script to migrate data from Dynamo to a Mysql DB.
First I was not using Async, but I started getting bottlenecks on the sql side, so I decided to "throttle" the dymano part using the async lib.
The problem: I have a recursion in the middle of the path, as long as dynamo has data I have to continue the process (ultra simple ETL), but I don't know how to perform the recursion inside the waterfall.
My code :
function main() {
async.waterfall([getMaxTimestamp, scanDynamoDB, printout, saveToMySQL], function(err, result) {
if(err) console.log(err)
console.log(result)
});
}
function getMaxTimestamp(callback) {
console.time("max query");
connection.query("SELECT MAX(created_at) as start_date from Tracking;", function(err, data) {
console.timeEnd("max query");
callback(err, data);
})
}
function scanDynamoDB(data, callback) {
if (data[0].start_date != null && data[0].start_date)
query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;
console.time("dynamo read");
dynamoDB.scan(query, function(err, data) {
console.timeEnd("dynamo read");
callback(err, data);
// if (!err) {
// if (data != undefined && data.Count > 0) {
// printout(data.Items) // Print out the subset of results.
// if (data.LastEvaluatedKey) { // Result is incomplete; there is more to come.
// query.ExclusiveStartKey = data.LastEvaluatedKey;
// scanDynamoDB(query);
// }
// } else {
// console.log('No fresh data found on Dynamo')
// } else console.dir(err);
});
};
function assembleSql() {
insertSql = "insert into Tracking (";
for (var i = 0; i < headers.length; i++) {
insertSql += headers[i];
if (i < headers.length - 1)
insertSql += ",";
}
insertSql += ") values ?;"
previousInsertSql = insertSql;
}
function saveToMySQL(items, callback) {
assembleSql();
//connection.connect();
console.time("insert sql")
connection.query(insertSql, [items], function(err, result) {
console.timeEnd("insert sql")
if (err){
callback(err, null)
return;
}
totalInserts += result.affectedRows;
callback(err, totalInserts)
//connection.end();
})
}
function printout(items, callback) {
var headersMap = {};
var values;
var header;
var value;
var out = [];
if (headers.length == 0) {
if (items.length > 0) {
for (var i = 0; i < items.length; i++) {
for (var key in items[i]) {
headersMap[key] = true;
}
}
}
for (var key in headersMap) {
headers.push(key);
}
}
for (index in items) {
values = [];
for (i = 0; i < headers.length; i++) {
value = "";
header = headers[i];
// Loop through the header rows, adding values if they exist
if (items[index].hasOwnProperty(header)) {
if (items[index][header].N) {
value = items[index][header].N;
} else if (items[index][header].S) {
value = items[index][header].S;
} else if (items[index][header].SS) {
value = items[index][header].SS.toString();
} else if (items[index][header].NS) {
value = items[index][header].NS.toString();
} else if (items[index][header].B) {
value = items[index][header].B.toString('base64');
} else if (items[index][header].M) {
value = JSON.stringify(items[index][header].M);
} else if (items[index][header].L) {
value = JSON.stringify(items[index][header].L);
} else if (items[index][header].BOOL !== undefined) {
value = items[index][header].BOOL.toString();
}
}
values.push(value)
}
out.push(values)
}
callback(null, out);
}
main();
The commented part is where the recursion happens, but I don't know where to place this inside my flow !
Any help would be appreciated !
Just don't call callback function inside scanDynamoDB while fetching data. You can implement additional function and call it recursive while errors is not appears, like below
function scanDynamoDB(data, callback) {
if (data[0].start_date != null && data[0].start_date)
query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;
console.time("dynamo read");
var result = []; // for accumulate data of each query
function readNext(err, data) {
if (err)
return callback(err);
if (!data || !data.Count)
return callback(null, result);
// add data to result
dynamoDB.scan(query, readNext);
}
dynamoDB.scan(query, readNext);
};
Actually I was able to figure it out by myself.
async.whilst(function() { return canInsert}, function (callback){
scanDynamoDB(query, callback)
}, function(err, res) {}
function scanDynamoDB(data, callback) {
console.time("dynamo read");
dynamoDB.scan(query, function(err, data) {
console.timeEnd("dynamo read");
if (!err) {
if (data != undefined && data.Count > 0) {
canInsert = data.LastEvaluatedKey;
if (data.LastEvaluatedKey) // Result is incomplete; there is more to come.
query.ExclusiveStartKey = data.LastEvaluatedKey;
}
} else console.dir(err);
});
};
I could have done it just with a while(canInsert). Anyway, I avoided recursion and memory usage is way way lower.
I want to use method in other method in the same class to calc some data from db as below but Im only getting
error
[TypeError: Object #<Query> has no method 'methodMaxLct']"
exports.UserClass = function() {
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
connection.query(dataUserResources, [idu], function(err, results, fields) {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = this.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
});
};
};
Can anyone give me a tip or same clue how to do that in right way?
Try it like so. You have to store the this object into a variable so you can use it in an inner function. The this is different based on the execution context.
exports.UserClass = function() {
var self = this;
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
connection.query(dataUserResources, [idu], function(err, results, fields) {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = self.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
});
};
};
The context (this) of the callback function is determined when the callback function is called. So, you should use either arrow function or .bind( this )
Using an arrow function
exports.UserClass = function() {
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
// use arrow function
connection.query(dataUserResources, [idu], (err, results, fields) => {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = this.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
});
};
};
Using .bind(...) method
exports.UserClass = function() {
this.methodMaxLct = function(lct, callback) {
var counting = Math.ceil(Math.pow(1.15, (lct - 1)) * 10) * 10;
callback(counting);
this.methodGetData = function(idu, callback) {
connection = mysql.createConnection(dbconfig);
connection.query(dataUserResources, [idu], function(err, results, fields) {
if (err) throw err;
if (results.length == 0) {
callback = 0;
} else {
for (var i in results) {
var dataU = results[i];
}
dataU.enMax = 30;
var ap = this.methodMaxLct(dataU.lct, function(answer) {
dataU.lctMax = answer;
});
callback(dataU);
}
connection.end();
}.bind( this ) );
};
};