Adding only Distinct numbers during map reduce - javascript

I have a csv as to which number called which number and the call details(duration and time etc.)
i want to have all the numbers a particular number called in an array.
that array should be an array of documents and so,in each document i can have all the call details also.
so finally i need documents with a "caller" number and a "called" array(that array is as defined above).
for this i had come up with a map reduce solution.(quite basic and intuitive).
but my problem is that i need only distinct numbers that a "caller" number has called.
my current mapreduce script repeats the dialled numbers.
how can i only consider unique numbers during the reduce phase?
my code looks like this:(i enter this in the mongo shell)
db.contacts.mapReduce(
function(){
numbers = [];
value={phone:this.<<called_number>>};
numbers.push(value);
emit(this.<<caller_number>>,{called:numbers});
},
function(key,values) {
result={called:[]};
values.forEach(function (v) {
var i,j;
for(i=0;i<v.called.length;i++) {
var flag=0;
for(j=0;j<result.called.length;j++) {
if(v.called[i].phone==result.called[j].phone){
flag=1;
}
}
if(flag==0) {
result.called.push(v.called[i])
}
}
});
return result;
},
{"query": {},"out":"new_collection"}
)
I understand that the map and reduce functions are java script functions.
so even the javascript coders can help me out here(to create the reduce function).

Try this.
db.contacts.mapReduce(function(){
emit(this.<<caller_number>>, {called:this.<<called_number>>, callDuration:this.<<callDuration>>,...});}
,function(key,values)
{
var map = {};
var called=values.filter(function removeDuplicated(it){
if (!map[it.called]){
map[it.called] = 1;
return true;
}
return false;
})
return {caller:key, called:called};},
{"query": {},"out":"new_collection"})

Related

Group objects in array by property

I have just started to pick up coding seriously. :)
I came across a problem that seems too complicated for me.
How to group the following products by promotions type?
var data = [
{
name:'product1',
price:'40',
promotion:[
{
name:'Buy 3 get 30% off',
code:'ewq123'
},
{
name:'Free Gift',
code:'abc140'
}
]
},
{
name:'product2',
price:'40',
promotion:[
{
name:'Buy 3 get 30% off',
code:'ewq123'
}
]
},
{
name:'product3',
price:'40',
promotion:[
{
name:'Buy 3 get 30% off',
code:'ewq123'
}
]
},
{
name:'product4',
price:'40'
},
{
name:'product5',
price:'40',
promotion:[
{name:'30% off', code:'fnj245'}
]
},
{
name:'product6',
price:'0',
promotion:[
{
name:'Free Gift',
code:'abc140'
}
]
}
];
I would like to get result in the following format
result =[
{
name : 'Buy 3 get 30% off',
code: 'ewq123',
products: [
... array of products
]
},
{
name : '30% off',
code: 'fnj245',
products: [
... array of products
]
},
{
...
}
];
I am able to get a list of products by promotion code, but how can I make it generic?
function productHasPromo(product, promotion){
if(!product.hasOwnProperty('promotion')) return false;
var productPromo = product.promotion;
for(var i=0; i<productPromo.length; i++){
if(productPromo[i].code === promotion){
return true;
}
}
return false;
}
function groupProductByPromo(products, promotion){
var arr = [];
for(var i=0; i<products.length; i++){
if(productHasPromo(products[i], promotion)){
arr.push(products[i]);
}
}
return arr;
}
Explanation
You could write a function that loops through your array and search for the unique values within a specified property. That is easily done when working with simple data types, but can be done with more complex structures as arrays of objects (like in your example), using a helper grouping function.
Since you also need the output to be in a specific format after the grouping, we will have to work on a transformer also. This transformer will receive the original data and the unique values extracted by the grouping function, and will generate the desired output.
The following functions were used in the example:
Array.prototype.groupBy = function (property, grouping, transformer) {
var values = [];
this.forEach(function (item) {
grouping.call(this, item, property).forEach(function (item) {
if (!values.contains(property, item[property])) {
values.push(item);
}
});
});
return transformer.call(this, values);
};
Array.prototype.contains = function (key, value) {
return this.find(function (elm) {
return elm[key] === value;
});
};
function transformerFunction(values) {
this.forEach(function (item) {
if (!item.promotion) return;
item.promotion.forEach(function (promotion) {
values.forEach(function (option) {
if (option.code === promotion.code) {
if (option.products) {
option.products.push(item);
} else {
option.products = [item];
}
}
});
});
});
return values;
}
function groupingFunction(item, property) {
if (!item.promotion) return [];
var values = [];
item.promotion.forEach(function (promotion) {
if (!values.contains(property, promotion[property])) {
values.push(promotion);
}
});
return values;
}
Usage as follows:
var items = data.groupBy('code', groupFunction, transformFunction);
Example
Check the example i've prepared at jsfiddle
Welcome to the coding world. A lot of people start off with a problem by trying to write some code, then they wonder why it doesn't work and scratch their heads, don't know the basics of debugging it, and then post here to SO. They're missing the crucial first step in programming which is to figure out how you are going to do it. This is also called designing the algorithm. Algorithms are often described using something called pseudo-code. It has the advantage that it can be looked at and understood and established to do the right thing, without getting bogged down in all the mundane details of a programming language.
There are some algorithms that are figured out by some very smart people--like the Boyer-Moore algorithm for string matching--and then there are other algorithms that programmers devise every day as part of their job.
The problem with SO is that all too often someone posts a question which essentially about an algorithm, and then all the keyboard-happy code jockeys pounce it and come up with a code fragment, which in many cases is so contorted and obtuse that one cannot even see what the underlying algorithm is.
What is the algorithm you propose for solving your problem? You could post that, and people would probably give you reasonable comments, and/or if you also give an actual implementation that doesn't work for some reason, help you understand where you've gone wrong.
At the risk of robbing you the pleasure of devising your own algorithm for solving this problem, here's an example:
Create an empty array for the results.
Loop through the products in the input.
For each product, loop through its promotions.
Find the promotion in the array of results.
If there is no such promotion in the array of results, create a new one, with an empty list of products.
Add the product to the array of products in the promotion entry in the array.
In pseudo-code:
results = new Array // 1
for each product in products (data) // 2
for each promotion in promotions field of product // 3
if results does not contain promotion by that name // 4
add promotion to results, with empty products field // 5
add product to products field of results.promotion // 6
If we believe this is correct, we can now try writing this in JavaScript.
var result = []; // 1
for (var i = 0; i < data.length; i++) { // 2
var product = data[i];
var promotions = product.promotion;
for (var j = 0; j < promotions.length; j++) { // 3
var promotion = promotions[i];
var name = promotion.name;
var result_promotion = find_promotion_by_name(name);
if (!result_promotion) { // 4
result_promotion = { name: name, products: [], code: promotion.code };
result.push(result_promotion); // 5
}
result_promotion.products.push(name); // 6
}
}
This code is OK, and it should get the job done (untested). However, it is still a bit unreadable. It does not follow the pseudo-code very closely. It somehow still hides the algorithm. It is hard to be sure that it is completely correct. So, we want to rewrite it. Functions like Array#foreach make it easier to do this. the top level can simply be:
var result = [];
data.forEach(processProduct);
In other words, call the processProduct function for each element of data (the list of products). It will be very hard for this code to be wrong, as long as `processProduct is implemented incorrectly.
function processProduct(product) {
product.promotion.forEach(processPromotion);
}
Again, this logic is provably correct, assuming processPromotion is implemented correctly.
function processPromotion(promotion) {
var result_promotion = getPromotionInResults(promotion);
result_promotion.products.push(name);
}
This could hardly be clearer. We obtain the entry for this promotion in the results array, then add the product to its list of products.
Now we need to simply implement getPromotionInResults. This will include the logic to create the promotion element in the results array if it doesn't exist.
function getPromotionInResults(promotion) {
var promotionInResults = findPromotionInResultsByName(promotion.name);
if (!promotionInResults) {
promotionInResults = {name: promotion.name, code: promotion.code, products: []};
result.push(promotionInResults);
}
return promotionInResults;
}
This also seems demonstrably correct. But we still have to implement findPromotionInResultsByName. For that, we can use Array#find, or some equivalent library routine or polyfill:
function findPromotionInResultsByName(name) {
return result.find(function(promotion) {
return promotion.name === name;
});
}
The entire solution is thus
function transform(data) {
// Given a product, update the result accordingly.
function processProduct(product) {
product.promotion.forEach(processPromotion);
}
// Given a promotion, update its list of products in results.
function processPromotion(promotion) {
var result_promotion = getPromotionInResults(promotion);
result_promotion.products.push(name);
}
// Find or create the promotion entries in results.
function getPromotionInResults(promotion) {
var promotionInResults = findPromotionInResultsByName(promotion.name);
if (!promotionInResults) {
promotionInResults = {name: promotion.name, code: promotion.code, products: []};
result.push(promotionInResults);
}
return promotionInResults;
}
// Find an existing entry in results, by its name.
function findPromotionInResultsByName(name) {
return result.find(function(promotion) {
return promotion.name === name;
});
}
var result = [];
data.forEach(processProduct);
return result;
}
Ok, after a few hours of works, with lots of help online and offline, I finally made it works. Thanks for the people who has helped.
Please do comment if you have a more elegant solution, always love to learn.
For people who ran into similar problem:
Here is my solution
function groupProductsByPromo(data){
var result = [];
// filter only product with promotion
var productsWithPromo = data.filter(function(product){
return product.hasOwnProperty('promotions');
});
// create promotions map
var mappedProducts = productsWithPromo.map(function(product) {
var mapping = {};
product.promotions.forEach(function(promotion) {
mapping[promotion.code] = {
promotion: promotion
};
});
return mapping;
});
// reduce duplicates in promotion map
mappedProducts = mappedProducts.reduce(function(flattenObject, mappedProducts) {
for (var promoCode in mappedProducts) {
if (flattenObject.hasOwnProperty(promoCode)) {
continue;
}
flattenObject[promoCode] = {
code: promoCode,
name: mappedProducts[promoCode].promotion.name
};
}
return flattenObject;
}, {});
// add products to promo item
for(var promoCode in mappedProducts){
mappedProducts[promoCode].products = productsWithPromo.filter(function(product){
return product.promotions.some(function(promo){
return promo.code === promoCode;
});
});
result.push(mappedProducts[promoCode]);
}
return result;
}
Check out lodash - a nifty library for doing all sorts of transforms.
lodash.groupBy is what you're looking for.

Access the nested value in JSON without looping through it

This is my JSON, I want to directly get the zipCodes values from the JSON without looping through the JSON. How can I do it?
countries:[
{
name:'India',
states:[{
name:'Orissa',
cities:[{
name:'Sambalpur',
zipCodes:{'768019','768020'}
}]
}]
}
]
I think you are looking for
countries[0].states[0].cities[0].zipCodes
Please note, this works for the above JSON as there is only 1 country in countries array and same as for states and cities. However, if there are more than 1 country, state or city then, you will have to iterate to extract information until and unless you know the exact index.
As this is not an associative array, your option is only to use indexes like this:
countries[x].states[y].cities[0].zipCodes
Where x would be each representation of state in your array, in case, of course, that you have more than one.
Similarly y would be each state in each state in each country, in case you have more of those and you can do the same for cities if you need to.
EDIT:
Here's how you can iterate the array:
for(var c in countries)
{
var name = countries[c].name;
if (name === "CountryIAmLookingFor")
{
var statesList = countries[c].states;
for (var s in statesList)
{
var stateName = statesList[s].name;
.....
}
}
}
You can keep iterating until you find the country, state, and city you need, then extract the zipCodes from there as shown in the previous code snippet.
Without "looping"
You can do this crazy trick (not saying this is the best way, but this way you aren't looping through the JSON):
var myData = { 'Put Your Data': 'HERE' };
function getCodes(name, data) {
var sv = data.match(new RegExp(name+'([\\S\\s]*?}][\\S\\s]*?}])'))[1].match(/zipCodes":\[(.*?)\]/g), r = [];
sv.forEach(function (item) {
item.match(/\d+/g).forEach(function (sub) {
r.push(+sub);
});
});
return r;
}
getCodes('India', JSON.stringify(myData));
If your data is already string, then you don't need the JSON.stringify. The forEach you see isn't actually "looping" through the JSON. It's already extracted the zip codes and the code just adds the zip codes to the array. . This line:
var sv = JSON.stringify(data).match(new RegExp(name+'([\\S\\s]*?}][\\S\\s]*?}])'))[1].match(/zipCodes":\[(.*?)\]/g), r = [];
is what grabs the zip codes, it gets something like:
["zipCodes":["768019","768020"]"]
The next line:
item.match(/\d+/g)
will grab the numbers outputting something like:
["768019", "768020"]
The loop just adds the zip-codes to another array
With looping
You're better off looping through the JSON:
var myData = {}, // Your data
zips = [];
myData.countries.forEach(function(i) {
if (i.name === 'India') {
i.states.forEach(function(j) {
j.cities.forEach(function(l) {
l.zipCodes.forEach(function(m) {
zips.push(m);
});
});
});
}
});
//use "zips" array
PERFORMANCE AND SPEED TESTS
After testing copying an array about 500MB (half a gig) took about 30 seconds. That's a lot. Considering an extremely large JSON would be about ~5MB, looping through a little over 5MB of JSON takes about 0.14 seconds. You should never worry about speed.
Here's my "trick" for avoiding explicit iteration. Let JSON.parse or JSON.stringify do the work for you. If your JSON is in string form, try this:
var array = [];
JSON.parse(jsonString, function (key, value) {
if (key === "zipCodes") {
array = array.concat(value);
}
return value;
});
console.log(array); // all your zipCodes
Suppose your Json is like
countries =[
{
name:'India',
states:[{
name:'Orissa',
cities:[{
name:'Sambalpur',
zipCodes:768019768020
}]
},{
name:'mumbai',
cities:[{
name:'rea',
zipCodes:324243
}]
}]
}
]
So now we use MAP it will give you ZipCode of every cities
countries.map(function(s){
s.states.map(function(c){
c.cities.map(function(z){
console.log(z.zipCodes)
})
})
})
OR
If you use return statement then it will give you 2 array with two zip code as per over JSON
var finalOP = countries.map(function(s){
var Stalist = s.states.map(function(c){
var zip = c.cities.map(function(z){
return z.zipCodes
})
return zip
})
return Stalist
})
console.log(finalOP)

How to use javascript to loop through key , values and add up one key's value when the other's match

I have a dataset of records that look like this :
[{
"d1d":"2015-05-28T00:00:00.000Z",
"d1h":0,
"d15m":0,
"ct":3
},
{
"d1d":"2015-05-28T00:00:00.000Z",
"d1h":0,
"d15m":0,
"ct":1
}
]
The ct value changes in every record. If d1d, d1h, and d15m are the same in one or more records, I need to combine those records into one with the sum of all the ct values.
I do have jquery, can I use grep for this?
I realize the server side could do a better job of getting me this data , but I have zero control over that.
You don't have to use jQuery for this, vanilla JavaScript will do.
I'll show you two solutions to your problem;
Example 1: Abusing Array#reduce as an iterator
var intermediaryArray = [];
dataset.reduce(function(prev, curr) {
if(prev.d1d === curr.d1d && prev.d1h === curr.d1h && prev.d15m === curr.d15m) {
intermediaryArray.push({
d1d: prev.d1d,
d1h: prev.d1h,
d15m: prev.d15m,
ct: prev.ct + curr.ct
});
} else {
// push the one that wasn't the same
intermediaryArray.push(curr);
}
// return current element so reduce has something to work on
// for the next iteration.
return curr;
});
Example 2: Using Array#Map and Array#Reduce in conjunction
This example utilises underscore.js to demonstrate the logic behind what you want to do.
.map() produces the new array of grouped objects.
.groupBy() produces an array of subarrays containing the objects that pass the predicate that all objects must share the same d1d or grouping function.
.reduce() boils all subarrays down to one value, your object with both cts added to each other.
var merged = _.map(_.groupBy(a, 'd1d'), function(subGroup) {
return subGroup.reduce(function(prev, curr) {
return {
d1d: prev.d1d,
d1h: prev.d1h,
d15m: prev.d15m,
ct: prev.ct + curr.ct
};
});
});
Here's one possible solution:
var dataset = [{
"d1d":"2015-05-28T00:00:00.000Z",
"d1h":0,
"d15m":0,
"ct":3
},
{
"d1d":"2015-05-28T00:00:00.000Z",
"d1h":0,
"d15m":0,
"ct":1
}
]
function addCt(dataset) {
var ctMap = {}
var d1d, d1h, d15m, ct, key, value
for (var ii=0, record; record=dataset[ii]; ii++) {
key = record.d1d+"|"+record.d1h+"|"+record.d15m
value = ctMap[key]
if (!value) {
value = 0
}
value += record.ct
ctMap[key] = value
}
return ctMap
}
ctMap = addCt(dataset)
console.log(ctMap)
// { "2015-05-28T00:00:00.000Z|0|0": 4 }
You may want to construct the key in a different way. You may want set the value to an object containing the d1d, d1h, d15m and cumulated ct values, with a single object for all matching d1d, d1h and d15m values.

couchdb reduce on non-key value

I have a data set in couchdb with multiple documents that list a timestamp and a set of signals from sensors. In this example I've only used a few different names, but there can be an infinite amount of different names as additional sensors are added to the system. Here's an example of three sample documents:
{ timestamp: 12345,
signals: ["highTemperature", "highPressure"]
}
{ timestamp: 12346,
signals: ["highTemperature"]
}
{ timestamp: 12347,
signals: ["lowPressure", "highTemperature"]
}
What I'd like to be able to do is to get the frequency of each tag. A simple way to do this is to create a map function like this:
function (doc) {
for (var idx in doc.signals) {
emit(doc.signals[idx], 1);
}
Along with a reduce function like this:
function(signal, counts) {
var sum = 0;
for(var i = 0; i < counts.length; i++) {
sum += counts[i];
};
return sum;
}
This will return a nice set of data like this:
{"rows":[
{"key":"highTemperature","value":3},
{"key":"highPressure","value":1},
{"key":"lowPressure","value":1}
]}
This is great if I wanted to know the signal distribution over all time, but I really want to know is the distribution of tags for a subset of data points, say timestamp 12346 - 12349. However, what I can't do is slice the data by timestamp using startkey and endkey because timestamp is not part of a key. If I make timestamp is the key then I can't reduce to get a distribution of signals.
Is there a way to do such a grouping so you reduce on an element that isn't part of the key? Ideally I'd like to specify the grouping interval via a URL parameter such as: /mydb/_design/main/_view/signalsByTime?startkey=12346&endkey=12347 and have it return the distribution of signals for just that time period, like this:
{"rows":[
{"key":"highTemperature","value":2},
{"key":"lowPressure","value":1}
]}
If you want timestamp to be the key and number of possible signals is very small (O(1), lets assume 3 as in your example), then you can emit in map characteristic vector of your signal:
if (doc.signal == "highTemperature") {
emit(doc.timestamp, [1,0,0]);
} else if (doc.signal == "highPressure") {
emit(doc.timestamp, [0,1,0]);
} ...
and sum-up vectors in reduce, possibly like this:
function(keys, values) {
var sum = [0,0,0];
for (v in values) {
for (s in sum) {
sum[s] += values[v][s];
}
}
return sum;
}

Mongodb MapReduce to concatenate strings?

All of the MongoDB MapReduce examples I have seen have dealt with counting/adding numbers. I need to combine strings, and it looks like MapReduce is the best tool for the job. I have a large MongoDB collection in this format:
{name: userone, type: typeone}
{name: usertwo, type: typetwo}
{name: userthree, type: typeone}
Each name only has one type, but names are not necessarily unique. I want to end up with a collection that lists all names for a particular type, either in a comma separated list or an array, like this:
{type: typeone, names: userone, usertwo}
{type: typetwo, names: userthree}
I was trying to use MapReduce to accomplish this. My function works correctly when there is only one user for a type. However, when there is more than one user, 'undefined' is stored in the names field.
I'm not very good at Javascript, and I'm still learning MongoDB so it's probably a simple data type or scope error.
Here are my map and reduce functions. What's wrong with them?
map = function() {
emit(this.user,{type:this.type});
}
reduce = function(key, values) {
var all="";
for(var i in values) {
all+=values[i]['type']+",";
}
return all;
}
It looks to me like you're trying to do a group-by via type. If so, you should be emitting type first. From there, its pretty much the same as your code, but I took the liberty of cleaning it up a bit.
Beware, the reduce function could get called multiple times on smaller groups. Therefore, if you used your code in a sharded environment, you may get extra trailing commas. See Reduce Function for more information.
Map:
m = function(){ emit(this.type, {names:this.name}); }
Reduce:
r = function(key, values){
var all = [];
values.forEach(function(x){
all.push(x.names)
})
return {"names": all.join(", ")};
}
Usage:
res = db.users.mapReduce(m,r); db[res.result].find()
Alternate:
Per OP request, here is a version that returns an array for names instead of a comma separated list string:
m = function () {
emit(this.type, {names:this.name});
}
r = function (key, values) {
var all = [];
values.forEach(function (x) {all.push(x.names);});
return {type:key, names:all};
}
f = function (w, r) {
r.names = r.names[0];
return r
}
res = db.users.mapReduce(m,r, {finalize:f}); db[res.result].find()
Cheers!

Categories