Mongodb MapReduce to concatenate strings? - javascript

All of the MongoDB MapReduce examples I have seen have dealt with counting/adding numbers. I need to combine strings, and it looks like MapReduce is the best tool for the job. I have a large MongoDB collection in this format:
{name: userone, type: typeone}
{name: usertwo, type: typetwo}
{name: userthree, type: typeone}
Each name only has one type, but names are not necessarily unique. I want to end up with a collection that lists all names for a particular type, either in a comma separated list or an array, like this:
{type: typeone, names: userone, usertwo}
{type: typetwo, names: userthree}
I was trying to use MapReduce to accomplish this. My function works correctly when there is only one user for a type. However, when there is more than one user, 'undefined' is stored in the names field.
I'm not very good at Javascript, and I'm still learning MongoDB so it's probably a simple data type or scope error.
Here are my map and reduce functions. What's wrong with them?
map = function() {
emit(this.user,{type:this.type});
}
reduce = function(key, values) {
var all="";
for(var i in values) {
all+=values[i]['type']+",";
}
return all;
}

It looks to me like you're trying to do a group-by via type. If so, you should be emitting type first. From there, its pretty much the same as your code, but I took the liberty of cleaning it up a bit.
Beware, the reduce function could get called multiple times on smaller groups. Therefore, if you used your code in a sharded environment, you may get extra trailing commas. See Reduce Function for more information.
Map:
m = function(){ emit(this.type, {names:this.name}); }
Reduce:
r = function(key, values){
var all = [];
values.forEach(function(x){
all.push(x.names)
})
return {"names": all.join(", ")};
}
Usage:
res = db.users.mapReduce(m,r); db[res.result].find()
Alternate:
Per OP request, here is a version that returns an array for names instead of a comma separated list string:
m = function () {
emit(this.type, {names:this.name});
}
r = function (key, values) {
var all = [];
values.forEach(function (x) {all.push(x.names);});
return {type:key, names:all};
}
f = function (w, r) {
r.names = r.names[0];
return r
}
res = db.users.mapReduce(m,r, {finalize:f}); db[res.result].find()
Cheers!

Related

Import text json for node js template literals

For my node js projects i typically have a text.json file and require it, instead of having static text within my code. something like below
JSON file
{
"greet":"Hello world"
}
var text = require('./text.json');
var greet = text.greet
I am having a little trouble in figuring out how this would work with template literals ?
I know this is an old issue but I just came up with a need for the same thing and.. yeah there are node modules that help do this but, this isn't that complex so I just made my own solution
function injectVariables( replacements, input ) {
const entries = Object.entries(replacements)
const result = entries.reduce( (output, entry) => {
const [key, value] = entry
const regex = new RegExp( `\\$\{${key}\}`, 'g')
return output.replace( regex, value )
}, input )
return result
}
const template = 'Hello my name is ${name} and I like ${language}'
const inputs = { name: 'David', language: 'JavaScript' }
const replaced = injectVariables(inputs, template)
console.log(replaced)
So, in this, it takes an input string and an object where the keys are the variable names in the string and the values are, you guessed it, the values.
It creates an array the values using Object.entries and then runs reduce across the entries to keep an updated version of the string as you go. On each iteration it makes a regex to match the variable expression and replaces that value with the one passed it.
This in particular won't look through nested objects (I didn't need that) but if for example your string had ${name.last} in it, since object keys can be strings, your input variable could be inputs = { 'name.last': 'Smith' } and it should work.
Hopefully this helps someone else.
I often use a very tiny templating helper library (tim - https://github.com/premasagar/tim) and it can be used to accomplish this:
//in my json file
var strings = {
'Hello': 'Hello {{name}}!',
'Goodbye': 'Goodbye {{name}}!'
};
//in my app
var tim = require('tim'); //templating library
var strings = require('./strings.json');
//replace
console.log(tim(strings.Hello,{name:'Fred'}));
Relevant JSFiddle:
https://jsfiddle.net/rtresjqv/
Alternatively, you could turn your strings into functions and then pass in the arguments:
//in my json file
var strings = {
'Hello': function() { return `Hello ${arguments[0]}!`; },
'Goodbye': function() { return `Goodbye {$arguments[0]}!`; }
};
//in my app
var strings = require('./strings.json');
//replace
console.log(strings.Hello('Fred'));
Fiddle:
https://jsfiddle.net/t6ta0576/

Angular LoadAsh - filtering records matching by fields on Multiple Objects of different Type

I've been struggling a bit learning loadash and how to correctly pull the data I want with some more advanced tricks. Single objects and lookups are pretty simple but i'm trying to pull all array records by a groupId, if that groupId exists in another object that isn't the same.
For example:
Generic JSON example of objects, each are arrays of records.
Groups ..
{
groupId:
name:
code:
}
Options ..
{
groupId:
optionId:
name:
code:
}
The problem I'm having is pulling all Options only if that groupId exist in the Groups array in loadash.
I've tried some stuff like
var results = [];
_.forEach(Groups, function(g) {
var found _.find(Options, g, function(option, group) {
return option.groupId === group.groupId;
})
results.push(found);
});
I haven't had much luck figuring out the best way to filter these down.
Any words if wisdom would be appreciated, thanks!
Something like this should work,
var result = _.filter(Options, function(o) {
return _.filter(Groups, function(g) { return g.groupId == o.groupid; }).length > 0;
});
actually i think the inner search would perform better with find, since it returns the first match, not sure though
var result = _.filter(Options, function(o) {
return _.find(Groups, { 'groupId': o.groupid });
});
hope this helps.

Meteor 1.0 - Mongo queries using variables as key, including $inc

I'm working with a large dataset that needs to be efficient with its Mongo queries. The application uses the Ford-Fulkerson algorithm to calculate recommendations and runs in polynomial time, so efficiency is extremely important. The syntax is ES6, but everything is basically the same.
This is an approximation of the data I'm working with. An array of items and one item being matched up against the other items:
let items = ["pen", "marker", "crayon", "pencil"];
let match = "sharpie";
Eventually, we will iterate over match and increase the weight of the pairing by 1. So, after going through the function, my ideal data looks like this:
{
sharpie: {
pen: 1,
marker: 1,
crayon: 1,
pencil: 1
}
}
To further elaborate, the value next to each key is the weight of that relationship, which is to say, the number of times those items have been paired together. What I would like to have happen is something like this:
// For each in the items array, check to see if the pairing already
// exists. If it does, increment. If it does not, create it.
_.each(items, function(item, i) {
Database.upsert({ match: { $exist: true }}, { match: { $inc: { item: 1 } } });
})
The problem, of course, is that Mongo does not allow bracket notation, nor does it allow for variable names as keys (match). The other problem, as I've learned, is that Mongo also has problems with deeply nested $inc operators ('The dollar ($) prefixed field \'$inc\' in \'3LhmpJMe9Es6r5HLs.$inc\' is not valid for storage.' }).
Is there anything I can do to make this in as few queries as possible? I'm open to suggestions.
EDIT
I attempted to create objects to pass into the Mongo query:
_.each(items, function(item, i) {
let selector = {};
selector[match] = {};
selector[match][item] = {};
let modifier = {};
modifier[match] = {};
modifier[match]["$inc"] = {};
modifier[match]["$inc"][item] = 1
Database.upsert(selector, modifier);
Unfortunately, it still doesn't work. The $inc breaks the query and it won't let me go more than 1 level deep to change anything.
Solution
This is the function I ended up implementing. It works like a charm! Thanks Matt.
_.each(items, function(item, i) {
let incMod = {$inc:{}};
let matchMod = {$inc:{}};
matchMod.$inc[match] = 1;
incMod.$inc[item] = 1;
Database.upsert({node: item}, matchMod);
Database.upsert({node: match}, incMod);
});
I think the trouble comes from your ER model. a sharpie isn't a standalone entity, a sharpie is an item. The relationship between 1 item and other items is such that 1 item has many items (1:M recursive) and each item-pairing has a weight.
Fully normalized, you'd have an items table & a weights table. The items table would have the items. The weights table would have something like item1, item2, weight (in doing so, you can have asymmetrical weighting, e.g. sharpie:pencil = 1, pencil:sharpie = .5, which is useful when calculating pushback in the FFA, but I don't think that applies in your case.
Great, now let's mongotize it.
When we say 1 item has many items, that "many" is probably not going to exceed a few thousand (think 16MB document cap). That means it's actually 1-to-few, which means we can nest the data, either using subdocs or fields.
So, let's check out that schema!
doc =
{
_id: "sharpie",
crayon: 1,
pencil: 1
}
What do we see? sharpie isn't a key, it's a value. This makes everything easy. We leave the items as fields. The reason we don't use an array of objects is because this is faster & cleaner (no need to iterate over the array to find the matching _id).
var match = "sharpie";
var items = ["pen", "marker", "crayon", "pencil"];
var incMod = {$inc:{}};
var matchMod = {$inc:{}};
matchMod.$inc[match] = 1;
for (var i = 0; i < items.length; i++) {
Collection.upsert({_id: items[i]}, matchMod);
incMod.$inc[items[i]] = 1;
}
Collection.upsert({_id: match}, incMod);
That's the easy part. The hard part is figuring out why you want to use an FFA for a suggestion engine :-P.

Access the nested value in JSON without looping through it

This is my JSON, I want to directly get the zipCodes values from the JSON without looping through the JSON. How can I do it?
countries:[
{
name:'India',
states:[{
name:'Orissa',
cities:[{
name:'Sambalpur',
zipCodes:{'768019','768020'}
}]
}]
}
]
I think you are looking for
countries[0].states[0].cities[0].zipCodes
Please note, this works for the above JSON as there is only 1 country in countries array and same as for states and cities. However, if there are more than 1 country, state or city then, you will have to iterate to extract information until and unless you know the exact index.
As this is not an associative array, your option is only to use indexes like this:
countries[x].states[y].cities[0].zipCodes
Where x would be each representation of state in your array, in case, of course, that you have more than one.
Similarly y would be each state in each state in each country, in case you have more of those and you can do the same for cities if you need to.
EDIT:
Here's how you can iterate the array:
for(var c in countries)
{
var name = countries[c].name;
if (name === "CountryIAmLookingFor")
{
var statesList = countries[c].states;
for (var s in statesList)
{
var stateName = statesList[s].name;
.....
}
}
}
You can keep iterating until you find the country, state, and city you need, then extract the zipCodes from there as shown in the previous code snippet.
Without "looping"
You can do this crazy trick (not saying this is the best way, but this way you aren't looping through the JSON):
var myData = { 'Put Your Data': 'HERE' };
function getCodes(name, data) {
var sv = data.match(new RegExp(name+'([\\S\\s]*?}][\\S\\s]*?}])'))[1].match(/zipCodes":\[(.*?)\]/g), r = [];
sv.forEach(function (item) {
item.match(/\d+/g).forEach(function (sub) {
r.push(+sub);
});
});
return r;
}
getCodes('India', JSON.stringify(myData));
If your data is already string, then you don't need the JSON.stringify. The forEach you see isn't actually "looping" through the JSON. It's already extracted the zip codes and the code just adds the zip codes to the array. . This line:
var sv = JSON.stringify(data).match(new RegExp(name+'([\\S\\s]*?}][\\S\\s]*?}])'))[1].match(/zipCodes":\[(.*?)\]/g), r = [];
is what grabs the zip codes, it gets something like:
["zipCodes":["768019","768020"]"]
The next line:
item.match(/\d+/g)
will grab the numbers outputting something like:
["768019", "768020"]
The loop just adds the zip-codes to another array
With looping
You're better off looping through the JSON:
var myData = {}, // Your data
zips = [];
myData.countries.forEach(function(i) {
if (i.name === 'India') {
i.states.forEach(function(j) {
j.cities.forEach(function(l) {
l.zipCodes.forEach(function(m) {
zips.push(m);
});
});
});
}
});
//use "zips" array
PERFORMANCE AND SPEED TESTS
After testing copying an array about 500MB (half a gig) took about 30 seconds. That's a lot. Considering an extremely large JSON would be about ~5MB, looping through a little over 5MB of JSON takes about 0.14 seconds. You should never worry about speed.
Here's my "trick" for avoiding explicit iteration. Let JSON.parse or JSON.stringify do the work for you. If your JSON is in string form, try this:
var array = [];
JSON.parse(jsonString, function (key, value) {
if (key === "zipCodes") {
array = array.concat(value);
}
return value;
});
console.log(array); // all your zipCodes
Suppose your Json is like
countries =[
{
name:'India',
states:[{
name:'Orissa',
cities:[{
name:'Sambalpur',
zipCodes:768019768020
}]
},{
name:'mumbai',
cities:[{
name:'rea',
zipCodes:324243
}]
}]
}
]
So now we use MAP it will give you ZipCode of every cities
countries.map(function(s){
s.states.map(function(c){
c.cities.map(function(z){
console.log(z.zipCodes)
})
})
})
OR
If you use return statement then it will give you 2 array with two zip code as per over JSON
var finalOP = countries.map(function(s){
var Stalist = s.states.map(function(c){
var zip = c.cities.map(function(z){
return z.zipCodes
})
return zip
})
return Stalist
})
console.log(finalOP)

Combining JSON Arrays with jQuery

I've spent all morning messing with this now and reading on here, but have found myself going round in circles!
I am trying to draw a chart using the excellent AmCharts Javascript Charts, to show me stock holding as a bar chart and stock turn as a line chart.
I cannot get both sets of data from one query to my database, and cannot use AmCharts StockChart as it is not time based data... therefore, I have two sets of data which need combining with Javascript.
The data is being pulled from a database and returned successfully as JSON arrays similar to this:
SALES DATA:
[{"brandName":"Fender","gearShiftedPerMonth":"35","retailSalesPerMonth":"55"},
{"brandName":"Gibson","gearShiftedPerMonth":"23","retailSalesPerMonth":"43"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13"}]
STOCK DATA:
[{"brandName":"Gibson","stockValue":"1234"},
{"brandName":"Fender","stockValue":"975"},
{"brandName":"Epiphone","stockValue":"834"}]
Obviously the actual figures are made up in that example!
Now, what I need to do is to combine those to create this:
COMBINED DATA
[{"brandName":"Fender","gearShiftedPerMonth":"35","retailSalesPerMonth":"55","stockValue":"975"},
{"brandName":"Gibson","gearShiftedPerMonth":"23","retailSalesPerMonth":"43","stockValue":"1234"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13","stockValue":"834"}]
What we have there is the Sales Dataset combined with Stock Dataset to add the additional data of stockValue added to the corresponding brandName record.
I have tried using $.extend but I can't figure out how to use it in this situation.
It is perhaps important to note that the data pairs might not necessarily be in the right order, and it is possible, though unlikely, that there might not be a match, so some kind of zeroing error catching must be implemented.
What you'll need to do first is transform the data into two objects, whose properties are the values you want to merge together:
{
"Fender" : {"gearShiftedPerMonth":"35","retailSalesPerMonth":"55"},
"Gibson" : {"gearShiftedPerMonth":"23","retailSalesPerMonth":"43"},
"Epiphone" : {"gearShiftedPerMonth":"10","retailSalesPerMonth":"13"}
}
and
{
"Gibson": {"stockValue":"1234"},
"Fender": { "stockValue":"975"},
"Epiphone": { "stockValue":"834"}
}
Once the transformation is done, you'll have two objects that you can merge using $.extend or other functions.
Update
For large sets, this gives results in nearly linear time:
var salesa = {}, stocka = {};
$.each(sales, function(i, e) {
salesa[e.brandName] = e;
});
$.each(stock, function(i, e) {
stocka[e.brandName] = e;
});
var combine = {};
$.extend(true, combine, salesa, stocka)
More speed can be tweaked if the merging happened during the second transformation callback ($each(stock...) instead of a separate call to $.extend() but it loses some of its obviousness.
I think what's he's trying to do is join the two datasets as if they were tables, joining by the brandName. From what I've been testing jQuery's $.extend() function does not take care of that, but merges objects according to their index in the Object arrays that it receives.
I think the matching of the key would need to be done manually.
stock = [{"brandName":"Fender","gearShiftedPerMonth":"35","retailSalesPerMonth":"55"},
{"brandName":"Gibson","gearShiftedPerMonth":"23","retailSalesPerMonth":"43"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13"}];
value = [{"brandName":"Gibson","stockValue":"1234"},
{"brandName":"Fender","stockValue":"975"},
{"brandName":"Epiphone","stockValue":"834"}];
var results = [];
$(stock).each(function(){
datum1 = this;
$(value).each(function() {
datum2 = this;
if(datum1.brandName == datum2.brandName)
results.push($.extend({}, datum1, datum2));
});
});
Which would result in:
[{"brandName":"Fender","gearShiftedPerMonth":"35","retailSalesPerMonth":"55","stockValue":"975"},
{"brandName":"Gibson","gearShiftedPerMonth":"23","retailSalesPerMonth":"43","stockValue":"1234"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13","stockValue":"834"}]
Instead of what the use of $.extend() returns:
[{"brandName":"Gibson","gearShiftedPerMonth":"35","retailSalesPerMonth":"55","stockValue":"1234"},
{"brandName":"Fender","gearShiftedPerMonth":"23","retailSalesPerMonth":"43","stockValue":"975"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13","stockValue":"834"}]
If your example code reflects reality, then jQuery's $.extend will be the wrong tool for this.
It blindly copies data from one object to another. Notice that the order of your data is not consistent. The SALES DATA has Fender first, while the STOCK DATA has gibson first.
So jQuery's $.extend is mixing the two results. The "gearShifted" and "retailSales" for Fender is ending up with the "brandName" and "stockValue" for Gibson.
What you'll need is to iterate one array, and look up the "brandName" in the other, and then copy over the data you want. You could use $.extend for that part of it if you like...
var sales_data =
[{"brandName":"Fender","gearShiftedPerMonth":"35","retailSalesPerMonth":"55"},
{"brandName":"Gibson","gearShiftedPerMonth":"23","retailSalesPerMonth":"43"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13"}]
var stock_data =
[{"brandName":"Gibson","stockValue":"1234"},
{"brandName":"Fender","stockValue":"975"},
{"brandName":"Epiphone","stockValue":"834"}]
var combined = $.map(sales_data, function(obj, i) {
return $.extend({}, obj, $.grep(stock_data, function(stock_obj) {
return obj.brandName === stock_obj.brandName
})[0]);
});
Note that this is not terribly efficient, but unless the data set is enormous, it shouldn't be an issue.
DEMO: http://jsfiddle.net/sDyKx/
RESULT:
[
{
"brandName": "Fender",
"gearShiftedPerMonth": "35",
"retailSalesPerMonth": "55",
"stockValue": "975"
},
{
"brandName": "Gibson",
"gearShiftedPerMonth": "23",
"retailSalesPerMonth": "43",
"stockValue": "1234"
},
{
"brandName": "Epiphone",
"gearShiftedPerMonth": "10",
"retailSalesPerMonth": "13",
"stockValue": "834"
}
]
In vanilla javascript you can do:
var sales = [{"brandName":"Fender","gearShiftedPerMonth":"35","retailSalesPerMonth":"55"},
{"brandName":"Gibson","gearShiftedPerMonth":"23","retailSalesPerMonth":"43"},
{"brandName":"Epiphone","gearShiftedPerMonth":"10","retailSalesPerMonth":"13"}];
var stock = [{"brandName":"Gibson","stockValue":"1234"},
{"brandName":"Fender","stockValue":"975"},
{"brandName":"Epiphone","stockValue":"834"}];
var combined = stock.slice(0);
for (var i = 0; i < stock.length; i++) {
for (var j = 0; j < sales.length; j++) {
if (stock[i].brandName === sales[j].brandName) {
for (var attrname in sales[j]) { combined[i][attrname] = sales[j][attrname]; }
}
}
}
JSON.stringify(combined)
produces
[
{"brandName":"Gibson","stockValue":"1234","gearShiftedPerMonth":"23","retailSalesPerMonth":"43"},
{"brandName":"Fender","stockValue":"975","gearShiftedPerMonth":"35","retailSalesPerMonth":"55"},
{"brandName":"Epiphone","stockValue":"834","gearShiftedPerMonth":"10","retailSalesPerMonth":"13"}
]

Categories