Get sums by grouping a collection in Meteor

Get sums by grouping a collection in Meteor - javascript

I have a collection with fields: number, a, b, c.
I want to divide the collection in three based on the number and get separate sums of a, b, and c for each group division.
I have done this with
function sumList(amountList) {
return _.reduce(amountList, function(sum, amount) {
return sum + amount;
}, -1);
}
// cursors
var group1 = Groups.find({ number: { $lte: 32 } }).fetch();
var group2 = Groups.find({ number: { $gte: 33, $lte: 70 } }).fetch();
var group3 = Groups.find({ number: { $gte: 71 } }).fetch();
// sums for group1
var group1SumA = sumList(_.pluck(group1, "a"));
var group1SumB = sumList(_.pluck(group1, "b"));
var group1SumC = sumList(_.pluck(group1, "c"));
// sums for group2
var group2SumA = sumList(_.pluck(group2, "a"));
var group2SumB = sumList(_.pluck(group2, "b"));
var group2SumC = sumList(_.pluck(group2, "c"));
// sums for group3
var group3SumA = sumList(_.pluck(group3 "a"));
var group3SumB = sumList(_.pluck(group3, "b"));
var group3SumC = sumList(_.pluck(group3, "c"));
It works but I think the code is very ugly.
I wonder if this can be done with some smart mapping. Besides, I guess it might have bad performance.
How can these sums be optimized?

Use the aggregation framework which will have the $match pipeline operator to filter the collection on the number field. The $group pipeline step then groups all the filtered input documents and applies the accumulator expression $sum to each field to get the sums.
Your pipeline would look like this:
var pipeline = [
{
"$match": { "number": { "$lte": 32 } } /* group1 filter */
},
{
"$group": {
"_id": 0,
"sumA": { "$sum": "$a" },
"sumB": { "$sum": "$b" },
"sumC": { "$sum": "$c" }
}
}
];
You can add the meteorhacks:aggregate package to implement the aggregation in Meteor:
Add to your app with
meteor add meteorhacks:aggregate
Since this package exposes .aggregate method on Mongo.Collection instances, you can then call the method to get the resulting array with the document that has the sums. For example
if (Meteor.isServer) {
var Coll = new Mongo.Collection('collectionName');
Meteor.methods({
sumList: function (filter) {
var pipeline = [
{
"$match": filter
},
{
"$group": {
"_id": 0,
"sumA": { "$sum": "$a" },
"sumB": { "$sum": "$b" },
"sumC": { "$sum": "$c" }
}
}
];
var result = Coll.aggregate(pipeline);
return result[0];
}
});
}
if (Meteor.isClient) {
// filters
var group1 = { "number": { "$lte": 32 } };
var group2 = { "number": { "$gte": 33, "$lte": 70 } };
var group3 = { "number": { "$gte": 71 } };
Meteor.call('sumList', group1, callback);
//Meteor.call('sumList', group2, callback);
//Meteor.call('sumList', group3, callback);
function callback(err, result) {
console.log(result)
}
}

Related

Why does Mongoose always get an older snapshot of my database?

I have a database of football matches and have the following situation:
Promotion >= 6 points & Relegation < 4 points.
I am in Season 1, Division 8. I have 1 match in my database that is for season 1, it was a win so 3 points.
I then have [{"season": "1", "Score": "1-0"}, {"season": "1", "Score": "2-0"}, {"season": "2", "Score": "3-0"}]
The first two matches in the array are for season 1 so I know this is division 8.
For the third match I need to check the result of Season 1 to know what division Season 2 should be. My issue is that when I check this it is only checking based on the very first match and it is saying 3 points when it should be 9 points.
How do I force Mongoose to use the latest snapshot of my database and not one from the very start of the function?
Matches.js
const router = require('express').Router();
const Match = require('../../models/match.model');
const getSeasonData = require('./getSeasonData');
router.route('/getNewMatches').post(auth, async (req, res) => {
const matches = await Match.find();
const getDivisionBasedOnSeasonResult = async () => {
const seasonData = await getSeasonData(seasonOfLastGame);
console.log({ seasonData });
switch (seasonData[0].seasonResult) {
case "Promoted":
return seasonData[0].division - 1;
case "Remained":
return seasonData[0].division;
case "Relegated":
return seasonData[0].division + 1;
default:
console.log("result not one of the three values");
}
}
const eaMatches = [{"season": "1", "Score": "1-0"}, {"season": "1", "Score": "2-0"}, {"season": "2", "Score": "3-0"}]
let seasonOfLastGame = 1;
for (const match of eaMatches) {
if (seasonOfLastGame === season) {
division = 8;
} else {
division = await getDivisionBasedOnSeasonResult();
}
seasonOfLastGame = season;
const newMatch = new Match({
division,
});
newMatch.save()
.then(() => {
res.json('Match added!')
})
.catch(err => res.status(400).json('Error: ' + err));
};
});
module.exports = router;
getSeasonData.js
const Match = require('../../models/match.model');
const getSeasonData = async seasonOfLastGame => {
const stages = [
{ "$match": { season: seasonOfLastGame } }
{
"$group":
{
"_id": "$season",
"points": {
"$sum": {
"$add": [{"$sum": { $cond: [{ $eq: ['$result', "Win"] }, 1, 0] } }]
}
},
"teamPlayed": { $sum: 1 }
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"seasonResult":
{
$switch:
{
branches: [
{
case: {$gte: ["$points", 6] },
then: "Promoted"
},
{
case: {$gte: ["$points", 4] },
then: "Remained"
},
{
case: {$lt: ["$points", 4] },
then: "Relegated"
}
],
default: "Result not available"
}
}
},
}
]
return Match.aggregate(stages);
}
module.exports = getSeasonData;

I have fixed this by adding this
const updatedMatches = await Match.find();
into the else statement just above
division = await getDivisionBasedOnSeasonResult();

javascript grouping linked nodes

I have 2 lists - nodes and links... Now what I would want is the most efficient way to add all the directly/indirectly linked elements into different groups.... For eg, 0 is connected to 1 which is connected to 2 so nodes 0,1,2 become group 1.... node 3 is connected to 4 so it becomes group 2 and so on.... Thanks in advance for your help :) This is part of a d3 implementation am doing..
PS: These lists will easily be in tens of thousands of nodes and links.
"nodes":[
{
"id":0,
"x":1509.9862,
"y":-609.1013
},
{
"id":1,
"x":1645.9578,
"y":-85.06705
},
{
"id":2,
"x":1948.1533,
"y":-469.3646
},
{
"id":3,
"x":348.1533,
"y":-669.3646
},
{
"id":4,
"x":1448.1533,
"y":-1469.3646
}
...
]
"links":[
{
"from":0,
"to":1
},
{
"from":1,
"to":2
},
{
"from":3,
"to":4
}
...
]

This is a classic UnionFind problem. The idea is to see each node as a set that has a pointer point to its father. Nodes with the same father are in the same group. So for your problem, we can create n sets at the beginning. And then iterate through the link to group everyone connected by the same link. The complexity is O(n), where n is the number of nodes.
nodes = [{
"id": 0,
"x": 1509.9862,
"y": -609.1013
},
{
"id": 1,
"x": 1645.9578,
"y": -85.06705
},
{
"id": 2,
"x": 1948.1533,
"y": -469.3646
},
{
"id": 3,
"x": 348.1533,
"y": -669.3646
},
{
"id": 4,
"x": 1448.1533,
"y": -1469.3646
}
];
links = [{
"from": 0,
"to": 1
},
{
"from": 1,
"to": 2
},
{
"from": 3,
"to": 4
}
];
// union-find is a data structure that can union two sets and check
// whether two element in the same set.
var father = {};
function group(nodes, links) {
// create n set with each set has the node as its only element
nodes.forEach(function(node, i) {
father[node.id] = node.id;
});
// union each set that has a link between them
links.forEach(function(link, i) {
union(link.from, link.to);
});
// for each unioned set, group nodes together
var id = 1;
var groupIdCnt = {};
var groupIds = {};
nodes.forEach(function(node, i) {
var f = find(node.id);
if (typeof groupIds[f] === 'undefined') {
groupIds[f] = id;
groupIdCnt[id] = 1;
id++;
} else {
groupIdCnt[groupIds[f]]++;
}
});
var groups = {};
nodes.forEach(function(node, i) {
var f = find(node.id);
if (groupIdCnt[groupIds[f]] === 1) {
node['group'] = 0;
} else {
node['group'] = groupIds[f];
}
if (typeof groups[node['group']] === 'undefined') {
groups[node['group']] = [];
}
groups[node['group']].push(node);
});
return Object.values(groups);
}
// find father of each set
function find(node) {
// if it is the root, return
if (father[node] === node) {
return node;
}
// if not, find the father and point to it
father[node] = find(father[node]);
return father[node];
}
// update the father of set which includes node1 to the father of set which includes node 2
function union(node1, node2) {
father[find(node1)] = find(node2);
}
// O(n), since we visit each node once
var groups = group(nodes, links);
console.log(nodes);
console.log(groups);

This code spits out an object whose keys are the node id and whose values are a group id, not necessarily sequential.
var obj = {
"links":[
{
"from":0,
"to":1
},
{
"from":1,
"to":2
},
{
"from":5,
"to":4
},
{
"from":3,
"to":4
}
]
};
var groups = {};
var nextGrp = 1;
for (var i=0, l; l = obj.links[i]; i++) {
if (groups[l.from]) {
if (groups[l.to]) {
if (groups[l.to] != groups[l.from]) {
// the two items span two different groups which must now be joined into 1
for (var j in groups) {
if (groups[j] == groups[l.to]) {
groups[j] = groups[l.from];
}
}
}
} else {
groups[l.to] = groups[l.from];
}
} else if (groups[l.to]) {
groups[l.from] = groups[l.to];
} else {
groups[l.from] = nextGrp;
groups[l.to] = nextGrp;
nextGrp++;
}
}
console.log(groups);

In the solution below I am creating groups of links that are, well, linked to each other. I do so by looping through all of the from/to combinations, and finding out if either has already been added to any of the accumulating groups of links. If they have, then I just add (or re-add) both the from and to value to that group. If neither the from nor to value has yet been grouped, then I make a new group and add both the from and to values to it. Note that these "groups" are actually Javascript sets, a new ES6/ES2015 data type that makes it much easier to deal with "groups" of elements for which no duplicates are needed and/or allowed.
Once the groups/sets of links are established, I then simply add an attribute to each node that indicates which group of links it is a part of.
Note that, for the sake of this demo code, I've simplified/de-cluttered some node values. I've also added some extra links, just to demonstrate some further cases that need handling.
const groupNodes = (nodes, links) => {
const groups = links.reduce((grps, {from, to}) => {
if (!grps.some(grp => {
if (grp.has(from) || grp.has(to)) return grp.add(from).add(to);
})) grps.push(new Set([from, to]));
return grps;
}, []);
nodes.forEach(node => {
groups.forEach((grp, i) => { if (grp.has(node.id)) node.group = i; });
});
return nodes;
};
const nodes = [
{
"id":0,
"x":0,
"y":0
},
{
"id":1,
"x":11,
"y":-11
},
{
"id":2,
"x":22,
"y":-22
},
{
"id":3,
"x":33,
"y":-33
},
{
"id":4,
"x":44,
"y":-44
},
{
"id":5,
"x":55,
"y":-55
},
{
"id":6,
"x":66,
"y":-66
}
];
const links = [
{
"from": 0,
"to" : 1
},
{
"from": 1,
"to" : 2
},
{
"from": 2,
"to" : 0
},
{
"from": 3,
"to" : 4
},
{
"from": 4,
"to" : 5
},
{
"from": 6,
"to" : 0
}
];
console.log(JSON.stringify(groupNodes(nodes, links)));

Mongodb: use $sample after $group

I have the following data set:
{company:"One", employee:"John"},
{company:"One", employee:"Mike"},
{company:"One", employee:"Donald"},
{company:"One", employee:"Mickey"},
{company:"Two", employee:"Johnny"},
{company:"Two", employee:"David"},
Ideally, I want a query that returns all distinct companies, number of employees for each company, random employee for each company
{Company: "One" , employee_count=4, randomemployee="Donald"},
{Company: "Two" , employee_count=2, randomemployee="David"},
I do find a way to get company and employee_count using aggregate/group
However I don't find a way to add the randomemployee with the same query.
My aggregation:
function aggr (collection,cb){
collection.aggregate(([{$group:{_id:'$company',total:{$sum:1}}},{$sort:{total:-1}}]),function(err, l1){
cb(null, l1)
})
}
I began an other Sample function:
function onesample (collection,arg,cb){
collection.aggregate(([{ $match: { "company": arg }},{ $sample: { size: 1 }}]),function(err, item){
cb(null, item[0].employee)
})
}
But i'm loosing myself with callbacks and loop.
Any elegant way to do this within one query?
Thanks a lot.
following your answer, I tried the following code.
I have an issue with the callback of async.foreachof, seems it doesn't finish before leaving to next step: any clue?
var async = require("async");
var MongoClient = require('mongodb').MongoClient;
var assert = require('assert');
var url = 'mongodb://localhost:27017/eyc0';
async.waterfall ([
function(cb) {
MongoClient.connect(url, function(err, db) {
cb(null,db)
})
},
function (db, cb) {
db.collection('kodes', function(err, coll) {
cb(null,db,coll)
})
},
function (db,coll, cb) {
var pipeline = [
{"$group": {"_id": "$ouat","total": { "$sum": 1}}},
{"$sort":{"total":-1} },
{"$project":{"_id": 0,"total":1,"company": "$_id"}}];
coll.aggregate(pipeline).toArray(function(err, dlist){
cb(null,db,coll,dlist)
})
},
function (db,coll,dlist, cb) {
// console.log(dlist)
cb(null,db,coll,dlist)
},
function (db,coll,dlist, cb) {
var dlist2 = []
async.forEachOf(
dlist,
function(item, key, cb){
var pipeline = [{ "$match": { "ouat": item.company } },{ "$sample": { size: 1 } }];
coll.aggregate(pipeline, function (err, data) {
item["randref"] = data[0].code;
console.log(item.company)
dlist2.push(item)
cb()
});
}
);
cb(null,db,coll,dlist,dlist2);
},
function (db,coll,dlist,dlist2, cb) {
console.log(dlist2)
console.log(dlist)
},
])

There's one approach that involves one query, it could be close but not as performant (as it uses $unwind) and won't give you the desired result (only the filtered company):
var pipeline = [
{
"$group": {
"_id": "$company",
"total": { "$sum": 1 },
"employees": { "$push": "$employee" }
}
},
{
"$project": {
"_id": 0,
"company": "$_id",
"employee_count": "$total"
"randomemployee": "$employees"
}
},
{ "$unwind": "$randomemployee" },
{ "$match": { "company": arg } },
{ "$sample": { size: 1 } }
];
collection.aggregate(pipeline, function(err, result){
console.log(result);
});
However, for a solution that uses callbacks from multiple queries, this can be handled easily with use of async module.
To get all distinct companies, number of employees for each company, random employee for each company consider using the async.waterfall() function where the first task returns the aggregation results with all distinct companies and number of employees for each company.
The second task uses the results from taks 1 above to iterate over using async.forEachOf(). This allows you to perform an asynchronous task for each item, and when they're all done do something else. With each document from the array, run the aggregation operation that uses the $sample operator to get a random document with the specified company. With each result, create an extra field with the random employee and push that to an array with the final results that you can access at the end of each task.
Below shows this approach:
var async = require("async");
async.waterfall([
// Load full aggregation results (won't be called before task 1's "task callback" has been called)
function(callback) {
var pipeline = [
{
"$group": {
"_id": "$company",
"total": { "$sum": 1 }
}
},
{
"$project": {
"_id": 0,
"company": "$_id",
"employee_count": "total"
}
}
];
collection.aggregate(pipeline, function(err, results){
if (err) return callback(err);
callback(results);
});
},
// Load random employee for each of the aggregated results in task 1
function(results, callback) {
var docs = []
async.forEachOf(
results,
function(value, key, callback) {
var pipeline = [
{ "$match": { "company": value.company } },
{ "$sample": { size: 1 } }
];
collection.aggregate(pipeline, function (err, data) {
if (err) return callback(err);
value["randomemployee"] = data[0].employee;
docs.push(value);
callback();
});
},
function(err)
callback(null, docs);
}
);
},
], function(err, result) {
if (err) return next(err);
console.log(JSON.stringify(result, null, 4));
}
);
With the the async.series() function, this is useful if you need to execute a set of async functions in a certain order.
Consider the following approach if you wish to get the all the distinct companies and their employee count as one result and the other random employee as another:
var async = require("async"),
locals = {},
company = "One";
async.series([
// Load random company
function(callback) {
var pipeline = [
{ "$match": { "company": company } },
{ "$sample": { size: 1 } }
];
collection.aggregate(pipeline, function(err, result){
if (err) return callback(err);
locals.randomcompany = result[0];
callback();
});
},
// Load full aggregation results (won't be called before task 1's "task callback" has been called)
function(callback) {
var pipeline = [
{
"$group": {
"_id": "$company",
"total": { "$sum": 1 }
}
},
{
"$project": {
"_id": 0,
"company": "$_id",
"employee_count": "total"
}
}
];
collection.aggregate(pipeline, function(err, result){
if (err) return callback(err);
locals.aggregation = result;
callback();
});
}
], function(err) { //This function gets called after the two tasks have called their "task callbacks"
if (err) return next(err);
//Here locals will be populated with 'randomcompany' and 'aggregation'
console.log(JSON.stringify(locals, null, 4));
}
);

db.comp.aggregate([
{$group:{_id:'$company',emp:{$addToSet:'$employee'}}},
{$project:{emp:1,employee_count:{'$size':'$emp'},
randomvalue:{'$literal':Math.random()}}},
{$project:{emp:1,employee_count:1,
randomposition:{'$floor':
{'$multiply':['$randomvalue', '$employee_count']}}}},
{$project:{'Company':'$_id', _id:0, employee_count:1,
randomemployee:{'$arrayElemAt':['$emp','$randomposition']}}},
{$sort:{Company:1}} ])
Seems to work!
A couple of results:
{ "employee_count" : 4, "Company" : "One", "randomemployee" : "Mike" }
{ "employee_count" : 2, "Company" : "Two", "randomemployee" : "Johnny" }
{ "employee_count" : 4, "Company" : "One", "randomemployee" : "Mickey" }
{ "employee_count" : 2, "Company" : "Two", "randomemployee" : "David" }

skipped count 0 in aggregate function

I'm stuck on this for couple of days. I'm trying to get the count: 0 where there is no documents in the given time period. This is the aggregate function I'm using at the moment:
var getCount = function(timeBlock, start, end, cb) {
Document.aggregate(
{
$match: {
time: {
$gte: new Date(start),
$lt: new Date(end)
}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
new Date(end),
'$time'
]}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
"$delta",
{ $mod: [
"$delta",
timeBlock
]}
]}
}
},
{
$group: {
_id: { $subtract: [
end,
"$delta"
]},
count: { $sum: 1 }
}
},
{
$project: {
time: "$_id",
count: 1,
_id: 0
}
},
{
$sort: {
time: 1
}
}, function(err, results) {
if (err) {
cb(err)
} else {
cb(null, results)
}
})
}
I tried using $cond, but with no luck

The group stage is producing documents based on grouping on your given _id and counting the number of documents from the previous stage that end up in the group. Hence, a count of zero would be the result of a document being created from 0 input documents belonging to the group. Thinking about it this way, it's clear that there's no way the aggregation pipeline can do this for you. It doesn't know what all of the "missing" time periods are and it can't invent the appropriate documents out of thin air. Reapplying your extra knowledge about the missing time periods to complete the picture at the end seems like a reasonable solution (not "hacky") if you need to have an explicit count of 0 for empty time periods.

Though it has already been said the best thing to do here is "merge" your results post process rather than expect "keys" that do not exist to appear or to issue multiple queries with explicit keys that are possibly not going to aggregate results and combine them.
What has not already been said is how you actually do this, so I'll give you a MongoDB "thinking" kind of way to collect your results.
As a quick disclaimer, you could possibly employ much the same approach by "seeding" empty keys for each interval using mapReduce, or possibly even altering your data so that there is always an empty value within each possible block. Those approaches seem basically "hacky" and in the mapReduce case is not going to provide the best performance or muliple results.
What I would suggest is that working with collection results for the MongoDB brain can be made simple. There is a neat little solution called neDB, which is billed as a kind of SQL Lite for MongoDB. It supports a subset of functionality and is therefore perfect for "in memory" manipulation of results with a MongoDB mindset:
var async = require('async'),
mongoose = require('mongoose'),
DataStore = require('nedb'),
Schema = mongoose.Schema;
var documentSchema = new Schema({
time: { type: Date, default: Date.now }
});
var Document = mongoose.model( "Document", documentSchema );
mongoose.connect('mongodb://localhost/test');
var getCount = function(timeBlock, start, end, callback) {
async.waterfall(
[
// Fill a blank series
function(callback) {
var db = new DataStore();
var current = start;
async.whilst(
function() { return current < end },
function(callback) {
var delta = end - current;
db.insert({ "_id": end - delta, "count": 0 },function(err,doc) {
//console.log( doc );
current += timeBlock;
callback(err);
});
},
function(err) {
callback(err,db);
}
);
},
// Get data and update
function(db,callback) {
var cursor = Document.collection.aggregate(
[
// Match documents
{ "$match": {
"time": {
"$gte": new Date(start),
"$lt": new Date(end)
}
}},
// Group. 1 step and less hacky
{ "$group": {
"_id": {
"$let": {
"vars": {
"delta": {
"$subtract": [
{ "$subtract": [ new Date(end), "$time" ] },
{ "$mod": [
{ "$subtract": [ new Date(end), "$time" ] },
timeBlock
]}
]
}
},
"in": { "$subtract": [ end, "$$delta" ] }
}
},
"count": { "$sum": 1 }
}}
],
{ "cursor": { "batchSize": 100 } }
);
cursor.on("data",function(item) {
cursor.pause();
console.log( "called" );
db.update(
{ "_id": item._id },
{ "$inc": { "count": item.count } },
{ "upsert": true },
function(err) {
cursor.resume();
}
);
});
cursor.on("end",function() {
console.log( "done" );
db.find({},function(err,result) {
callback(err,result);
});
});
}
],
function(err,result) {
callback(err,result);
}
);
}
mongoose.connection.on("open", function(err,conn) {
getCount(
1000 * 60 * 60, // each hour
new Date("2014-07-01").valueOf(), // start
new Date("2014-07-02").valueOf(), // end
function(err,result) {
if (err) throw err;
console.log( result );
}
);
});
So essentially create each interval as in memory collection and then just update those interval records with the actual data retrieved. I can't think of another way to do that where it would be more simple and natural to the way of thinking.
Just a footnote, the "interval" logic is just replicated from your question, but in fact the time periods are "rounded up" where 15 minutes would appear in hour 1. It usually is the practice to round down so that everything belongs to the interval it falls in and not the next one.

this is hacky fix I did for now:
var getCount = function(timeBlock, start, end, cb) {
Document.aggregate(
{
$match: {
time: {
$gte: new Date(start),
$lt: new Date(end)
}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
new Date(end),
'$time'
]}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
"$delta",
{ $mod: [
"$delta",
timeBlock
]}
]}
}
},
{
$group: {
_id: { $subtract: [
end,
"$delta"
]},
count: { $sum: 1 }
}
},
{
$project: {
time: "$_id",
count: 1,
_id: 0
}
},
{
$sort: {
time: 1
}
}, function(err, results) {
if (err) {
cb(err)
} else {
// really hacky way
var numOfTimeBlocks = ( end - start ) / timeBlock
// in case there is no 0s in the given period of time there is no need
// to iterate through all of the results
if ( results.length === numOfTimeBlocks ) {
cb(results);
} else {
var time = start;
var details = [];
var times = results.map(function(item) {
return item.time;
});
for( var i = 0; i < numOfTimeBlocks; i++) {
time += timeBlock;
var idx = times.indexOf(time);
if (idx > -1) {
details.push(results[idx]);
} else {
var documentCount = { count: 0, time: time };
details.push(documentCount);
}
}
cb(details);
}
}
})
}
I was also thinking about doing one query per time block, which gives the same result but I think is inefficient because you query the database N times.

How to sort array inside collection record in MongoDB?

I have a collection of students, each with a record that looks like the following and I want to sort the scores array in descending order of score.
what does that incantation look like on the mongo shell?
> db.students.find({'_id': 1}).pretty()
{
"_id" : 1,
"name" : "Aurelia Menendez",
"scores" : [
{
"type" : "exam",
"score" : 60.06045071030959
},
{
"type" : "quiz",
"score" : 52.79790691903873
},
{
"type" : "homework",
"score" : 71.76133439165544
},
{
"type" : "homework",
"score" : 34.85718117893772
}
]
}
I'm trying this incantation....
doc = db.students.find()
for (_id,score) in doc.scores:
print _id,score
but it's not working.

You will need to manipulate the embedded array in your application code or using the new Aggregation Framework in MongoDB 2.2.
Example aggregation in the mongo shell:
db.students.aggregate(
// Initial document match (uses index, if a suitable one is available)
{ $match: {
_id : 1
}},
// Expand the scores array into a stream of documents
{ $unwind: '$scores' },
// Filter to 'homework' scores
{ $match: {
'scores.type': 'homework'
}},
// Sort in descending order
{ $sort: {
'scores.score': -1
}}
)
Sample output:
{
"result" : [
{
"_id" : 1,
"name" : "Aurelia Menendez",
"scores" : {
"type" : "homework",
"score" : 71.76133439165544
}
},
{
"_id" : 1,
"name" : "Aurelia Menendez",
"scores" : {
"type" : "homework",
"score" : 34.85718117893772
}
}
],
"ok" : 1
}

Starting in Mongo 5.2, it's the exact use case for the new $sortArray aggregation operator:
// {
// name: "Aurelia Menendez",
// scores: [
// { type: "exam", score: 60.06 }
// { type: "quiz", score: 52.79 }
// { type: "homework", score: 71.76 }
// { type: "homework", score: 34.85 }
// ]
// }
db.collection.aggregate([
{ $set: {
scores: {
$sortArray: {
input: "$scores",
sortBy: { score: -1 }
}
}
}}
])
// {
// name: "Aurelia Menendez",
// scores: [
// { type: "homework", score: 71.76 },
// { type: "exam", score: 60.06 },
// { type: "quiz", score: 52.79 },
// { type: "homework", score: 34.85 }
// ]
// }
This:
sorts ($sortArray) the scores array (input: "$scores")
by applying a sort on scores (sortBy: { score: -1 })
without having to apply a combination of expensive $unwind, $sort and $group stages

Since this question can be managed in different ways i want to say that another solution is "insert and sort", in this way you will get the Ordered array at the moment you will made a Find().
Consider this data:
{
"_id" : 5,
"quizzes" : [
{ "wk": 1, "score" : 10 },
{ "wk": 2, "score" : 8 },
{ "wk": 3, "score" : 5 },
{ "wk": 4, "score" : 6 }
]
}
Here we will update the Document, make the Sort.
db.students.update(
{ _id: 5 },
{
$push: {
quizzes: {
$each: [ { wk: 5, score: 8 }, { wk: 6, score: 7 }, { wk: 7, score: 6 } ],
$sort: { score: -1 },
$slice: 3 // keep the first 3 values
}
}
}
)
Result is:
{
"_id" : 5,
"quizzes" : [
{ "wk" : 1, "score" : 10 },
{ "wk" : 2, "score" : 8 },
{ "wk" : 5, "score" : 8 }
]
}
Documentation:
https://docs.mongodb.com/manual/reference/operator/update/sort/#up._S_sort

That's how we could solve this with JS and mongo console:
db.students.find({"scores.type": "homework"}).forEach(
function(s){
var sortedScores = s.scores.sort(
function(a, b){
return a.score<b.score && a.type=="homework";
}
);
var lowestHomeworkScore = sortedScores[sortedScores.length-1].score;
db.students.update({_id: s._id},{$pull: {scores: {score: lowestHomeworkScore}}}, {multi: true});
})

In order to sort array, follow these steps:
1) use unwind to iterate through array
2) sort array
3) use group to merge objects of array into one array
4) then project other fields
Query
db.taskDetails.aggregate([
{$unwind:"$counter_offer"},
{$match:{_id:ObjectId('5bfbc0f9ac2a73278459efc1')}},
{$sort:{"counter_offer.Counter_offer_Amount":1}},
{$unwind:"$counter_offer"},
{"$group" : {_id:"$_id",
counter_offer:{ $push: "$counter_offer" },
"task_name": { "$first": "$task_name"},
"task_status": { "$first": "$task_status"},
"task_location": { "$first": "$task_location"},
}}
]).pretty()

Here is the java code which can be used to find out the lowest score in the array and remove it.
public class sortArrayInsideDocument{
public static void main(String[] args) throws UnknownHostException {
MongoClient client = new MongoClient();
DB db = client.getDB("school");
DBCollection lines = db.getCollection("students");
DBCursor cursor = lines.find();
try {
while (cursor.hasNext()) {
DBObject cur = cursor.next();
BasicDBList dbObjectList = (BasicDBList) cur.get("scores");
Double lowestScore = new Double(0);
BasicDBObject dbObject = null;
for (Object doc : dbObjectList) {
BasicDBObject basicDBObject = (BasicDBObject) doc;
if (basicDBObject.get("type").equals("homework")) {
Double latestScore = (Double) basicDBObject
.get("score");
if (lowestScore.compareTo(Double.valueOf(0)) == 0) {
lowestScore = latestScore;
dbObject = basicDBObject;
} else if (lowestScore.compareTo(latestScore) > 0) {
lowestScore = latestScore;
dbObject = basicDBObject;
}
}
}
// remove the lowest score here.
System.out.println("object to be removed : " + dbObject + ":"
+ dbObjectList.remove(dbObject));
// update the collection
lines.update(new BasicDBObject("_id", cur.get("_id")), cur,
true, false);
}
} finally {
cursor.close();
}
}
}

It's easy enough to guess, but anyway, try not cheat with mongo university courses because you won't understand basics then.
db.students.find({}).forEach(function(student){
var minHomeworkScore,
scoresObjects = student.scores,
homeworkArray = scoresObjects.map(
function(obj){
return obj.score;
}
);
minHomeworkScore = Math.min.apply(Math, homeworkArray);
scoresObjects.forEach(function(scoreObject){
if(scoreObject.score === minHomeworkScore){
scoresObjects.splice(scoresObjects.indexOf(minHomeworkScore), 1);
}
});
printjson(scoresObjects);
});

Order Title and Array title also and return whole collection data Collection name is menu
[
{
"_id": "5f27c5132160a22f005fd50d",
"title": "Gift By Category",
"children": [
{
"title": "Ethnic Gift Items",
"s": "/gift?by=Category&name=Ethnic"
},
{
"title": "Novelty Gift Items",
"link": "/gift?by=Category&name=Novelty"
}
],
"active": true
},
{
"_id": "5f2752fc2160a22f005fd50b",
"title": "Gift By Occasion",
"children": [
{
"title": "Gifts for Diwali",
"link": "/gift-for-diwali"
},
{
"title": "Gifts for Ganesh Chathurthi",
"link": "/gift-for-ganesh-chaturthi",
}
],
"active": true
}
]
Query as below
let menuList = await Menu.aggregate([
{
$unwind: '$children'
},
{
$sort:{"children.title":1}
},
{
$group : { _id : "$_id",
root: { $mergeObjects: '$$ROOT' },
children: { $push: "$children" }
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: ['$root', '$$ROOT']
}
}
},
{
$project: {
root: 0
}
},
{
$match: {
$and:[{'active':true}],
}
},
{
$sort:{"title":1}
}
]);

I believe you are doing M101P: MongoDB for Developers where homework 3.1 is to remove the lower one from two homework scores. Since aggregations were not taught up to that point you can do something like this:
import pymongo
conn = pymongo.MongoClient('mongodb://localhost:27017')
db = conn.school
students = db.students
for student_data in students.find():
smaller_homework_score_seq = None
smaller_homework_score_val = None
for score_seq, score_data in enumerate(student_data['scores']):
if score_data['type'] == 'homework':
if smaller_homework_score_seq is None or smaller_homework_score_val > score_data['score']:
smaller_homework_score_seq = score_seq
smaller_homework_score_val = score_data['score']
students.update({'_id': student_data['_id']}, {'$pop': {'scores': smaller_homework_score_seq}})

This is my approach using pyMongo, the Python driver to MongoDB:
import pymongo
conn = pymongo.MongoClient('mongodb://localhost')
def remove_lowest_hw():
db = conn.school
students = db.students
# first sort scores in ascending order
students.update_many({}, {'$push':{'scores':{'$each':[], '$sort':{'score': 1}}}})
# then collect the lowest homework score for each student via projection
cursor = students.find({}, {'scores':{'$elemMatch':{'type':'homework'}}})
# iterate over each student, trimming each of the lowest homework score
for stu in cursor:
students.update({'_id':stu['_id']}, {'$pull':{'scores':{'score':stu['scores'][0]['score']}}})
remove_lowest_hw()
conn.close()

this work for me, it is a little rough code but the results of the lowest tasks for each student are correct.
var scores_homework = []
db.students.find({"scores.type": "homework"}).forEach(
function(s){
s.scores.forEach(
function(ss){
if(ss.type=="homework"){
ss.student_id = s._id
scores_homework.push(ss)
}
}
)
})
for(i = 0; i < scores_homework.length; i++)
{
var b = i+1;
var ss1 = scores_homework[i];
var ss2 = scores_homework[b];
var lowest_score = {};
if(ss1.score > ss2.score){
lowest_score.type = ss2.type;
lowest_score.score = ss2.score;
db.students.update({_id: ss2.student_id},{$pull: {scores: {score: lowest_score.score}}});
}else if(ss1.score < ss2.score){
lowest_score.type = ss1.type;
lowest_score.score = ss1.score;
db.students.update({_id: ss1.student_id},{$pull: {scores: {score: lowest_score.score}}});
}else{
lowest_score.type = ss1.type;
lowest_score.score = ss1.score;
db.students.update({_id: ss1.student_id},{$pull: {scores: {score: lowest_score.score}}});
}
i++
}

This is how I have implemented in Java (Have kept it simple so that it's easier to understand) -
Approach :
Get scores array from student collection
Get all score values from scores array where type == homework
Sort the score values so that lowest becomes 1st element [score.get(0)]
Then, loop through the main scores and create new copy of scores array while skipping elements where type == homework && score == scores.get(0)
Finally, update the new scores array to student document.
Below is working Java code:
public void removeLowestScore(){
//Create mongo client and database connection and get collection
MongoClient client = new MongoClient("localhost");
MongoDatabase database = client.getDatabase("school");
MongoCollection<Document> collection = database.getCollection("students");
FindIterable<Document> docs = collection.find();
for (Document document : docs) {
//Get scores array
ArrayList<Document> scores = document.get("scores", ArrayList.class);
//Create a list of scores where type = homework
List<Double> homeworkScores = new ArrayList<Double>();
for (Document score : scores) {
if(score.getString("type").equalsIgnoreCase("homework")){
homeworkScores.add(score.getDouble("score"));
}
}
//sort homework scores
Collections.sort(homeworkScores);
//Create a new list to update into student collection
List<Document> newScoresArray = new ArrayList<Document>();
Document scoreDoc = null;
//Below loop populates new score array with eliminating lowest score of "type" = "homework"
for (Document score : scores) {
if(score.getString("type").equalsIgnoreCase("homework") && homeworkScores.get(0) == score.getDouble("score")){
continue;
}else{
scoreDoc = new Document("type",score.getString("type"));
scoreDoc.append("score",score.getDouble("score"));
newScoresArray.add(scoreDoc);
}
}
//Update the scores array for every student using student _id
collection.updateOne(Filters.eq("_id", document.getInteger("_id")), new Document("$set",new Document("scores",newScoresArray)));
}
}

Certainly it's late, but I just want to contribute my own solution on Mongo Shell:
var students = db.getCollection('students').find({});
for(i = 0 ; i < students.length(); i++) {
var scores = students[i].scores;
var tmp = [];
var min = -1 ;
var valueTmp = {};
for(j = 0 ; j < scores.length; j++) {
if(scores[j].type != 'homework') {
tmp.push(scores[j]);
} else {
if (min == -1) {
min = scores[j].score;
valueTmp = scores[j];
} else {
if (min > scores[j].score) {
min = scores[j].score;
tmp.push(valueTmp);
valueTmp = scores[j];
} else {
tmp.push(scores[j]);
}
}
}
}
db.students.updateOne({_id:students[i]._id},
{$set:{scores:tmp}});
}

the answer of #Stennie is fine, maybe a $group operator would be useful to keep the original document, without exploding it in many documents (one by score).
I just add another solution when using javascript for your application.
if you query only one document, it's sometimes easier to sort the embedded array by JS, instead of doing an aggregate.
When your document has a lot of fields, it's even better than using $push operator, otherwise you've to push all the fields one by one, or use $$ROOT operator (am I wrong ?)
My example code uses Mongoose.js :
Suppose you have initialized you Students model.
// Sorting
function compare(a, b) {
return a.score - b.score;
}
Students.findById('1', function(err, foundDocument){
foundDocument.scores = foundDocument.scores.sort(compare);
// do what you want here...
// foundModel keeps all its fields
});

sort by the score can be simple like:
db.students.find({_id:137}).sort({score:-1}).pretty()
but you need to find the one for type:homework ...

it should be something like this:
db.students.find().sort(scores: ({"score":-1}));

We Keep Coding

JavaScript is the programming language of the Web.

Get sums by grouping a collection in Meteor - javascript

Related

Why does Mongoose always get an older snapshot of my database?

javascript grouping linked nodes

Mongodb: use $sample after $group

skipped count 0 in aggregate function

How to sort array inside collection record in MongoDB?

Categories

Resources