Calculating multiple averages using MongoDB Aggregation

Calculating multiple averages using MongoDB Aggregation - javascript

I've been tasked with generating averages for day, week, month, and year for a rather large set of documents in MongoDB.
All of the jobs have a created field, and I need to base the average values off of the outputs array...
Here's what a document looks like:
{
__v: 0,
_id: ObjectId("535837911393fd0200d8e1eb"),
created: ISODate("2014-04-23T21:58:41.446Z"),
output: [
{
ref: {
img: false
},
type: "image/png",
methods: [
{
options: {
height: 200,
width: 200
},
method: "resize"
}
]
},
{
ref: {
img: false
},
type: "image/png",
methods: [
{
options: {
height: 400,
width: 400
},
method: "resize"
}
]
}
]
}
And here is what my current script looks like:
JobModel.aggregate([
{
$unwind: '$output'
},
{
$group: {
_id: { $dayOfYear: '$created' },
day: { $sum: 1 }
}
},
{
$group: {
_id: null,
avgDay: { $avg: '$day' }
}
},
{
$project: {
_id: 0,
average: {
day: '$avgDay'
}
}
}
],
function(err, data) {
if (err) {
console.log(err);
return;
}
res.send(data);
next();
});
I cannot seem to figure out the right order for this. Any suggestions?

Really not that sure what you are after here. You say that you want "multiple" averages but that brings up the question of "muliple" over what basis? The average "output" entries over a individual day would be different from the average output entries per month or even per daily average per month. So the scale changes with each selection and is not really a single query for "daily", "monthly" and "yearly"
I would seem that you really was "discrete" totals which would be best approached by first finding the "size" of the output entries and then applying an average per scale:
JobModel.aggregate(
[
{ "$unwind": "$output" },
// Count the array entries on the record
{ "$group": {
"_id": "$_id",
"created": { "$first": "$created" },
"count": { "$sum": 1 }
}},
// Now get the average per day
{ "$group": {
"_id": { "$dayOfYear": "$created" },
"avg": { "$avg": "$count" }
}}
],
function(err,result) {
}
);
Or actually with MongoDB 2.6 and greater you can just use the $size operator on the array:
JobModel.aggregate(
[
// Now get the average per day
{ "$group": {
"_id": { "$dayOfYear": "$created" },
"avg": { "$avg": { "$size": "$output" } }
}}
],
function(err,result) {
}
);
So the logical thing is to run each of those within your required $match range other your aggregation key of either "day", "month" or "year"
You could do something like combining the daily averages per day, with the daily average per month and then daily for year by combining results into arrays, otherwise you would just be throwing items away, which can be alternately done if you "just" wanted the daily average for the year, but as full results:
JobModel.aggregate(
[
// Now get the average per day
{ "$group": {
"_id": {
"year": { "$year": "$created" },
"month": { "$month": "$created" },
"day": { "$dayOfYear": "$created" }
},
"dayAvg": { "$avg": { "$size": "$output" } }
}},
// Group for month
{ "$group": {
"_id": {
"year": "$_id.year",
"month": "$_id.month"
},
"days": {
"$push": {
"day": "$_id.day",
"avg": "$dayAvg"
}
},
"monthAvg": { "$avg": "$dayAvg" }
}},
// Group for the year
{ "$group": {
"_id": "$_id.year",
"daily": { "$avg": "$monthAvg" },
"months": {
"$push": {
"month": "$_id.month",
"daily": "$monthAvg",
"days": "$days"
}
}
}}
],
function(err,result) {
}
);
However you want to apply that, but the main thing missing from your example is finding the "size" or "count" of the original "output" array per document from which to obtain an average.

Related

Mongo Organize by object inside aggregate request

My project is in nodeJs with express and i use mongoose for the request to my database mongoDb.
I have a model Media the structure is:
{
"_id": {
"$oid": "6354f982a11464ff4f7bac60"
},
"userId": {
"$oid": "6353aa119d39ccbb3263123f"
},
"date": "2022-10-23",
"base64": "data:image/png;base64,iVBORw0KGgoAAA....=",
"label": "Noamount",
"uriPath": "xxxx",
"s3Path": "xxxx",
"amount": 0,
"__v": 0,
"type": "tva"
}
Like you seen there is a field date, imagine that i have 3 medias on differents month:
{
"date": "2022-10-23",
"label": "monthTen",
"type" : "tva",
... // Other property
},
{
"date": "2022-09-10",
"label": "monthNineFirst",
"type" : "tva",
... // Other property
},
{
"date": "2022-09-19",
"label": "monthNineSecond",
"type" : "other",
... // Other property
}
I want to output something like this:
// Ordery by type in first place and by month
// number like 9 = the month inside the date property
{
tva: {
9: [{ label: "monthNineFirst"... }],
10: [{ label: "monthNineTen"... }]
},
other: {
9: [{ label: "monthNineSecond"... }]
}
}
Is found the property aggregate but i don't understand it correctly.
I know how to dow that in JS it's easy, but can i do that directly on the request with the property aggregate and $group?
There is what i have done so far https://mongoplayground.net/p/-vAbdnnqOfD

Here's one way to do it by extending your mongoplayground.net start.
db.collection.aggregate([
{ // group all docs by month
$group: {
_id: {
$month: {
$dateFromString: {
dateString: "$date",
format: "%Y-%m-%d"
}
}
},
data: {"$push": "$$ROOT"}
}
},
{ // group all groups into a single doc
"$group": {
"_id": null,
"groupData": {
"$push": {
// k,v for $arrayToObject
"k": "$_id",
"v": {
"$sortArray": {
"input": "$data",
"sortBy": {"date": 1}
}
}
}
}
}
},
{
"$replaceWith": {
"$arrayToObject": {
"$map": {
"input": {
// sort by month
"$sortArray": {
"input": "$groupData",
"sortBy": {"k": 1}
}
},
"in": {
"$mergeObjects": [
"$$this",
{ // rewrite k as string
"k": {"$toString": "$$this.k"}
}
]
}
}
}
}
}
])
Try it on mongoplayground.net.

Mongo get value from 2 collections same time

I'm trying to get data from 2 collections, and return one array with merge data of both collection.
The best solution for me was :
const bothValues = await ValueA.aggregate([
{ $unionWith: { coll: 'valueB' } },
{ $sort: { rank: -1, _id: -1 } },
{
$match: {
isAvailable: true,
},
},
{ $skip: skip },
{ $limit: 30 },
]);
which work perfectly. But.. $unionWith was not implemented my MongoDB version (4.0.X) so I can't use it.
const bothValues = await ValueA.aggregate(
[
{ $limit: 1 },
{
$lookup: {
from: 'valueB',
pipeline: [{ $limit: 15 }],
as: 'valueB',
},
},
{
$lookup: {
from: 'ValueA',
pipeline: [{ $limit: 15 }, { $sort: { rank: -1, _id: -1 } }],
as: 'ValueA',
},
},
{
$project:
{
Union: { $concatArrays: ['$valueB', '$ValueA'] },
},
},
{ $unwind: '$Union' },
{ $replaceRoot: { newRoot: '$Union' } },
],
);
but now, I got 2 problems :
I can't use a $skip, which is important, where use it ?
How to use $match ?
Thanks

Query
your query made with some changes to work like the first query
match in both pipelines, sort in both, (limit limitN+skipN)
(this way we make sure that we always have enough documents even if all are taken from valueA or valueB)
Take sorted 70 from each, so in all ways we will have the 70 needed in the final sort/skip/limit after the union.
concat,unwind,replace-root like in your query
sort again (to sort the union now), skip, limit
no matter we always have enough documents to skip
this example query is made for skip=40 and limit=30 so in the first 2 pipelines we limit=70
db.ValueA.aggregate([
{
"$limit": 1
},
{
"$lookup": {
"from": "valueB",
"pipeline": [
{
"$match": {
"isAvailable": true
}
},
{
"$sort": {
"rank": -1,
"_id": -1
}
},
{
"$limit": 70
}
],
"as": "valueB"
}
},
{
"$lookup": {
"from": "valueA",
"pipeline": [
{
"$match": {
"isAvailable": true
}
},
{
"$sort": {
"rank": -1,
"_id": -1
}
},
{
"$limit": 70
}
],
"as": "valueA"
}
},
{
"$project": {
"union": {
"$concatArrays": [
"$valueA",
"$valueB"
]
}
}
},
{
"$unwind": {
"path": "$union"
}
},
{
"$replaceRoot": {
"newRoot": "$union"
}
},
{
"$sort": {
"rank": -1,
"_id": -1
}
},
{
"$skip": 40
},
{
"$limit": 30
}
])

Group Array of Objects by key and sum values from mongoose aggregation result

I have a query result from a mongoose aggregation query that I need to further process, or at best do it in the aggregation itself.
The aggregation looks like this
result = await TokenBalance.aggregate([
{
$match: {
$and: [
{ ethervalue: { $gte: minBalance } },
{
ethervalue: { $lte: maxBalance }
}
]
}
},
{ $limit:limit }
])
This returns an array of Objects of this format
{
"_id": "61013d6dda7d7c0015af5ccf",
"balances": [
{
"address": "0x1fc3ddeb035310930a444c0fa59c01618d5902af",
"symbol": "HBTC",
"balance": 5.21419339e-10,
"usdvalue": 0.000020969961637162402
},
{
"address": "0x1fc3ddeb035310930a444c0fa59c01618d5902af",
"symbol": "NSBT",
"balance": 1.258566,
"usdvalue": 27.427343477595258
},
{
"address": "0x1fc3ddeb035310930a444c0fa59c01618d5902af",
"symbol": "CRV",
"balance": 517.985955847106,
"usdvalue": 806.7017064052314
},
{
"address": "0x1fc3ddeb035310930a444c0fa59c01618d5902af",
"symbol": "USDT",
"balance": 0.003469,
"usdvalue": 0.003470159747979122
}
],
"address": "0x1fc3ddeb035310930a444c0fa59c01618d5902af",
"ethervalue": 0.7604598621232733,
"createdAt": "2021-07-28T11:20:13.927Z",
"updatedAt": "2021-07-28T11:20:13.927Z",
"__v": 0
},
What I need, is the "balances" property to be processed as grouped by symbol and for each of these symbols sum the balance and usdvalue fields.
I would prefer this do be done in the aggregation if possible, but I can not seem to get it right, even not in pure nodejs.
I want the result to be like this:
[
{
symbol: USDC, balance: xxx, usdvalue: yyy
},
{
symbol: USDT, balance: zzz, usdvalue: jjj
}
]

You can use the below approach,
$unwind to deconstruct the balances array
$group by symbol and sum balance and usdvalue
$addFields to rename _id field to symbol and and remove _id field
result = await TokenBalance.aggregate([
{
$match: {
$and: [
{ ethervalue: { $gte: minBalance } },
{ ethervalue: { $lte: maxBalance } }
]
}
},
{ $unwind: "$balances" },
{
$group: {
_id: "$balances.symbol",
balance: { $sum: "$balances.balance" },
usdvalue: { $sum: "$balances.usdvalue" }
}
},
{
$addFields: {
symbol: "$_id",
_id: "$$REMOVE"
}
},
{ $limit:limit }
])
Playground

Return Latest Sorted Date from Multiple Arrays

I currently have this schema
var dataSchema = new Schema({
hid: { type: String },
sensors: [{
nid: { type: String },
sid: { type: String },
data: {
param1: { type: String },
param2: { type: String },
data: { type: String }
},
date: { type: Date, default: Date.now }
}],
actuators: [{
nid: { type: String },
aid: { type: String },
control_id: { type: String },
data: {
param1: { type: String },
param2: { type: String },
data: { type: String }
},
date: { type: Date, default: Date.now }
}],
status: [{
nid: {type: String},
status_code: {type: String},
date: { type: Date, default: Date.now }
}],
updated: { type: Date, default: Date.now },
created: { type: Date }
});
And the query that I'm trying to build should search the schema by "hid" and then only pick the last object (by date) from the "sensors", "actuators" and "status" arrays but I can't figure out how to do that.
With this query I can partially achieve what I'm trying to get but it only give me one array at the time so I have to query the database three times and I would avoid doing so
db.getCollection('data').aggregate([
{ $match : { hid : "testhid" } },
{$project : {"sensors" : 1}},
{$unwind : "$sensors"},
{$sort : {"sensors.date" : -1}},
{$limit : 1}
])
Thanks in advance for any help

The best advice here would be to "store" the arrays as sorted in the first place. Chances are that they probably already are considering that any $push operation ( or even if you used .push() ) will actually just "append" to the array so that the latest item is "last" anyway.
So unless you are actually "changing" the "date" properties after you create, then the "latest date" is always the "last" item anyway. In which case, just $slice the entries:
Data.find({ "hid": "testhid" }).select({
"sensors": { "$slice": -1 },
"actuators": { "$slice": -1 },
"status": { "$slice": -1 }
}).exec(function(err,data) {
]);
"If", some reason you actually did manage to store in a different way or altered the "date" properties so they latest is no longer the "last", then it's probably a good idea to have all future updates use the $sort modifier with $push. This can "ensure" that additions to the array are consistently sorted. You can even modify the whole collection in one simple statement:
Date.update(
{},
{
"$push": {
"sensors": { "$each": [], "$sort": { "date": 1 } },
"actuators": { "$each": [], "$sort": { "date": 1 } },
"status": { "$each": [], "$sort": { "date": 1 } }
}
},
{ "multi": true },
function(err,num) {
}
)
In that one statement, every document in the collection is having every array mentioned re-sorted to that the "latest date" is the "last" entry for each array. This then means that the above usage of $slice is perfectly fine.
Now "If", absolutely none of that is possible for your case and you actually have some reason why the array entries are not to be commonly stored in "date" order, then ( and only really then ) should you resort to using .aggregate() in order to the the results:
Data.aggregate(
[
{ "$match": { "hid": "testhid" } },
{ "$unwind": "$sensors" },
{ "$sort": { "_id": 1, "sensors.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}},
{ "$unwind": "$actuators" },
{ "$sort": { "_id": 1, "actuators.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}},
{ "$unwind": "$status" },
{ "$sort": { "_id": 1, "status.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}}
],
function(err,data) {
}
)
The reality there is that MongoDB has no way to "inline sort" array content in a return from any query or aggregation pipeline statement. You can only really do this by processing with $unwind then using $sort and finally a $group using $first to effectively get the single item from the sorted array.
This you need to do "per" array, since the process of $unwind is creating seperate documents for each array item. You "could" do it all in one go like:
Data.aggregate(
[
{ "$match": { "hid": "testhid" } },
{ "$unwind": "$sensors" },
{ "$unwind": "$actuators" },
{ "$unwind": "$status" }
{ "$sort": {
"_id": 1,
"sensors.date": -1,
"actuators.date": -1,
"actuators.status": -1
}},
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}}
],
function(err,data) {
}
)
But it's really not that much improvement on the other process with all things considered.
The real lesson here should be to "keep the array sorted" and then doing an operation to $slice the last item is a very simple process.

Aggregate from array where date was before today

I am trying to aggregate a collection which has an array. In this array, there's a reminders array. The document might look like this:
{
_id: "1234",
dates: {
start: ISODate(),
end: ISODate()
},
reminders: [{
sendAt: ISODate(),
status: 'closed'
}, {
sendAt: ISODate(),
status: 'open'
}]
}
Say the first one is before today and the next one is after today. What I want to do is get the array of all that come before today, OR, an empty array if none came before today. I tried the following aggregation
db.reminders.aggregate([
{ $match: { 'dates.end': { $gt: new Date } } },
{ $unwind: '$reminders' },
{
$match: {
reminders: {
$elemMatch: {
sendAt: { $lt: new Date() },
status: { $ne: 'open' }
}
}
}
}
])
However, if there are no reminders before today, it will fail and give nothing back.
Is there a way to construct this structure with mongodb aggregation?
NOTE: I can't use $filter because that is in 3.2

You can use the $redact operator, to filter out sub-documents for versions >=2.6 .
It also avoids the unnecessary $unwind stage.
$match all the documents that have their dates.end attribute greater than the search criteria.
$redact through all sub-documents and do the following, $$DESCEND into those documents, that match the conditions, else $$PRUNE.
sample code:
var endDateToMatch = ISODate("2014-01-01T00:00:00Z");
var currentDate = ISODate();
db.t.aggregate([
{
$match:{"dates.end":{$gt:endDateToMatch}}
},
{
$redact:{$cond:[
{$and:[
{$ne:[{$ifNull:["$status",""]},
"open"]},
{$lt:[{$ifNull:["$sendAt",currentDate-1]},
currentDate]}
]
},
"$$DESCEND","$$PRUNE"]}
}
])
This would give you one document per document that matches the $match stage. If you need to accumulate all the sub-documents, then you need to $unwind "reminders" and $group by _id as null.

So you basically want $filter behavior but need to do it in an earlier version, with your main case being returning documents even if the array content ends up empty.
For MongoDB 2.6 you can do "almost" the same thing with $map and $setDifference:
db.reminders.aggregate([
{ "$match": { "dates.end": { "$gt": new Date() } } },
{ "$project": {
"dates": 1,
"reminders": {
"$setDifference": [
{ "$map": {
"input": "$reminders",
"as": "reminder",
"in": {
"$cond": [
{ "$and": [
{ "$lt": [ "$$reminder.sendAt", new Date() ] },
{ "$ne": [ "$$reminder.status", "open" ] }
]},
"$$reminder",
false
]
}
}},
[false]
]
}
}}
])
And that is okay as long as the resulting "set" from $setDifference is all unqiuely identified items. So the $map method applies the test, either returning the content or false if there was no match to conditions. The $setDifferene essentially removes any false elements from the results, but of course as a "set" would count any items exactly the same as one.
If your MongoDB is less than 2.6 ( or the case of "sets" makes the above unusable), it just requires being a bit more careful when looking at the content to filter:
db.reminders.aggregate([
{ "$match": { "dates.end": { "$gt": new Date() } } },
{ "$unwind": "$reminders" },
// Count where condition matched
{ "$group": {
"_id": "$_id",
"dates": { "$first": "$dates" },
"reminders": { "$push": "$reminders" },
"matched": { "$sum": {
"$cond": [
{ "$and": [
{ "$lt": [ "$reminders.sendAt", new Date() ] },
{ "$ne": [ "$reminders.status", "open" ] }
]},
1,
0
]
}}
}},
// Substitute array where no count just for brevity
{ "$project": {
"dates": 1,
"reminders": { "$cond": [
{ "$eq": [ "$matched", 0 ] },
{ "$const": [false] },
"$reminders"
]},
"matched": 1
}},
// Unwind again
{ "$unwind": "$reminders" },
// Filter for matches "or" where there were no matches to keep
{ "$match": {
"$or": [
{
"reminder.sendAt": { "$lt": new Date() },
"reminder.status": { "$ne": "open" }
},
{ "matched": 0 }
]
}},
// Group again
{ "$group": {
"_id": "$_id",
"dates": { "$first": "$dates" },
"reminders": { "$push": "$reminders" }
}},
// Replace the [false] array with an empty one
{ "$project": {
"dates": 1,
"reminders": { "$cond": [
{ "$eq": [ "$reminders", [false] ] },
{ "$const": [] },
"$reminders"
]}
}}
])
It's a bit long winded, but it's basically doing the same thing.
Also note that $elemMatch does not apply after processing $unwind, since the content is in fact no longer an array. Simple dot notation applies to the elements that are now in individual documents.

We Keep Coding

JavaScript is the programming language of the Web.

Calculating multiple averages using MongoDB Aggregation - javascript

Related

Mongo Organize by object inside aggregate request

Mongo get value from 2 collections same time

Group Array of Objects by key and sum values from mongoose aggregation result

Return Latest Sorted Date from Multiple Arrays

Aggregate from array where date was before today

Categories

Resources