Reshape the mongodb aggregation result with dynamic keys - javascript

Sample Collection document:
{
"_id" : ObjectId("5fec3b978b34e8b047b7ae14"),
"duration" : 20.0,
"createdOn" : ISODate("2020-12-16T22:28:44.000Z"),
"ClockInTime" : ISODate("2020-12-31T14:57:13.041Z"),
"states" : "PROCESSED"
}
Currently i'm using the following query.
db.collection.aggregate([{
$match: {
states: 'PROCESSED'
}
},
{
$group: {
_id: {
month: {
$month: "$createdOn"
},
year: {
$year: "$createdOn"
}
},
count: {
$sum: 1
},
date: {
$first: "$createdOn"
}
}
},
{
$project: {
_id: 0,
year: {
$year: "$date"
},
month: {
$month: "$date"
},
count: 1
}
}
]);
Which gives me the result in the following format.
[{
"count" : 2.0,
"year" : 2020,
"month" : 11
}, {
"count" : 5.0,
"year" : 2020,
"month" : 12
}, ...]
But i want the following format.
{
"2020": {
"11": 2,
"12": 5
}
}
Right now i'm able to get the above output by application level coding but i'm trying to get the same output from the mongodb query itself.

Based on the result you already have, add this one:
db.collection.aggregate([
{ $set: { result: [{ k: { $toString: "$month" }, v: "$count" }] } },
{ $set: { result: { $arrayToObject: "$result" } } },
{ $set: { result: [{ k: { $toString: "$year" }, v: "$result" }] } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$result" } } }
])
Note, date: { $first: "$createdOn" } is not determined. Either use date: { $min: "$createdOn" } or insert {$sort: {...}} stage before you run $group. Well, if you get always just one document, then it does not matter of course. (but then you would not need count: { $sum: 1 } either)
Update based on additional input
db.collection.aggregate([
{ $match: { states: "PROCESSED" } },
{
$group: {
_id: {
month: { $month: "$createdOn" },
year: { $year: "$createdOn" }
},
count: { $sum: 1 },
date: { $first: "$createdOn" }
}
},
{ $group: { _id: "$_id.year", data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$map: {
input: "$data",
in: {
k: { $toString: "$$this._id.month" },
v: "$$this.count"
}
}
}
}
},
{ $set: { data: { $arrayToObject: "$data" } } },
{ $set: { data: [ { k: { $toString: "$_id" }, v: "$data" } ] } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$data" } } }
])
See Mongo Playground
In older MonogDB version use $addFields which is an alias for $set
following code works for older mongodb versions:
db.collection.aggregate([{
$match: {
states: "PROCESSED"
}
}, {
$group: {
_id: {
month: {
$dateToString: { format: "%m", date: "$createdOn" }
},
year: {
$dateToString: { format: "%Y", date: "$createdOn" }
}
},
count: {
$sum: 1
},
date: {
$first: "$createdOn"
}
}
}, {
$group: {
_id: "$_id.year",
data: {
$push: "$$ROOT"
}
}
}, {
$addFields: {
data: {
$map: {
input: "$data",
in: {
k: "$$this._id.month",
v: "$$this.count"
}
}
}
}
}, {
$addFields: {
data: {
$arrayToObject: "$data"
}
}
}, {
$addFields: {
data: [{
k: "$_id",
v: "$data"
}]
}
}, {
$replaceRoot: {
newRoot: {
$arrayToObject: "$data"
}
}
}])

Related

Meteor Collection Find Documents Based on Latest Date Time in an Array

How to get the latest documents from a collection using date time?
I have searched in SO for this specific problem, but couldn't find an example that is similar to my data structure. I have this kind of data structure:
[
{
stationId: 'xxxxx',
stationName: 'xxxx',
state: 'xxxx',
lat: 'xxxxx',
long: 'xx.xxxxx',
waterLevel: [
{
wlDateTime: '11/04/2022 11:30',
wlSeverity: 'Danger',
wlLevel: 7.5
},
{
wlDateTime: '11/04/2022 09:00',
wlSeverity: 'Danger',
wlLevel: 7.3
},
{
wlDateTime: '11/04/2022 03:00',
wlSeverity: 'Normal',
wlLevel: 5.2
}
],
rainfallData: [
{
rfDateTime: '11/04/2022 11:30',
rfSeverity: 'Heavy',
rfLevel: 21
},
{
rfDateTime: '11/04/2022 10:30',
rfSeverity: 'Heavy',
rfLevel: 21
},
{
rfDateTime: '11/04/2022 9:30',
rfSeverity: 'Heavy',
rfLevel: 21
}
]
}
]
The question is, how can I get documents that have wlDateTime equal today, with wlSeverity equal to Danger, but I just want the latest record from the waterLevel array. The same case with the rainfallDataarray i.e. to return with the latest reading for today.
Sample expected return will be like this:
[
{
stationId: 'xxxxx',
stationName: 'xxxx',
state: 'xxxx',
lat: 'xxxxx',
long: 'xx.xxxxx',
waterLevelData: [
{
wlDateTime: '11/04/2022 11:30', //latest data compared to the array
wlSeverity: 'Danger',
wlLevel: 7.5
}
],
rainfallData: [
{
rfDateTime: '11/04/2022 11:30', //latest data compared to the array
rfSeverity: 'Heavy',
rfLevel: 21
}
]
}
]
I've tried querying it like this:
Meteor.publish('Alerts', function(){
return AlertLatest.find({
'waterLevelData.wlSeverity':'Danger',
}, {
fields : {
'stationName' : 1,
'state' : 1,
'lat' : 1,
'long' : 1,
'waterLevelData.wlDateTime' : 1,
'waterLevelData.wlSeverity' : 1,
'waterLevelData.wlLevel' : 1,
'rainfallData.rfSeverity' : 1,
}},{sort: { 'waterLevelData.wlDateTime' : -1}});
})
but the query returned data that isn't how I wanted. Any help will be much appreciated.
UPDATE
I've tried the solution provided by #YuTing, which is using aggregate to customise the publication query. I went ahead and read a bit about Mongodb Aggregation, and found a Meteorjs community package (tunguska:reactive-aggregate) which simplifies the process.
This is the sample of a working aggregation so far:
Meteor.publish('PIBDataAlerts', function(){
const start = dayjs().startOf('day'); // set to 12:00 am today
const end = dayjs().endOf('day'); // set to 23:59 pm today
ReactiveAggregate(this, PIBLatest, [
{
$match: {
'stationStatus' : 'ON',
'waterLevelData': { //trying to get only today's docs
"$elemMatch" : {
"wlDateTime" : {
$gte: start.format() , $lt: end.format()
}
}
}
}
},
{
$set: {
waterLevelHFZ: {
$filter: {
input: "$waterLevelData",
as: "w",
cond: {
$and: [
{ $or : [
{ $eq: [ "$$w.wlSeverity", "Alert" ] },
{ $eq: [ "$$w.wlSeverity", "Warning" ] },
{ $eq: [ "$$w.wlSeverity", "Danger" ] },
]},
{ $eq: [ "$$w.wlDateTime", { $max: "$waterLevelData.wlDateTime" } ] }
],
}
}
},
rainfallDataHFZ: {
$filter: {
input: "$rainfallData",
as: "r",
cond: { $eq: [ "$$r.rfDateTime", { $max: "$rainfallData.rfDateTime" } ] }
}
}
}
},
{
$project : {
"stationId": 1,
"stationName" :1,
"state": 1,
"waterLevelHFZ": 1,
"rainfallDataHFZ": 1
}
}
]);
})
I'm struggling to get documents that only have the wlDateTime that equals today. I've tried a query in the $match but it returned empty array. If the $match is set to {}, it'll return all 1548 records even though the wlDateTime is not equals to today.
change your date string to date
filter the array to find the max one
db.collection.aggregate([
{
$match: {
$expr: {
$or: [
{
$ne: [
{
$filter: {
input: "$waterLevel",
as: "w",
cond: {
$eq: [
{
$dateTrunc: {
date: {
$dateFromString: {
dateString: "$$w.wlDateTime",
format: "%d/%m/%Y %H:%M"
}
},
unit: "day"
}
},
{
$dateTrunc: {
date: "$$NOW",
unit: "day"
}
}
]
}
}
},
[]
]
},
{
$ne: [
{
$filter: {
input: "$rainfallData",
as: "r",
cond: {
$eq: [
{
$dateTrunc: {
date: {
$dateFromString: {
dateString: "$$r.rfDateTime",
format: "%d/%m/%Y %H:%M"
}
},
unit: "day"
}
},
{
$dateTrunc: {
date: "$$NOW",
unit: "day"
}
}
]
}
}
},
[]
]
}
]
}
}
},
{
$set: {
waterLevel: {
$map: {
input: "$waterLevel",
as: "w",
in: {
$mergeObjects: [
"$$w",
{
wlDateTime: {
$dateFromString: {
dateString: "$$w.wlDateTime",
format: "%d/%m/%Y %H:%M"
}
}
}
]
}
}
},
rainfallData: {
$map: {
input: "$rainfallData",
as: "r",
in: {
$mergeObjects: [
"$$r",
{
rfDateTime: {
$dateFromString: {
dateString: "$$r.rfDateTime",
format: "%d/%m/%Y %H:%M"
}
}
}
]
}
}
}
}
},
{
$set: {
waterLevel: {
$filter: {
input: "$waterLevel",
as: "w",
cond: {
$and: [
{
$in: [
"$$w.wlSeverity",
[
"Alert",
"Warning",
"Danger"
]
]
},
{
$eq: [
"$$w.wlDateTime",
{
$max: "$waterLevel.wlDateTime"
}
]
},
{
$eq: [
{
$dateTrunc: {
date: "$$w.wlDateTime",
unit: "day"
}
},
{
$dateTrunc: {
date: "$$NOW",
unit: "day"
}
}
]
}
]
}
}
},
rainfallData: {
$filter: {
input: "$rainfallData",
as: "r",
cond: {
$and: [
{
$eq: [
"$$r.rfDateTime",
{
$max: "$rainfallData.rfDateTime"
}
]
},
{
$eq: [
{
$dateTrunc: {
date: "$$r.rfDateTime",
unit: "day"
}
},
{
$dateTrunc: {
date: "$$NOW",
unit: "day"
}
}
]
}
]
}
}
}
}
}
])
mongoplayground
I don't think you can sort by embedded document in an array field. It's not how mongodb works.
but I just want the latest
I you are only interested in the latest docs you can omit the sort and instead use a natural negative cursor:
Meteor.publish('Alerts', function(){
return AlertLatest.find({
'waterLevelData.wlSeverity':'Danger',
}, {
fields : {
'stationName' : 1,
'state' : 1,
'lat' : 1,
'long' : 1,
'waterLevelData.wlDateTime' : 1,
'waterLevelData.wlSeverity' : 1,
'waterLevelData.wlLevel' : 1,
'rainfallData.rfSeverity' : 1,
}},{ hint: { $natural: -1}});
})
It will start counting docs from the end, instead of the beginning.
https://docs.meteor.com/api/collections.html#Mongo-Collection-find

Find total unique counter based on unique id and date - MongoDB

I'm trying to count all and unique events on daily based based on the following data shape:
{
username: "jack",
events: [
{
eventType: "party",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "87654321-0ebb-4238-8bf7-87654321"
}
},
{
eventType: "party",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "87654321-0ebb-4238-8bf7-87654321"
}
},
{
eventType: "party",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "01234567-0ebb-4238-8bf7-01234567"
}
},
{
eventType: "party",
createdAt: "2022-01-30T10:26:11.214Z",
visitorInfo: {
visitorId: "12345678-0ebb-4238-8bf7-12345678"
}
},
{
eventType: "party",
createdAt: "2022-01-30T10:16:11.214Z",
visitorInfo: {
visitorId: "12345678-0ebb-4238-8bf7-12345678"
}
}
]
}
I'm trying to count events (all and unique ones based on visitorId) on date (daily).
This is what I have so far (thanks to #R2D2's guide on the approach):
Event.aggregate([
{ $match: { username: 'jack' } },
{ $unwind: "$events" },
{
$project: {
total: {
$cond: [
{
$eq: ["$events.eventType", "party"],
},
1,
0,
],
},
unique: { // where I'm stuck. I need to count unique events based on visitorId on current date.
$cond: [
{
$eq: ["$events.eventType", "party"],
},
1,
0,
],
},
date: "$events.createdAt",
},
},
{
$group: {
_id: {
$dateToString: { format: "%Y-%m-%d", date: "$date" },
},
total: {
$sum: "$total",
},
uniqueTotal: {
$sum: "$unique",
},
},
},
{
$project: {
date: "$_id",
total: 1,
uniqueTotal: 1,
},
},
{
$group: {
_id: "0",
dateAndEventFrequency: {
$push: "$$ROOT",
},
},
},
{
$project: {
_id: 0,
dateAndEventFrequency: 1,
},
},
]);
I tried using $addToSet but it's not used with $project (it works with $group).
Any new approach is welcome based on the data shape and the desired result I'm expecting. I used $project because I was already using it.
Basically what I'm hoping to get in the end:
dateAndEventFrequency: [
{
_id: "2022-01-23",
uniqueTotal: 2,
total: 3,
date: "2022-01-23",
},
{
_id: "2022-01-30",
uniqueTotal: 1,
total: 2,
date: "2022-01-30",
},
]
Any help or guidance is appreciated. Thanks!
first group by date and visitorId together and then do another group just by date
you can test it here mongo playground
db.collection.aggregate([
{
$match: {
username: "jack"
}
},
{
"$unwind": "$events"
},
{
"$group": {
"_id": {
date: {
"$dateToString": {
format: "%Y-%m-%d",
date: "$events.createdAt"
}
},
"visitorId": "$events.visitorInfo.visitorId",
},
"count": {
"$count": {}
}
}
},
{
"$group": {
"_id": "$_id.date",
"uniqueTotal": {
"$count": {}
},
total: {
"$sum": "$count"
}
}
}
])

is there is way to multi group in mongodb

there is way to multi group in mongodb ?
document i have and want to query it
[ {
_id: '1615658138236',
englishName: 'samsung smart tv 50',
screen_resulation: '4K',
screen_size: '50' }, {
_id: '1615750981674',
englishName: 'lg tv 55 led uhd',
screen_resulation: 'UHD',
screen_size: '55' }, {
_id: '1615791834538',
englishName: 'samsung smart 55 inch crystal 4k',
screen_resulation: '4K',
screen_size: '55' } ]
for example i have 2 unknown fields i use this method to get them
for (let i = 0; i < result[0].filters.length; i++) {
const item = result[0].filters[i].key;
groupBy[item] = `$${item}`;
}
and i try to query mongodb to get count of every field
const products = await Product.aggregate([
{
$match: {
category,
},
},
{
$group: {
_id: groupBy,
count: {
$sum: 1,
},
},
},
{
$sort: { count: -1 },
},
]);
result i get
[
{ _id: { screen_size: '50', screen_resulation: '4K' }, count: 1 },
{ _id: { screen_size: '55', screen_resulation: 'UHD' }, count: 1 },
{ _id: { screen_size: '55', screen_resulation: '4K' }, count: 1 }
]
what i expect is :
[
{ _id: { screen_size: '50' }, count: 1 },
{ _id: { screen_size: '55' }, count: 2 },
{ _id: { screen_resulation: '4K' }, count: 2 },
{ _id: { screen_resulation: 'UHD' }, count: 1 },
]
i really find mongodb is great but very hard for me i dont know why
You can use $facet for multiple aggregation pipelines.
db.collection.aggregate([
{
"$facet": {
"screen_size_count": [
{
"$group": {
"_id": "$screen_size",
"count": {
$sum: 1
}
}
}
],
"screen_resulation_count:": [
{
"$group": {
"_id": "$screen_resulation",
"count": {
$sum: 1
}
}
}
]
}
}
])
Mongo Playground: https://mongoplayground.net/p/cnEY8NV4HNs

Build the total sum of aggregated result

Is it somehow possible to add a field to the aggregated result? My goal is to have a total sum for all results. Currently, I just reduce the result, but I believe this is not as performant as solving it through a query.
aggregate([
{
$match: {
time: { $gte: start, $lte: end },
},
},
{
$group:
{
_id: { $dateToString: { format: '%Y-%m-%d', date: '$time' } },
totalAmount: { $sum: '$payment.amount' },
},
},
]).exec().then((result) => {
return {
total: result.reduce(((acc, curr) => acc + curr.totalAmount), 0),
dates: result,
};
});
result is:
{
"_id":"2020-06-06",
"totalAmount":12
},
{
"_id":"2020-07-06",
"totalAmount":12
}
Any idea how I can get the total amount for all, looking like this but without that reduce part?
{
"total": 24,
"dates": [
{
"_id": "2020-06-06",
"totalAmount": 12,
},
{
"_id": "2020-07-06",
"totalAmount": 12,
}
]
}
Either you can use two queries simultaneously
const [result, totalAmount] = await Promise.all([
Model.aggregate([
{ $match: { time: { $gte: start, $lte: end } } },
{
$group: {
_id: { $dateToString: { format: "%Y-%m-%d", date: "$time" } },
totalAmount: { $sum: "$payment.amount" },
}
},
]),
Model.aggregate([
{ $match: { time: { $gte: start, $lte: end } } },
{
$group: {
_id: null,
totalAmount: { $sum: "$payment.amount" },
}
},
])
])
return {
total: result,
dates: totalAmount,
}
Or can use $facet
const result = await Model.aggregate([
{ $match: { time: { $gte: start, $lte: end } } },
{
$facet: {
result: [
{
$group: {
_id: {
$dateToString: { format: "%Y-%m-%d", date: "$time" },
},
totalAmount: { $sum: "$payment.amount" },
},
},
],
totalAmount: [
{
$group: {
_id: null,
totalAmount: { $sum: "$payment.amount" },
},
},
],
},
},
]);
return {
total: _.get(result, "[0].result", []),
dates: _.get(result, "[0].totalAmount.totalAmount", 0),
}

MongoDB aggregation performance issue

I'm using MongoDB for my Nodejs project and when using the below piece of code with aggregation the performance seems to be very slow.
masterSchema.hotelDetails.aggregate({
$match: {
"hotel.basicInfo.city.id": cityId.toString()
}
}, {
$sort: {
"hotel.basicInfo.district.name": -1
}
}, {
$group: {
_id: "$hotel.basicInfo.district.id",
name: {
$first: "$hotel.basicInfo.district.name"
}
}
}).exec(function(err, enData) {
if(err) {
console.log(err);
}
for(var i in enData) {
for(var j in filterObj.district) {
if(filterObj.district[j]._id === enData[i]._id) {
filterObj.district[j].en = (enData[i].name !== null) ? enData[i].name : "Others";
break;
}
}
}
callback();
});
I COMMAND [conn13994] command production.hotelDetailsAr command: aggregate { aggregate: "hotelDetailsAr", pipeline: [ { $match: { hotel.basicInfo.city.id: "54350" } }, { $sort: { hotel.basicInfo.district.name: -1 } }, { $group: { _id: "$hotel.basicInfo.district.id", districtCount: { $sum: 1 }, name: { $first: "$hotel.basicInfo.district.name" } } } ] } keyUpdates:0 writeConflicts:0 numYields:1129 reslen:112 locks:{ Global: { acquireCount: { r: 2264 } }, Database: { acquireCount: { r: 1132 } }, Collection: { acquireCount: { r: 1132 } } } protocol:op_query 802ms
filterQuery.aggregate({
$match: {
"hotel.basicInfo.city.id": cityId.toString()
}
}, {
$group: {
_id: null,
0: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", "5"]
}, 1, 0]
}
},
1: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", "4"]
}, 1, 0]
}
},
2: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", "3"]
}, 1, 0]
}
},
3: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", "2"]
}, 1, 0]
}
},
4: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", "1"]
}, 1, 0]
}
},
5: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", "0"]
}, 1, 0]
}
},
6: {
$sum: {
$cond: [{
$eq: ["$hotel.basicInfo.starRating", null]
}, 1, 0]
}
}
}
}).exec(function (err, data) {
filterObj.starRating = data;
callback();
});
Server information:
Centos 5
8 Core CPU
40GB RAM
MongoDB Info
240,370 Documents
480MB
Version 3.2
Am i doing something wrong with the query?

Categories