I have a collection named Vote that looks like the following:
{
postId: "1",
comment:{
text_sentiment: "positive",
topic: "A"
}
}, // DOC-1
{
postId: "2",
comment:{
text_sentiment: "negative",
topic: "A"
}
}, // DOC-2
{
postId: "3",
comment:{
text_sentiment: "positive",
topic: "B"
}
},..//DOC-3 ..
I want to do an aggregation on this collection such that it returns the following structure.
[
{
_id: "hash",
topic: "A",
topicOccurance: 2,
sentiment: {
positive: 1,
negative: 1,
neutral: 0
},
postIds: [1,2]
},
..
]
I created the following aggregation:
db.Vote.aggregate([
{
$match: {
surveyId: "e6d38e1ecd",
"comment.topic": {
$exists: 1
},
}
},
{
$group: {
_id: {
topic: "$comment.topic",
text_sentiment: "$comment.text_sentiment"
},
total: {
$sum: 1
},
}
},
{
$group: {
_id: "$_id.topic",
total: {
$sum: "$total"
},
text_sentiments: {
$push: {
k: "$_id.text_sentiment",
v: "$total"
}
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"$arrayToObject": "$text_sentiments"
}
}
},
{
$sort: {
"topicOccurance": -1
}
}
])
This works fine but I do not know how can I also get an array in the response holding the key postIds. Each document inside the collection vote has postId and I want to group the posts having the same topic and push to an array. How can I do this?
2nd stage ($group) - Add postId into postIds array via $push.
3rd stage ($group) - Add postIds array into postIds array via $push. This will leads postIds become nested array.
[[1,2], ...]
4th stage ($project) - For postIds field, use $reduce operator to flatten the postIds array by $concat. Update: with $setUnion to distinct items in array.
db.collection.aggregate([
// match stage
{
$group: {
_id: {
topic: "$comment.topic",
text_sentiment: "$comment.text_sentiment"
},
total: {
$sum: 1
},
postIds: {
$push: "$postId"
}
}
},
{
$group: {
_id: "$_id.topic",
total: {
$sum: "$total"
},
text_sentiments: {
$push: {
k: "$_id.text_sentiment",
v: "$total"
}
},
postIds: {
"$push": "$postIds"
}
}
},
{
$project: {
topic: "$_id",
topicOccurance: "$total",
sentiment: {
"$arrayToObject": "$text_sentiments"
},
postIds: {
$setUnion: [
{
$reduce: {
input: "$postIds",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
]
}
}
},
// sort stage
])
Sample Mongo Playground
Related
I need to return the matching documents in such a way that their _id and status gets added to an object result and their aggregation ($sum) gets added to another object called aggregate. The documents look like this -
Document 1:
{
_id: 5efbd2ffc93f2352ad91666e,
varId: 'KL63D4280',
status: 'completed',
collection: 40,
cash: 3,
upi: 6000,
driverSalary: 4
}
Document 2:
{
_id: 5efbd2ffc93f2352ad916672,
varId: 'KL63D4280',
status: 'completed',
collection: 5,
cash: 3,
upi: 187,
driverSalary: 3
}
The output should look like the following :
{
result: [
{
_id: 5efbd2ffc93f2352ad91666e,
varId: 'KL63D4280',
status: 'completed'
},
{
_id: 5efbd2ffc93f2352ad916672,
varId: 'KL63D4280',
status: 'completed'
}
],
aggregates: {
_count: 2,
collection: 45,
cash: 6,
upi: 6187,
driverSalary: 7,
}
}
My code looks is like the below but it doesn't give the right output and looks like it would take much time -
const res = await BusDayWiseBreakDown.aggregate([
{
$match: {
varId,
status
}
},
{
$facet: {
result: [
{
$project: {
_id: 1,
varId:1,
status:1
}
}
]
}
},
{
$facet: {
aggregates: [
{
$project: {
_count: 1,
collection: 1,
cash: 1,
upi: 1,
driverSalary: 1,
}
},
{
$group: {
_id: null,
collection: { $sum: "$collection" },
cash: { $sum: "$cash" },
upi: { $sum: "$upi" },
driverSalary: { $sum: "$driverSalary" },
}
}
]
}
}
]).session(mongoSession);
return res;
}
There are lots of ways this could be done. Here's one way.
db.collection.aggregate([
{
"$match": {
varId: "KL63D4280",
status: "completed"
}
},
{
"$group": {
"_id": null,
"results": {
"$push": {
"_id": "$_id",
"varId": "$varId",
"status": "$status"
}
},
"_count": {"$count": {}},
"collection": {"$sum": "$collection"},
"cash": {"$sum": "$cash"},
"upi": {"$sum": "$upi"},
"driverSalary": {"$sum": "$driverSalary"}
}
},
{
"$project": {
"_id": 0,
"results": 1,
"aggregates": {
"_count": "$_count",
"collection": "$collection",
"cash": "$cash",
"upi": "$upi",
"driverSalary": "$driverSalary"
}
}
}
])
Try it on mongoplayground.net.
I'm trying to count all and unique events on daily based based on the following data shape:
{
username: "jack",
events: [
{
eventType: "party",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "87654321-0ebb-4238-8bf7-87654321"
}
},
{
eventType: "party",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "87654321-0ebb-4238-8bf7-87654321"
}
},
{
eventType: "party",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "01234567-0ebb-4238-8bf7-01234567"
}
},
{
eventType: "party",
createdAt: "2022-01-30T10:26:11.214Z",
visitorInfo: {
visitorId: "12345678-0ebb-4238-8bf7-12345678"
}
},
{
eventType: "party",
createdAt: "2022-01-30T10:16:11.214Z",
visitorInfo: {
visitorId: "12345678-0ebb-4238-8bf7-12345678"
}
}
]
}
I'm trying to count events (all and unique ones based on visitorId) on date (daily).
This is what I have so far (thanks to #R2D2's guide on the approach):
Event.aggregate([
{ $match: { username: 'jack' } },
{ $unwind: "$events" },
{
$project: {
total: {
$cond: [
{
$eq: ["$events.eventType", "party"],
},
1,
0,
],
},
unique: { // where I'm stuck. I need to count unique events based on visitorId on current date.
$cond: [
{
$eq: ["$events.eventType", "party"],
},
1,
0,
],
},
date: "$events.createdAt",
},
},
{
$group: {
_id: {
$dateToString: { format: "%Y-%m-%d", date: "$date" },
},
total: {
$sum: "$total",
},
uniqueTotal: {
$sum: "$unique",
},
},
},
{
$project: {
date: "$_id",
total: 1,
uniqueTotal: 1,
},
},
{
$group: {
_id: "0",
dateAndEventFrequency: {
$push: "$$ROOT",
},
},
},
{
$project: {
_id: 0,
dateAndEventFrequency: 1,
},
},
]);
I tried using $addToSet but it's not used with $project (it works with $group).
Any new approach is welcome based on the data shape and the desired result I'm expecting. I used $project because I was already using it.
Basically what I'm hoping to get in the end:
dateAndEventFrequency: [
{
_id: "2022-01-23",
uniqueTotal: 2,
total: 3,
date: "2022-01-23",
},
{
_id: "2022-01-30",
uniqueTotal: 1,
total: 2,
date: "2022-01-30",
},
]
Any help or guidance is appreciated. Thanks!
first group by date and visitorId together and then do another group just by date
you can test it here mongo playground
db.collection.aggregate([
{
$match: {
username: "jack"
}
},
{
"$unwind": "$events"
},
{
"$group": {
"_id": {
date: {
"$dateToString": {
format: "%Y-%m-%d",
date: "$events.createdAt"
}
},
"visitorId": "$events.visitorInfo.visitorId",
},
"count": {
"$count": {}
}
}
},
{
"$group": {
"_id": "$_id.date",
"uniqueTotal": {
"$count": {}
},
total: {
"$sum": "$count"
}
}
}
])
I would like to return this only if there are 2 by in the data array. The number of _id can be unlimited.
However, the code { $size: { data: 2 }, } does not work because I get $size is not allowed in this atlas tier error.
Expected return:
[
{
"_id": "Something1?",
"data": [
{
"by": "user1",
},
{
"by": "user2",
}
]
},
]
I want to include something like $size in the code, otherwise it will return the data even if there is only 1 by, or 3 by, or 0 by. I only want to return the data if there are 2 by.
What should I do? Full code without $size:
let x = await Answer.aggregate([
{
$match: {
$and: [
{
by: {
$in: [user.email, user2[0].email],
},
},
],
},
},
{
$group: {
_id: "$question",
data: {
$push: "$$ROOT",
},
},
},
{
$project: {
"data._id": 0,
"data.question": 0,
"data.__v": 0,
},
},
{ $sort: { "data.date": -1 } },
]);
Looks like your atlas tier doesn't support $size.
But you can have a field like count that increments by 1 when grouping:
db.collection.aggregate([
{
$group: {
_id: "$question",
data: {
$push: "$$ROOT",
},
count: {
$sum: 1
}
}
},
{
$match: {
count: 2
}
}
])
Try this in playground
Update
Finally, your aggregation should look like this:
[
{
$match: {
$and: [
{
by: {
$in: [user.email, user2[0].email],
},
},
],
},
},
{
$group: {
_id: "$question",
data: {
$push: "$$ROOT",
},
count: {
$sum: 1
}
},
},
{
$match: {
count: 2
}
},
{
$project: {
"data._id": 0,
"data.question": 0,
"data.__v": 0,
"count": 0
},
},
{ $sort: { "data.date": -1 } },
]
You can learn more about $sum here.
Presuming your model is called Employee:
Employee.find({ { "social_account.2": { "$exists": false }} },function(err,docs) {
})
As $exists asks for the 2 index of an array which means it has something in it.
The same applies to a maximum number:
Employee.find({ { "social_account.9": { "$exists": true}} },function(err,docs) {
})
For your perspective I think this should be your answer:
Employee.find({ { "data.2": { "$exists": false }} },function(err,docs) {
})
I have ten stations stored in the stations collection: Station A, Station B, Station C, Station D, Station E, Station F, Station G, Station H, Station I, Station J.
Right now, to create a count list of all inter-station rides between all possible pairs of stations, I do the following in my Node.js code (using Mongoose):
const stationCombinations = []
// get all stations from the stations collection
const stationIds = await Station.find({}, '_id name').lean().exec()
// list of all possible from & to combinations with their names
stationIds.forEach(fromStation => {
stationIds.forEach(toStation => {
stationCombinations.push({ fromStation, toStation })
})
})
const results = []
// loop through all station combinations
for (const stationCombination of stationCombinations) {
// create aggregation query promise
const data = Ride.aggregate([
{
$match: {
test: false,
state: 'completed',
duration: { $gt: 2 },
fromStation: mongoose.Types.ObjectId(stationCombination.fromStation._id),
toStation: mongoose.Types.ObjectId(stationCombination.toStation._id)
}
},
{
$group: {
_id: null,
count: { $sum: 1 }
}
},
{
$addFields: {
fromStation: stationCombination.fromStation.name,
toStation: stationCombination.toStation.name
}
}
])
// push promise to array
results.push(data)
}
// run all aggregation queries
const stationData = await Promise.all(results)
// flatten nested/empty arrays and return
return stationData.flat()
Executing this function give me the result in this format:
[
{
"fromStation": "Station A",
"toStation": "Station A",
"count": 1196
},
{
"fromStation": "Station A",
"toStation": "Station B",
"count": 1
},
{
"fromStation": "Station A",
"toStation": "Station C",
"count": 173
},
]
And so on for all other combinations...
The query currently takes a lot of time to execute and I keep getting alerts from MongoDB Atlas about excessive load on the database server because of these queries. Surely there must be an optimized way to do something like this?
You need to use MongoDB native operations. You need to $group by fromStation and toStation and with $lookup join two collections.
Note: I assume you have MongoDB >=v3.6 and Station._id is ObjectId
db.ride.aggregate([
{
$match: {
test: false,
state: "completed",
duration: {
$gt: 2
}
}
},
{
$group: {
_id: {
fromStation: "$fromStation",
toStation: "$toStation"
},
count: {
$sum: 1
}
}
},
{
$lookup: {
from: "station",
let: {
fromStation: "$_id.fromStation",
toStation: "$_id.toStation"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$_id",
[
"$$fromStation",
"$$toStation"
]
]
}
}
}
],
as: "tmp"
}
},
{
$project: {
_id: 0,
fromStation: {
$reduce: {
input: "$tmp",
initialValue: "",
in: {
$cond: [
{
$eq: [
"$_id.fromStation",
"$$this._id"
]
},
"$$this.name",
"$$value"
]
}
}
},
toStation: {
$reduce: {
input: "$tmp",
initialValue: "",
in: {
$cond: [
{
$eq: [
"$_id.toStation",
"$$this._id"
]
},
"$$this.name",
"$$value"
]
}
}
},
count: 1
}
},
{
$sort: {
fromStation: 1,
toStation: 1
}
}
])
MongoPlayground
Not tested:
const data = Ride.aggregate([
{
$match: {
test: false,
state: 'completed',
duration: { $gt: 2 }
}
},
{
$group: {
_id: {
fromStation: "$fromStation",
toStation: "$toStation"
},
count: { $sum: 1 }
}
},
{
$lookup: {
from: "station",
let: {
fromStation: "$_id.fromStation",
toStation: "$_id.toStation"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$_id",
[
"$$fromStation",
"$$toStation"
]
]
}
}
}
],
as: "tmp"
}
},
{
$project: {
_id: 0,
fromStation: {
$reduce: {
input: "$tmp",
initialValue: "",
in: {
$cond: [
{
$eq: [
"$_id.fromStation",
"$$this._id"
]
},
"$$this.name",
"$$value"
]
}
}
},
toStation: {
$reduce: {
input: "$tmp",
initialValue: "",
in: {
$cond: [
{
$eq: [
"$_id.toStation",
"$$this._id"
]
},
"$$this.name",
"$$value"
]
}
}
},
count: 1
}
},
{
$sort: {
fromStation: 1,
toStation: 1
}
}
])
I have this aggregation pipeline:
aggregate.lookup({
from: 'tags',
localField: 'tags',
foreignField: '_id',
as: 'tags'
});
aggregate.match({
productType: 'product',
available: true,
categories: {
$elemMatch: {
url: '/category/test'
}
}
});
aggregate.facet({
products: [
{ $sort: { [order]: sort } },
{ $skip: skip },
{ $limit: pageSize },
{
$project: {
_id: 1,
images: 1,
onSale: 1,
price: 1,
quantity: 1,
slug: 1,
sale: 1,
sku: 1,
status: 1,
title: 1,
brand: 1,
tags: 1
}
}
],
tags: [
{ $unwind: '$tags' },
{
$group: {
_id: {
name: '$name.label',
slug: '$slug'
},
count: {
$sum: 1
}
}
}
],
range: [
{
$bucketAuto: {
groupBy: '$price',
buckets: 1,
output: {
min: { $min: '$price' },
max: { $max: '$price' }
}
}
}
],
total: [{ $group: { _id: null, count: { $sum: 1 } } }]
});
aggregate.addFields({
total: {
$arrayElemAt: ['$total', 0]
}
});
aggregate.addFields({
range: {
$arrayElemAt: ['$range', 0]
}
});
Every product has it's own tags and I can't figure out how to:
Get the tags that belong only to the matched products and return an array from the $facet that contains
tags: [{name: 'tag1', slug: 'slug1', count: 10}, {name: 'tag2', slug: 'slug2', count: 5} ]
Where count: 10 are the products that have the tag.
Right now it returns all the tags found in the database.
2. Why the range property returns an object like this:
"range": {
"_id": {
"min": 5.9,
"max": 47
},
"min": 5.9,
"max": 47
}
and not like this since i provide an output object in $bucketAuto:
"range": {
"min": 5.9,
"max": 47
}
As of 2, this is normal mongodb behavior for $bucketAuto.
Here's what i've done and it works:
just right before $facet:
aggregate.lookup({
from: 'tags',
let: { tags: '$tags' },
pipeline: [
{
$match: {
$expr: { $in: ['$_id', '$$tags'] }
}
}
],
as: 'tags'
});
and then inside $facet:
tags: [
{ $unwind: { path: '$tags' } },
{
$group: {
_id: '$tags',
count: {
$sum: 1
}
}
}
],