How can I add a filter after an $lookup or is there any other method to do this?
My data collection test is:
{ "_id" : ObjectId("570557d4094a4514fc1291d6"), "id" : 100, "value" : "0", "contain" : [ ] }
{ "_id" : ObjectId("570557d4094a4514fc1291d7"), "id" : 110, "value" : "1", "contain" : [ 100 ] }
{ "_id" : ObjectId("570557d4094a4514fc1291d8"), "id" : 120, "value" : "1", "contain" : [ 100 ] }
{ "_id" : ObjectId("570557d4094a4514fc1291d9"), "id" : 121, "value" : "2", "contain" : [ 100, 120 ] }
I select id 100 and aggregate the childs:
db.test.aggregate([ {
$match : {
id: 100
}
}, {
$lookup : {
from : "test",
localField : "id",
foreignField : "contain",
as : "childs"
}
}]);
I get back:
{
"_id":ObjectId("570557d4094a4514fc1291d6"),
"id":100,
"value":"0",
"contain":[ ],
"childs":[ {
"_id":ObjectId("570557d4094a4514fc1291d7"),
"id":110,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d8"),
"id":120,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d9"),
"id":121,
"value":"2",
"contain":[ 100, 120 ]
}
]
}
But I want only childs that match with "value: 1"
At the end I expect this result:
{
"_id":ObjectId("570557d4094a4514fc1291d6"),
"id":100,
"value":"0",
"contain":[ ],
"childs":[ {
"_id":ObjectId("570557d4094a4514fc1291d7"),
"id":110,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d8"),
"id":120,
"value":"1",
"contain":[ 100 ]
}
]
}
The question here is actually about something different and does not need $lookup at all. But for anyone arriving here purely from the title of "filtering after $lookup" then these are the techniques for you:
MongoDB 3.6 - Sub-pipeline
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"let": { "id": "$id" },
"pipeline": [
{ "$match": {
"value": "1",
"$expr": { "$in": [ "$$id", "$contain" ] }
}}
],
"as": "childs"
}}
])
Earlier - $lookup + $unwind + $match coalescence
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"localField": "id",
"foreignField": "contain",
"as": "childs"
}},
{ "$unwind": "$childs" },
{ "$match": { "childs.value": "1" } },
{ "$group": {
"_id": "$_id",
"id": { "$first": "$id" },
"value": { "$first": "$value" },
"contain": { "$first": "$contain" },
"childs": { "$push": "$childs" }
}}
])
If you question why would you $unwind as opposed to using $filter on the array, then read Aggregate $lookup Total size of documents in matching pipeline exceeds maximum document size for all the detail on why this is generally necessary and far more optimal.
For releases of MongoDB 3.6 and onwards, then the more expressive "sub-pipeline" is generally what you want to "filter" the results of the foreign collection before anything gets returned into the array at all.
Back to the answer though which actually describes why the question asked needs "no join" at all....
Original
Using $lookup like this is not the most "efficient" way to do what you want here. But more on this later.
As a basic concept, just use $filter on the resulting array:
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"localField": "id",
"foreignField": "contain",
"as": "childs"
}},
{ "$project": {
"id": 1,
"value": 1,
"contain": 1,
"childs": {
"$filter": {
"input": "$childs",
"as": "child",
"cond": { "$eq": [ "$$child.value", "1" ] }
}
}
}}
]);
Or use $redact instead:
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"localField": "id",
"foreignField": "contain",
"as": "childs"
}},
{ "$redact": {
"$cond": {
"if": {
"$or": [
{ "$eq": [ "$value", "0" ] },
{ "$eq": [ "$value", "1" ] }
]
},
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}}
]);
Both get the same result:
{
"_id":ObjectId("570557d4094a4514fc1291d6"),
"id":100,
"value":"0",
"contain":[ ],
"childs":[ {
"_id":ObjectId("570557d4094a4514fc1291d7"),
"id":110,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d8"),
"id":120,
"value":"1",
"contain":[ 100 ]
}
]
}
Bottom line is that $lookup itself cannot "yet" query to only select certain data. So all "filtering" needs to happen after the $lookup
But really for this type of "self join" you are better off not using $lookup at all and avoiding the overhead of an additional read and "hash-merge" entirely. Just fetch the related items and $group instead:
db.test.aggregate([
{ "$match": {
"$or": [
{ "id": 100 },
{ "contain.0": 100, "value": "1" }
]
}},
{ "$group": {
"_id": {
"$cond": {
"if": { "$eq": [ "$value", "0" ] },
"then": "$id",
"else": { "$arrayElemAt": [ "$contain", 0 ] }
}
},
"value": { "$first": { "$literal": "0"} },
"childs": {
"$push": {
"$cond": {
"if": { "$ne": [ "$value", "0" ] },
"then": "$$ROOT",
"else": null
}
}
}
}},
{ "$project": {
"value": 1,
"childs": {
"$filter": {
"input": "$childs",
"as": "child",
"cond": { "$ne": [ "$$child", null ] }
}
}
}}
])
Which only comes out a little different because I deliberately removed the extraneous fields. Add them in yourself if you really want to:
{
"_id" : 100,
"value" : "0",
"childs" : [
{
"_id" : ObjectId("570557d4094a4514fc1291d7"),
"id" : 110,
"value" : "1",
"contain" : [ 100 ]
},
{
"_id" : ObjectId("570557d4094a4514fc1291d8"),
"id" : 120,
"value" : "1",
"contain" : [ 100 ]
}
]
}
So the only real issue here is "filtering" any null result from the array, created when the current document was the parent in processing items to $push.
What you also seem to be missing here is that the result you are looking for does not need aggregation or "sub-queries" at all. The structure that you have concluded or possibly found elsewhere is "designed" so that you can get a "node" and all of it's "children" in a single query request.
That means just the "query" is all that is really needed, and the data collection ( which is all that is happening since no content is really being "reduced" ) is just a function of iterating the cursor result:
var result = {};
db.test.find({
"$or": [
{ "id": 100 },
{ "contain.0": 100, "value": "1" }
]
}).sort({ "contain.0": 1 }).forEach(function(doc) {
if ( doc.id == 100 ) {
result = doc;
result.childs = []
} else {
result.childs.push(doc)
}
})
printjson(result);
This does exactly the same thing:
{
"_id" : ObjectId("570557d4094a4514fc1291d6"),
"id" : 100,
"value" : "0",
"contain" : [ ],
"childs" : [
{
"_id" : ObjectId("570557d4094a4514fc1291d7"),
"id" : 110,
"value" : "1",
"contain" : [
100
]
},
{
"_id" : ObjectId("570557d4094a4514fc1291d8"),
"id" : 120,
"value" : "1",
"contain" : [
100
]
}
]
}
And serves as proof that all you really need to do here is issue the "single" query to select both the parent and children. The returned data is just the same, and all you are doing on either server or client is "massaging" into another collected format.
This is one of those cases where you can get "caught up" in thinking of how you did things in a "relational" database, and not realize that since the way the data is stored has "changed", you no longer need to use the same approach.
That is exactly what the point of the documentation example "Model Tree Structures with Child References" in it's structure, where it makes it easy to select parents and children within one query.
Related
Suppose I have data in bookOrder as:
{
"_id" : ObjectId("615fc295257d6d7cf57a39fe"),
"orderId" : "2001",
"itemId" : [
"615fc232257d6d7cf57a39d4",
"615fc251257d6d7cf57a39e0"
],
"Discount" : 10
}
Item data as:
{
"_id" : ObjectId("615fc232257d6d7cf57a39d4"),
"itemId" : "1001",
"Price" : 10.21
}
{
"_id" : ObjectId("615fc251257d6d7cf57a39e0"),
"itemId" : "1002",
"Price" : 100
}
I want to calculate the total price of order after discount,
i.e. total price as : 100+10.21-10 = 100.21
For this I tried as:
const data = await db.order.aggregate(
[
{
"$match": {
"orderId": orderId
}
},
{
"$lookup": {
"from": "item",
let: {
eid: "$itemId"
},
pipeline: [
{
"$match": {
$expr: {
$in: [
"$_id",
"$$eid"
]
}
}
},
],
"as": "items"
}
},
{
"$unwind": {
path: "$items"
}
},
]
)
So, I get the value as:
{
"orderId" : "2001",
"Discount":10,
"itemId":[{
"itemId" : "1001",
"Price" : 10.21
},
"itemId" : "1002",
"Price" : 100
]}
}
SO instead of having to loop over the itemId price and get total sun, and then subtracting from the discount price of order can we do all these calculations of db itself.
Is there any way that I can query the total price from DB only instead of having to fetch data and applying any loop and then calculating the total price?
Please let me know if anyone needs any further explanation from my side.
use sum
aggregate
db.orders.aggregate([
{
"$match": {
"orderId": "2001"
}
},
{
"$lookup": {
"from": "items",
"localField": "itemId",
"foreignField": "_id",
"as": "items"
}
},
{
"$project": {
"orderId": 1,
"total": {
$subtract: [
{
"$sum": "$items.Price"
},
"$Discount"
]
}
}
}
])
mongoplayground
You can do this in a couple of ways, here is the most straight forward one using $map and some math operators.
db.order.aggregate([
{
"$match": {
"orderId": "2001"
}
},
{
"$lookup": {
"from": "item",
let: {
eid: "$itemId"
},
pipeline: [
{
"$match": {
$expr: {
$in: [
"$_id",
"$$eid"
]
}
}
},
],
"as": "items"
}
},
{
$project: {
orderId: 1,
finalSum: {
$subtract: [
{
$sum: {
$map: {
input: "$items",
in: "$$this.Price"
}
}
},
"$Discount"
]
}
}
}
])
Mongo Playground
Here I am trying to get entire data but if date less then current then do not fetch that date from the database.
{
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab5"),
"highlights" : [
"highlights-1",
],
"notes" : [
"Listen"
],
"soldout" : false,
"active" : false,
"operator" : ObjectId(""),
"title" : "2D1N Awesome trip to Knowhere 99",
"destinations" : [
{
"coordinatesType" : "Point",
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab6"),
}
],
"difficulty" : "Easy",
"duration" : {
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab7"),
"days" : NumberInt(2),
"nights" : NumberInt(1)
},
"media" : {
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab8"),
"images" : [
],
"videos" : [
]
},
"description" : "Surrounded ",
"inclusions" : [
{
"_id" : ObjectId(""),
"text" : "Included"
}
],
"itinerary" : "Surrounded .",
"thingsToCarry" : [
{
"_id" : ObjectId(""),
"text" : "Yourself"
}
],
"exclusions" : [
{
"_id" : ObjectId(""),
"text" : "A Lot"
}
],
"policy" : "Fully refundable 7777 Days before the date of Experience",
"departures" : [
{
"dates" : [
ISODate("2019-11-19T02:44:58.989+0000"),
ISODate("2019-11-23T17:19:47.878+0000")
],
"_id" : ObjectId(""),
"bookingCloses" : "2 Hours Before",
"maximumSeats" : NumberInt(20),
"source" : {
"coordinatesType" : "Point",
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ac2"),
"code" : "code",
"name" : "Manali",
"state" : "Himachal Pradesh",
"region" : "North",
"country" : "India",
"coordinates" : [
23.33,
NumberInt(43),
NumberInt(33)
]
},
"pickupPoints" : [
{
"coordinatesType" : "Point",
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ac3"),
"name" : "name-3",
"address" : "address-3",
"time" : "time-3",
"coordinates" : [
23.33,
NumberInt(43),
NumberInt(33)
]
}
],
"prices" : {
"3" : NumberInt(5)
},
"mrps" : {
"3" : NumberInt(5)
},
"markup" : NumberInt(25),
"discount" : NumberInt(0),
"b2m" : {
"3" : NumberInt(5)
},
"m2c" : {
"3" : 6.25
},
"minimumOccupancy" : NumberInt(3),
"maximumOccupancy" : NumberInt(3)
}
],
"bulkDiscounts" : [
{
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ac4")
}
],
}
In this I am trying to get all the data except the date section should be different. Means I should get my output as below
{
"_id": "5d6fad0f9e0dc027fc6b5ab5",
"highlights": [
"highlights-1",
"highlights-2",
"highlights-3",
"highlights-4",
"highlights-5"
],
"notes": [
"Listen"
],
"soldout": false,
"active": false,
"operator": "5d5d84e8c89fbf00063095f6",
"title": "2D1N Awesome trip to Knowhere 99",
"destinations": [
{
"code": "code",
"name": "Manali",
"coordinates": [
23.33,
43,
33
]
}
],
"difficulty": "Easy",
"duration": {
"_id": "5d6fad0f9e0dc027fc6b5ab7",
"days": 2,
"nights": 1
},
"media": {
"_id": "5d6fad0f9e0dc027fc6b5ab8",
"images": [
],
"videos": []
},
"description": "Surrounded.",
"inclusions": [
{
"_id": "5d6fad0f9e0dc027fc6b5abe",
"text": "Included"
}
],
"itinerary": "Surrounded",
"thingsToCarry": [
{
"_id": "5d6fad0f9e0dc027fc6b5abf",
"text": "Yourself"
}
],
"exclusions": [
{
"_id": "5d6fad0f9e0dc027fc6b5ac0",
"text": "A Lot"
}
],
"policy": "Fully refundable 7777 Days before the date of Experience",
"departures": [
{
"dates": [
"2019-11-23T17:19:47.878Z"
],
"_id": "5d6fad0f9e0dc027fc6b5ac1",
"bookingCloses": "2 Hours Before",
"maximumSeats": 20,
"source": {
"code": "code",
"name": "Manali",
"coordinates": [
23.33,
43,
33
]
},
"pickupPoints": [
{
"coordinatesType": "Point",
"_id": "5d6fad0f9e0dc027fc6b5ac3",
"name": "name-3",
"address": "address-3",
"time": "time-3",
"coordinates": [
23.33,
43,
33
]
}
],
"mrps": {
"3": 5
},
"markup": 25,
"discount": 0,
"b2m": {
"3": 5
},
"m2c": {
"3": 6.25
},
"minimumOccupancy": 3,
"maximumOccupancy": 3
}
],
"bulkDiscounts": [
{
"_id": "5d6fad0f9e0dc027fc6b5ac4"
}
],
"url": "",
}
]
I mean to say that no difference in output except dates array. If dates are less than current date then no need to fetch else fetch from DB with filtered dates array.
If you use mongo 3.4> then you can try with $addFields and $filter:
myCollection.aggregate([
{$match: {
'departures.dates': {
$elemMatch: {$gt: new Date()}}
}
},
{$addFields: {
'departures.dates': {
$filter: {
input: '$departures.dates',
as: 'date',
cond: {
$gt: ['$$date', new Date()]
}
}
}
}}
])
I was missing one terms here that my documnet structure is like below
{
_id: ObjecId(),
departure: [{
dates: [Array]
}]
}
So, here is my solution in the below code
pipeline = [
{ $unwind: '$departures' },
{
$addFields: {
'departures.dates': {
$filter: {
input: '$departures.dates',
as: 'date',
cond: {
$gt: ['$$date', new Date()]
}
}
}
}
}
];
My documents looks like this.
{
"_id" : ObjectId("572c4bffd073dd581edae045"),
"name" : "What's New in PHP 7",
"description" : "PHP 7 is the first new major version number of PHP since 2004. This course shows what's new, and what's changed.",
"difficulty_level" : "Beginner",
"type" : "Normal",
"tagged_skills" : [
{
"_id" : "5714e894e09a0f7d804b2254",
"name" : "PHP"
}
],
"created_at" : 1462520831.649,
"updated_at" : 1468233074.243 }
Is it possible to get recent 5 documents and total count in a single query.
I am using two queries for this requirement as given below.
db.course.find().sort({created_at:-1}).limit(5)
db.course.count()
This is a perfect job for the aggregation framework.
db.course.aggregate(
[
{ "$sort": { "created_at": -1 }},
{ "$group": {
"_id": null,
"docs": { "$push": "$$ROOT" },
"count": { "$sum": 1 }
}},
{ "$project": { "_id": 0, "count": 1, "docs": { "$slice": [ "$docs", 5 ] } }}
]
)
If your MongoDB server doesn't support $slice then you need to use the ugly and inefficient approach.
db.course.aggregate(
[
{ "$sort": { "created_at": -1 }},
{ "$group": {
"_id": null,
"docs": { "$push": "$$ROOT" },
"count": { "$sum": 1 }
}},
{ "$unwind": "$docs" },
{ "$limit": 5 }
]
)
You can implement this easily with $facet
myCollection.aggregate([
{
$facet: {
count: [{ $count: "value" }],
data: [{ $sort: { _id: -1 } }, { $skip: skip }, { $limit: limit }]
}
},
{ $unwind: "$count" },
{ $set: { count: "$count.value" } }
])
the return result will be like:
[
{
"count": 234,
"data": [
// ...
]
}
]
#styvane I tested in person, this query is even less efficient than twice queries.
// get count
db.course.aggregate([{$match:{}}, {$count: "count"}]);
// get docs
db.course.aggregate(
[
{$match:{}},
{ "$sort": { "created_at": -1 }},
{"$skip": offset},
{"$limit": limit}
]
)
No, there is no other way. Two queries - one for count - one with limit.
Given a collection like:
{
"_id": "XXXX",
"JobId": [
100
],
"PersonalDetails": [
{
"Level": 1,
"Zone": [
{
"Id": 1,
"Code": "XXXXXXXX",
"IsAvailable": true
},
{
"Id": 45,
"Code": "ZZZZZZZZZ",
"IsAvailable": false
}
]
}
],
"Timestamp": ISODate("2015-11-01T00:00:00.000Z")
}
I need to get all Zone ids and codes that have the IsAvailable flag set to true.
I have tried the following:
var details = db.test.find(
{
JobId: {$in: [100]},
'PersonalDetails': {$elemMatch: {Zone : {$elemMatch: {IsAvailable: true}}}}
},
{
'PersonalDetails.Zone.Id': 1,
'PersonalDetails.Zone.Code': 1,
'PersonalDetails.Zone.IsAvailable': 1
});
details.forEach(function(doc){
var myDetails = doc.PersonalDetails;
myDetails.forEach(function(doc2){
var myZones = doc2.Zone;
print(myZones);
This gives me
{
"0" : {
"Id": 1,
"Code": "XXXXXXXX",
"IsAvailable": true
},
"1" : {
"Id": 45,
"Code": "ZZZZZZZZZ",
"IsAvailable": false
}
}
But I just want only where the IsAvailable flag is set to true returned.
Am I going about this the wrong way?? I tried using aggregate but ran into the same problem - returning all and not filtering the IsAvailable flag.
You need to use the .aggregate() method.
First of all you need to reduce the size of the documents to process using the $match operator. From there you will need to denormalize your "PersonalDetails" array using the $unwind operator.
You can then use the $project operator to return only sub-documents that match your criteria.
The $map operator in the project stage is used to return array of sub-documents.
db.collection.aggregate([
{ "$match": {
"JobId": 100,
"PersonalDetails.Zone.IsAvailable": true
}},
{ "$unwind": "$PersonalDetails" },
{ "$project": {
"zone": {
"$setDifference": [
{ "$map": {
"input": "$PersonalDetails.Zone",
"as": "z",
"in": { "$cond": [ "$$z.IsAvailable", "$$z", false ] }
}},
[false]
]
}
}}
])
Which returns:
{
"_id" : "XXXX",
"zone" : [
{
"Id" : 1,
"Code" : "XXXXXXXX",
"IsAvailable" : true
}
]
}
Starting from MongoDB 3.2 we can use the $filter operator to do this efficiently
db.collection.aggregate([
{ "$match": {
"JobId": 100,
"PersonalDetails.Zone.IsAvailable": true
}},
{ "$unwind": "$PersonalDetails" },
{ "$project": {
"zone": {
"$filter": {
"input": "$PersonalDetails.Zone",
"as": "z", "cond": "$$z.IsAvailable"
}
}
}}
])
Assuming I have a schema that looks something like this:
{
field: [{
subDoc: ObjectId,
...
}],
...
}
and I have some list of ObjectIds (user input), how would I get a count of those specific ObjectIds? For exmaple, if I have data like this:
[
{field: [ {subDoc: 123}, {subDoc: 234} ]},
{field: [ {subDoc: 234}, {subDoc: 345} ]},
{field: [ {subDoc: 123}, {subDoc: 345}, {subDoc: 456} ]}
]
and the list of ids given by the user is 123, 234, 345, I need to get a count the given ids, so a result approximating this:
{
123: 2,
234: 2,
345: 2
}
What would be the best way to go about this?
The aggregation framework itself if not going to dynamically name keys the way you have presented as a proposed output, and that probably is a good thing really. But you can probably just do a query like this:
db.collection.aggregate([
// Match documents that contain the elements
{ "$match": {
"field.subDoc": { "$in": [123,234,345] }
}},
// De-normalize the array field content
{ "$unwind": "$field" },
// Match just the elements you want
{ "$match": {
"field.subDoc": { "$in": [123,234,345] }
}},
// Count by the element as a key
{ "$group": {
"_id": "$field.subDoc",
"count": { "$sum": 1 }
}}
])
That gives you output like this:
{ "_id" : 345, "count" : 2 }
{ "_id" : 234, "count" : 2 }
{ "_id" : 123, "count" : 2 }
But if you really want to go nuts on this, you are specifying the "keys" that you want as part of your query, so you could form a pipeline like this:
db.collection.aggregate([
{ "$match": {
"field.subDoc": { "$in": [123,234,345] }
}},
{ "$unwind": "$field" },
{ "$match": {
"field.subDoc": { "$in": [123,234,345] }
}},
{ "$group": {
"_id": "$field.subDoc",
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"123": {
"$max": {
"$cond": [
{ "$eq": [ "$_id", 123 ] },
"$count",
0
]
}
},
"234": {
"$max": {
"$cond": [
{ "$eq": [ "$_id", 234 ] },
"$count",
0
]
}
},
"345": {
"$max": {
"$cond": [
{ "$eq": [ "$_id", 345 ] },
"$count",
0
]
}
}
}}
])
Which is a relatively simple thing to construct that last stage in code by just processing the list of arguments:
var list = [123,234,345];
var group2 = { "$group": { "_id": null } };
list.forEach(function(id) {
group2["$group"][id] = {
"$max": {
"$cond": [
{ "$eq": [ "$_id", id ] },
"$count",
0
]
}
};
});
And that comes out more or less how you want it.
{
"_id" : null,
"123" : 2,
"234" : 2,
"345" : 2
}
Not exactly what you're asking for but it can give you an idea:
db.test.aggregate([
{
$unwind: '$field'
},
{
$group: {
_id: {
subDoc: '$field.subDoc'
},
count: {
$sum: 1
}
}
},
{
$project: {
subDoc: '$subDoc.subDoc',
count: '$count'
}
}
]);
Output:
{
"result": [
{
"_id": {
"subDoc": 456
},
"count": 1
},
{
"_id": {
"subDoc": 345
},
"count": 2
},
{
"_id": {
"subDoc": 234
},
"count": 2
},
{
"_id": {
"subDoc": 123
},
"count": 2
}
],
"ok": 1
}