How to improve this aggregate with many $projects - javascript

I have created an aggregate function and I feel it's pretty long and non-DRY. I'm wondering what ways I can improve it.
My Thread model has a sub-document called revisions. The function tries to get the most recent revision that has the status of APPROVED.
Here is the full model.
{
"_id": ObjectId("56dc750769faa2393a8eb656"),
"slug": "my-thread",
"title": "my-thread",
"created": 1457249482555.0,
"user": ObjectId("56d70a491128bb612c6c9220"),
"revisions": [
{
"body": "This is the body!",
"status": "APPROVED",
"_id": ObjectId("56dc750769faa2393a8eb657"),
"comments": [
],
"title": "my-thread"
}
]
}
And here is the aggregate function I want to improve.
Thread.aggregate([
{ $match: {
slug: thread
} },
{ $project: {
user: '$user',
created: '$created',
slug: '$slug',
revisions: {
$filter: {
input: '$revisions',
as: 'revision',
cond: { $eq: [ '$$revision.status', 'APPROVED' ] }
}
}
} },
{ $sort: { 'revisions.created': -1 } },
{ $project: {
user: '$user',
created: '$created',
slug: '$slug',
revisions: { $slice: ["$revisions", 0, 1] }
} },
{ $unwind: '$revisions'},
{ $project: {
body: '$revisions.body',
title: '$revisions.title',
user: '$user',
slug: '$slug',
created: '$created'
}}
])

Well you cannot really since there are $sort and $unwind stages in between on purpose. It's also basically "wrong", since the $sort cannot re-order the array until you $unwind it first.
Then it is better to use $group and $first instead, to just get the first element from the sort in each document:
Thread.aggregate([
{ "$match": {
"slug": thread
} },
{ "$project": {
"user": 1,
"created": 1,
"slug": 1,
"revisions": {
"$filter": {
"input": "$revisions",
"as": "revision",
"cond": { "$eq": [ "$$revision.status", "APPROVED" ] }
}
}
} },
// Cannot sort until you $unwind
{ "$unwind": "$revisions" },
// Now that will sort the elements
{ "$sort": { "_id": 1, "revisions.created": -1 } },
// And just grab the $first boundary for everything
{ "$group": {
"_id": "$_id",
"body": { "$first": "$revisions.body" },
"title": { "$first": "$revisions.title" },
"user": { "$first": "$user" },
"slug": { "$first": "$slug" },
"created": { "$first": "$created" }
}}
])
You could always reform the array with $push and then apply $arrayElemAt instead of the $slice to yield just a single element, but it's kind of superflous considering it would need another $project after the $group in the first place.
So even though there are "some" operations you can do without using $unwind, unfortunately "sorting" the arrays generated out of functions like $filter is not something that can be presently done, until you $unwind the array first.
If you didn't "need" the $sort on the "revisions.created" ( notably missing from your sample document ) then you can instead just use normal projection instead:
Thread.find(
{ "slug": slug, "revisions.status": "APPROVED" },
{ "revisions.$": 1 },
)
Only when sorting array elements would you need anything else, since the $ positional operator will just return the first matched element anyway.

Related

how to find array element using auto generated id in mongodb

currently, I am struggling with how the MongoDB document system works. I want to fetch array elements with an auto-generated id but how to fetch that specific data that I don't know.
my current schema is
const ItemPricesSchema = new mongoose.Schema({
_id : {
type: String
},
ItemsPrices: {
type: [{
barcode : {
type: String
},
itemName : {
type: String
},
price : {
type: String
}
}]
}
});
current data is stored in this way
{
"_id": "sha#c.c",
"ItemsPrices": [
{
"barcode": "345345",
"itemName": "maggie",
"price": "45",
"_id": "620a971e11120abbde5f4c3a"
},
{
"barcode": "356345",
"itemName": "monster",
"price": "70",
"_id": "620a971e11120abbde5f4c3b"
}
],
"__v": 0
}
what I want to achieve is that I want to find array elements through ids
if I want a specific array element with id "620a971e11120abbde5f4c3b" what should I do??
I have tried $unwind , $in, $match...
the result should be like
{
"_id": "sha#c.c",
"ItemsPrices": [
{
"barcode": "356345",
"itemName": "monster",
"price": "70",
"_id": "620a971e11120abbde5f4c3b"
}
],
"__v": 0
}
what I tried is like this from the answer
router.get('/filter/:id', async (req, res) => {
try {
const item = await ItemPricesSchema.aggregate([
{$project: {
"ItemsPrices": {
$filter: {
input: "$ItemsPrices",
as: "item",
cond: {
$eq: [
"$$item._id",
"620a8dd1c88ae3eb88a8107a"
]
}
}
}
}
}
])
res.json(item);
console.log(item);
} catch (error) {
res.status(500).json({message: error.message});
}
})
and returns something like this (Empty arrays)
[
{
"_id": "xvz#zyx.z",
"ItemsPrices": []
},
{
"_id": "zxc#xc.czx",
"ItemsPrices: []
},
{
"_id": "asd#asd.asd",
"ItemsPrices": []
},
{
"_id": "qwe#qwe.qwe",
"ItemsPrices": []
}
]
but If I search for price $$item.price
cond: {
$eq: [
"$$item.price",
"30"
]
}
it returns the perfect output
[
{
"_id": "xvz#zyx.z",
"ItemsPrices": []
},
{
"_id": "zxc#xc.czx",
"ItemsPrices: []
},
{
"_id": "asd#asd.asd",
"ItemsPrices": []
},
{
"_id": "qwe#qwe.qwe",
"ItemsPrices": [
{
"barcode":"234456345",
"price":"30",
"itemName":"monster",
"_id":"620a8dd1c88ae3eb88a8107a"
}
]
}
]
You can do an aggregation with $project and apply $filter on the array part. In mongoose you can apply the aggregation query in a more or less similar way https://mongoosejs.com/docs/api/aggregate.html
db.collection.aggregate([
{
$project: {
"ItemsPrices": {
$filter: {
input: "$ItemsPrices",
as: "item",
cond: {
$eq: [
"$$item._id",
mongoose.Types.ObjectId("620a971e11120abbde5f4c3b")
]
}
}
},
"__v": 1 //when projecting 1 means in the final result this field appears
}
}
])
more examples
demo
Option 1:
Use $filter in an aggregation query as explained by cmgchess
Option 2:
If you only want one object from array you can use $elemMatch like this:
db.collection.find({
"ItemsPrices._id": "620a971e11120abbde5f4c3b"
},
{
"ItemsPrices": {
"$elemMatch": {
"_id": "620a971e11120abbde5f4c3b"
}
}
})
Example here
But take care, using $elemMatch only the first element is returned. Check this other example where there are two objects with the desired _id but only returns one.
As said before, if you only one (or only exists one) maybe you can use find and $elemMatch to avoid a filter by the entire array. But if can be multiple values use $filter.

mongodb - unwinding nested subdocuments

For the following dataset example:
lists
{ _id: 1, included_lists: [ 2 ], items: [ "i1" ]}
{ _id: 2, included_lists: [], items: [ "i2", "i3" ]}
items
{ _id: "i1", details: [{}, {}, {}]}
{ _id: "i2", details: [{}, {}, {}]}
{ _id: "i3", details: [{}, {}, {}]}
I want to grab all the items for a list, including the ones attached to the included_lists
For example: if we're looking at list _id 1, we should get items i1, i2, i3
I have an idea how to do this, which involves using populate or $lookup, but I'm not sure how to unwind the nested items inside the included_lists and join them with the items in the original list.
In the end, I would like to have a dataset where I am able to use limit, skip and match.
I'm using mongoose, but vanilla mongodb code would also be fine.
Update
My current idea of how to do this is to retrieve all of the list ids first in one query i.e.
List.find({ _id: id}, { included_lists: 1})
Then, with the list ids, make an array of that i.e.
var all_ids = [id, ...included_lists]
Then just find the items and unwind
Psuedo-code:
List
.aggregate([
{
$match: {
_id: {
$in: all_ids
}
}
},
{ $lookup: {} }
{
$unwind: "$items"
},
{
$project: {
"list.name": 1,
"list._id": 1,
"items": 1
}
}
])
But I don't want to have to do a first query to retrieve all the list_ids, I should be able to retrieve all related items just through one _id which would then be able to retrieve the rest of the items through included_lists
You can try below aggregation from mongodb 3.6 and above
List.aggregate([
{ "$match": { "_id": id }},
{ "$lookup": {
"from": Items.collection.name,
"let": { "items": "$items" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$items" ] } } }
],
"as": "items"
}},
{ "$lookup": {
"from": Lists.collection.name,
"let": { "included_lists": "$included_lists", "items": "$items" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$included_lists" ] } } },
{ "$lookup": {
"from": Items.collection.name,
"let": { "items": "$items" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$items" ] } } }
],
"as": "items"
}},
{ "$project": { "allItems": { "$concatArrays": [ "$$items", "$items" ]}}}
],
"as": "included_lists"
}},
{ "$unwind": "$included_lists" },
{ "$replaceRoot": { "newRoot": "$included_lists" }}
])
You can try below aggregation in 3.4.
Initial $lookup to get the items values for included_lists followed by $concatArrays to merge the looked up items and items.
Second $lookup to get the item details followed by $unwind to flatten the results.
List.aggregate([
{"$lookup":{
"from":name of the list collection,
"localField":"included_lists",
"foreignField":"_id",
"as":"included_items"
}},
{"$unwind":"$included_items"},
{"$project":{"allItems":{"$concatArrays":["$items","$included_items.items"]}}},
{"$lookup":{
"from":name of the item collection,
"localField":"allItems",
"foreignField":"_id",
"as":"lookedup_items"
}},
{"$unwind":"$lookedup_items"},
{"$skip": some number},
{"$limit": some number}
])

Return Latest Sorted Date from Multiple Arrays

I currently have this schema
var dataSchema = new Schema({
hid: { type: String },
sensors: [{
nid: { type: String },
sid: { type: String },
data: {
param1: { type: String },
param2: { type: String },
data: { type: String }
},
date: { type: Date, default: Date.now }
}],
actuators: [{
nid: { type: String },
aid: { type: String },
control_id: { type: String },
data: {
param1: { type: String },
param2: { type: String },
data: { type: String }
},
date: { type: Date, default: Date.now }
}],
status: [{
nid: {type: String},
status_code: {type: String},
date: { type: Date, default: Date.now }
}],
updated: { type: Date, default: Date.now },
created: { type: Date }
});
And the query that I'm trying to build should search the schema by "hid" and then only pick the last object (by date) from the "sensors", "actuators" and "status" arrays but I can't figure out how to do that.
With this query I can partially achieve what I'm trying to get but it only give me one array at the time so I have to query the database three times and I would avoid doing so
db.getCollection('data').aggregate([
{ $match : { hid : "testhid" } },
{$project : {"sensors" : 1}},
{$unwind : "$sensors"},
{$sort : {"sensors.date" : -1}},
{$limit : 1}
])
Thanks in advance for any help
The best advice here would be to "store" the arrays as sorted in the first place. Chances are that they probably already are considering that any $push operation ( or even if you used .push() ) will actually just "append" to the array so that the latest item is "last" anyway.
So unless you are actually "changing" the "date" properties after you create, then the "latest date" is always the "last" item anyway. In which case, just $slice the entries:
Data.find({ "hid": "testhid" }).select({
"sensors": { "$slice": -1 },
"actuators": { "$slice": -1 },
"status": { "$slice": -1 }
}).exec(function(err,data) {
]);
"If", some reason you actually did manage to store in a different way or altered the "date" properties so they latest is no longer the "last", then it's probably a good idea to have all future updates use the $sort modifier with $push. This can "ensure" that additions to the array are consistently sorted. You can even modify the whole collection in one simple statement:
Date.update(
{},
{
"$push": {
"sensors": { "$each": [], "$sort": { "date": 1 } },
"actuators": { "$each": [], "$sort": { "date": 1 } },
"status": { "$each": [], "$sort": { "date": 1 } }
}
},
{ "multi": true },
function(err,num) {
}
)
In that one statement, every document in the collection is having every array mentioned re-sorted to that the "latest date" is the "last" entry for each array. This then means that the above usage of $slice is perfectly fine.
Now "If", absolutely none of that is possible for your case and you actually have some reason why the array entries are not to be commonly stored in "date" order, then ( and only really then ) should you resort to using .aggregate() in order to the the results:
Data.aggregate(
[
{ "$match": { "hid": "testhid" } },
{ "$unwind": "$sensors" },
{ "$sort": { "_id": 1, "sensors.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}},
{ "$unwind": "$actuators" },
{ "$sort": { "_id": 1, "actuators.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}},
{ "$unwind": "$status" },
{ "$sort": { "_id": 1, "status.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}}
],
function(err,data) {
}
)
The reality there is that MongoDB has no way to "inline sort" array content in a return from any query or aggregation pipeline statement. You can only really do this by processing with $unwind then using $sort and finally a $group using $first to effectively get the single item from the sorted array.
This you need to do "per" array, since the process of $unwind is creating seperate documents for each array item. You "could" do it all in one go like:
Data.aggregate(
[
{ "$match": { "hid": "testhid" } },
{ "$unwind": "$sensors" },
{ "$unwind": "$actuators" },
{ "$unwind": "$status" }
{ "$sort": {
"_id": 1,
"sensors.date": -1,
"actuators.date": -1,
"actuators.status": -1
}},
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}}
],
function(err,data) {
}
)
But it's really not that much improvement on the other process with all things considered.
The real lesson here should be to "keep the array sorted" and then doing an operation to $slice the last item is a very simple process.

Aggregate from array where date was before today

I am trying to aggregate a collection which has an array. In this array, there's a reminders array. The document might look like this:
{
_id: "1234",
dates: {
start: ISODate(),
end: ISODate()
},
reminders: [{
sendAt: ISODate(),
status: 'closed'
}, {
sendAt: ISODate(),
status: 'open'
}]
}
Say the first one is before today and the next one is after today. What I want to do is get the array of all that come before today, OR, an empty array if none came before today. I tried the following aggregation
db.reminders.aggregate([
{ $match: { 'dates.end': { $gt: new Date } } },
{ $unwind: '$reminders' },
{
$match: {
reminders: {
$elemMatch: {
sendAt: { $lt: new Date() },
status: { $ne: 'open' }
}
}
}
}
])
However, if there are no reminders before today, it will fail and give nothing back.
Is there a way to construct this structure with mongodb aggregation?
NOTE: I can't use $filter because that is in 3.2
You can use the $redact operator, to filter out sub-documents for versions >=2.6 .
It also avoids the unnecessary $unwind stage.
$match all the documents that have their dates.end attribute greater than the search criteria.
$redact through all sub-documents and do the following, $$DESCEND into those documents, that match the conditions, else $$PRUNE.
sample code:
var endDateToMatch = ISODate("2014-01-01T00:00:00Z");
var currentDate = ISODate();
db.t.aggregate([
{
$match:{"dates.end":{$gt:endDateToMatch}}
},
{
$redact:{$cond:[
{$and:[
{$ne:[{$ifNull:["$status",""]},
"open"]},
{$lt:[{$ifNull:["$sendAt",currentDate-1]},
currentDate]}
]
},
"$$DESCEND","$$PRUNE"]}
}
])
This would give you one document per document that matches the $match stage. If you need to accumulate all the sub-documents, then you need to $unwind "reminders" and $group by _id as null.
So you basically want $filter behavior but need to do it in an earlier version, with your main case being returning documents even if the array content ends up empty.
For MongoDB 2.6 you can do "almost" the same thing with $map and $setDifference:
db.reminders.aggregate([
{ "$match": { "dates.end": { "$gt": new Date() } } },
{ "$project": {
"dates": 1,
"reminders": {
"$setDifference": [
{ "$map": {
"input": "$reminders",
"as": "reminder",
"in": {
"$cond": [
{ "$and": [
{ "$lt": [ "$$reminder.sendAt", new Date() ] },
{ "$ne": [ "$$reminder.status", "open" ] }
]},
"$$reminder",
false
]
}
}},
[false]
]
}
}}
])
And that is okay as long as the resulting "set" from $setDifference is all unqiuely identified items. So the $map method applies the test, either returning the content or false if there was no match to conditions. The $setDifferene essentially removes any false elements from the results, but of course as a "set" would count any items exactly the same as one.
If your MongoDB is less than 2.6 ( or the case of "sets" makes the above unusable), it just requires being a bit more careful when looking at the content to filter:
db.reminders.aggregate([
{ "$match": { "dates.end": { "$gt": new Date() } } },
{ "$unwind": "$reminders" },
// Count where condition matched
{ "$group": {
"_id": "$_id",
"dates": { "$first": "$dates" },
"reminders": { "$push": "$reminders" },
"matched": { "$sum": {
"$cond": [
{ "$and": [
{ "$lt": [ "$reminders.sendAt", new Date() ] },
{ "$ne": [ "$reminders.status", "open" ] }
]},
1,
0
]
}}
}},
// Substitute array where no count just for brevity
{ "$project": {
"dates": 1,
"reminders": { "$cond": [
{ "$eq": [ "$matched", 0 ] },
{ "$const": [false] },
"$reminders"
]},
"matched": 1
}},
// Unwind again
{ "$unwind": "$reminders" },
// Filter for matches "or" where there were no matches to keep
{ "$match": {
"$or": [
{
"reminder.sendAt": { "$lt": new Date() },
"reminder.status": { "$ne": "open" }
},
{ "matched": 0 }
]
}},
// Group again
{ "$group": {
"_id": "$_id",
"dates": { "$first": "$dates" },
"reminders": { "$push": "$reminders" }
}},
// Replace the [false] array with an empty one
{ "$project": {
"dates": 1,
"reminders": { "$cond": [
{ "$eq": [ "$reminders", [false] ] },
{ "$const": [] },
"$reminders"
]}
}}
])
It's a bit long winded, but it's basically doing the same thing.
Also note that $elemMatch does not apply after processing $unwind, since the content is in fact no longer an array. Simple dot notation applies to the elements that are now in individual documents.

MongoDB aggregate count of items in array across different documents?

Here is my MongoDB collection schema:
company: String
model: String
tags: [String]
I need to aggregate it so I get the following output:
[{
"_id": {
"company": "Lenovo",
"model": "T400"
},
"tags": {
tag: "SomeTag"
count: 124 // number of times, this tag was found in `Lenovo T400`
}
}...]
I tried to do the following:
var aggParams = {};
aggParams.push({ $unwind: '$tags' });
aggParams.push({ $group: {
_id: { company: '$company', model: '$model' },
tags: { $push: { tag: '$tags', count: { $sum: 1 } } },
}});
But I got the following error:
invalid operator '$sum'
What is the right way to do this with aggregation?
You need to process $unwind on an array in order to deal with it meaninfully in aggregation. Also you adding to an array and "counts" stage a separate. As well an aggregation pipeline in an "array" of arguments itself, and not an object as you have defined:
Model.aggregate([
{ "$unwind": "$tags" },
{ "$group": {
"_id": {
"company": "$company",
"model": "$model",
"tag": "$tags"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": {
"company": "$_id.company",
"model": "$_id.model",
},
"tags": { "$push": { "tag": "$_id.tag", "count": "$count" }
}}
], function(err,result) {
})
So two $group stages does the job here. One to sum up the tags within company and model and the other to group on just the "company" and "model" and add the distinct tags and counts to an array.

Categories