Slow query performance: MongoDB with $lookup and $and/$or

Slow query performance: MongoDB with $lookup and $and/$or - javascript

I'm trying to find a way to create an engine that translates GraphQL query filters to MongoDB aggregations while keeping the performance. Our application has the requisite of limiting the results from collection A by applying filters to collection B, C and even D sometimes.
For better understanding, here's a sample about how a filter is translated to MongoDB.
This:
{
"filter": {
"return": null,
"AND": [{
"customer_WITH": {
"OR": [{
"code": "CUSTOMER NAME"
}, {
"commercialName_LIKE": "CUSTOMER NAME"
}, {
"corporateName_LIKE": "CUSTOMER NAME"
}]
}
}],
"OR": [{
"dispatcher_WITH": {
"company_WITH": {
"corporateName_LIKE": "COMPANY NAME"
}
}
}, {
"redispatcher_WITH": {
"company_WITH": {
"corporateName_LIKE": "COMPANY NAME"
}
}
}],
"reversal": null
}
}
Gets translated to this:
[{
"$match": {
"return": {
"$eq": null
},
"reversal": {
"$eq": null
},
"company": {
"$eq": ObjectId("xxxxxxxxxxxxxxxxxxxxxxxx")
}
}
}, {
"$lookup": {
"as": "dispatcher",
"from": "shippers",
"localField": "dispatcher",
"foreignField": "_id"
}
}, {
"$unwind": {
"path": "$dispatcher",
"preserveNullAndEmptyArrays": true
}
}, {
"$lookup": {
"as": "dispatcher.company",
"from": "companies",
"localField": "dispatcher.company",
"foreignField": "_id"
}
}, {
"$unwind": {
"path": "$dispatcher.company",
"preserveNullAndEmptyArrays": true
}
}, {
"$lookup": {
"as": "redispatcher",
"from": "shippers",
"localField": "redispatcher",
"foreignField": "_id"
}
}, {
"$unwind": {
"path": "$redispatcher",
"preserveNullAndEmptyArrays": true
}
}, {
"$lookup": {
"as": "redispatcher.company",
"from": "companies",
"localField": "redispatcher.company",
"foreignField": "_id"
}
}, {
"$unwind": {
"path": "$redispatcher.company",
"preserveNullAndEmptyArrays": true
}
}, {
"$lookup": {
"as": "customer",
"from": "customers",
"localField": "customer",
"foreignField": "_id"
}
}, {
"$match": {
"$or": [{
"dispatcher.company.corporateName": {
"$regex": /\sCOMPANY\sNAME/
}
}, {
"redispatcher.company.corporateName": {
"$regex": /\sCOMPANY\sNAME/
}
}],
"$and": [{
"$or": [{
"customer.code": {
"$eq": "CUSTOMER NAME"
}
}, {
"customer.commercialName": {
"$regex": /CUSTOMER\sNAME/
}
}, {
"customer.corporateName": {
"$regex": /CUSTOMER\sNAME/
}
}]
}]
}
}, {
"$unwind": {
"path": "$customer",
"preserveNullAndEmptyArrays": true
}
}, {
"$group": {
"_id": "$invoiceNo",
"__rootId": {
"$first": "$_id"
},
"company": {
"$first": "$company"
},
"customer": {
"$first": "$customer._id"
},
"dispatcher": {
"$first": "$dispatcher._id"
},
"redispatcher": {
"$first": "$redispatcher._id"
},
"driverPlate": {
"$first": "$driverPlate"
},
"key": {
"$first": "$key"
},
"activities": {
"$first": "$activities"
},
"serialNo": {
"$first": "$serialNo"
},
"invoiceNo": {
"$first": "$invoiceNo"
},
"incidents": {
"$first": "$incidents"
},
"deliveries": {
"$first": "$deliveries"
},
"return": {
"$first": "$return"
}
}
}, {
"$project": {
"_id": "$__rootId",
"company": "$company",
"customer": "$customer",
"dispatcher": "$dispatcher",
"redispatcher": "$redispatcher",
"driverPlate": "$driverPlate",
"key": "$key",
"activities": "$activities",
"serialNo": "$serialNo",
"invoiceNo": "$invoiceNo",
"incidents": "$incidents",
"deliveries": "$deliveries",
"return": "$return"
}
}, {
"$sort": {
"invoiceNo": -1
}
}, {
"$limit": 51
}]
The engine is smart enough to reallocate to the first position $match properties that don't require $lookups and right after $lookups if they do, however if they are within a $and/$or condition block, then they are reallocated after the last $lookup, regardless of what properties are there.
I could scan for what is used inside the $and and deconstruct it into new reallocated $match phases, but I need to figure how to handle the $or operator: I can't apply the same desconstruction idea on it because this would invalidate the condition.
So my question is: Is there an alternative way to use the phase $lookup along with $and/$or and improve the performance drastically?
Creating more indexes won't help because they're not used for the $lookup. Moving up $match phases, as the MongoDB team would suggest is also not possible because it would break the conditions. So I'm out of ideas now.
Best regards.

Related

How do I get comments count while fetching posts

I have two collections Posts an comments. I am storing comments with postID. I want to show comments count field when fetching all the posts data.
How do I achieve this?
// posts
{
postID: '123',
title: 'abc'
}
// comments
{
postID: '123',
comments: [
{
commentID: 'comment123',
comment: 'my Comment'
}
]
}
// Looking for this
{
postID: '123',
title: 'abc',
commentCount: 1
}

Here's one way you could do it.
db.posts.aggregate([
{
"$lookup": {
"from": "comments",
"localField": "postID",
"foreignField": "postID",
"pipeline": [
{
"$project": {
"_id": 0,
"commentCount": {"$size": "$comments"}
}
}
],
"as": "commentCount"
}
},
{
"$project": {
"_id": 0,
"postID": 1,
"title": 1,
"commentCount": {"$first": "$commentCount.commentCount"}
}
}
])
Try it on mongoplayground.net.

Try This.
pipeline = [{
"$lookup": {
"from": "comments",
"let": {
"postId": "$postId",
},
"pipeline": [
{
"$match": {
"$expr": {
"$eq": ["$postId", "$$postId"]
},
}
},
{
"$group": {
"_id": "$postId",
"comments_count": {"$sum": 1}
}
}
],
"as": "comments"
}
},
{
"$project": {
"_id": 0,
"postId": 1,
"title":1,
"comments_count": "$comments.comments_count"
}
}]
db.posts.aggregate(pipeline)

mongodb: How to use variable inside group operator in aggregate operation?

I am tring to make a query where use the value and try to interpolate a string in a new field.
Mongo Database:
[
{
"state": "1",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
},
{
"state": "2",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
}
]
Aggregate query:
db.collection.aggregate({
"$match": {
"state": {
"$in": [
"1",
"2"
]
}
}
},
{
"$group": {
"_id": {
"state": "$state"
},
"this_path": {
"$first": {
"$concat": [
"events.",
"$state",
".0.date"
]
}
}
}
})
"this_path" gets "events.1.0.date", but how to use this value, in another query(line), I would like to do like a string interpolation. Some thing like
...
"date": {
"$first": { `\$${this_path}`}
...
so it become the "events.1.date" then "$events.1.0.date" then "123.2"

you can define it by let just for example a fragment from pipeline:
$lookup: {
from: contentCollectionName,
as: 'content',
let: {
parentId: '$id',
},
The id is taken from above matched documents, but it can be anything

sort documents by their children in mongoose

I have a mongodb collection as follows:
[
{
"_id": { "$oid": "609b8f06a28f6728d19b486d" },
"user1": "609952c2b112741634d27d89",
"user2": "609b8202b5a389099cae3ce6",
"messages": [
{
"body": "Hello user 2",
"user": "609952c2b112741634d27d89",
"readed": true,
"created": { "$date": "2021-05-13T01:38:07.502Z" }
},
{
"body": "How old are you?",
"user": "609952c2b112741634d27d89",
"readed": false,
"created": "2021-05-13T01:40:07.502Z"
},
{
"body": "I am fine. Are you ready?",
"user": "609b8202b5a389099cae3ce6",
"readed": false,
"created": "2021-05-13T01:42:07.502Z"
},
{
"body": "Yes. Lighgui start",
"user": "609952c2b112741634d27d89",
"readed": false,
"created": "2021-05-13T01:38:50.502Z"
}
]
}
]
I want to sort chats that have messages up first how do I do that?
also if possible i would like to get the latest message of each chat and the number of messages with readed=false

You can use
$unwind to destructure the array
$sort to sort the array based on created of messages
$group to restructure the array
Here is the code
db.collection.aggregate([
{
$unwind: {
path: "$messages",
preserveNullAndEmptyArrays: true
}
},
{
"$sort": {"messages.created": 1 }
},
{
"$group": {
"_id": "$_id",
user1: { $first: "$user1" },
user2: { $first: "$user2" },
"messages": { "$push": "$messages" }
}
}
])
Working Mongo playground

MongoDb return both matched results and matched results on subdocument

Hello i am trying to get my database to return both matched and empty results on a sub-document.
I am joining two tables using aggregate and lookup, below is the code
db.collection.aggregate([
{
$addFields: {
cut_off_date: { $toDate: "$shipment_cutoff_date" },
},
},
{
$lookup: {
from: "updates",
localField: "_id",
foreignField: "shipment_id",
as: "updates",
},
},
{
$match: {
"updates.description": { $ne: "All updates completed" },
},
},
]);
Challenge is i am trying to get All rows where all updates have been completed as well as all empty updates. If i remove the match parameters i get all the results including where the updates have been completed and i am trying to avoid doing a foreach after getting all my results.
Here is a snippet of the result without the match
{
"_id": "609927e31233700004370cfb",
"title": "Hello World",
"createdAt": "2021-05-10T12:32:35.799Z",
"updatedAt": "2021-05-10T15:58:59.149Z",
"updates": []
},
{
"_id": "60940ad73ced476b2d0b3626",
"createdAt": "2021-05-06T15:27:19.814Z",
"updatedAt": "2021-05-10T12:49:08.167Z",
"updates": [
{
"_id": "60952c0ed31c6283f302eb23",
"post_id": "60940ad73ced476b2d0b3626",
"description": "This is an update description",
"createdAt": "2021-05-07T12:01:18.815Z",
"updatedAt": "2021-05-07T12:01:18.815Z",
},
]
},
{
"_id": "60940ad73ced476b2d0b3626",
"createdAt": "2021-05-06T15:27:19.814Z",
"updatedAt": "2021-05-10T12:49:08.167Z",
"updates": [
{
"_id": "60952c0ed31c6283f302eb23",
"post_id": "60940ad73ced476b2d0b3626",
"description": "All updates completed",
"createdAt": "2021-05-07T12:01:18.815Z",
"updatedAt": "2021-05-07T12:01:18.815Z",
},
]
}
Here is a snippet of what i will like to achieve after the match
{
"_id": "609927e31233700004370cfb",
"title": "Hello World",
"createdAt": "2021-05-10T12:32:35.799Z",
"updatedAt": "2021-05-10T15:58:59.149Z",
"updates": []
},
{
"_id": "60940ad73ced476b2d0b3626",
"createdAt": "2021-05-06T15:27:19.814Z",
"updatedAt": "2021-05-10T12:49:08.167Z",
"updates": [
{
"_id": "60952c0ed31c6283f302eb23",
"post_id": "60940ad73ced476b2d0b3626",
"description": "This is an update description",
"createdAt": "2021-05-07T12:01:18.815Z",
"updatedAt": "2021-05-07T12:01:18.815Z",
},
]
},
I am trying to get the results without the section where update description is not "All updates completed
Any help here please, MondoDb version is 4+

You can use $filter
$facet to categorize incoming doucment into two. 1. updates == empty array and 2. update != empty array
$redact use to keep or eliminate the document based on the condition we give
$concatArray to combined to both arrays which were produced after $facet
$unwind to deconstruct the array
$replaceRoot to make to root
He is the script
db.collection.aggregate([
{
"$facet": {
"emptyUpdates": [
{
"$match": {
$expr: { $eq: [ "$updates", [] ] }
}
}
],
"withoutUpdate": [
{
"$match": {
$expr: { $ne: [ "$updates", [] ] }
}
},
{
"$redact": {
"$cond": [
{
"$anyElementTrue": {
"$filter": {
"input": "$updates",
"cond": {
$eq: [ "$$this.description","All updates completed" ]
}
}
}
},
"$$PRUNE",
"$$KEEP",
]
}
}
]
}
},
{
"$project": {
combined: {
"$concatArrays": ["$emptyUpdates", "$withoutUpdate" ]
}
}
},
{ "$unwind": "$combined" },
{
"$replaceRoot": {"newRoot": "$combined" }
}
])
Working Mongo playground
Let me know anything goes wrong

Return Latest Sorted Date from Multiple Arrays

I currently have this schema
var dataSchema = new Schema({
hid: { type: String },
sensors: [{
nid: { type: String },
sid: { type: String },
data: {
param1: { type: String },
param2: { type: String },
data: { type: String }
},
date: { type: Date, default: Date.now }
}],
actuators: [{
nid: { type: String },
aid: { type: String },
control_id: { type: String },
data: {
param1: { type: String },
param2: { type: String },
data: { type: String }
},
date: { type: Date, default: Date.now }
}],
status: [{
nid: {type: String},
status_code: {type: String},
date: { type: Date, default: Date.now }
}],
updated: { type: Date, default: Date.now },
created: { type: Date }
});
And the query that I'm trying to build should search the schema by "hid" and then only pick the last object (by date) from the "sensors", "actuators" and "status" arrays but I can't figure out how to do that.
With this query I can partially achieve what I'm trying to get but it only give me one array at the time so I have to query the database three times and I would avoid doing so
db.getCollection('data').aggregate([
{ $match : { hid : "testhid" } },
{$project : {"sensors" : 1}},
{$unwind : "$sensors"},
{$sort : {"sensors.date" : -1}},
{$limit : 1}
])
Thanks in advance for any help

The best advice here would be to "store" the arrays as sorted in the first place. Chances are that they probably already are considering that any $push operation ( or even if you used .push() ) will actually just "append" to the array so that the latest item is "last" anyway.
So unless you are actually "changing" the "date" properties after you create, then the "latest date" is always the "last" item anyway. In which case, just $slice the entries:
Data.find({ "hid": "testhid" }).select({
"sensors": { "$slice": -1 },
"actuators": { "$slice": -1 },
"status": { "$slice": -1 }
}).exec(function(err,data) {
]);
"If", some reason you actually did manage to store in a different way or altered the "date" properties so they latest is no longer the "last", then it's probably a good idea to have all future updates use the $sort modifier with $push. This can "ensure" that additions to the array are consistently sorted. You can even modify the whole collection in one simple statement:
Date.update(
{},
{
"$push": {
"sensors": { "$each": [], "$sort": { "date": 1 } },
"actuators": { "$each": [], "$sort": { "date": 1 } },
"status": { "$each": [], "$sort": { "date": 1 } }
}
},
{ "multi": true },
function(err,num) {
}
)
In that one statement, every document in the collection is having every array mentioned re-sorted to that the "latest date" is the "last" entry for each array. This then means that the above usage of $slice is perfectly fine.
Now "If", absolutely none of that is possible for your case and you actually have some reason why the array entries are not to be commonly stored in "date" order, then ( and only really then ) should you resort to using .aggregate() in order to the the results:
Data.aggregate(
[
{ "$match": { "hid": "testhid" } },
{ "$unwind": "$sensors" },
{ "$sort": { "_id": 1, "sensors.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}},
{ "$unwind": "$actuators" },
{ "$sort": { "_id": 1, "actuators.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}},
{ "$unwind": "$status" },
{ "$sort": { "_id": 1, "status.date": -1 } },
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}}
],
function(err,data) {
}
)
The reality there is that MongoDB has no way to "inline sort" array content in a return from any query or aggregation pipeline statement. You can only really do this by processing with $unwind then using $sort and finally a $group using $first to effectively get the single item from the sorted array.
This you need to do "per" array, since the process of $unwind is creating seperate documents for each array item. You "could" do it all in one go like:
Data.aggregate(
[
{ "$match": { "hid": "testhid" } },
{ "$unwind": "$sensors" },
{ "$unwind": "$actuators" },
{ "$unwind": "$status" }
{ "$sort": {
"_id": 1,
"sensors.date": -1,
"actuators.date": -1,
"actuators.status": -1
}},
{ "$group": {
"_id": "$_id",
"sensors": { "$first": "$sensors" },
"actuators": { "$first": "$actuators" },
"status": { "$first": "$status" },
"updated": { "$first": "$updated" },
"created": { "$first": "$created" }
}}
],
function(err,data) {
}
)
But it's really not that much improvement on the other process with all things considered.
The real lesson here should be to "keep the array sorted" and then doing an operation to $slice the last item is a very simple process.

We Keep Coding

JavaScript is the programming language of the Web.

Slow query performance: MongoDB with $lookup and $and/$or - javascript

Related

How do I get comments count while fetching posts

mongodb: How to use variable inside group operator in aggregate operation?

sort documents by their children in mongoose

MongoDb return both matched results and matched results on subdocument

Return Latest Sorted Date from Multiple Arrays

Categories

Resources