MongoDB Aggregate Match - check if value contains any of the given strings - javascript

I have the following types of documents in my mongodb. How can i use a match function to check if the key2 value contains 'Mermaid / Fairy' or 'Superhero'?
{
_id: 123,
key2: [ 'Mermaid / Fairy', 'Superhero' ]
}
{
_id: 456,
key2: [ 'Slug']
}
This is how i am doing matches for individual words, however i would like to pass in a couple, and if it matches any of them, then it gets returned
{
$match: { key2: /.*Superhero.*/ },
},

you can use this aggregate
itemsSchema.aggregate([
{
$match: {
"key2": {
$in: [
"Mermaid / Fairy",
"Superhero"
]
}
}
}])

Here are a couple of ways ...
to check if the key2 value contains 'Mermaid / Fairy' or 'Superhero'
... by checking if the "$size" of the "$setIntersection" of "$key2" and ["Mermaid / Fairy", "Superhero"]
db.collection.aggregate([
{
"$match": {
"$expr": {
"$gt": [
{
"$size": {
"$setIntersection": [
"$key2",
["Mermaid / Fairy", "Superhero"]
]
}
},
0
]
}
}
}
])
Try it on mongoplayground.net.
Another way is to use "$reduce" by checking each "$key2" value to see if it is "$in" ["Mermaid / Fairy", "Superhero"].
db.collection.aggregate([
{
"$match": {
"$expr": {
"$reduce": {
"input": "$key2",
"initialValue": false,
"in": {
"$or": [
"$$value",
{
"$in": [
"$$this",
["Mermaid / Fairy", "Superhero"]
]
}
]
}
}
}
}
}
])
Try it on mongoplayground.net.

Related

MongoDB hash object projection

If there is an object with unknown keys:
{
data: {
someObjectIdStringThatCantBePutInProjection: {
dontReturn: 123,
return: 321
},
someOtherObjectIdStringThatCantBePutInProjection: {
dontReturn: 1234,
return: 4321
}
}
}
And I want MongoDB to return only return property of the objects of the objects, what would the projection look like?
For example a projection
{
data: { **allProperties**: { return: 1 } }
}
should return:
{
data: {
someObjectIdStringThatCantBePutInProjection: {
return: 321
},
someOtherObjectIdStringThatCantBePutInProjection: {
return: 4321
}
}
}
Using dynamic values as field names is considered an anti-pattern and introduces unnecessary complexity to queries. Nevertheless, you can convert the data object to an array of k-v tuples by $objectToArray. Use $map to get only the return field you need. Finally, use $arrayToObject to revert back to original form.
db.collection.aggregate([
{
"$set": {
"data": {
"$map": {
"input": {
"$objectToArray": "$data"
},
"as": "d",
"in": {
k: "$$d.k",
v: {
return: "$$d.v.return"
}
}
}
}
}
},
{
"$set": {
"data": {
"$arrayToObject": "$data"
}
}
}
])
Mongo Playground

How to accumulate category counted fixed-size array in `$group` stage?

Let books collection be,
db.books.insertMany([
{ "name": "foo", "category": 0, publishedAt: ISODate("2008-09-14T00:00:00Z") },
{ "name": "bar", "category": 1, publishedAt: ISODate("1945-08-17T00:00:00Z") },
{ "name": "baz", "category": 1, publishedAt: ISODate("2002-03-01T00:00:00Z") },
{ "name": "qux", "category": 2, publishedAt: ISODate("2002-01-21T00:00:00Z") },
{ "name": "quux", "category": 4, publishedAt: ISODate("2018-04-18T00:00:00Z") },
])
I want to calculate total amount of books published between 2000-2010 inclusive for each year and also count of published categories. Let category be defined as an enum with 5 variants represented with integer in MongoDB schema e.g Fiction, Fantasy, Classic, Horror, Comic.
I achieved other requirements with this aggregation pipeline.
db.books.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"),
$lt: ISODate("2011-01-01T00:00:00Z"),
},
},
},
{
$group: {
_id: {
$year: "$publishedAt",
},
totalCount: {
$count: {},
},
},
},
{
$sort: {
_id: 1,
},
},
]);
With following output,
[
{
_id: 2002,
totalCount: 2,
},
{
_id: 2008,
totalCount: 1,
},
]
But I also want a field that represents number of categories in an array. For example,
[
{
_id: 2002,
totalCount: 2,
categoryCount: [0, 1, 1, 0, 0],
},
{
_id: 2008,
totalCount: 1,
categoryCount: [1, 0, 0, 0, 0],
},
]
Array's length needs to be 5 since category is defined with 5 variants. In the example, the year 2002 has total of 2 books, which totalCount represents and has 1 book in category 1 which is why categoryCount[1] is 1. Likewise 1 book in category 2.
Using $accumulate
db.collection.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"), $lt: ISODate("2011-01-01T00:00:00Z")
}
}
},
{
$group: {
_id: {
$year: "$publishedAt"
},
totalCount: {
$count: {}
},
categoryCount: {
$accumulator: {
init: function(){
return [0,0,0,0,0] //start with array with all entries as 0
},
accumulate: function(state, category) {
state[category] = state[category] + 1; //increment the value at index specified by the category
return state;
},
accumulateArgs: ["$category"],
merge: function(state1, state2) {
for (i = 0; i < state.length; i++) //incase the merge is needed add the values at each indexes
{
state[i] = state1[i] + state2[i];
}
return state;
},
lang: "js"
}
}
},
},
{
$sort: {
_id: 1
}
}
]);
You can achieve results like that without accumulator, using two $group stages: first by year and category, and then by year only, and then apply some MongoDB functions to transform the result to the desired format
The resulting query is long and looks quite complicated, duh. But works on your data example:
db.collection.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"),
$lt: ISODate("2011-01-01T00:00:00Z")
}
}
},
{
$group: {
_id: {
year: {
$year: "$publishedAt"
},
category: "$category"
},
totalCount: {
$count: {}
}
}
},
{
$group: {
"_id": "$_id.year",
"totalCount": {
"$sum": "$totalCount"
},
"categoryCount": {
"$push": {
"k": {
"$toString": "$_id.category"
},
"v": "$totalCount"
}
}
}
},
{
"$addFields": {
"categoryCount": {
"$arrayToObject": "$categoryCount"
}
}
},
{
"$addFields": {
"categoryCount": {
"$mergeObjects": [
{
"0": 0,
"1": 0,
"2": 0,
"3": 0,
"4": 0,
"5": 0
},
"$categoryCount"
]
}
}
},
{
"$addFields": {
"categoryCount": {
"$objectToArray": "$categoryCount"
}
}
},
{
"$addFields": {
"categoryCount": {
"$map": {
"input": "$categoryCount",
"as": "x",
"in": {
"$mergeObjects": [
"$$x",
{
"k": {
"$toInt": "$$x.k"
}
}
]
}
}
}
}
},
{
"$addFields": {
"categoryCount": {
"$sortArray": {
"input": "$categoryCount",
"sortBy": {
"$k": 1
}
}
}
}
},
{
"$addFields": {
"categoryCount": "$categoryCount.v"
}
},
{
$sort: {
_id: 1
}
}
])
MongoDB playground
Step-by-step explanation:
$match - your initial filter
$group - pass both year and category into _id to preserve the count for each category
$group - group by year only, collect a "categoryCount" as a list of objects for each category that appeared in this year
$addFields - combine the list into a single document, keys are categories, and values are their counts. Notice, that keys can only be a strings, so we must cast them
$addFields - "densify" object to fill missing categories with zeros
$addFields - convert object back to the array, so we can extract values only
$addFields - cast categories back to numbers for correct sorting, if you have more than 10 of them
$addFields - sort by categories to ensure order (actually I'm not sure if this step is really needed)
$addFields - extract the count for each category into a flat list
Try to add these stages one by one to your query to see how it actually works.
In fact, my suggestion is to use aggregation as an end-to-end transformation, but rather stop at stage 3 or 4, and finish the transformation with your programming language, if you can. Good luck

Count number of fields that match in an Array of Object in mongodb

What I have
I have a DB in MongoDB like this:
{
"_id": {
"$oid": "60ba531acbfed3545c51a49e"
},
"email": "shaswat.dharaiya#gmail.com",
"Formats": [{
"format": "AST-QC",
}],
"Series": [{
"seq": "AST-QC - 1",
},
{
"seq": "AST-QC - 2",
},
{
"seq": "AST-QD - 1",
}]
}
I am successfully getting the data from the Formats array using this query:
const pipeline = [
{ $match: { "email": email } },
{ $unwind: "$Formats" },
]
const res = await colc.aggregate(pipeline)
What I want
Along with the data in the Formats array, I need the count of every format that is used by seq in Series array.
I am certain that it can be done using $addFields, Something like this.
const pipeline = [
{ $match: { "email": email } },
{ $unwind: "$Formats" },
{ $addFields: {"Formats.count": 0} }
]
const res = await colc.aggregate(pipeline)
But I am not sure as to how.
I don't want to call another query using .count()
$filter to iterate loop of Series array
$regexMatch to search format in seb
$size to get total elements in filtered result
const pipeline = [
{ $match: { email: email } },
{ $unwind: "$Formats" },
{
$addFields: {
"Formats.count": {
$size: {
$filter: {
input: "$Series",
cond: {
$regexMatch: {
input: "$$this.seq",
regex: "$Formats.format"
}
}
}
}
}
}
}
]
Playground

Building your MongoDB $match as a string dynamically

Im trying to build my $match dynamically for my MongoDB request, and when I do the simple stuff it works perfect, like this:
var matchStr = {};
matchStr.status = { "$lte": 3 };
matchStr.checkout = { $gte: Math.round(new Date(d).getTime()/1000) }
And then I run the
bookingTable.aggregate([
{
$match: {
$and: [ matchStr ]
}
}, etc....
Which gives a nice:
matchStr: {
"status": {
"$lte": 3
},
"checkout": {
"$gte": 1527669588
}
}
So thats all great, but what if I want to put something like this into the matchStr...
{ $or: [ { "managerDate": {$lte: managerLast} }, { "activityDate": {$lte: activityLast} } ] }
,
{ $or: [ { "expireDate": {$gt: oneDayBackward} }, { "status": {$lt: 9}} ] }
,
{ "status": { $in: [0, 1, 2, 9 ] } }
How can I do that?
There are multiple syntax for accessing the property of an object
var matchStr = {}
matchStr.status = { "$lte": 3 }
matchStr.checkout = { "$gte": Math.round(new Date().getTime()/1000) }
matchStr["$or"] = [
{ "managerDate": { "$lte": "managerLast" }},
{ "activityDate": { "$lte": "activityLast" }}
]
or If you want to push to $or operator
matchStr["$or"].push({ "managerDate": { "$lte": "managerLast" } })
matchStr["$or"].push({ "activityDate": { "$lte": "activityLast" } })

How to find the matched element using custom ranking algorithm

I need to calculate rank on each account. Rank depends on the account who requested the method.
var account; // assumed to be one requesting the method
const allMatches = Account.find({
$or: [
{ argA: account.argA },
{ argB: account.argB },
{ argC: account.argC }
],
_id: {$ne: account._id}
});
const getRank = (item) => {
let rank = 0;
if (item.argB === account.argB) {
rank += 3;
}
if (item.argA === account.argA) {
rank += 2;
}
if (item.argC === account.argC) {
rank += 1;
}
return rank;
};
const compare = (a,b) => {
const rankA = getRank(a);
const rankB = getRank(b);
if (rankA === rankB) return 0;
return rankA > rankB ? 1: -1;
}
// use the compare method
allMatches.sort(compare);
account.set({match : allMatches[0]['id']});
However, I cant use the sort method like this, as it expects an object or a string.
I need some help to proceed in the correct direction.
For example -
If there are 3 accounts in the system
A1 - {argA: 'A', argB: 'B', argC: 'C'}
A2 - {argA: 'D', argB: 'B', argC: 'F'}
A3 - {argA: 'G', argB: 'H', argC: 'C'}
Now if A1 needs to find a match -
Rank score with A2 is = 3 // argB is same
Rank score with A3 is = 1 // argC is same
Hence A1 will match with A2, and thats what I need.
You can "sort on the database" using .aggregate()
let results = await Account.aggregate([
{ "$match": {
"$or": [
{ "argA": account.argA },
{ "argB": account.argB },
{ "argC": account.argC }
],
"_id": { "$ne": account._id}
}},
{ "$addFields": {
"rank": {
"$sum": [
{ "$cond": [{ "$eq": ["$argA", account.argA] }, 3, 0 ] },
{ "$cond": [{ "$eq": ["$argB", account.argB] }, 2, 0 ] },
{ "$cond": [{ "$eq": ["$argC", account.argC] }, 1, 0 ] }
]
}
}},
{ "$sort": { "rank": -1 } }
/*
* Add $skip and $limit for paging if needed
{ "$skip": 0 },
{ "$limit": 25 },
*/
])
Also noting that Account.aggregate() or Account.find() actually returns a Promise since the method is async, so you need to await that or use .then() or provide a callback, depending on your preferred style:
Account.aggregate([
{ "$match": {
"$or": [
{ "argA": account.argA },
{ "argB": account.argB },
{ "argC": account.argC }
],
"_id": { "$ne": account._id}
}},
{ "$addFields": {
"rank": {
"$sum": [
{ "$cond": [{ "$eq": ["$argA", account.argA] }, 3, 0 ] },
{ "$cond": [{ "$eq": ["$argB", account.argB] }, 2, 0 ] },
{ "$cond": [{ "$eq": ["$argC", account.argC] }, 1, 0 ] }
]
}
}},
{ "$sort": { "rank": -1 } }
/*
* Add $skip and $limit for paging if needed
{ "$skip": 0 },
{ "$limit": 25 },
*/
]).then( result => /* do something */ )
That promise/callback resolution is the basic error in your code.
But the general point is you probably want to calculate "rank" on the server in order to enable paging over larger result sets, and that's a lot smarter than trying to sort "everything" in a result array.

Categories