Let books collection be,
db.books.insertMany([
{ "name": "foo", "category": 0, publishedAt: ISODate("2008-09-14T00:00:00Z") },
{ "name": "bar", "category": 1, publishedAt: ISODate("1945-08-17T00:00:00Z") },
{ "name": "baz", "category": 1, publishedAt: ISODate("2002-03-01T00:00:00Z") },
{ "name": "qux", "category": 2, publishedAt: ISODate("2002-01-21T00:00:00Z") },
{ "name": "quux", "category": 4, publishedAt: ISODate("2018-04-18T00:00:00Z") },
])
I want to calculate total amount of books published between 2000-2010 inclusive for each year and also count of published categories. Let category be defined as an enum with 5 variants represented with integer in MongoDB schema e.g Fiction, Fantasy, Classic, Horror, Comic.
I achieved other requirements with this aggregation pipeline.
db.books.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"),
$lt: ISODate("2011-01-01T00:00:00Z"),
},
},
},
{
$group: {
_id: {
$year: "$publishedAt",
},
totalCount: {
$count: {},
},
},
},
{
$sort: {
_id: 1,
},
},
]);
With following output,
[
{
_id: 2002,
totalCount: 2,
},
{
_id: 2008,
totalCount: 1,
},
]
But I also want a field that represents number of categories in an array. For example,
[
{
_id: 2002,
totalCount: 2,
categoryCount: [0, 1, 1, 0, 0],
},
{
_id: 2008,
totalCount: 1,
categoryCount: [1, 0, 0, 0, 0],
},
]
Array's length needs to be 5 since category is defined with 5 variants. In the example, the year 2002 has total of 2 books, which totalCount represents and has 1 book in category 1 which is why categoryCount[1] is 1. Likewise 1 book in category 2.
Using $accumulate
db.collection.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"), $lt: ISODate("2011-01-01T00:00:00Z")
}
}
},
{
$group: {
_id: {
$year: "$publishedAt"
},
totalCount: {
$count: {}
},
categoryCount: {
$accumulator: {
init: function(){
return [0,0,0,0,0] //start with array with all entries as 0
},
accumulate: function(state, category) {
state[category] = state[category] + 1; //increment the value at index specified by the category
return state;
},
accumulateArgs: ["$category"],
merge: function(state1, state2) {
for (i = 0; i < state.length; i++) //incase the merge is needed add the values at each indexes
{
state[i] = state1[i] + state2[i];
}
return state;
},
lang: "js"
}
}
},
},
{
$sort: {
_id: 1
}
}
]);
You can achieve results like that without accumulator, using two $group stages: first by year and category, and then by year only, and then apply some MongoDB functions to transform the result to the desired format
The resulting query is long and looks quite complicated, duh. But works on your data example:
db.collection.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"),
$lt: ISODate("2011-01-01T00:00:00Z")
}
}
},
{
$group: {
_id: {
year: {
$year: "$publishedAt"
},
category: "$category"
},
totalCount: {
$count: {}
}
}
},
{
$group: {
"_id": "$_id.year",
"totalCount": {
"$sum": "$totalCount"
},
"categoryCount": {
"$push": {
"k": {
"$toString": "$_id.category"
},
"v": "$totalCount"
}
}
}
},
{
"$addFields": {
"categoryCount": {
"$arrayToObject": "$categoryCount"
}
}
},
{
"$addFields": {
"categoryCount": {
"$mergeObjects": [
{
"0": 0,
"1": 0,
"2": 0,
"3": 0,
"4": 0,
"5": 0
},
"$categoryCount"
]
}
}
},
{
"$addFields": {
"categoryCount": {
"$objectToArray": "$categoryCount"
}
}
},
{
"$addFields": {
"categoryCount": {
"$map": {
"input": "$categoryCount",
"as": "x",
"in": {
"$mergeObjects": [
"$$x",
{
"k": {
"$toInt": "$$x.k"
}
}
]
}
}
}
}
},
{
"$addFields": {
"categoryCount": {
"$sortArray": {
"input": "$categoryCount",
"sortBy": {
"$k": 1
}
}
}
}
},
{
"$addFields": {
"categoryCount": "$categoryCount.v"
}
},
{
$sort: {
_id: 1
}
}
])
MongoDB playground
Step-by-step explanation:
$match - your initial filter
$group - pass both year and category into _id to preserve the count for each category
$group - group by year only, collect a "categoryCount" as a list of objects for each category that appeared in this year
$addFields - combine the list into a single document, keys are categories, and values are their counts. Notice, that keys can only be a strings, so we must cast them
$addFields - "densify" object to fill missing categories with zeros
$addFields - convert object back to the array, so we can extract values only
$addFields - cast categories back to numbers for correct sorting, if you have more than 10 of them
$addFields - sort by categories to ensure order (actually I'm not sure if this step is really needed)
$addFields - extract the count for each category into a flat list
Try to add these stages one by one to your query to see how it actually works.
In fact, my suggestion is to use aggregation as an end-to-end transformation, but rather stop at stage 3 or 4, and finish the transformation with your programming language, if you can. Good luck
So I am working on a generic Elastic search method. I can generate queries well but I need to find a way to check if a document has property x, it must match a value y
something like;
{
index: 'any-index',
query {
bool: { must: [...queries] },
// if has property companyId, only return objects with companyId == 4
}
}
I believe that a filter with exists-query can be a solution.
{
"query": {
"bool": {
"filter": [
{
"exists": {
"field": "field_name"
}
}
],
"must": [
{
"match": {
"field_name": "xpto"
}
}
]
}
}
}
Here I have an array. Some of the values listed below already exist in MongoDB, but I need the values that do not currently exist in the database
ex: -
Values currently in the database
[
{"id":100},
{"id":500},
{"id":606},
{"id":800}
]
the value I have
let x = [100,300,400,500,606,800];
I need the output to consist of the following values:
300,400
These values need to be added because they do not already exist in the database
dbo.collection.aggregate([{
$group: {
_id: null,
names: {
"$addToSet": "$uid"
}
}
},
{
"$replaceRoot": {
"newRoot": {
results: {
$filter: {
input: x,
as: "datum",
cond: {
$not: {
"$setIsSubset": [
[
"$$datum"
],
"$names"
]
}
}
}
}
}
}
}
]).toArray(function(err, result) {
console.log(result);
})
What I have
I have a DB in MongoDB like this:
{
"_id": {
"$oid": "60ba531acbfed3545c51a49e"
},
"email": "shaswat.dharaiya#gmail.com",
"Formats": [{
"format": "AST-QC",
}],
"Series": [{
"seq": "AST-QC - 1",
},
{
"seq": "AST-QC - 2",
},
{
"seq": "AST-QD - 1",
}]
}
I am successfully getting the data from the Formats array using this query:
const pipeline = [
{ $match: { "email": email } },
{ $unwind: "$Formats" },
]
const res = await colc.aggregate(pipeline)
What I want
Along with the data in the Formats array, I need the count of every format that is used by seq in Series array.
I am certain that it can be done using $addFields, Something like this.
const pipeline = [
{ $match: { "email": email } },
{ $unwind: "$Formats" },
{ $addFields: {"Formats.count": 0} }
]
const res = await colc.aggregate(pipeline)
But I am not sure as to how.
I don't want to call another query using .count()
$filter to iterate loop of Series array
$regexMatch to search format in seb
$size to get total elements in filtered result
const pipeline = [
{ $match: { email: email } },
{ $unwind: "$Formats" },
{
$addFields: {
"Formats.count": {
$size: {
$filter: {
input: "$Series",
cond: {
$regexMatch: {
input: "$$this.seq",
regex: "$Formats.format"
}
}
}
}
}
}
}
]
Playground
I am trying to return returned properties, much like in Mysql's AS. But with renamed object properties.
Query
Games.find({leagueID:leagueID, result:{$ne: null}}).populate('home_id away_id').sort({date: -1}).execAsync()
Output
{
home_id: {
...some details
},
away_id: {
...some details
}
}
Desired Output
{
home: {
...some details
},
away: {
...some details
}
}
So how can I get the desired outcome?
My solution is to use the transform function.
GamesSchema.set('toJSON', {
transform: function(doc, ret, options) {
if (mongoose.Types.ObjectId.isValid(ret.home)) {
ret.homeId = ret.home;
delete ret.home;
}
if (mongoose.Types.ObjectId.isValid(ret.away)) {
ret.awayId = ret.away;
delete ret.away;
}
}
});
Without populate:
Input
{
"_id": "sD95OhsGrWVIqmTLVeuQdkna",
"leagueID": 1000,
"home": "404d1d9f68c3bb386b50f440" // ObjectId
"away": "504d1d9f68c3bb386b50f450" // ObjectId
}
Output
{
"_id": "sD95OhsGrWVIqmTLVeuQdkna",
"leagueID": 1000,
"homeId": "404d1d9f68c3bb386b50f440"
"awayId": "504d1d9f68c3bb386b50f450"
}
With populate:
Input
{
"_id": "sD95OhsGrWVIqmTLVeuQdkna",
"leagueID": 1000,
"home": "404d1d9f68c3bb386b50f440" // ObjectId
"away": "504d1d9f68c3bb386b50f450" // ObjectId
}
Output
{
"_id": "sD95OhsGrWVIqmTLVeuQdkna",
"leagueID": 1000,
"home": {
"_id": "404d1d9f68c3bb386b50f440",
"name": "Home"
}
"away": {
"_id": "504d1d9f68c3bb386b50f450",
"name": "Away"
}
}
You can use aggregation and manipulate the output field like this
db.collection.aggregate([{ $project:{_id:0, home:"$home_id", away:"$away_id"} }])
Try lodash's _.mapKeys, like this:
const newObject = _.mapKeys(oldObject.toJSON(), (value, key) => {
if (key === 'oldKey') return 'newKey';
return key;
});