Mongo aggregate several documents into one - javascript

I'm trying to merge a few complicated documents via the mongodb.collection.aggregate() command.
Let's say that I want to merge x of the collection's document (in the following example: x=2):
[
{
"_id": 1,
"Data": {
"children": {
"1": {
"name": "appear_only_in_first_doc",
"cost": 1,
"revenue": 4.5,
"grandchildren": {
"1t9dsqdqdvoj8pdppxjk": {
"cost": 0,
"revenue": 1.5
}
}
},
"2": {
"name": "appear_in_both_docs",
"cost": 2,
"revenue": 7,
"grandchildren": {
"jesrdt5qwef2222dgt": {
"cost": 1,
"revenue": 3
},
"klh352hk5367kf": {
"cost": 2,
"revenue": 7
}
}
}
}
}
},
{
"_id": 2,
"Data": {
"children": {
"2": {
"name": "appear_in_both_docs___but_diff_name",
"cost": 9,
"revenue": 7,
"grandchildren": {
"aaaaaaaaa": {
"cost": 3,
"revenue": 2
},
"jesrdt5qwef2222dgt": {
"cost": 6,
"revenue": 5
}
}
},
"3": {
"name": "appear_only_in_last_doc",
"cost": 4,
"revenue": 2,
"grandchildren": {
"cccccccccccc": {
"cost": 4,
"revenue": 2
}
}
}
}
}
}
]
Challenges:
The keys under the "children" and "grandchildren" keys are dynamic and unknown while writing the query.
If a child or grandchild appears only in one doc (e.g. "1", "3", "1t9dsqdqdvoj8pdppxjk", "klh352hk5367kf", "aaaaaaaaa" and "cccccccccccc") - it should also appear in the final result.
If a child appears in more than one docs (e.g. "2" and "jesrdt5qwef2222dgt") - it should appear as one in the final result. The fields "cost" and "revenue" should be summed, and the last "name" field should be taken.
I've seen the following solutions:
unionWith - Irrelevant, unions 2 different collections.
merge - Irrelevant, cannot sum values of fields that appear in more than once (it takes the last instead).
mergeObjects - Irrelevant, cannot sum values of fields that appear in more than once (it takes the last instead).
The final result should look like this:
{
"Data": {
"children": {
"1": {
"name": "appear_only_in_first_doc",
"cost": 1,
"revenue": 4.5,
"grandchildren": {
"1t9dsqdqdvoj8pdppxjk": {
"cost": 0,
"revenue": 1.5
}
}
},
"2": {
"name": "appear_in_both_docs___but_diff_name",
"cost": 11,
"revenue": 14,
"grandchildren": {
"aaaaaaaaa": {
"cost": 3,
"revenue": 2
},
"jesrdt5qwef2222dgt": {
"cost": 7,
"revenue": 8
},
"klh352hk5367kf": {
"cost": 2,
"revenue": 7
}
}
},
"3": {
"name": "appear_only_in_last_doc",
"cost": 4,
"revenue": 2,
"grandchildren": {
"cccccccccccc": {
"cost": 4,
"revenue": 2
}
}
}
}
}
}

This is little lengthy process, might be there will be some easy one, I am just sharing the process,
$project to convert children to array format (k,v)
$unwind deconstruct children array
$group by children key and do sum of cost and revenue, and get last name using $last
$unwind deconstruct grandchildren array
$addFields to convert grandchildren to array format (k,v)
$unwind deconstruct grandchildren array
db.collection.aggregate([
{ $project: { "Data.children": { $objectToArray: "$Data.children" } } },
{ $unwind: "$Data.children" },
{
$group: {
_id: "$Data.children.k",
name: { $last: "$Data.children.v.name" },
cost: { $sum: "$Data.children.v.cost" },
revenue: { $sum: "$Data.children.v.revenue" },
grandchildren: { $push: "$Data.children.v.grandchildren" }
}
},
{ $unwind: "$grandchildren" },
{ $addFields: { grandchildren: { $objectToArray: "$grandchildren" } } },
{ $unwind: "$grandchildren" },
$group by children key and grandchildren key, count the sum of cost and revenue of grandchildren
{
$group: {
_id: {
ck: "$_id",
gck: "$grandchildren.k"
},
cost: { $first: "$cost" },
revenue: { $first: "$revenue" },
name: { $first: "$name" },
grandchildren_cost: { $sum: "$grandchildren.v.cost" },
grandchildren_revenue: { $sum: "$grandchildren.v.revenue" }
}
},
$group by children key and re-construct grandchildren array
{
$group: {
_id: "$_id.ck",
cost: { $first: "$cost" },
revenue: { $first: "$revenue" },
name: { $last: "$name" },
grandchildren: {
$push: {
k: "$_id.gck",
v: {
cost: "$grandchildren_cost",
revenue: "$grandchildren_revenue"
}
}
}
}
},
$group by null and re-construct children array and convert grandchildren to object from (k,v) array using $arrayToObject
{
$group: {
_id: null,
children: {
$push: {
k: "$_id",
v: {
name: "$name",
cost: "$cost",
revenue: "$revenue",
grandchildren: { $arrayToObject: "$grandchildren" }
}
}
}
}
},
$project to convert children to object using $arrayToObject
{
$project: {
_id: 0,
"Data.children": { $arrayToObject: "$children" }
}
}
])
Playground

Related

Mongo Organize by object inside aggregate request

My project is in nodeJs with express and i use mongoose for the request to my database mongoDb.
I have a model Media the structure is:
{
"_id": {
"$oid": "6354f982a11464ff4f7bac60"
},
"userId": {
"$oid": "6353aa119d39ccbb3263123f"
},
"date": "2022-10-23",
"base64": "....=",
"label": "Noamount",
"uriPath": "xxxx",
"s3Path": "xxxx",
"amount": 0,
"__v": 0,
"type": "tva"
}
Like you seen there is a field date, imagine that i have 3 medias on differents month:
{
"date": "2022-10-23",
"label": "monthTen",
"type" : "tva",
... // Other property
},
{
"date": "2022-09-10",
"label": "monthNineFirst",
"type" : "tva",
... // Other property
},
{
"date": "2022-09-19",
"label": "monthNineSecond",
"type" : "other",
... // Other property
}
I want to output something like this:
// Ordery by type in first place and by month
// number like 9 = the month inside the date property
{
tva: {
9: [{ label: "monthNineFirst"... }],
10: [{ label: "monthNineTen"... }]
},
other: {
9: [{ label: "monthNineSecond"... }]
}
}
Is found the property aggregate but i don't understand it correctly.
I know how to dow that in JS it's easy, but can i do that directly on the request with the property aggregate and $group?
There is what i have done so far https://mongoplayground.net/p/-vAbdnnqOfD
Here's one way to do it by extending your mongoplayground.net start.
db.collection.aggregate([
{ // group all docs by month
$group: {
_id: {
$month: {
$dateFromString: {
dateString: "$date",
format: "%Y-%m-%d"
}
}
},
data: {"$push": "$$ROOT"}
}
},
{ // group all groups into a single doc
"$group": {
"_id": null,
"groupData": {
"$push": {
// k,v for $arrayToObject
"k": "$_id",
"v": {
"$sortArray": {
"input": "$data",
"sortBy": {"date": 1}
}
}
}
}
}
},
{
"$replaceWith": {
"$arrayToObject": {
"$map": {
"input": {
// sort by month
"$sortArray": {
"input": "$groupData",
"sortBy": {"k": 1}
}
},
"in": {
"$mergeObjects": [
"$$this",
{ // rewrite k as string
"k": {"$toString": "$$this.k"}
}
]
}
}
}
}
}
])
Try it on mongoplayground.net.

How to find $count in mongoose for same group?

how to retrieve count for quantity and price in for same group in mongoose
db.inventory.find( { $or: [ { quantity: { $lt: 20 } }, { price: 10 } ] } )
You can try using $facet to get count of multiple group in single query. check the below query is helpful to your requirement.
db.inventory.aggregate([
{ "$facet": {
"quantity": [
{ "$match" : { "quantity": { "$lt": 20 }}},
{ "$count": "Total" },
],
"price": [
{ "$match" : {"price": { "$eq": 10 }}},
{ "$count": "Released" }
]
}},
{ "$project": {
"quantity": { "$arrayElemAt": ["$quantity.Total", 0] },
"price": { "$arrayElemAt": ["$price.Released", 0] }
}}
])

How to findOneAndUpdate single field with multi nested array documents

I'm stuck on how to update single value in multi nested array documents value with findOneAndUpdate.
My condition goes like this:
Update warehouse amount where the productCode is "abc123", size "41" in warehouse "Hamburg".
I just get back null or bot sizes 41 and 42.
Here is the part of the doc:
{
"_id": ObjectId("xxxx636309f84479ec0c7b"),
"productCode": "abc123",
"brand": "Nike",
"name": "aaa",
"model": "Runner",
"color": "Brown",
"image": "shoe.jpg",
"sizes": [{
"_id": ObjectId("xxxxc636309f84479ec0c7e"),
"size": "41",
"wares": [{
"_id": ObjectId("xxxx2c636309f84479ec0c80"),
"ware": "Hamburg",
"amount": 7
},
{
"_id": ObjectId("5db72c636309f84479ec0c7f"),
"ware": "Berlin",
"amount": 7
}
]
},
{
"_id": ObjectId("5db72c636309f84479ec0c7c"),
"size": "42",
"wares": [{
"_id": ObjectId("5db72c636309f84479ec0c7d"),
"ware": "Hamburg",
"amount": 16
}]
}
],
"__v": 0
}
This is what I've tried:
Product.findOneAndUpdate({
"productCode": "abc123",
"sizes.size": 41,
"sizes.wares.ware": "Hamburg"
}, {
"$set": {
"sizes.0.wares.amount": 99
}
}, {
useFindAndModify: false
},
(err, products) => {
if (err) {
return res.status(422).send(err)
}
return res.json(products)
}
);
How can I solve this?
And to fulfill #ambianBeing, this is how it would be done with findOneAndUpdate:
Product.findOneAndUpdate({
"productCode": "abc123",
"sizes": {
$elemMatch: {
$and: [
{ size: "41" },
{
wares: {
$elemMatch: {
ware: "Hamburg"
}
}
}]
}
}
}, {
$set: {
"sizes.$[theSize].wares.$[theWare].amount": 99
}
}, {
arrayFilters: [{
"theSize.size": "41"
}, {
"theWare.ware": "Hamburg"
}]
})
Can be done using filtered positional operator $[<identifier>] which is nifty in use cases of nested array updation.
Query (Mongo Shell):
db.collection.update(
{ productCode: "abc123" },
{ $set: { "sizes.$[outer].wares.$[inner].amount": 99 } },
{
arrayFilters: [{ "outer.size": "41" }, { "inner.ware": "Hamburg" }],
multi: false
}
);
Query with Mongoose Model:
Product.update(
{ productCode: "abc123" },
{ "sizes.$[outer].wares.$[inner].amount": 99 },
{
arrayFilters: [{ "outer.size": "41" }, { "inner.ware": "Hamburg" }],
multi: false
},
(err, rawDoc) => {
if (err) {
console.error(err);
}
console.info(rawDoc);
}
);

Elasticsearch Vs MongoDB Aggregation - Development Comparison with Example

I'm converting MongoDB Query to Elasticsearch in NodeJS platform. While developing I'm facing some difficulties with grouping and filtering data (getting nested objects like hits.hits._source) within Elasticsearch Query like we doing in MongoDB Query.
Example:-
UserModel.aggregate([
{
$match: {
uId: req.body.uId, timestamp: { $gte: req.body.date, $lte: new Date() }
},
},
{
$group: {
_id: "$eId",
location: {
$push: {
time: "$timestamp", lat: "$lat"
}
},
timestamp: {
$push: "$timestamp"
},
testId: { $first: "$testId" },
}
},
{
$project: {
eId: 1, location: 1, testId: 1, max: { $max: "$timestamp" }
}
},
{ $unwind: { path: "$location", preserveNullAndEmptyArrays: true } },
{
$redact: {
$cond: {
if: { $eq: ["$location.time", "$max"] },
then: "$$DESCEND",
else: "$$PRUNE"
}
}
},
{
$project: {
eId: 1, latitude: "$location.lat", testId: 1
}
},
]).exec(function (err, result) {
console.log(result)
});
What will be the equivalent query in Elasticsearch?
I'm looking for solution with grouping, unwinding and projecting (MongoDB concepts to Elasticsearch) required data with minimal nested response.
Thanks in Advance.
EDIT:-
Adding Elasticsearch Document:-
{
"timestamp": "2019-10-08T:02:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
2.000,
34.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:02:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a408",
"location": [
2.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:03:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
4.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:03:40:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
2.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4e1"
},
{
"timestamp": "2019-10-10T:03:40:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
3.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4e1"
}
Match with status =1, and Group By eId
With that results, group by timestamp and get max timestamp value
Expected Result:-
[
{
"_id": "5d5d7ce0c89852e7bad4a407",
"max": "2019-10-10T:03:40:15.54Z", // max timestamp
"zId": [
"5d5d7ce0c89852e7bad4a4e1",
"5d5d7ce0c89852e7bad4a4ef"
]
},
{
"_id": "5d5d7ce0c89852e7bad4a408",
"max": "2019-10-09T:02:50:15.54Z",
"zId": [
"5d5d7ce0c89852e7bad4a4ef"
]
}, // ...etc
]
Thanks for the documents. Sadly, I do not know any way to retrieve only the documents having the max timestamp field value.
The following query will allow you to filter by status and group by eId then get the max timestamp value, but it will not return the documents having the max timestamp value.
{
"size": 0,
"query": {
"term": {
"status": 1
}
},
"aggregations": {
"eId_group": {
"terms": {
"field": "eId"
},
"aggregations": {
"max_timestamp": {
"max": {
"field": "timestamp"
}
}
}
}
}
}
This second query use a top_hits aggregation to retrieve the documents grouped by eId. The returned documents are sorted by decreasing timestamp value so the documents having the max timestamp will be firsts, but you may also get documents with different timestamps.
{
"size": 0,
"query": {
"term": {
"status": 1
}
},
"aggregations": {
"eId_group": {
"terms": {
"field": "eId"
},
"aggregations": {
"max_timestamp": {
"max": {
"field": "timestamp"
}
},
"top_documents": {
"top_hits": {
"size": 20,
"sort": { "timestamp": "desc"}
}
}
}
}
}
}
I used the following mapping for the index
PUT /test_index
{
"mappings": {
"properties": {
"timestamp": {
"type": "date"
},
"eId": {
"type": "keyword"
},
"zId": {
"type": "keyword"
},
"status": {
"type": "keyword"
}
}
}
}

Calculating multiple averages using MongoDB Aggregation

I've been tasked with generating averages for day, week, month, and year for a rather large set of documents in MongoDB.
All of the jobs have a created field, and I need to base the average values off of the outputs array...
Here's what a document looks like:
{
__v: 0,
_id: ObjectId("535837911393fd0200d8e1eb"),
created: ISODate("2014-04-23T21:58:41.446Z"),
output: [
{
ref: {
img: false
},
type: "image/png",
methods: [
{
options: {
height: 200,
width: 200
},
method: "resize"
}
]
},
{
ref: {
img: false
},
type: "image/png",
methods: [
{
options: {
height: 400,
width: 400
},
method: "resize"
}
]
}
]
}
And here is what my current script looks like:
JobModel.aggregate([
{
$unwind: '$output'
},
{
$group: {
_id: { $dayOfYear: '$created' },
day: { $sum: 1 }
}
},
{
$group: {
_id: null,
avgDay: { $avg: '$day' }
}
},
{
$project: {
_id: 0,
average: {
day: '$avgDay'
}
}
}
],
function(err, data) {
if (err) {
console.log(err);
return;
}
res.send(data);
next();
});
I cannot seem to figure out the right order for this. Any suggestions?
Really not that sure what you are after here. You say that you want "multiple" averages but that brings up the question of "muliple" over what basis? The average "output" entries over a individual day would be different from the average output entries per month or even per daily average per month. So the scale changes with each selection and is not really a single query for "daily", "monthly" and "yearly"
I would seem that you really was "discrete" totals which would be best approached by first finding the "size" of the output entries and then applying an average per scale:
JobModel.aggregate(
[
{ "$unwind": "$output" },
// Count the array entries on the record
{ "$group": {
"_id": "$_id",
"created": { "$first": "$created" },
"count": { "$sum": 1 }
}},
// Now get the average per day
{ "$group": {
"_id": { "$dayOfYear": "$created" },
"avg": { "$avg": "$count" }
}}
],
function(err,result) {
}
);
Or actually with MongoDB 2.6 and greater you can just use the $size operator on the array:
JobModel.aggregate(
[
// Now get the average per day
{ "$group": {
"_id": { "$dayOfYear": "$created" },
"avg": { "$avg": { "$size": "$output" } }
}}
],
function(err,result) {
}
);
So the logical thing is to run each of those within your required $match range other your aggregation key of either "day", "month" or "year"
You could do something like combining the daily averages per day, with the daily average per month and then daily for year by combining results into arrays, otherwise you would just be throwing items away, which can be alternately done if you "just" wanted the daily average for the year, but as full results:
JobModel.aggregate(
[
// Now get the average per day
{ "$group": {
"_id": {
"year": { "$year": "$created" },
"month": { "$month": "$created" },
"day": { "$dayOfYear": "$created" }
},
"dayAvg": { "$avg": { "$size": "$output" } }
}},
// Group for month
{ "$group": {
"_id": {
"year": "$_id.year",
"month": "$_id.month"
},
"days": {
"$push": {
"day": "$_id.day",
"avg": "$dayAvg"
}
},
"monthAvg": { "$avg": "$dayAvg" }
}},
// Group for the year
{ "$group": {
"_id": "$_id.year",
"daily": { "$avg": "$monthAvg" },
"months": {
"$push": {
"month": "$_id.month",
"daily": "$monthAvg",
"days": "$days"
}
}
}}
],
function(err,result) {
}
);
However you want to apply that, but the main thing missing from your example is finding the "size" or "count" of the original "output" array per document from which to obtain an average.

Categories