Aggregate Totals From Multiple Arrays - javascript

I have some market feed data that i'm trying to use mongodb 3.4.5 on Ubuntu 16.04 to produce a nice summary given a date range.
Here is one example document.. each document might contain multiple batched data in Buys, Sells and Fills.
{
"MarketName" : "BIX",
"Nounce" : 12040,
"Buys" : [
{
"Type" : 2,
"Rate" : 0.08068147,
"Quantity" : 55.57280163
},
{
"Type" : 1,
"Rate" : 0.07980372,
"Quantity" : 0
},
{
"Type" : 0,
"Rate" : 0.07962334,
"Quantity" : 34.96018931
},
{
"Type" : 1,
"Rate" : 0.07960592,
"Quantity" : 0
}
],
"Sells" : [
{
"Type" : 0,
"Rate" : 0.08070098,
"Quantity" : 4.08189692
},
{
"Type" : 0,
"Rate" : 0.08112318,
"Quantity" : 10
},
{
"Type" : 1,
"Rate" : 0.08112319,
"Quantity" : 0
},
{
"Type" : 1,
"Rate" : 0.08149567,
"Quantity" : 0
}
],
"Fills" : [
{
"OrderType" : "SELL",
"Rate" : 0.08068147,
"Quantity" : 0.51627134,
"TimeStamp" : "2017-07-25T15:20:34.357"
},
{
"OrderType" : "BUY",
"Rate" : 0.08048147,
"Quantity" : 0.51007134,
"TimeStamp" : "2017-07-25T15:20:34.357"
}
],
"TimeStamp" : ISODate("2017-07-26T22:32:20.741+08:00")
}
What I've been trying with various unwinds and groups, projects and push etc.. but i'm not getting anywhere close to the output i want. Because the type is the object key name i am struggling to get something to group on.
The output i'm looking for is something like this.
{
"MarketName" : "RRG",
"Buys" : {
totalCount: 99, //size/count of all items in Buys array
avgRate: 0.07980372, //avg rate of all buy object items
totalQuantity: 3.09239812, //sum of all buy array items, quantity values
totalValue: 306.14741388, //avgRate * totalQuantity
type0: {
totalCount: 19, //count of items in Buy array oftype 0
avgRate: 0.07980372, //avg rate of all buy object items of type 0
totalQuantity: 3.09239812, //sum of all buy object quantity values oftype 0
totalValue: 30.14741388, //avgRate * totalQuantity
},
type1: {
totalCount: 9,
avgRate: 0.07980372,
totalQuantity: 3.09239812,
totalValue: 30.14741388,
},
type2: {
totalCount: 12,
avgRate: 0.07980372,
totalQuantity: 3.09239812,
totalValue: 30.14741388,
}
},
"Sells" : {
..same as buys format
},
"Fills" : {
..same as buys format
}
}
If someone could assist with this in any way i'd be very grateful.
Here is query i managed to get working, but it's a long way from what i want and i'm no mongo expert so struggling to know how to proceed.
db.getCollection('tinfo').aggregate(
[
{
$match: {
'$and': [
{'Type': {$eq: 'market'}},
{'TimeStamp': {$lte: new Date()}},
{'TimeStamp': {$gte: new Date(Date.now() - 24 * 60 * 60 * 1000)}},
{'MarketName': {$eq: 'BIX'}}
]
}
},
},
{ $unwind: "$Buys" },
{ $unwind: "$Sells" },
{ $unwind: "$Fills" },
{
$group: {
_id: {_id: "$_id", type: "$Buys.Type"},
count: {$sum: 1},
avgRate: {$avg: "$Buys.Rate"},
quantity: {$sum: "$Buys.Quantity"}
}
},{
$project: {
type: "$_id.type",
count: 1,
avgRate: 1,
quantity: 1,
total: {$multiply: ["$quantity", "$avgRate"]}
}
},{
$group: {
"_id": {
"_id" : "$_id._id"
},
"results" : {
$push: {
"k": "$type",
"v": {
"count": "$count",
"avgRate": "$avgRate",
"quantity": "$quantity"
}
}
}
}
}
])

It's certainly possible, and note that I'm actually quite deliberately using the actual MongoDB 3.4 features "in the last stage only", which generally highlights that you do not really need them since the main output can be achieved without the "named keys" in output.
The general listing goes like this:
var endDate = new Date(),
startDate = new Date(endDate.valueOf() - 24 * 60 * 60 * 60 * 1000 );
db.getCollection('tinfo').aggregate([
/*
{ "$match": {
"TimeStamp": {
"$gte": startDate, "$lt": endDate
}
}},
*/
{ "$project": {
"MarketName": 1,
"combined": {
"$concatArrays": [
{ "$map": {
"input": "$Buys",
"as": "b",
"in": {
"ttype": "Buys",
"Type": "$$b.Type",
"Rate": "$$b.Rate",
"Quantity": "$$b.Quantity"
}
}},
{ "$map": {
"input": "$Sells",
"as": "s",
"in": {
"ttype": "Sells",
"Type": "$$s.Type",
"Rate": "$$s.Rate",
"Quantity": "$$s.Quantity"
}
}},
{ "$map": {
"input": "$Fills",
"as": "f",
"in": {
"ttype": "Fills",
"Type": "$$f.OrderType",
"Rate": "$$f.Rate",
"Quantity": "$$f.Quantity"
}
}}
]
}
}},
{ "$unwind": "$combined" },
{ "$group": {
"_id": {
"MarketName": "$MarketName",
"ttype": "$combined.ttype",
"Type": "$combined.Type"
},
"totalCount": { "$sum": 1 },
"avgRate": { "$avg": "$combined.Rate" },
"totalQuantity": { "$sum": "$combined.Quantity" },
}},
{ "$group": {
"_id": {
"MarketName": "$_id.MarketName",
"ttype": "$_id.ttype",
},
"Types": {
"$push": {
"k": {
"$concat": [
"type",
{ "$cond": {
"if": { "$eq": [ "$_id.ttype", "Fills" ] },
"then": "$_id.Type",
"else": { "$substr": ["$_id.Type",0,1] }
}}
]
},
"v": {
"totalCount": "$totalCount",
"avgRate": "$avgRate",
"totalQuantity": "$totalQuantity",
"totalValue": { "$multiply": [ "$totalQuantity", "$avgRate" ] }
}
}
},
"totalCount": { "$sum": "$totalCount" },
"avgRate": { "$avg": "$avgRate" },
"totalQuantity": { "$sum": "$totalQuantity" }
}},
{ "$group": {
"_id": "$_id.MarketName",
"data": {
"$push": {
"k": "$_id.ttype",
"v": {
"totalCount": "$totalCount",
"avgRate": "$avgRate",
"totalQuantity": "$totalQuantity",
"totalValue": { "$multiply": [ "$totalQuantity", "$avgRate" ] },
"Types": "$Types"
}
}
}
}},
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[{ "k": "MarketName", "v": "$_id" }],
{ "$map": {
"input": "$data",
"as": "d",
"in": {
"k": "$$d.k",
"v": {
"$arrayToObject": {
"$concatArrays": [
[
{ "k": "totalCount", "v": "$$d.v.totalCount" },
{ "k": "avgRate", "v": "$$d.v.avgRate" },
{ "k": "totalQuantity", "v": "$$d.v.totalQuantity" },
{ "k": "totalValue", "v": "$$d.v.totalValue" }
],
"$$d.v.Types"
]
}
}
}
}}
]
}
}
}}
])
The broad strokes of which are.
Project a combined array which is needed to avoid a "cartesian product" which would otherwise occur if you used $unwind on each array separately. So you want to combine all the arrays into a single one with a consistent format for later processing. This is what we do using $concatArrays and $map to do a little "reshaping" and identify which "ttype" so we know the "source" array.
Gradually Group Since the "totals" occur at different levels we first want to $group on the "inner" "type" property and then gradually work outwards until you get back to the top level. At each stage you do the other calculations such as the $multiply on the $sum and $avg results.
Finally Reshape Which is all essentially about $arrayToObject when used in the aggregation framework. Here we just supply everything as "key" and "value" pairs in array format ( also explaining the "k" and "v" as used in earlier stages ) so that this operator can "transform" into an object format with "named keys".
Of course that final stage can also be done in client code, but since the version is supported I include the actual pipeline stage to do it.
The output is then:
{
"MarketName" : "BIX",
"Buys" : {
"totalCount" : 4.0,
"avgRate" : 0.08000321,
"totalQuantity" : 90.53299094,
"totalValue" : 7.24292988610092,
"type2" : {
"totalCount" : 1.0,
"avgRate" : 0.08068147,
"totalQuantity" : 55.57280163,
"totalValue" : 4.4836953275268
},
"type1" : {
"totalCount" : 2.0,
"avgRate" : 0.07970482,
"totalQuantity" : 0.0,
"totalValue" : 0.0
},
"type0" : {
"totalCount" : 1.0,
"avgRate" : 0.07962334,
"totalQuantity" : 34.96018931,
"totalValue" : 2.7836470398945
}
},
"Sells" : {
"totalCount" : 4.0,
"avgRate" : 0.081110755,
"totalQuantity" : 14.08189692,
"totalValue" : 1.14219329101337,
"type1" : {
"totalCount" : 2.0,
"avgRate" : 0.08130943,
"totalQuantity" : 0.0,
"totalValue" : 0.0
},
"type0" : {
"totalCount" : 2.0,
"avgRate" : 0.08091208,
"totalQuantity" : 14.08189692,
"totalValue" : 1.13939557014279
}
},
"Fills" : {
"totalCount" : 2.0,
"avgRate" : 0.08058147,
"totalQuantity" : 1.02634268,
"totalValue" : 0.0827042018781396,
"typeBUY" : {
"totalCount" : 1.0,
"avgRate" : 0.08048147,
"totalQuantity" : 0.51007134,
"totalValue" : 0.0410512912480698
},
"typeSELL" : {
"totalCount" : 1.0,
"avgRate" : 0.08068147,
"totalQuantity" : 0.51627134,
"totalValue" : 0.0416535306300698
}
}
}
Based on the data provided of course. The actual "calcs" may vary in implementation ( I'm just following your own notes ), but this is the general structure to follow.
Earlier Versions
As noted the output format here really does not need the new features of $replaceRoot and $arrayToObject in order to get to the end result. All that is being transformed is the final document response on the cursor.
So if you look at the output before the $replaceRoot stage is invoked you see:
{
"_id" : "BIX",
"data" : [
{
"k" : "Buys",
"v" : {
"totalCount" : 4.0,
"avgRate" : 0.08000321,
"totalQuantity" : 90.53299094,
"totalValue" : 7.24292988610092,
"Types" : [
{
"k" : "type2",
"v" : {
"totalCount" : 1.0,
"avgRate" : 0.08068147,
"totalQuantity" : 55.57280163,
"totalValue" : 4.4836953275268
}
},
{
"k" : "type1",
"v" : {
"totalCount" : 2.0,
"avgRate" : 0.07970482,
"totalQuantity" : 0.0,
"totalValue" : 0.0
}
},
{
"k" : "type0",
"v" : {
"totalCount" : 1.0,
"avgRate" : 0.07962334,
"totalQuantity" : 34.96018931,
"totalValue" : 2.7836470398945
}
}
]
}
},
{
"k" : "Sells",
"v" : {
"totalCount" : 4.0,
"avgRate" : 0.081110755,
"totalQuantity" : 14.08189692,
"totalValue" : 1.14219329101337,
"Types" : [
{
"k" : "type1",
"v" : {
"totalCount" : 2.0,
"avgRate" : 0.08130943,
"totalQuantity" : 0.0,
"totalValue" : 0.0
}
},
{
"k" : "type0",
"v" : {
"totalCount" : 2.0,
"avgRate" : 0.08091208,
"totalQuantity" : 14.08189692,
"totalValue" : 1.13939557014279
}
}
]
}
},
{
"k" : "Fills",
"v" : {
"totalCount" : 2.0,
"avgRate" : 0.08058147,
"totalQuantity" : 1.02634268,
"totalValue" : 0.0827042018781396,
"Types" : [
{
"k" : "typeBUY",
"v" : {
"totalCount" : 1.0,
"avgRate" : 0.08048147,
"totalQuantity" : 0.51007134,
"totalValue" : 0.0410512912480698
}
},
{
"k" : "typeSELL",
"v" : {
"totalCount" : 1.0,
"avgRate" : 0.08068147,
"totalQuantity" : 0.51627134,
"totalValue" : 0.0416535306300698
}
}
]
}
}
]
}
And we can easily do that same transformation in client code when processing the cursor using .map() and .reduce() JavaScript functions as a shell example:
var endDate = new Date(),
startDate = new Date(endDate.valueOf() - 24 * 60 * 60 * 60 * 1000 );
db.getCollection('tinfo').aggregate([
/*
{ "$match": {
"TimeStamp": {
"$gte": startDate, "$lt": endDate
}
}},
*/
{ "$project": {
"MarketName": 1,
"combined": {
"$concatArrays": [
{ "$map": {
"input": "$Buys",
"as": "b",
"in": {
"ttype": "Buys",
"Type": "$$b.Type",
"Rate": "$$b.Rate",
"Quantity": "$$b.Quantity"
}
}},
{ "$map": {
"input": "$Sells",
"as": "s",
"in": {
"ttype": "Sells",
"Type": "$$s.Type",
"Rate": "$$s.Rate",
"Quantity": "$$s.Quantity"
}
}},
{ "$map": {
"input": "$Fills",
"as": "f",
"in": {
"ttype": "Fills",
"Type": "$$f.OrderType",
"Rate": "$$f.Rate",
"Quantity": "$$f.Quantity"
}
}}
]
}
}},
{ "$unwind": "$combined" },
{ "$group": {
"_id": {
"MarketName": "$MarketName",
"ttype": "$combined.ttype",
"Type": "$combined.Type"
},
"totalCount": { "$sum": 1 },
"avgRate": { "$avg": "$combined.Rate" },
"totalQuantity": { "$sum": "$combined.Quantity" },
}},
{ "$group": {
"_id": {
"MarketName": "$_id.MarketName",
"ttype": "$_id.ttype",
},
"Types": {
"$push": {
"k": {
"$concat": [
"type",
{ "$cond": {
"if": { "$eq": [ "$_id.ttype", "Fills" ] },
"then": "$_id.Type",
"else": { "$substr": ["$_id.Type",0,1] }
}}
]
},
"v": {
"totalCount": "$totalCount",
"avgRate": "$avgRate",
"totalQuantity": "$totalQuantity",
"totalValue": { "$multiply": [ "$totalQuantity", "$avgRate" ] }
}
}
},
"totalCount": { "$sum": "$totalCount" },
"avgRate": { "$avg": "$avgRate" },
"totalQuantity": { "$sum": "$totalQuantity" }
}},
{ "$group": {
"_id": "$_id.MarketName",
"data": {
"$push": {
"k": "$_id.ttype",
"v": {
"totalCount": "$totalCount",
"avgRate": "$avgRate",
"totalQuantity": "$totalQuantity",
"totalValue": { "$multiply": [ "$totalQuantity", "$avgRate" ] },
"Types": "$Types"
}
}
}
}},
/*
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[{ "k": "MarketName", "v": "$_id" }],
{ "$map": {
"input": "$data",
"as": "d",
"in": {
"k": "$$d.k",
"v": {
"$arrayToObject": {
"$concatArrays": [
[
{ "k": "totalCount", "v": "$$d.v.totalCount" },
{ "k": "avgRate", "v": "$$d.v.avgRate" },
{ "k": "totalQuantity", "v": "$$d.v.totalQuantity" },
{ "k": "totalValue", "v": "$$d.v.totalValue" }
],
"$$d.v.Types"
]
}
}
}
}}
]
}
}
}}
*/
])
.map( doc => Object.assign(
{ "MarketName": doc._id },
doc.data.map( d => ({
"k": d.k,
"v": Object.assign(
Object.keys(d.v)
.filter(k => k !== 'Types')
.map( k => ({ [k]: d.v[k] }))
.reduce((acc,curr) => Object.assign(acc,curr),{}),
d.v.Types.reduce((acc,curr) => Object.assign(acc,{ [curr.k]: curr.v }),{})
)
}))
.reduce((acc,curr) => Object.assign(acc,{ [curr.k]: curr.v }),{})
))
Which of course produces the exact same output:
{
"MarketName" : "BIX",
"Buys" : {
"totalCount" : 4.0,
"avgRate" : 0.08000321,
"totalQuantity" : 90.53299094,
"totalValue" : 7.24292988610092,
"type2" : {
"totalCount" : 1.0,
"avgRate" : 0.08068147,
"totalQuantity" : 55.57280163,
"totalValue" : 4.4836953275268
},
"type1" : {
"totalCount" : 2.0,
"avgRate" : 0.07970482,
"totalQuantity" : 0.0,
"totalValue" : 0.0
},
"type0" : {
"totalCount" : 1.0,
"avgRate" : 0.07962334,
"totalQuantity" : 34.96018931,
"totalValue" : 2.7836470398945
}
},
"Sells" : {
"totalCount" : 4.0,
"avgRate" : 0.081110755,
"totalQuantity" : 14.08189692,
"totalValue" : 1.14219329101337,
"type1" : {
"totalCount" : 2.0,
"avgRate" : 0.08130943,
"totalQuantity" : 0.0,
"totalValue" : 0.0
},
"type0" : {
"totalCount" : 2.0,
"avgRate" : 0.08091208,
"totalQuantity" : 14.08189692,
"totalValue" : 1.13939557014279
}
},
"Fills" : {
"totalCount" : 2.0,
"avgRate" : 0.08058147,
"totalQuantity" : 1.02634268,
"totalValue" : 0.0827042018781396,
"typeBUY" : {
"totalCount" : 1.0,
"avgRate" : 0.08048147,
"totalQuantity" : 0.51007134,
"totalValue" : 0.0410512912480698
},
"typeSELL" : {
"totalCount" : 1.0,
"avgRate" : 0.08068147,
"totalQuantity" : 0.51627134,
"totalValue" : 0.0416535306300698
}
}
}

Related

How to group by sub documents and get unique value of value field?

This is my database collection:
{"productId" : 1,
"isVariant": 1,
"isComplete" : 1,
"variantId" : 1,
"attributeSet" : [
{
"name" : "Capacity",
"value" : "500 GB",
"id" : 3
},
{
"name" : "Form Factor",
"value" : "5 inch",
"id" : 4
},
{
"id" : 5,
"name" : "Memory Components",
"value" : "3D NAND",
"isVariation" : 0
}
]
},
{"productId" : 2,
"isVariant": 1,
"isComplete" : 1,
"variantId" : 1,
"attributeSet" : [
{
"name" : "Capacity",
"value" : "1 TB",
"id" : 3
},
{
"name" : "Form Factor",
"value" : "5 inch",
"id" : 4
},
{
"id" : 5,
"name" : "Memory Components",
"value" : "3D NAND",
"isVariation" : 0
}
]
},
{"productId" : 3,
"isVariant": 1,
"isComplete" : 0,
"variantId" : 1,
"attributeSet" : [
{
"name" : "Capacity",
"value" : "500 GB",
"id" : 3
},
{
"name" : "Form Factor",
"value" : "2.5 inch",
"id" : 4
},
{
"id" : 5,
"name" : "Memory Components",
"value" : "3D NAND",
"isVariation" : 0
}
]
},
{"productId" : 4,
"isVariant": 1,
"isComplete" : 0,
"variantId" : 1,
"attributeSet" : [
{
"name" : "Capacity",
"value" : "1 TB",
"id" : 3
},
{
"name" : "Form Factor",
"value" : "2.5 inch",
"id" : 4
},
{
"id" : 5,
"name" : "Memory Components",
"value" : "3D NAND",
"isVariation" : 0
}
]
}
Now I want to send the data of only the attribute where isVariation is not 0. Also I want to send the variant values of each attribute where isComplete =1. Hence the result should look like this
result : [{
"id": 3,
"name": "Capacity",
"value": [
"500 GB",
"1 TB"
]
}, {
"id": 4,
"name": "Form Factor",
"value": [
"5 inch"
]
}]
The above result does not have value of 2.5 inch as the isComplete is 0 for this document. Can anyone help me with the query
$match isComplete is 1
$project to show required fields
$unwind deconstruct attributeSet array
$match attributeSet.isVariation is not 0
$group by attributeSet.id and get first name and get unique value using $addToSet
db.collection.aggregate([
{ $match: { isComplete: 1 } },
{
$project: {
_id: 0,
attributeSet: 1
}
},
{ $unwind: "$attributeSet" },
{ $match: { "attributeSet.isVariation": { $ne: 0 } } },
{
$group: {
_id: "$attributeSet.id",
name: { $first: "$attributeSet.name" },
value: { $addToSet: "$attributeSet.value" }
}
}
])
Playground
The $project stage is not required in your query, i have added because this will optimize your query performance.

How remove data from an array in aggrigation whithout disturbing outside data in mongodb

Here I am trying to get entire data but if date less then current then do not fetch that date from the database.
{
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab5"),
"highlights" : [
"highlights-1",
],
"notes" : [
"Listen"
],
"soldout" : false,
"active" : false,
"operator" : ObjectId(""),
"title" : "2D1N Awesome trip to Knowhere 99",
"destinations" : [
{
"coordinatesType" : "Point",
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab6"),
}
],
"difficulty" : "Easy",
"duration" : {
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab7"),
"days" : NumberInt(2),
"nights" : NumberInt(1)
},
"media" : {
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ab8"),
"images" : [
],
"videos" : [
]
},
"description" : "Surrounded ",
"inclusions" : [
{
"_id" : ObjectId(""),
"text" : "Included"
}
],
"itinerary" : "Surrounded .",
"thingsToCarry" : [
{
"_id" : ObjectId(""),
"text" : "Yourself"
}
],
"exclusions" : [
{
"_id" : ObjectId(""),
"text" : "A Lot"
}
],
"policy" : "Fully refundable 7777 Days before the date of Experience",
"departures" : [
{
"dates" : [
ISODate("2019-11-19T02:44:58.989+0000"),
ISODate("2019-11-23T17:19:47.878+0000")
],
"_id" : ObjectId(""),
"bookingCloses" : "2 Hours Before",
"maximumSeats" : NumberInt(20),
"source" : {
"coordinatesType" : "Point",
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ac2"),
"code" : "code",
"name" : "Manali",
"state" : "Himachal Pradesh",
"region" : "North",
"country" : "India",
"coordinates" : [
23.33,
NumberInt(43),
NumberInt(33)
]
},
"pickupPoints" : [
{
"coordinatesType" : "Point",
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ac3"),
"name" : "name-3",
"address" : "address-3",
"time" : "time-3",
"coordinates" : [
23.33,
NumberInt(43),
NumberInt(33)
]
}
],
"prices" : {
"3" : NumberInt(5)
},
"mrps" : {
"3" : NumberInt(5)
},
"markup" : NumberInt(25),
"discount" : NumberInt(0),
"b2m" : {
"3" : NumberInt(5)
},
"m2c" : {
"3" : 6.25
},
"minimumOccupancy" : NumberInt(3),
"maximumOccupancy" : NumberInt(3)
}
],
"bulkDiscounts" : [
{
"_id" : ObjectId("5d6fad0f9e0dc027fc6b5ac4")
}
],
}
In this I am trying to get all the data except the date section should be different. Means I should get my output as below
{
"_id": "5d6fad0f9e0dc027fc6b5ab5",
"highlights": [
"highlights-1",
"highlights-2",
"highlights-3",
"highlights-4",
"highlights-5"
],
"notes": [
"Listen"
],
"soldout": false,
"active": false,
"operator": "5d5d84e8c89fbf00063095f6",
"title": "2D1N Awesome trip to Knowhere 99",
"destinations": [
{
"code": "code",
"name": "Manali",
"coordinates": [
23.33,
43,
33
]
}
],
"difficulty": "Easy",
"duration": {
"_id": "5d6fad0f9e0dc027fc6b5ab7",
"days": 2,
"nights": 1
},
"media": {
"_id": "5d6fad0f9e0dc027fc6b5ab8",
"images": [
],
"videos": []
},
"description": "Surrounded.",
"inclusions": [
{
"_id": "5d6fad0f9e0dc027fc6b5abe",
"text": "Included"
}
],
"itinerary": "Surrounded",
"thingsToCarry": [
{
"_id": "5d6fad0f9e0dc027fc6b5abf",
"text": "Yourself"
}
],
"exclusions": [
{
"_id": "5d6fad0f9e0dc027fc6b5ac0",
"text": "A Lot"
}
],
"policy": "Fully refundable 7777 Days before the date of Experience",
"departures": [
{
"dates": [
"2019-11-23T17:19:47.878Z"
],
"_id": "5d6fad0f9e0dc027fc6b5ac1",
"bookingCloses": "2 Hours Before",
"maximumSeats": 20,
"source": {
"code": "code",
"name": "Manali",
"coordinates": [
23.33,
43,
33
]
},
"pickupPoints": [
{
"coordinatesType": "Point",
"_id": "5d6fad0f9e0dc027fc6b5ac3",
"name": "name-3",
"address": "address-3",
"time": "time-3",
"coordinates": [
23.33,
43,
33
]
}
],
"mrps": {
"3": 5
},
"markup": 25,
"discount": 0,
"b2m": {
"3": 5
},
"m2c": {
"3": 6.25
},
"minimumOccupancy": 3,
"maximumOccupancy": 3
}
],
"bulkDiscounts": [
{
"_id": "5d6fad0f9e0dc027fc6b5ac4"
}
],
"url": "",
}
]
I mean to say that no difference in output except dates array. If dates are less than current date then no need to fetch else fetch from DB with filtered dates array.
If you use mongo 3.4> then you can try with $addFields and $filter:
myCollection.aggregate([
{$match: {
'departures.dates': {
$elemMatch: {$gt: new Date()}}
}
},
{$addFields: {
'departures.dates': {
$filter: {
input: '$departures.dates',
as: 'date',
cond: {
$gt: ['$$date', new Date()]
}
}
}
}}
])
I was missing one terms here that my documnet structure is like below
{
_id: ObjecId(),
departure: [{
dates: [Array]
}]
}
So, here is my solution in the below code
pipeline = [
{ $unwind: '$departures' },
{
$addFields: {
'departures.dates': {
$filter: {
input: '$departures.dates',
as: 'date',
cond: {
$gt: ['$$date', new Date()]
}
}
}
}
}
];

Dynamically add Keys From Array Content

How can I add fields dynamically in MongoDB? I have an array, with which I want to create columns. For example, from this,
/* 1 */
{
"date" : "2017-07-30",
"brand" : [
{
"name" : "Apple",
"quantity" : 31
}
],
"total" : 31
}
/* 2 */
{
"date" : "2017-08-02",
"brand" : [
{
"name" : "Apple",
"quantity" : 1
},
{
"name" : "Samsung",
"quantity" : 6
}
],
"total" : 7
}
I want to make this,
/* 1 */
{
"date" : "2017-07-30",
"Apple": 31,
"Samsung": 0,
"total" : 31
}
/* 2 */
{
"date" : "2017-08-02",
"Apple": 1,
"Samsung": 6,
"total" : 7
}
There $addFields operator can add a field, but how can I use it for multiple fields?
You can do it, but honestly it's kind of a waste. Use $replaceRoot and $arrayToObject where you have it:
db.collection.aggregate([
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[{ "k": "date", "v": "$date" }],
{ "$map": {
"input": "$brand",
"as": "b",
"in": { "k": "$$b.name", "v": "$$b.quantity" }
}},
[{ "k": "total", "v": "$total" }]
]
}
}
}}
])
It's actually a lot better to simply transform from the cursor. As in the shell:
db.collection.find().map( doc =>
Object.assign(
{ date: doc.date },
doc.brand.map(d => ({ [d.name]: d.quantity }) )
.reduce((acc,curr) => Object.assign(acc,curr),{}),
{ total: doc.total }
)
)
Which does the same thing:
[
{
"date" : "2017-07-30",
"Apple" : 31.0,
"total" : 31.0
},
{
"date" : "2017-08-02",
"Apple" : 1.0,
"Samsung" : 6.0,
"total" : 7.0
}
]

Aggregation filter after $lookup

How can I add a filter after an $lookup or is there any other method to do this?
My data collection test is:
{ "_id" : ObjectId("570557d4094a4514fc1291d6"), "id" : 100, "value" : "0", "contain" : [ ] }
{ "_id" : ObjectId("570557d4094a4514fc1291d7"), "id" : 110, "value" : "1", "contain" : [ 100 ] }
{ "_id" : ObjectId("570557d4094a4514fc1291d8"), "id" : 120, "value" : "1", "contain" : [ 100 ] }
{ "_id" : ObjectId("570557d4094a4514fc1291d9"), "id" : 121, "value" : "2", "contain" : [ 100, 120 ] }
I select id 100 and aggregate the childs:
db.test.aggregate([ {
$match : {
id: 100
}
}, {
$lookup : {
from : "test",
localField : "id",
foreignField : "contain",
as : "childs"
}
}]);
I get back:
{
"_id":ObjectId("570557d4094a4514fc1291d6"),
"id":100,
"value":"0",
"contain":[ ],
"childs":[ {
"_id":ObjectId("570557d4094a4514fc1291d7"),
"id":110,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d8"),
"id":120,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d9"),
"id":121,
"value":"2",
"contain":[ 100, 120 ]
}
]
}
But I want only childs that match with "value: 1"
At the end I expect this result:
{
"_id":ObjectId("570557d4094a4514fc1291d6"),
"id":100,
"value":"0",
"contain":[ ],
"childs":[ {
"_id":ObjectId("570557d4094a4514fc1291d7"),
"id":110,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d8"),
"id":120,
"value":"1",
"contain":[ 100 ]
}
]
}
The question here is actually about something different and does not need $lookup at all. But for anyone arriving here purely from the title of "filtering after $lookup" then these are the techniques for you:
MongoDB 3.6 - Sub-pipeline
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"let": { "id": "$id" },
"pipeline": [
{ "$match": {
"value": "1",
"$expr": { "$in": [ "$$id", "$contain" ] }
}}
],
"as": "childs"
}}
])
Earlier - $lookup + $unwind + $match coalescence
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"localField": "id",
"foreignField": "contain",
"as": "childs"
}},
{ "$unwind": "$childs" },
{ "$match": { "childs.value": "1" } },
{ "$group": {
"_id": "$_id",
"id": { "$first": "$id" },
"value": { "$first": "$value" },
"contain": { "$first": "$contain" },
"childs": { "$push": "$childs" }
}}
])
If you question why would you $unwind as opposed to using $filter on the array, then read Aggregate $lookup Total size of documents in matching pipeline exceeds maximum document size for all the detail on why this is generally necessary and far more optimal.
For releases of MongoDB 3.6 and onwards, then the more expressive "sub-pipeline" is generally what you want to "filter" the results of the foreign collection before anything gets returned into the array at all.
Back to the answer though which actually describes why the question asked needs "no join" at all....
Original
Using $lookup like this is not the most "efficient" way to do what you want here. But more on this later.
As a basic concept, just use $filter on the resulting array:
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"localField": "id",
"foreignField": "contain",
"as": "childs"
}},
{ "$project": {
"id": 1,
"value": 1,
"contain": 1,
"childs": {
"$filter": {
"input": "$childs",
"as": "child",
"cond": { "$eq": [ "$$child.value", "1" ] }
}
}
}}
]);
Or use $redact instead:
db.test.aggregate([
{ "$match": { "id": 100 } },
{ "$lookup": {
"from": "test",
"localField": "id",
"foreignField": "contain",
"as": "childs"
}},
{ "$redact": {
"$cond": {
"if": {
"$or": [
{ "$eq": [ "$value", "0" ] },
{ "$eq": [ "$value", "1" ] }
]
},
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}}
]);
Both get the same result:
{
"_id":ObjectId("570557d4094a4514fc1291d6"),
"id":100,
"value":"0",
"contain":[ ],
"childs":[ {
"_id":ObjectId("570557d4094a4514fc1291d7"),
"id":110,
"value":"1",
"contain":[ 100 ]
},
{
"_id":ObjectId("570557d4094a4514fc1291d8"),
"id":120,
"value":"1",
"contain":[ 100 ]
}
]
}
Bottom line is that $lookup itself cannot "yet" query to only select certain data. So all "filtering" needs to happen after the $lookup
But really for this type of "self join" you are better off not using $lookup at all and avoiding the overhead of an additional read and "hash-merge" entirely. Just fetch the related items and $group instead:
db.test.aggregate([
{ "$match": {
"$or": [
{ "id": 100 },
{ "contain.0": 100, "value": "1" }
]
}},
{ "$group": {
"_id": {
"$cond": {
"if": { "$eq": [ "$value", "0" ] },
"then": "$id",
"else": { "$arrayElemAt": [ "$contain", 0 ] }
}
},
"value": { "$first": { "$literal": "0"} },
"childs": {
"$push": {
"$cond": {
"if": { "$ne": [ "$value", "0" ] },
"then": "$$ROOT",
"else": null
}
}
}
}},
{ "$project": {
"value": 1,
"childs": {
"$filter": {
"input": "$childs",
"as": "child",
"cond": { "$ne": [ "$$child", null ] }
}
}
}}
])
Which only comes out a little different because I deliberately removed the extraneous fields. Add them in yourself if you really want to:
{
"_id" : 100,
"value" : "0",
"childs" : [
{
"_id" : ObjectId("570557d4094a4514fc1291d7"),
"id" : 110,
"value" : "1",
"contain" : [ 100 ]
},
{
"_id" : ObjectId("570557d4094a4514fc1291d8"),
"id" : 120,
"value" : "1",
"contain" : [ 100 ]
}
]
}
So the only real issue here is "filtering" any null result from the array, created when the current document was the parent in processing items to $push.
What you also seem to be missing here is that the result you are looking for does not need aggregation or "sub-queries" at all. The structure that you have concluded or possibly found elsewhere is "designed" so that you can get a "node" and all of it's "children" in a single query request.
That means just the "query" is all that is really needed, and the data collection ( which is all that is happening since no content is really being "reduced" ) is just a function of iterating the cursor result:
var result = {};
db.test.find({
"$or": [
{ "id": 100 },
{ "contain.0": 100, "value": "1" }
]
}).sort({ "contain.0": 1 }).forEach(function(doc) {
if ( doc.id == 100 ) {
result = doc;
result.childs = []
} else {
result.childs.push(doc)
}
})
printjson(result);
This does exactly the same thing:
{
"_id" : ObjectId("570557d4094a4514fc1291d6"),
"id" : 100,
"value" : "0",
"contain" : [ ],
"childs" : [
{
"_id" : ObjectId("570557d4094a4514fc1291d7"),
"id" : 110,
"value" : "1",
"contain" : [
100
]
},
{
"_id" : ObjectId("570557d4094a4514fc1291d8"),
"id" : 120,
"value" : "1",
"contain" : [
100
]
}
]
}
And serves as proof that all you really need to do here is issue the "single" query to select both the parent and children. The returned data is just the same, and all you are doing on either server or client is "massaging" into another collected format.
This is one of those cases where you can get "caught up" in thinking of how you did things in a "relational" database, and not realize that since the way the data is stored has "changed", you no longer need to use the same approach.
That is exactly what the point of the documentation example "Model Tree Structures with Child References" in it's structure, where it makes it easy to select parents and children within one query.

Converting JSON document to Google Chart format

I'm fairly new to Javascript and my situation is this:
I'm using Google Charts to visualize some data and the data is contained in Elasticsearch.
I'm querying the data with an Ajax command however the data that is returned is not usable in Google Charts in its current format.
The query returns data like this:
{
took: 5
timed_out: false
_shards: {
total: 5
successful: 5
failed: 0
}
hits: {
total: 11
max_score: 1
hits: [
{
_index: inventory
_type: on_hand
_id: 4
_score: 1
_source: {
warehouse_id: 107
date: 03-28-2013
lane: M01
routes: 383
}
}
I need to have it formatted like this for Google Charts:
{
"cols": [
{"id":"","label":"Lane","type":"string"},
{"id":"","label":"Routes","type":"number"}
],
"rows": [
{"c":[{"v":"M01"},{"v":4657}]},
{"c":[{"v":"M02"},{"v":4419}]},
{"c":[{"v":"M03"},{"v":4611}]},
{"c":[{"v":"M04"},{"v":4326}]},
{"c":[{"v":"M05"},{"v":4337}]},
{"c":[{"v":"M06"},{"v":5363}]}
]
}
While I don't expect someone to write the code for me, I would really appreciate if someone could give me a good starting point for pulling out the needed data, and adding in the proper formatting such as "cols": [... and "rows":[... etc. Thank you!
EDIT:
I was able to run an updated query which returns the results in a valid JSON format:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 7,
"max_score" : 1.0,
"hits" : [ {
"_index" : "wcs",
"_type" : "routes",
"_id" : "4",
"_score" : 1.0, "_source" : {"lane":"M04","routes":"102"}
}, {
"_index" : "wcs",
"_type" : "routes",
"_id" : "5",
"_score" : 1.0, "_source" : {"lane":"M03","routes":"143"}
}, {
"_index" : "wcs",
"_type" : "routes",
"_id" : "1",
"_score" : 1.0, "_source" : {"lane":"M07","routes":"80"}
}, {
"_index" : "wcs",
"_type" : "routes",
"_id" : "6",
"_score" : 1.0, "_source" : {"lane":"M02","routes":"157"}
}, {
"_index" : "wcs",
"_type" : "routes",
"_id" : "2",
"_score" : 1.0, "_source" : {"lane":"M06","routes":"101"}
}, {
"_index" : "wcs",
"_type" : "routes",
"_id" : "7",
"_score" : 1.0, "_source" : {"lane":"M01","routes":"105"}
}, {
"_index" : "wcs",
"_type" : "routes",
"_id" : "3",
"_score" : 1.0, "_source" : {"lane":"M05","routes":"160"}
} ]
}
}
However the JSON document required actually needs to be exactly as I've shown in my original post for Google Charts to be able to use it. The "lane" and "routes" values need to be extracted from the returned data (shown just above) and formatted as the JSON doc in the original post. Thank you again.
You should be able to do something like:
var json = {
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 7,
"max_score": 1,
"hits": [
{
"_index": "wcs",
"_type": "routes",
"_id": "4",
"_score": 1,
"_source": {
"lane": "M04",
"routes": "102"
}
}
]
}
};
var data = {};
data.cols = [
{
"id": "",
"label": "Lane",
"type": "string"
},
{
"id": "",
"label": "Routes",
"type":"number"
}
];
data.rows = [
{
"c": [
{
"v": json.hits.hits[0]._source.lane
},
{
"v": json.hits.hits[0]._source.routes
}
]
}
];
console.log(data);

Categories