My project is in nodeJs with express and i use mongoose for the request to my database mongoDb.
I have a model Media the structure is:
{
"_id": {
"$oid": "6354f982a11464ff4f7bac60"
},
"userId": {
"$oid": "6353aa119d39ccbb3263123f"
},
"date": "2022-10-23",
"base64": "....=",
"label": "Noamount",
"uriPath": "xxxx",
"s3Path": "xxxx",
"amount": 0,
"__v": 0,
"type": "tva"
}
Like you seen there is a field date, imagine that i have 3 medias on differents month:
{
"date": "2022-10-23",
"label": "monthTen",
"type" : "tva",
... // Other property
},
{
"date": "2022-09-10",
"label": "monthNineFirst",
"type" : "tva",
... // Other property
},
{
"date": "2022-09-19",
"label": "monthNineSecond",
"type" : "other",
... // Other property
}
I want to output something like this:
// Ordery by type in first place and by month
// number like 9 = the month inside the date property
{
tva: {
9: [{ label: "monthNineFirst"... }],
10: [{ label: "monthNineTen"... }]
},
other: {
9: [{ label: "monthNineSecond"... }]
}
}
Is found the property aggregate but i don't understand it correctly.
I know how to dow that in JS it's easy, but can i do that directly on the request with the property aggregate and $group?
There is what i have done so far https://mongoplayground.net/p/-vAbdnnqOfD
Here's one way to do it by extending your mongoplayground.net start.
db.collection.aggregate([
{ // group all docs by month
$group: {
_id: {
$month: {
$dateFromString: {
dateString: "$date",
format: "%Y-%m-%d"
}
}
},
data: {"$push": "$$ROOT"}
}
},
{ // group all groups into a single doc
"$group": {
"_id": null,
"groupData": {
"$push": {
// k,v for $arrayToObject
"k": "$_id",
"v": {
"$sortArray": {
"input": "$data",
"sortBy": {"date": 1}
}
}
}
}
}
},
{
"$replaceWith": {
"$arrayToObject": {
"$map": {
"input": {
// sort by month
"$sortArray": {
"input": "$groupData",
"sortBy": {"k": 1}
}
},
"in": {
"$mergeObjects": [
"$$this",
{ // rewrite k as string
"k": {"$toString": "$$this.k"}
}
]
}
}
}
}
}
])
Try it on mongoplayground.net.
Related
I have the following collection in MongoDB
[
{
"acronym": "front",
"references": [
{
"date": "2020-03-04",
"value": "5.6"
},
{
"date": "2020-03-05",
"value": "6.3"
}
]
}
]
I want to use the function $addToSet in order to add new document into references. I know that it can be done with the following code:
db.collection.update({
"acronym": "front"
},
{
$addToSet: {
"references": {
"date": "2020-03-06",
"value": "6"
}
}
})
And it will add the new document to the array references, so the result is the following:
[
{
"acronym": "front",
"references": [
{
"date": "2020-03-04",
"value": "5.6"
},
{
"date": "2020-03-05",
"value": "6.3"
},
{
"date": "2020-03-06",
"value": "6"
}
]
}
]
QUESTION: What I want to obtain is that in the case of adding a date that is already in the array, the update will no be produced.
Here is the playground: https://mongoplayground.net/p/DPER2RuROEs
Thanks!
You can add another qualifier to the update to prevent duplicated dates
db.collection.update({
"acronym": "front",
"references.date": {
$ne: "2020-03-04"
}
},
{
$addToSet: {
"references": {
"date": "2020-03-04",
"value": "6"
}
}
})
I got the solution from here
I am tring to make a query where use the value and try to interpolate a string in a new field.
Mongo Database:
[
{
"state": "1",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
},
{
"state": "2",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
}
]
Aggregate query:
db.collection.aggregate({
"$match": {
"state": {
"$in": [
"1",
"2"
]
}
}
},
{
"$group": {
"_id": {
"state": "$state"
},
"this_path": {
"$first": {
"$concat": [
"events.",
"$state",
".0.date"
]
}
}
}
})
"this_path" gets "events.1.0.date", but how to use this value, in another query(line), I would like to do like a string interpolation. Some thing like
...
"date": {
"$first": { `\$${this_path}`}
...
so it become the "events.1.date" then "$events.1.0.date" then "123.2"
you can define it by let just for example a fragment from pipeline:
$lookup: {
from: contentCollectionName,
as: 'content',
let: {
parentId: '$id',
},
The id is taken from above matched documents, but it can be anything
I'm converting MongoDB Query to Elasticsearch in NodeJS platform. While developing I'm facing some difficulties with grouping and filtering data (getting nested objects like hits.hits._source) within Elasticsearch Query like we doing in MongoDB Query.
Example:-
UserModel.aggregate([
{
$match: {
uId: req.body.uId, timestamp: { $gte: req.body.date, $lte: new Date() }
},
},
{
$group: {
_id: "$eId",
location: {
$push: {
time: "$timestamp", lat: "$lat"
}
},
timestamp: {
$push: "$timestamp"
},
testId: { $first: "$testId" },
}
},
{
$project: {
eId: 1, location: 1, testId: 1, max: { $max: "$timestamp" }
}
},
{ $unwind: { path: "$location", preserveNullAndEmptyArrays: true } },
{
$redact: {
$cond: {
if: { $eq: ["$location.time", "$max"] },
then: "$$DESCEND",
else: "$$PRUNE"
}
}
},
{
$project: {
eId: 1, latitude: "$location.lat", testId: 1
}
},
]).exec(function (err, result) {
console.log(result)
});
What will be the equivalent query in Elasticsearch?
I'm looking for solution with grouping, unwinding and projecting (MongoDB concepts to Elasticsearch) required data with minimal nested response.
Thanks in Advance.
EDIT:-
Adding Elasticsearch Document:-
{
"timestamp": "2019-10-08T:02:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
2.000,
34.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:02:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a408",
"location": [
2.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:03:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
4.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:03:40:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
2.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4e1"
},
{
"timestamp": "2019-10-10T:03:40:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
3.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4e1"
}
Match with status =1, and Group By eId
With that results, group by timestamp and get max timestamp value
Expected Result:-
[
{
"_id": "5d5d7ce0c89852e7bad4a407",
"max": "2019-10-10T:03:40:15.54Z", // max timestamp
"zId": [
"5d5d7ce0c89852e7bad4a4e1",
"5d5d7ce0c89852e7bad4a4ef"
]
},
{
"_id": "5d5d7ce0c89852e7bad4a408",
"max": "2019-10-09T:02:50:15.54Z",
"zId": [
"5d5d7ce0c89852e7bad4a4ef"
]
}, // ...etc
]
Thanks for the documents. Sadly, I do not know any way to retrieve only the documents having the max timestamp field value.
The following query will allow you to filter by status and group by eId then get the max timestamp value, but it will not return the documents having the max timestamp value.
{
"size": 0,
"query": {
"term": {
"status": 1
}
},
"aggregations": {
"eId_group": {
"terms": {
"field": "eId"
},
"aggregations": {
"max_timestamp": {
"max": {
"field": "timestamp"
}
}
}
}
}
}
This second query use a top_hits aggregation to retrieve the documents grouped by eId. The returned documents are sorted by decreasing timestamp value so the documents having the max timestamp will be firsts, but you may also get documents with different timestamps.
{
"size": 0,
"query": {
"term": {
"status": 1
}
},
"aggregations": {
"eId_group": {
"terms": {
"field": "eId"
},
"aggregations": {
"max_timestamp": {
"max": {
"field": "timestamp"
}
},
"top_documents": {
"top_hits": {
"size": 20,
"sort": { "timestamp": "desc"}
}
}
}
}
}
}
I used the following mapping for the index
PUT /test_index
{
"mappings": {
"properties": {
"timestamp": {
"type": "date"
},
"eId": {
"type": "keyword"
},
"zId": {
"type": "keyword"
},
"status": {
"type": "keyword"
}
}
}
}
Given a collection like:
{
"_id": "XXXX",
"JobId": [
100
],
"PersonalDetails": [
{
"Level": 1,
"Zone": [
{
"Id": 1,
"Code": "XXXXXXXX",
"IsAvailable": true
},
{
"Id": 45,
"Code": "ZZZZZZZZZ",
"IsAvailable": false
}
]
}
],
"Timestamp": ISODate("2015-11-01T00:00:00.000Z")
}
I need to get all Zone ids and codes that have the IsAvailable flag set to true.
I have tried the following:
var details = db.test.find(
{
JobId: {$in: [100]},
'PersonalDetails': {$elemMatch: {Zone : {$elemMatch: {IsAvailable: true}}}}
},
{
'PersonalDetails.Zone.Id': 1,
'PersonalDetails.Zone.Code': 1,
'PersonalDetails.Zone.IsAvailable': 1
});
details.forEach(function(doc){
var myDetails = doc.PersonalDetails;
myDetails.forEach(function(doc2){
var myZones = doc2.Zone;
print(myZones);
This gives me
{
"0" : {
"Id": 1,
"Code": "XXXXXXXX",
"IsAvailable": true
},
"1" : {
"Id": 45,
"Code": "ZZZZZZZZZ",
"IsAvailable": false
}
}
But I just want only where the IsAvailable flag is set to true returned.
Am I going about this the wrong way?? I tried using aggregate but ran into the same problem - returning all and not filtering the IsAvailable flag.
You need to use the .aggregate() method.
First of all you need to reduce the size of the documents to process using the $match operator. From there you will need to denormalize your "PersonalDetails" array using the $unwind operator.
You can then use the $project operator to return only sub-documents that match your criteria.
The $map operator in the project stage is used to return array of sub-documents.
db.collection.aggregate([
{ "$match": {
"JobId": 100,
"PersonalDetails.Zone.IsAvailable": true
}},
{ "$unwind": "$PersonalDetails" },
{ "$project": {
"zone": {
"$setDifference": [
{ "$map": {
"input": "$PersonalDetails.Zone",
"as": "z",
"in": { "$cond": [ "$$z.IsAvailable", "$$z", false ] }
}},
[false]
]
}
}}
])
Which returns:
{
"_id" : "XXXX",
"zone" : [
{
"Id" : 1,
"Code" : "XXXXXXXX",
"IsAvailable" : true
}
]
}
Starting from MongoDB 3.2 we can use the $filter operator to do this efficiently
db.collection.aggregate([
{ "$match": {
"JobId": 100,
"PersonalDetails.Zone.IsAvailable": true
}},
{ "$unwind": "$PersonalDetails" },
{ "$project": {
"zone": {
"$filter": {
"input": "$PersonalDetails.Zone",
"as": "z", "cond": "$$z.IsAvailable"
}
}
}}
])
I've been tasked with generating averages for day, week, month, and year for a rather large set of documents in MongoDB.
All of the jobs have a created field, and I need to base the average values off of the outputs array...
Here's what a document looks like:
{
__v: 0,
_id: ObjectId("535837911393fd0200d8e1eb"),
created: ISODate("2014-04-23T21:58:41.446Z"),
output: [
{
ref: {
img: false
},
type: "image/png",
methods: [
{
options: {
height: 200,
width: 200
},
method: "resize"
}
]
},
{
ref: {
img: false
},
type: "image/png",
methods: [
{
options: {
height: 400,
width: 400
},
method: "resize"
}
]
}
]
}
And here is what my current script looks like:
JobModel.aggregate([
{
$unwind: '$output'
},
{
$group: {
_id: { $dayOfYear: '$created' },
day: { $sum: 1 }
}
},
{
$group: {
_id: null,
avgDay: { $avg: '$day' }
}
},
{
$project: {
_id: 0,
average: {
day: '$avgDay'
}
}
}
],
function(err, data) {
if (err) {
console.log(err);
return;
}
res.send(data);
next();
});
I cannot seem to figure out the right order for this. Any suggestions?
Really not that sure what you are after here. You say that you want "multiple" averages but that brings up the question of "muliple" over what basis? The average "output" entries over a individual day would be different from the average output entries per month or even per daily average per month. So the scale changes with each selection and is not really a single query for "daily", "monthly" and "yearly"
I would seem that you really was "discrete" totals which would be best approached by first finding the "size" of the output entries and then applying an average per scale:
JobModel.aggregate(
[
{ "$unwind": "$output" },
// Count the array entries on the record
{ "$group": {
"_id": "$_id",
"created": { "$first": "$created" },
"count": { "$sum": 1 }
}},
// Now get the average per day
{ "$group": {
"_id": { "$dayOfYear": "$created" },
"avg": { "$avg": "$count" }
}}
],
function(err,result) {
}
);
Or actually with MongoDB 2.6 and greater you can just use the $size operator on the array:
JobModel.aggregate(
[
// Now get the average per day
{ "$group": {
"_id": { "$dayOfYear": "$created" },
"avg": { "$avg": { "$size": "$output" } }
}}
],
function(err,result) {
}
);
So the logical thing is to run each of those within your required $match range other your aggregation key of either "day", "month" or "year"
You could do something like combining the daily averages per day, with the daily average per month and then daily for year by combining results into arrays, otherwise you would just be throwing items away, which can be alternately done if you "just" wanted the daily average for the year, but as full results:
JobModel.aggregate(
[
// Now get the average per day
{ "$group": {
"_id": {
"year": { "$year": "$created" },
"month": { "$month": "$created" },
"day": { "$dayOfYear": "$created" }
},
"dayAvg": { "$avg": { "$size": "$output" } }
}},
// Group for month
{ "$group": {
"_id": {
"year": "$_id.year",
"month": "$_id.month"
},
"days": {
"$push": {
"day": "$_id.day",
"avg": "$dayAvg"
}
},
"monthAvg": { "$avg": "$dayAvg" }
}},
// Group for the year
{ "$group": {
"_id": "$_id.year",
"daily": { "$avg": "$monthAvg" },
"months": {
"$push": {
"month": "$_id.month",
"daily": "$monthAvg",
"days": "$days"
}
}
}}
],
function(err,result) {
}
);
However you want to apply that, but the main thing missing from your example is finding the "size" or "count" of the original "output" array per document from which to obtain an average.