Aggregate array occourances - javascript

I have a set of documents (posts) which have an array of users mentioned in each post.
{
"title": "Some post title",
[ ... ]
"mentions": ["johnsmith", "johndoe", "paul"]
}
I want to aggregate a list of unique mentions, and the number of times they've been mentioned across all posts. For example:
[{ user: "johnsmith", count: 5 }, { user: "benlewis", count: 9 }, { user: "johndoe", count: 1 }]
With Mongo, I'd do something like:
"mentions": [{
"$unwind": "$mentions"
}, {
"$group": {
"_id": "$mentions",
"count": { "$sum": 1 }
}
}]
What's the equivalent in Elasticsearch?

You can use a Terms aggregation for that. A small (5.x) example:
PUT test
{
"mappings": {
"test" : {
"properties": {
"title": {
"type": "text"
},
"mentions": {
"type": "keyword"
}
}
}
}
}
POST test/test/1
{
"title": "Some post title",
"mentions": [
"johnsmith",
"johndoe",
"paul"
]
}
POST test/test/2
{
"title": "Some post title 2",
"mentions": [
"johnsmith"
]
}
GET test/_search
{
"size": 0,
"aggs": {
"test": {
"terms": {
"field": "mentions",
"size": 10
}
}
}
}
Gives the following response:
"aggregations": {
"test": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "johnsmith",
"doc_count": 2
},
{
"key": "johndoe",
"doc_count": 1
},
{
"key": "paul",
"doc_count": 1
}
]
}
}
}
Hope this helps :)

Related

How to update nested object in array (Mongoose/MongoDB)

I've been struggling to get my around how to update a object in a nested array with a particular id. I've attempted to implement $set as shown below. I want to be able to update the task with an _id of 62ff74bfe80b11ade2d34455 with the data from the request body.
{
"_id": "62fa5aa25778ec97bc6ee231",
"user": "62f0eb5ebebd0f236abcaf9d",
"name": "Marketing Plan",
"columns": [
{
"name": "todo",
"_id": "62fa5aa25778ec97bc6ee233",
"tasks": [
{ ====> here
"title": "Task Four",
"description": "This is task four",
"subtasks": [
{
"name": "wash dshes",
"completed": false,
"_id": "62ff74bfe80b11ade2d34456"
},
{
"name": "do homework",
"completed": false,
"_id": "62ff74bfe80b11ade2d34457"
}
],
"_id": "62ff74bfe80b11ade2d34455"
}
]
},
{
"name": "doing",
"_id": "62fa5aa25778ec97bc6ee234",
"tasks": []
},
{
"name": "done",
"_id": "62fa5aa25778ec97bc6ee235",
"tasks": []
}
],
"__v": 0
}
const updatedTask = await Board.findOneAndUpdate(
{
"columns.tasks._id": req.params.id,
},
{ $set: { "columns.$.tasks": req.body } },
{ new: true }
);
You can use the positional operator in combination with an arrayfilter. Here's an example how you'd update a specific field of the relevant task:
db.collection.update({
"columns.tasks._id": req.params.id
},
{
"$set": {
"columns.$[].tasks.$[t].title": "it works"
}
},
{
"arrayFilters": [
{
"t._id": req.params.id
}
]
})
You can also try this on mongoplayground.
If you're looking for a way to replace the matching task object itself you can do:
db.collection.update({
"columns.tasks._id": req.params.id
},
{
"$set": {
"columns.$[].tasks.$[t]": req.body
}
},
{
"arrayFilters": [
{
"t._id": req.params.id
}
]
})

mongodb: How to use variable inside group operator in aggregate operation?

I am tring to make a query where use the value and try to interpolate a string in a new field.
Mongo Database:
[
{
"state": "1",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
},
{
"state": "2",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
}
]
Aggregate query:
db.collection.aggregate({
"$match": {
"state": {
"$in": [
"1",
"2"
]
}
}
},
{
"$group": {
"_id": {
"state": "$state"
},
"this_path": {
"$first": {
"$concat": [
"events.",
"$state",
".0.date"
]
}
}
}
})
"this_path" gets "events.1.0.date", but how to use this value, in another query(line), I would like to do like a string interpolation. Some thing like
...
"date": {
"$first": { `\$${this_path}`}
...
so it become the "events.1.date" then "$events.1.0.date" then "123.2"
you can define it by let just for example a fragment from pipeline:
$lookup: {
from: contentCollectionName,
as: 'content',
let: {
parentId: '$id',
},
The id is taken from above matched documents, but it can be anything

Try to query and aggregate in ElasticSearch but aggregrating not working - elasticsearch.js client

I'm trying to query my dataset for two purposes:
Match a term (resellable = true)
Order the results by their price
lowest to highest
Data set/doc is:
"data" : {
"resellable" : true,
"startingPrice" : 0,
"id" : "4emEe_r_x5DRCc5",
"buyNowPrice" : 0.006493, //Changes per object
"sub_title" : "test 1",
"title" : "test 1",
"category" : "Education",
}
//THREE OBJECTS WITH THE VALUES OF 0.006, 0.7, 1.05 FOR BUYNOWPRICE
I have three objects of these with different buyNowPrice
Query with agg is:
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"from": 0,
"size": 5,
"aggs": {
"lowestPrice": {
"terms": {
"field": "data.buyNowPrice",
"order": {
"lowest_price": "desc"
}
},
"aggs": {
"lowest_price": {
"min": {
"field": "data.buyNowPrice"
}
},
"lowest_price_top_hits": {
"top_hits": {
"size": 5,
"sort": [
{
"data.buyNowPrice": {
"order": "desc"
}
}
]
}
}
}
}
}
}
The query works fine, and the results are 3 objects that have resellable = true
The issue is, the agg is not organizing the results based off the lowest buy now price.
Each result, the order of buyNowPrice is: 1.06, 0.006, 0.7 - which is not ordered properly.
Switching to desc has no affect, so I don't believe the agg is running at all?
EDIT:
Using the suggestion below my query now looks like:
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"from": 0,
"size": 5,
"aggs": {
"lowestPrice": {
"terms": {
"field": "data.buyNowPrice",
"order": {
"lowest_price": "asc"
}
},
"aggs": {
"lowest_price": {
"min": {
"field": "data.buyNowPrice"
}
},
"lowest_price_top_hits": {
"top_hits": {
"size": 5
}
}
}
}
}
}
With the results of the query being:
total: { value: 3, relation: 'eq' },
max_score: 0.2876821,
hits: [
{
_index: 'education',
_type: 'listing',
_id: '4emEe_r_x5DRCc5', <--- buyNowPrice of 0.006
_score: 0.2876821,
_source: [Object]
},
{
_index: 'education',
_type: 'listing',
_id: '4ee_r_x5DRCc5', <--- buyNowPrice of 1.006
_score: 0.18232156,
_source: [Object]
},
{
_index: 'education',
_type: 'listing',
_id: '4444_r_x5DRCc5', <--- buyNowPrice of 0.7
_score: 0.18232156,
_source: [Object]
}
]
}
EDIT 2:
Removing the query for resellable = true the aggregation will sort properly and return the items in the proper order. But with the query for resellable included, it does not.
I'm assuming this has to do with the _score property overriding the sorting from agg? How would this be fixed
You can use a bucket sort aggregation that is a parent pipeline
aggregation which sorts the buckets of its parent multi-bucket
aggregation. Zero or more sort fields may be specified together with
the corresponding sort order.
Adding a working example (using the same index data as given in the question), search query, and search result
Search Query:
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"from": 0,
"size": 5,
"aggs": {
"source": {
"terms": {
"field": "data.buyNowPrice"
},
"aggs": {
"latest": {
"top_hits": {
"_source": {
"includes": [
"data.buyNowPrice",
"data.id"
]
}
}
},
"highest_price": {
"max": {
"field": "data.buyNowPrice"
}
},
"bucket_sort_order": {
"bucket_sort": {
"sort": {
"highest_price": {
"order": "desc"
}
}
}
}
}
}
}
}
Search Result:
"buckets": [
{
"key": 1.0499999523162842,
"doc_count": 1,
"highest_price": {
"value": 1.0499999523162842
},
"latest": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "stof_64364468",
"_type": "_doc",
"_id": "3",
"_score": 0.08701137,
"_source": {
"data": {
"id": "4emEe_r_x5DRCc5",
"buyNowPrice": 1.05 <-- note this
}
}
}
]
}
}
},
{
"key": 0.699999988079071,
"doc_count": 1,
"highest_price": {
"value": 0.699999988079071
},
"latest": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "stof_64364468",
"_type": "_doc",
"_id": "2",
"_score": 0.08701137,
"_source": {
"data": {
"id": "4emEe_r_x5DRCc5",
"buyNowPrice": 0.7 <-- note this
}
}
}
]
}
}
},
{
"key": 0.006000000052154064,
"doc_count": 1,
"highest_price": {
"value": 0.006000000052154064
},
"latest": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "stof_64364468",
"_type": "_doc",
"_id": "1",
"_score": 0.08701137,
"_source": {
"data": {
"id": "4emEe_r_x5DRCc5",
"buyNowPrice": 0.006 <-- note this
}
}
}
]
}
}
}
]
Update 1:
If you modify your search query as :
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"aggs": {
"lowestPrice": {
"terms": {
"field": "data.buyNowPrice",
"order": {
"lowest_price": "asc" <-- change the order here
}
},
"aggs": {
"lowest_price": {
"min": {
"field": "data.buyNowPrice"
}
},
"lowest_price_top_hits": {
"top_hits": {
"size": 5
}
}
}
}
}
}
Running the above search query also, you will get your required results.

How to findOneAndUpdate single field with multi nested array documents

I'm stuck on how to update single value in multi nested array documents value with findOneAndUpdate.
My condition goes like this:
Update warehouse amount where the productCode is "abc123", size "41" in warehouse "Hamburg".
I just get back null or bot sizes 41 and 42.
Here is the part of the doc:
{
"_id": ObjectId("xxxx636309f84479ec0c7b"),
"productCode": "abc123",
"brand": "Nike",
"name": "aaa",
"model": "Runner",
"color": "Brown",
"image": "shoe.jpg",
"sizes": [{
"_id": ObjectId("xxxxc636309f84479ec0c7e"),
"size": "41",
"wares": [{
"_id": ObjectId("xxxx2c636309f84479ec0c80"),
"ware": "Hamburg",
"amount": 7
},
{
"_id": ObjectId("5db72c636309f84479ec0c7f"),
"ware": "Berlin",
"amount": 7
}
]
},
{
"_id": ObjectId("5db72c636309f84479ec0c7c"),
"size": "42",
"wares": [{
"_id": ObjectId("5db72c636309f84479ec0c7d"),
"ware": "Hamburg",
"amount": 16
}]
}
],
"__v": 0
}
This is what I've tried:
Product.findOneAndUpdate({
"productCode": "abc123",
"sizes.size": 41,
"sizes.wares.ware": "Hamburg"
}, {
"$set": {
"sizes.0.wares.amount": 99
}
}, {
useFindAndModify: false
},
(err, products) => {
if (err) {
return res.status(422).send(err)
}
return res.json(products)
}
);
How can I solve this?
And to fulfill #ambianBeing, this is how it would be done with findOneAndUpdate:
Product.findOneAndUpdate({
"productCode": "abc123",
"sizes": {
$elemMatch: {
$and: [
{ size: "41" },
{
wares: {
$elemMatch: {
ware: "Hamburg"
}
}
}]
}
}
}, {
$set: {
"sizes.$[theSize].wares.$[theWare].amount": 99
}
}, {
arrayFilters: [{
"theSize.size": "41"
}, {
"theWare.ware": "Hamburg"
}]
})
Can be done using filtered positional operator $[<identifier>] which is nifty in use cases of nested array updation.
Query (Mongo Shell):
db.collection.update(
{ productCode: "abc123" },
{ $set: { "sizes.$[outer].wares.$[inner].amount": 99 } },
{
arrayFilters: [{ "outer.size": "41" }, { "inner.ware": "Hamburg" }],
multi: false
}
);
Query with Mongoose Model:
Product.update(
{ productCode: "abc123" },
{ "sizes.$[outer].wares.$[inner].amount": 99 },
{
arrayFilters: [{ "outer.size": "41" }, { "inner.ware": "Hamburg" }],
multi: false
},
(err, rawDoc) => {
if (err) {
console.error(err);
}
console.info(rawDoc);
}
);

$group with nested array element

would like to know how to use MongoDB's aggregation to collectively summarize a count of each reason_id:
I would like to get known that there are 2 counts for having "reason_id = KW7Kcsv7835YZeE3n", and 1 count for having "reason_id = KNcKQCjhFzha3oLfE".
Here is my data:
[
{
"_id": "2DLQFJLbZScBXpSam",
"toilet_id": "bJsyfh3TCvpTzE2mJ",
"reason_ids": [
"KNcKQCjhFzha3oLfE",
"KW7Kcsv7835YZeE3n"
],
"score_id": null,
"toilet": {
"_id": "bJsyfh3TCvpTzE2mJ",
"name": "Toilet_M_1",
"gender": "m",
"mac_address": "11:11:11:11:11:11"
}
},
{
"_id": "akjsbcjascklsacas",
"toilet_id": "bJsyfh3TCvpTzE2mJ",
"reason_ids": [
"KW7Kcsv7835YZeE3n"
],
"score_id": null,
"toilet": {
"_id": "fsgndgklndsdsdsd",
"name": "Toilet_F_1",
"gender": "f",
"mac_address": "11:11:11:11:11:11"
}
},
]
You can try this
db.collection.aggregate([
{
"$unwind": "$reason_ids"
},
{
"$group": {
"_id": "$reason_ids",
"count": {
"$sum": 1
}
}
}
])
Output
[
{
"_id": "KW7Kcsv7835YZeE3n",
"count": 2
},
{
"_id": "KNcKQCjhFzha3oLfE",
"count": 1
}
]
Try it here

Categories