I need to calculate rank on each account. Rank depends on the account who requested the method.
var account; // assumed to be one requesting the method
const allMatches = Account.find({
$or: [
{ argA: account.argA },
{ argB: account.argB },
{ argC: account.argC }
],
_id: {$ne: account._id}
});
const getRank = (item) => {
let rank = 0;
if (item.argB === account.argB) {
rank += 3;
}
if (item.argA === account.argA) {
rank += 2;
}
if (item.argC === account.argC) {
rank += 1;
}
return rank;
};
const compare = (a,b) => {
const rankA = getRank(a);
const rankB = getRank(b);
if (rankA === rankB) return 0;
return rankA > rankB ? 1: -1;
}
// use the compare method
allMatches.sort(compare);
account.set({match : allMatches[0]['id']});
However, I cant use the sort method like this, as it expects an object or a string.
I need some help to proceed in the correct direction.
For example -
If there are 3 accounts in the system
A1 - {argA: 'A', argB: 'B', argC: 'C'}
A2 - {argA: 'D', argB: 'B', argC: 'F'}
A3 - {argA: 'G', argB: 'H', argC: 'C'}
Now if A1 needs to find a match -
Rank score with A2 is = 3 // argB is same
Rank score with A3 is = 1 // argC is same
Hence A1 will match with A2, and thats what I need.
You can "sort on the database" using .aggregate()
let results = await Account.aggregate([
{ "$match": {
"$or": [
{ "argA": account.argA },
{ "argB": account.argB },
{ "argC": account.argC }
],
"_id": { "$ne": account._id}
}},
{ "$addFields": {
"rank": {
"$sum": [
{ "$cond": [{ "$eq": ["$argA", account.argA] }, 3, 0 ] },
{ "$cond": [{ "$eq": ["$argB", account.argB] }, 2, 0 ] },
{ "$cond": [{ "$eq": ["$argC", account.argC] }, 1, 0 ] }
]
}
}},
{ "$sort": { "rank": -1 } }
/*
* Add $skip and $limit for paging if needed
{ "$skip": 0 },
{ "$limit": 25 },
*/
])
Also noting that Account.aggregate() or Account.find() actually returns a Promise since the method is async, so you need to await that or use .then() or provide a callback, depending on your preferred style:
Account.aggregate([
{ "$match": {
"$or": [
{ "argA": account.argA },
{ "argB": account.argB },
{ "argC": account.argC }
],
"_id": { "$ne": account._id}
}},
{ "$addFields": {
"rank": {
"$sum": [
{ "$cond": [{ "$eq": ["$argA", account.argA] }, 3, 0 ] },
{ "$cond": [{ "$eq": ["$argB", account.argB] }, 2, 0 ] },
{ "$cond": [{ "$eq": ["$argC", account.argC] }, 1, 0 ] }
]
}
}},
{ "$sort": { "rank": -1 } }
/*
* Add $skip and $limit for paging if needed
{ "$skip": 0 },
{ "$limit": 25 },
*/
]).then( result => /* do something */ )
That promise/callback resolution is the basic error in your code.
But the general point is you probably want to calculate "rank" on the server in order to enable paging over larger result sets, and that's a lot smarter than trying to sort "everything" in a result array.
Related
Let books collection be,
db.books.insertMany([
{ "name": "foo", "category": 0, publishedAt: ISODate("2008-09-14T00:00:00Z") },
{ "name": "bar", "category": 1, publishedAt: ISODate("1945-08-17T00:00:00Z") },
{ "name": "baz", "category": 1, publishedAt: ISODate("2002-03-01T00:00:00Z") },
{ "name": "qux", "category": 2, publishedAt: ISODate("2002-01-21T00:00:00Z") },
{ "name": "quux", "category": 4, publishedAt: ISODate("2018-04-18T00:00:00Z") },
])
I want to calculate total amount of books published between 2000-2010 inclusive for each year and also count of published categories. Let category be defined as an enum with 5 variants represented with integer in MongoDB schema e.g Fiction, Fantasy, Classic, Horror, Comic.
I achieved other requirements with this aggregation pipeline.
db.books.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"),
$lt: ISODate("2011-01-01T00:00:00Z"),
},
},
},
{
$group: {
_id: {
$year: "$publishedAt",
},
totalCount: {
$count: {},
},
},
},
{
$sort: {
_id: 1,
},
},
]);
With following output,
[
{
_id: 2002,
totalCount: 2,
},
{
_id: 2008,
totalCount: 1,
},
]
But I also want a field that represents number of categories in an array. For example,
[
{
_id: 2002,
totalCount: 2,
categoryCount: [0, 1, 1, 0, 0],
},
{
_id: 2008,
totalCount: 1,
categoryCount: [1, 0, 0, 0, 0],
},
]
Array's length needs to be 5 since category is defined with 5 variants. In the example, the year 2002 has total of 2 books, which totalCount represents and has 1 book in category 1 which is why categoryCount[1] is 1. Likewise 1 book in category 2.
Using $accumulate
db.collection.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"), $lt: ISODate("2011-01-01T00:00:00Z")
}
}
},
{
$group: {
_id: {
$year: "$publishedAt"
},
totalCount: {
$count: {}
},
categoryCount: {
$accumulator: {
init: function(){
return [0,0,0,0,0] //start with array with all entries as 0
},
accumulate: function(state, category) {
state[category] = state[category] + 1; //increment the value at index specified by the category
return state;
},
accumulateArgs: ["$category"],
merge: function(state1, state2) {
for (i = 0; i < state.length; i++) //incase the merge is needed add the values at each indexes
{
state[i] = state1[i] + state2[i];
}
return state;
},
lang: "js"
}
}
},
},
{
$sort: {
_id: 1
}
}
]);
You can achieve results like that without accumulator, using two $group stages: first by year and category, and then by year only, and then apply some MongoDB functions to transform the result to the desired format
The resulting query is long and looks quite complicated, duh. But works on your data example:
db.collection.aggregate([
{
$match: {
publishedAt: {
$gte: ISODate("2000-01-01T00:00:00Z"),
$lt: ISODate("2011-01-01T00:00:00Z")
}
}
},
{
$group: {
_id: {
year: {
$year: "$publishedAt"
},
category: "$category"
},
totalCount: {
$count: {}
}
}
},
{
$group: {
"_id": "$_id.year",
"totalCount": {
"$sum": "$totalCount"
},
"categoryCount": {
"$push": {
"k": {
"$toString": "$_id.category"
},
"v": "$totalCount"
}
}
}
},
{
"$addFields": {
"categoryCount": {
"$arrayToObject": "$categoryCount"
}
}
},
{
"$addFields": {
"categoryCount": {
"$mergeObjects": [
{
"0": 0,
"1": 0,
"2": 0,
"3": 0,
"4": 0,
"5": 0
},
"$categoryCount"
]
}
}
},
{
"$addFields": {
"categoryCount": {
"$objectToArray": "$categoryCount"
}
}
},
{
"$addFields": {
"categoryCount": {
"$map": {
"input": "$categoryCount",
"as": "x",
"in": {
"$mergeObjects": [
"$$x",
{
"k": {
"$toInt": "$$x.k"
}
}
]
}
}
}
}
},
{
"$addFields": {
"categoryCount": {
"$sortArray": {
"input": "$categoryCount",
"sortBy": {
"$k": 1
}
}
}
}
},
{
"$addFields": {
"categoryCount": "$categoryCount.v"
}
},
{
$sort: {
_id: 1
}
}
])
MongoDB playground
Step-by-step explanation:
$match - your initial filter
$group - pass both year and category into _id to preserve the count for each category
$group - group by year only, collect a "categoryCount" as a list of objects for each category that appeared in this year
$addFields - combine the list into a single document, keys are categories, and values are their counts. Notice, that keys can only be a strings, so we must cast them
$addFields - "densify" object to fill missing categories with zeros
$addFields - convert object back to the array, so we can extract values only
$addFields - cast categories back to numbers for correct sorting, if you have more than 10 of them
$addFields - sort by categories to ensure order (actually I'm not sure if this step is really needed)
$addFields - extract the count for each category into a flat list
Try to add these stages one by one to your query to see how it actually works.
In fact, my suggestion is to use aggregation as an end-to-end transformation, but rather stop at stage 3 or 4, and finish the transformation with your programming language, if you can. Good luck
I have the following types of documents in my mongodb. How can i use a match function to check if the key2 value contains 'Mermaid / Fairy' or 'Superhero'?
{
_id: 123,
key2: [ 'Mermaid / Fairy', 'Superhero' ]
}
{
_id: 456,
key2: [ 'Slug']
}
This is how i am doing matches for individual words, however i would like to pass in a couple, and if it matches any of them, then it gets returned
{
$match: { key2: /.*Superhero.*/ },
},
you can use this aggregate
itemsSchema.aggregate([
{
$match: {
"key2": {
$in: [
"Mermaid / Fairy",
"Superhero"
]
}
}
}])
Here are a couple of ways ...
to check if the key2 value contains 'Mermaid / Fairy' or 'Superhero'
... by checking if the "$size" of the "$setIntersection" of "$key2" and ["Mermaid / Fairy", "Superhero"]
db.collection.aggregate([
{
"$match": {
"$expr": {
"$gt": [
{
"$size": {
"$setIntersection": [
"$key2",
["Mermaid / Fairy", "Superhero"]
]
}
},
0
]
}
}
}
])
Try it on mongoplayground.net.
Another way is to use "$reduce" by checking each "$key2" value to see if it is "$in" ["Mermaid / Fairy", "Superhero"].
db.collection.aggregate([
{
"$match": {
"$expr": {
"$reduce": {
"input": "$key2",
"initialValue": false,
"in": {
"$or": [
"$$value",
{
"$in": [
"$$this",
["Mermaid / Fairy", "Superhero"]
]
}
]
}
}
}
}
}
])
Try it on mongoplayground.net.
Here is an example
testWidgetOrderSort = [
{ "_id": "name", "order": 1 },
{ "_id": "is", "order": 2 },
{ "_id": "my", "order": 0 },
{ "_id": "oh I would be very first" },
{ "_id": "adam", "order": 3 }
]
Here for the the object { "_id": "oh I would be very first" } does not have the property order so it should come first.
And then the rest of the objects should be sorted according to the property "order"
So after sorting it should be,
output= [ { _id: 'oh I would be very first' },
{ _id: 'my', order: 0 },
{ _id: 'name', order: 1 },
{ _id: 'is', order: 2 },
{ _id: 'adam', order: 3 } ]
Logic is basic array sorting logic.
If both a.order and b.order are defined return 1 or -1 depending on the largest value.
If either one of them is undefined return 1 or -1 depending on the defined value.
Please Note: The value 1 and -1 determines the relative position between the two nodes. Returning 1 places a after b and -1 places a before b.
const testWidgetOrderSort = [
{ "_id": "name", "order": 1 },
{ "_id": "is", "order": 2 },
{ "_id": "my", "order": 0 },
{ "_id": "oh I would be very first" },
{ "_id": "adam", "order": 3 }
];
const output = testWidgetOrderSort.sort((a, b) => {
if( a.order !== undefined && b.order !== undefined ) {
return a.order > b.order ? 1 : -1;
} else {
return a.order !== undefined ? 1 : -1
}
});
console.log(output);
I came up with something like this:
const test = [
{ "_id": "name", "order": 1 },
{ "_id": "is", "order": 2 },
{ "_id": "my", "order": 0 },
{ "_id": "oh I would be very first" },
{ "_id": "adam", "order": 3 }
];
const x = test.sort((a, b) => {
const [STAY, SWAP] = [-1, 1];
if (!a.hasOwnProperty('order')) { return STAY; }
if (!b.hasOwnProperty('order')) { return SWAP; }
return a.order - b.order;
});
console.log(x);
You just have to pass the custom comparator function
if (!("order" in a)) return -1;
if (!("order" in b)) return 1;
else return a.order - b.order;
1) return -1 if property order doesn't exist in a.
2) return 1 if property order doesn't exist in b.
3) if both the object has order property then just sort in ascending order.
const arr = [
{ _id: "name", order: 1 },
{ _id: "is", order: 2 },
{ _id: "my", order: 0 },
{ _id: "oh I would be very first" },
{ _id: "adam", order: 3 },
];
const result = arr.sort((a, b) => {
if (!("order" in a)) return -1;
if (!("order" in b)) return 1;
else return a.order - b.order;
});
console.log(result);
If you don't care about the performance too much, the below should be fine,
const testWidgetOrderSort = [
{ "_id": "name", "order": 1 },
{ "_id": "is", "order": 2 },
{ "_id": "my", "order": 0 },
{ "_id": "oh I would be very first" },
{ "_id": "adam", "order": 3 }
];
const finalArr = testWidgetOrderSort.filter(a => typeof a.order === "undefined");
const sortedArrWithOrderItems = testWidgetOrderSort.filter(a => typeof a.order !== "undefined").sort((a,b) => (a.order > b.order ? 1 : -1));
finalArr.push(...sortedArrWithOrderItems);
console.log(finalArr);
Note: Personally I would recommend going with #Nitheesh or #decpk solution, it is more clean and performance wise better. My solution is just to give another solution for the problem
I am trying to calculate the average duration for each stage. So in the array below - I should be able to get the average duration for 'test1', which would be 2.
jobs = [
{
"build_id": 1,
"stage_executions": [
{
"name": "test1"
"duration": 1,
},
{
"name": "test2"
"duration": 16408,
},
{
"name": "test3"
"duration": 16408,
},
]
},
{
"build_id": 2,
"stage_executions": [
{
"name": "test1"
"duration": 3,
},
{
"name": "test2"
"duration": 11408,
},
{
"name": "test3"
"duration": 2408,
},
]
}
]
My failed attempt:
avgDuration: function(jobs) {
let durationSum = 0
for (let item = 0; item < this.jobs.length; item++) {
for (let i = 0; i < this.jobs[item].stage.length; item++) {
durationSum += stage.duration
}
durationAverage = durationSum/this.jobs[item].stage.length
}
return durationAverage
What am I doing wrong? I'm not sure how to accomplish this since the duration is spread out between each job.
UPDATE:
This is return a single average for all stages rateher than per stage
<template>
<div class="stages">
<h3>
Average Duration
</h3>
<table>
<tbody>
<tr v-for="item in durations">
<td>
<b>{{ item.average}} {{ item.count }}</b>
// this returns only 1 average and 177 count instead of 10
<br />
</td>
</tr>
</tbody>
</table>
</div>
</template>
<script>
import { calculateDuration } from "../../helpers/time.js";
import { liveDuration } from "../../helpers/time.js";
import moment from "moment";
export default {
name: "Stages",
data() {
return {
jobs: [],
durations: []
};
},
methods: {
avgDuration: function(jobs) {
var averageByName = {}; // looks like { 'name': { average: 111, count: 0 }}
for (var job of jobs) {
for(var stage of job.stage_execution) {
if (averageByName[stage.name] == null) { // we need a new object
averageByName[stage.name] = { average: 0, count: 0 };
}
// just name it so its easier to read
var averageObj = averageByName[stage.name];
// update count
averageObj.count += 1;
// Cumulative moving average
averageObj.average = averageObj.average + ( (stage.duration - averageObj.average) / averageObj.count );
console.log(averageObj.count)
}
}
return averageByName
},
},
created() {
this.JobExecEndpoint =
process.env.VUE_APP_TEST_URL +
"/api/v2/jobs/?limit=10";
fetch(this.JobExecEndpoint)
.then(response => response.json())
.then(body => {
for (let i = 0; i < body.length; i++) {
this.jobs.push({
name: body[i].job.name,
job: body[i].job,
stage_execution: body[i].stage_executions,
});
}
})
.then(() => {
this.$emit("loading", true);
})
.then(() => {
this.durations = this.avgDuration(this.jobs);
})
.catch(err => {
console.log("Error Fetching:", this.JobExecEndpoint, err);
return { failure: this.JobExecEndpoint, reason: err };
});
}
};
</script>
We can do this pretty simply and without overflow from having too many numbers by using a Cumulative moving average and a few loops.
Here is a line the relevant Wikipedia page on Moving Averages and the most relvant formula below.
I will not go into much detail with the above as there are a lot of documents describing this sort of thing. I will however say that the main reason to this over adding all the values together is that there is a far lower chance of overflow and that is why I am using it for this example.
Here is my solution with comments made in code.
var jobs = [ { "build_id": 1, "stage_executions": [ { "name": "test1", "duration": 1, }, { "name": "test2", "duration": 16408, }, { "name": "test3", "duration": 16408, }, ] }, { "build_id": 2, "stage_executions": [ { "name": "test1", "duration": 3, }, { "name": "test2", "duration": 11408, }, { "name": "test3", "duration": 2408, }, ] } ];
var averageByName = {}; // looks like { 'name': { average: 111, count: 0 }}
for (var job of jobs) {
for(var stage of job.stage_executions) {
if (averageByName[stage.name] == null) { // we need a new object
averageByName[stage.name] = { average: 0, count: 0 };
}
// just name it so its easier to read
var averageObj = averageByName[stage.name];
// update count
averageObj.count += 1;
// Cumulative moving average
averageObj.average = averageObj.average + ( (stage.duration - averageObj.average) / averageObj.count );
}
}
// print the averages
for(var name in averageByName) {
console.log(name, averageByName[name].average);
}
Let me know if you have any questions or if anything is unclear.
You could collect the values in an object for each index and map later only the averages.
var jobs = [{ build_id: 1, stage_executions: [{ name: "test1", duration: 1 }, { name: "test2", duration: 16408 }, { name: "test3", duration: 16408 }] }, { build_id: 2, stage_executions: [{ name: "test1", duration: 3 }, { name: "test2", duration: 11408 }, { name: "test3", duration: 2408 }] }],
averages = jobs
.reduce((r, { stage_executions }) => {
stage_executions.forEach(({ duration }, i) => {
r[i] = r[i] || { sum: 0, count: 0 };
r[i].sum += duration;
r[i].avg = r[i].sum / ++r[i].count;
});
return r;
}, []);
console.log(averages.map(({ avg }) => avg));
console.log(averages);
.as-console-wrapper { max-height: 100% !important; top: 0; }
I've used Array.prototype.flatMap to flatten the jobs array into an array of {name:string,duration:number} object. Also, to make more solution a bit more dynamic the function takes in a field argument which returns the average for that specific field.
const jobs = [
{
"build_id": 1,
"stage_executions": [
{
"name": "test1",
"duration": 1,
},
{
"name": "test2",
"duration": 16408,
},
{
"name": "test3",
"duration": 16408,
},
]
},
{
"build_id": 2,
"stage_executions": [
{
"name": "test1",
"duration": 3,
},
{
"name": "test2",
"duration": 11408,
},
{
"name": "test3",
"duration": 2408,
},
]
}
];
const caller = function(jobs, field) {
const filtered = jobs
.flatMap((item) => item.stage_executions)
.filter(item => {
return item.name === field;
})
const total = filtered.reduce((prev, curr) => {
return prev + curr.duration;
}, 0)
return total / filtered.length;
}
console.log(caller(jobs, 'test1'))
console.log(caller(jobs, 'test2'))
console.log(caller(jobs, 'test3'))
In case you get the error flatMap is not a function. You can add this code snippet in your polyfill or at the top of your js file.
Array.prototype.flatMap = function(lambda) {
return Array.prototype.concat.apply([], this.map(lambda));
};
PS: for demostration, I obtained the flatMap implementation from here
Elasticsearch nested aggregations allow you to effectively group by multiple fields. But what it returns is buckets which are nested for each field you group by.
What I need is an array of objects for each group combination.
My query:
{
index : 'stats',
type : 'click',
size : 0,
body : {
aggs : {
publisher : {
terms : {
field : 'publisherData.id'
},
aggs : {
advertiser : {
terms : {
field : 'advertiserData.id'
},
aggs : {
country : {
terms : {
field : 'request.location.country.iso_code'
},
aggs : {
revenue : {
sum : {
field : 'revenueData.data.USD'
}
},
cost : {
sum : {
field : 'costData.data.USD'
}
}
}
}
}
}
}
}
}
}
}
The result, limited to one entry per field. Normally there would be more so all combinations of nested fields would have to be mapped to an array for display in a table.
{
"took": 562,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4812178,
"max_score": 0,
"hits": []
},
"aggregations": {
"publisher": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 3114671,
"buckets": [
{
"key": 4,
"doc_count": 1697507,
"advertiser": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 555390,
"buckets": [
{
"key": 5,
"doc_count": 1142117,
"country": {
"doc_count_error_upper_bound": 13807,
"sum_other_doc_count": 544585,
"buckets": [
{
"key": "us",
"doc_count": 424137,
"revenue": {
"value": 772282
},
"cost": {
"value": 53698.84903321415
}
}
]
}
}
]
}
}
]
}
}
}
What I need (normally there would be multiple objects here, one for each combination of nested fields) :
[{
publisher:4,
advertiser:5,
country:'us',
cost:53698.84903321415,
revenue:772282
}]
What's the best way to get this result from the above nested structure or even better and if possible, from elasticsearch itself.
Any help greatly appreciated.
In plain Javascript, you could use an iterative and recursive approach - but I suggest to use some feature of ES for getting the wanted result.
function getValues(object) {
function iter(o, p) {
var add = false;
Object.keys(o).forEach(function (k) {
if (['key', 'doc_count'].indexOf(k) !== -1) {
return;
}
if (Array.isArray(o[k].buckets)) {
o[k].buckets.forEach(function (a) {
iter(a, p.concat([[k, a.key]]));
});
return;
}
add = true;
p.push([k, o[k].value]);
});
add && result.push(Object.assign({}, ...p.map(a => ({[a[0]]: a[1]}))));
}
var result = [];
iter(object.aggregations, []);
return result;
}
var data = { took: 562, timed_out: false, _shards: { total: 5, successful: 5, failed: 0 }, hits: { total: 4812178, max_score: 0, hits: [] }, aggregations: { publisher: { doc_count_error_upper_bound: 0, sum_other_doc_count: 3114671, buckets: [{ key: 4, doc_count: 1697507, advertiser: { doc_count_error_upper_bound: 0, sum_other_doc_count: 555390, buckets: [{ key: 5, doc_count: 1142117, country: { doc_count_error_upper_bound: 13807, sum_other_doc_count: 544585, buckets: [{ key: "us", doc_count: 424137, revenue: { value: 772282 }, cost: { value: 53698.84903321415 } }] } }] } }] } } };
console.log(getValues(data));