skipped count 0 in aggregate function - javascript

I'm stuck on this for couple of days. I'm trying to get the count: 0 where there is no documents in the given time period. This is the aggregate function I'm using at the moment:
var getCount = function(timeBlock, start, end, cb) {
Document.aggregate(
{
$match: {
time: {
$gte: new Date(start),
$lt: new Date(end)
}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
new Date(end),
'$time'
]}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
"$delta",
{ $mod: [
"$delta",
timeBlock
]}
]}
}
},
{
$group: {
_id: { $subtract: [
end,
"$delta"
]},
count: { $sum: 1 }
}
},
{
$project: {
time: "$_id",
count: 1,
_id: 0
}
},
{
$sort: {
time: 1
}
}, function(err, results) {
if (err) {
cb(err)
} else {
cb(null, results)
}
})
}
I tried using $cond, but with no luck

The group stage is producing documents based on grouping on your given _id and counting the number of documents from the previous stage that end up in the group. Hence, a count of zero would be the result of a document being created from 0 input documents belonging to the group. Thinking about it this way, it's clear that there's no way the aggregation pipeline can do this for you. It doesn't know what all of the "missing" time periods are and it can't invent the appropriate documents out of thin air. Reapplying your extra knowledge about the missing time periods to complete the picture at the end seems like a reasonable solution (not "hacky") if you need to have an explicit count of 0 for empty time periods.

Though it has already been said the best thing to do here is "merge" your results post process rather than expect "keys" that do not exist to appear or to issue multiple queries with explicit keys that are possibly not going to aggregate results and combine them.
What has not already been said is how you actually do this, so I'll give you a MongoDB "thinking" kind of way to collect your results.
As a quick disclaimer, you could possibly employ much the same approach by "seeding" empty keys for each interval using mapReduce, or possibly even altering your data so that there is always an empty value within each possible block. Those approaches seem basically "hacky" and in the mapReduce case is not going to provide the best performance or muliple results.
What I would suggest is that working with collection results for the MongoDB brain can be made simple. There is a neat little solution called neDB, which is billed as a kind of SQL Lite for MongoDB. It supports a subset of functionality and is therefore perfect for "in memory" manipulation of results with a MongoDB mindset:
var async = require('async'),
mongoose = require('mongoose'),
DataStore = require('nedb'),
Schema = mongoose.Schema;
var documentSchema = new Schema({
time: { type: Date, default: Date.now }
});
var Document = mongoose.model( "Document", documentSchema );
mongoose.connect('mongodb://localhost/test');
var getCount = function(timeBlock, start, end, callback) {
async.waterfall(
[
// Fill a blank series
function(callback) {
var db = new DataStore();
var current = start;
async.whilst(
function() { return current < end },
function(callback) {
var delta = end - current;
db.insert({ "_id": end - delta, "count": 0 },function(err,doc) {
//console.log( doc );
current += timeBlock;
callback(err);
});
},
function(err) {
callback(err,db);
}
);
},
// Get data and update
function(db,callback) {
var cursor = Document.collection.aggregate(
[
// Match documents
{ "$match": {
"time": {
"$gte": new Date(start),
"$lt": new Date(end)
}
}},
// Group. 1 step and less hacky
{ "$group": {
"_id": {
"$let": {
"vars": {
"delta": {
"$subtract": [
{ "$subtract": [ new Date(end), "$time" ] },
{ "$mod": [
{ "$subtract": [ new Date(end), "$time" ] },
timeBlock
]}
]
}
},
"in": { "$subtract": [ end, "$$delta" ] }
}
},
"count": { "$sum": 1 }
}}
],
{ "cursor": { "batchSize": 100 } }
);
cursor.on("data",function(item) {
cursor.pause();
console.log( "called" );
db.update(
{ "_id": item._id },
{ "$inc": { "count": item.count } },
{ "upsert": true },
function(err) {
cursor.resume();
}
);
});
cursor.on("end",function() {
console.log( "done" );
db.find({},function(err,result) {
callback(err,result);
});
});
}
],
function(err,result) {
callback(err,result);
}
);
}
mongoose.connection.on("open", function(err,conn) {
getCount(
1000 * 60 * 60, // each hour
new Date("2014-07-01").valueOf(), // start
new Date("2014-07-02").valueOf(), // end
function(err,result) {
if (err) throw err;
console.log( result );
}
);
});
So essentially create each interval as in memory collection and then just update those interval records with the actual data retrieved. I can't think of another way to do that where it would be more simple and natural to the way of thinking.
Just a footnote, the "interval" logic is just replicated from your question, but in fact the time periods are "rounded up" where 15 minutes would appear in hour 1. It usually is the practice to round down so that everything belongs to the interval it falls in and not the next one.

this is hacky fix I did for now:
var getCount = function(timeBlock, start, end, cb) {
Document.aggregate(
{
$match: {
time: {
$gte: new Date(start),
$lt: new Date(end)
}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
new Date(end),
'$time'
]}
}
},
{
$project: {
time: 1,
delta: { $subtract: [
"$delta",
{ $mod: [
"$delta",
timeBlock
]}
]}
}
},
{
$group: {
_id: { $subtract: [
end,
"$delta"
]},
count: { $sum: 1 }
}
},
{
$project: {
time: "$_id",
count: 1,
_id: 0
}
},
{
$sort: {
time: 1
}
}, function(err, results) {
if (err) {
cb(err)
} else {
// really hacky way
var numOfTimeBlocks = ( end - start ) / timeBlock
// in case there is no 0s in the given period of time there is no need
// to iterate through all of the results
if ( results.length === numOfTimeBlocks ) {
cb(results);
} else {
var time = start;
var details = [];
var times = results.map(function(item) {
return item.time;
});
for( var i = 0; i < numOfTimeBlocks; i++) {
time += timeBlock;
var idx = times.indexOf(time);
if (idx > -1) {
details.push(results[idx]);
} else {
var documentCount = { count: 0, time: time };
details.push(documentCount);
}
}
cb(details);
}
}
})
}
I was also thinking about doing one query per time block, which gives the same result but I think is inefficient because you query the database N times.

Related

How to find duplicates in mongoDB with two conditions

I want to find duplicates in the MongoDB atlas. I have seen examples for finding duplicates but they didn't take two conditions as I understand it. for some network issues, we discovered duplicates with a reference(refno) number in the database
so I want to find duplicate refno within these two conditions (currentDate and transactionTypeId)
const transactionRecord = await this.tranModel.aggregate(
[{ $group:
{ _id: "$refno", count: { $sum: 1 } } }, { $match: { count: { $gt: 1 } } }
])
With the above code I was able to get all the duplicates in the db, but I want to limit it to the condition
{ transactiontdate: currentDate, transactionTypeID: transactionTypesid });
I got it working
const Record = await this.tranModel.aggregate([
{
$match: { transactiontdate: currentDate, transactionTypeID: transactionTypeID }
},
{
$group: { _id: "$refno", count: { $sum: 1 } }
}, {
$match: { count: { $gt: 1 } }
}
])
if (Record != null) {
for (let i = 0; i < Record.length; i++) {
const element = Record[i]._id.toString()
tranArray.push(element)
}
}
}

Mongodb change the order of an array

I am stumped on this one. I have an images array with in my collection, the users can rearrange the order of images on the client side and I am trying to save the new order to the database. The imagesOrder array is the new images in the new order and it only has the url so I want to match the url to the urls in the database. I am not sure how to make the index a variable or if this is possible:
this is what I have so far. my code editor shows and error on [index] so I know that is not the proper format but not sure what is:
imagesOrder.forEach((index, image) => {
const imageUrl = image.url
const index = index
Users.update({
id
}, {
$set: {
images[index]: imageUrl
}
})
});
So that is not actually the way you would do this. Basically there is no need to actually send an update request to the server for every single indexed position for the array. Also the update() method is asynchronous, so it's not something you ever put inside a forEach() which does not respect awaiting the completion of an asynchronous call.
Instead what is usually the most practical solution is to just $set the entire array content in one request. Also mocking up your imagesOrder to something practical since forEach() even actually has the signature of .forEach((<element>,><index>) => ..., which seems different to what you were expecting given the code in the question.
var imagesOrder = [
{ index: 0, url: '/one' }, { index: 1, url: '/two' }, { index: 2, url: '/three' }
];
let response = await Users.updateOne(
{ id },
{ "$set": { "images": imagesOrder.map(({ url }) => url) } }
);
// { "$set": { "images": ["/one","/two","/three"] } }
Much like the forEach() a map() does the same array iteration but with the difference that it actually returns an array generated by the processing function. This is actually what you want since all that is needed here is to simply extract the values of the url property from each object.
Note the index properties are actually already in order and really redundant here, but I'm just approximating what it sounds like you have from your question. Since an "array" actually maintains it's own order then such a property "should" be redundant and it would be advisable that your source array data actually conforms to this.
If however you managed to record such index values in a way they are actually out of order, then the best solution is to add a sort():
var imagesOrder = [
{ index: 2, url: '/three' }, { index: 0, url: '/one' }, { index: 1, url: '/two' }
];
let response = await Users.updateOne(
{ id },
{ "$set": {
"images": imagesOrder.sort((a,b) => a.index - b.index)
.map(({ url }) => url)
}}
);
// { "$set": { "images": ["/one","/two","/three"] } }
As for what you "attempted", it's not really benefiting you in any way to actually attempt updating each element at a given position. But if you really wanted to see it done, then again you actually would just instead build up a single update request:
var imagesOrder = [
{ index: 2, url: '/three' }, { index: 0, url: '/one' }, { index: 1, url: '/two' }
];
var update = { $set: {} };
for ( let { url, index } of imagesOrder.sort((a,b) => a.index - b.index) ) {
update.$set['images.'+index] = url;
}
/*
* Creates:
*
* { "$set": {
* "images.0": "/one",
* "images.1": "/two",
* "images.2": "/three"
* }}
*/
let response = await Users.updateOne({ id }, update);
Or in the case where the index property was not there or irrelevant since the array is already ordered:
var imagesOrder = [
{ index: 2, url: '/three' }, { index: 0, url: '/one' }, { index: 1, url: '/two' }
];
for ( let [index, { url }] of Object.entries(imagesOrder) ) {
update.$set['images.'+index] = url;
}
/*
* Creates:
*
* { "$set": {
* "images.0": "/one",
* "images.1": "/two",
* "images.2": "/three"
* }}
*/
let response = await Users.updateOne({ id }, update);
So it's all pretty much the same thing. Note the common form of notation is actually a "string" for the key which includes the index position numerically. This is described in Dot Notation within the core documentation for the MongoDB query language.
The one main difference here is that should your new array contain more entries than the actual array stored in the document to be modified, that second form using the "dot notation" to the indexed position is going to fail since it cannot "set" an index position which does not exist.
For this reason even though there are other pitfalls to "replacing" the array as the original examples show, it's a lot safer than attempting to update via the positional index in the stored document.
Note that this should be enough to have you at least started in the right direction. Making this work with multiple users possibly updating the data at once can become pretty complicated in terms of update statements for both checking and merging changes.
In most cases the simple "replace" will be more than adequate at least for a while. And of course the main lesson here should be to not loop "async" methods in places where it is completely unnecessary. Most of the time what you really want to "loop" is the construction of the statement, if of course any looping is required at all and most of the time it really isn't.
Addendum
Just in case you or anyone had it in mind to actually store an array of objects with the index position values stored within them, this can become a little more complex, but it can also serve as an example of how to actually issue an update statement which does not "replace" the array and actually is safe considering it does not rely on indexed positions of the array but instead using matching conditions.
This is possible with the positional filtered $[<identifier>] syntax introduced in MongoDB 3.6. This allows conditions to specify which element to update ( i.e by matching url ) instead of including the index positions within the statement directly. It's safer since if no matching element is found, then the syntax allows for not attempting to change anything at all.
Also as demonstration the method to $sort the elements based on updated index values is shown. Noting this actually uses the $push modifier even though in this statement we are not actually adding anything to the array. Just reordering the elements. But it's how you actually do that atomically:
const { Schema, Types: { ObjectId } } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/longorder';
const opts = { useNewUrlParser: true };
// sensible defaults
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
mongoose.set('useFindAndModify', false);
mongoose.set('useCreateIndex', true);
// schema defs
const imageSchema = new Schema({
index: Number,
url: String
})
const userSchema = new Schema({
images: [imageSchema]
});
const User = mongoose.model('User', userSchema);
// log helper
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, opts);
// clean models
await Promise.all(
Object.entries(conn.models).map(([k,m]) => m.deleteMany())
);
// Create data
let _id = new ObjectId();
let user = await User.findOneAndUpdate(
{ _id },
{
'$push': {
'images': {
'$each': [
{ index: 2, url: '/one' },
{ index: 0, url: '/three' },
{ index: 1, url: '/two' }
],
'$sort': { 'index': 1 }
}
}
},
{ 'new': true, 'upsert': true }
);
log(user);
// Change order request
let orderImages = [
{ index: 2, url: '/three' },
{ index: 0, url: '/one' },
{ index: 1, url: '/two' }
];
let $set = { };
let arrayFilters = [];
for ( { index, url } of orderImages ) {
let key = url.replace(/^\//,'');
arrayFilters.push({ [`${key}.url`]: url });
$set[`images.$[${key}].index`] = index;
}
let ops = [
// Update the index value of each matching item
{ 'updateOne': {
'filter': { _id },
'update': { $set },
arrayFilters
}},
// Re-sort the array by index value
{ 'updateOne': {
'filter': { _id },
'update': {
'$push': {
'images': { '$each': [], '$sort': { 'index': 1 } }
}
}
}}
];
log(ops);
let response = await User.bulkWrite(ops);
log(response);
let newuser = await User.findOne({ _id });
log(newuser);
} catch(e) {
console.error(e)
} finally {
mongoose.disconnect()
}
})()
And the output, showing initial document state, the update and actual changes made:
Mongoose: users.deleteMany({}, {})
Mongoose: users.findOneAndUpdate({ _id: ObjectId("5bf6116621293f2ab3dec3d3") }, { '$setOnInsert': { __v: 0 }, '$push': { images: { '$each': [ { _id: ObjectId("5bf6116621293f2ab3dec3d6"), index: 2, url: '/one' }, { _id: ObjectId("5bf6116621293f2ab3dec3d5"), index: 0, url: '/three' }, { _id: ObjectId("5bf6116621293f2ab3dec3d4"), index: 1, url: '/two' } ], '$sort': { index: 1 } } } }, { upsert: true, remove: false, projection: {}, returnOriginal: false })
{
"_id": "5bf6116621293f2ab3dec3d3",
"__v": 0,
"images": [
{
"_id": "5bf6116621293f2ab3dec3d5",
"index": 0,
"url": "/three"
},
{
"_id": "5bf6116621293f2ab3dec3d4",
"index": 1,
"url": "/two"
},
{
"_id": "5bf6116621293f2ab3dec3d6",
"index": 2,
"url": "/one"
}
]
}
[
{
"updateOne": {
"filter": {
"_id": "5bf6116621293f2ab3dec3d3"
},
"update": {
"$set": {
"images.$[three].index": 2,
"images.$[one].index": 0,
"images.$[two].index": 1
}
},
"arrayFilters": [
{
"three.url": "/three"
},
{
"one.url": "/one"
},
{
"two.url": "/two"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": "5bf6116621293f2ab3dec3d3"
},
"update": {
"$push": {
"images": {
"$each": [],
"$sort": {
"index": 1
}
}
}
}
}
}
]
Mongoose: users.bulkWrite([ { updateOne: { filter: { _id: 5bf6116621293f2ab3dec3d3 }, update: { '$set': { 'images.$[three].index': 2, 'images.$[one].index': 0, 'images.$[two].index': 1 } }, arrayFilters: [ { 'three.url': '/three' }, { 'one.url': '/one' }, { 'two.url': '/two' } ] } }, { updateOne: { filter: { _id: 5bf6116621293f2ab3dec3d3 }, update: { '$push': { images: { '$each': [], '$sort': { index: 1 } } } } } } ], {})
{
"ok": 1,
"writeErrors": [],
"writeConcernErrors": [],
"insertedIds": [],
"nInserted": 0,
"nUpserted": 0,
"nMatched": 2,
"nModified": 2,
"nRemoved": 0,
"upserted": [],
"lastOp": {
"ts": "6626503031506599940",
"t": 139
}
}
Mongoose: users.findOne({ _id: ObjectId("5bf6116621293f2ab3dec3d3") }, { projection: {} })
{
"_id": "5bf6116621293f2ab3dec3d3",
"__v": 0,
"images": [
{
"_id": "5bf6116621293f2ab3dec3d6",
"index": 0,
"url": "/one"
},
{
"_id": "5bf6116621293f2ab3dec3d4",
"index": 1,
"url": "/two"
},
{
"_id": "5bf6116621293f2ab3dec3d5",
"index": 2,
"url": "/three"
}
]
}

Building your MongoDB $match as a string dynamically

Im trying to build my $match dynamically for my MongoDB request, and when I do the simple stuff it works perfect, like this:
var matchStr = {};
matchStr.status = { "$lte": 3 };
matchStr.checkout = { $gte: Math.round(new Date(d).getTime()/1000) }
And then I run the
bookingTable.aggregate([
{
$match: {
$and: [ matchStr ]
}
}, etc....
Which gives a nice:
matchStr: {
"status": {
"$lte": 3
},
"checkout": {
"$gte": 1527669588
}
}
So thats all great, but what if I want to put something like this into the matchStr...
{ $or: [ { "managerDate": {$lte: managerLast} }, { "activityDate": {$lte: activityLast} } ] }
,
{ $or: [ { "expireDate": {$gt: oneDayBackward} }, { "status": {$lt: 9}} ] }
,
{ "status": { $in: [0, 1, 2, 9 ] } }
How can I do that?
There are multiple syntax for accessing the property of an object
var matchStr = {}
matchStr.status = { "$lte": 3 }
matchStr.checkout = { "$gte": Math.round(new Date().getTime()/1000) }
matchStr["$or"] = [
{ "managerDate": { "$lte": "managerLast" }},
{ "activityDate": { "$lte": "activityLast" }}
]
or If you want to push to $or operator
matchStr["$or"].push({ "managerDate": { "$lte": "managerLast" } })
matchStr["$or"].push({ "activityDate": { "$lte": "activityLast" } })

MongoDB Node Driver Count of current aggregation

I am using mongodb for node and am trying to aggregate a collection of documents based on some set filters and then limit it to 10. I have it aggregating just fine and limiting just fine but I need to get the total number of that aggregated documents before I limit them to 10.
Here is my code.
var qry = [];
if (filter.FocusArea && filter.FocusArea != "(None)") {
qry.push({
$match: { 'ProgramAreaId': filter.FocusArea }
});
}
if (filter.Status && filter.Status != "(None)") {
qry.push({
$match: { 'StatusId': filter.Status }
});
}
if (filter.ProgOfficer && filter.ProgOfficer != "(None)") {
qry.push({
$match: { 'ProgramOfficerId': filter.ProgOfficer }
});
}
if (filter.Fund && filter.Fund != "(None)") {
qry.push({
$match: { 'FundId': filter.Fund }
});
}
var skipNbr = (parseInt(filter.Page) * 10 - 10);
qry.push({ $project: { _id: '$_id', count: { $sum: '$$ROOT' }, content: '$$ROOT'} }) // I would like to do the count here.
qry.push({ $skip: skipNbr })
qry.push({ $limit: 10 })
var apps = mongo.collection('Applications').aggregate(qry, function(err, docs) {
callback(docs);
});
Can this be done in one aggregation query or does it need to be split into two?
It's possible to do so in a single query.
You can project the filtered array using $project into two different fields: one with the content and one with the count.
You can use $slice to limit the content array.
db.collection.aggregate([
{
$match: {} // Filter
},
{
$group: {
_id: 1,
array: {$push: '$$ROOT'}
}
},
{
$project: {
content: {
$slice: ['$array', skip, limit]
},
total: {
$size: '$array'
}
}
}
], {allowDiskUse: true})

Get sums by grouping a collection in Meteor

I have a collection with fields: number, a, b, c.
I want to divide the collection in three based on the number and get separate sums of a, b, and c for each group division.
I have done this with
function sumList(amountList) {
return _.reduce(amountList, function(sum, amount) {
return sum + amount;
}, -1);
}
// cursors
var group1 = Groups.find({ number: { $lte: 32 } }).fetch();
var group2 = Groups.find({ number: { $gte: 33, $lte: 70 } }).fetch();
var group3 = Groups.find({ number: { $gte: 71 } }).fetch();
// sums for group1
var group1SumA = sumList(_.pluck(group1, "a"));
var group1SumB = sumList(_.pluck(group1, "b"));
var group1SumC = sumList(_.pluck(group1, "c"));
// sums for group2
var group2SumA = sumList(_.pluck(group2, "a"));
var group2SumB = sumList(_.pluck(group2, "b"));
var group2SumC = sumList(_.pluck(group2, "c"));
// sums for group3
var group3SumA = sumList(_.pluck(group3 "a"));
var group3SumB = sumList(_.pluck(group3, "b"));
var group3SumC = sumList(_.pluck(group3, "c"));
It works but I think the code is very ugly.
I wonder if this can be done with some smart mapping. Besides, I guess it might have bad performance.
How can these sums be optimized?
Use the aggregation framework which will have the $match pipeline operator to filter the collection on the number field. The $group pipeline step then groups all the filtered input documents and applies the accumulator expression $sum to each field to get the sums.
Your pipeline would look like this:
var pipeline = [
{
"$match": { "number": { "$lte": 32 } } /* group1 filter */
},
{
"$group": {
"_id": 0,
"sumA": { "$sum": "$a" },
"sumB": { "$sum": "$b" },
"sumC": { "$sum": "$c" }
}
}
];
You can add the meteorhacks:aggregate package to implement the aggregation in Meteor:
Add to your app with
meteor add meteorhacks:aggregate
Since this package exposes .aggregate method on Mongo.Collection instances, you can then call the method to get the resulting array with the document that has the sums. For example
if (Meteor.isServer) {
var Coll = new Mongo.Collection('collectionName');
Meteor.methods({
sumList: function (filter) {
var pipeline = [
{
"$match": filter
},
{
"$group": {
"_id": 0,
"sumA": { "$sum": "$a" },
"sumB": { "$sum": "$b" },
"sumC": { "$sum": "$c" }
}
}
];
var result = Coll.aggregate(pipeline);
return result[0];
}
});
}
if (Meteor.isClient) {
// filters
var group1 = { "number": { "$lte": 32 } };
var group2 = { "number": { "$gte": 33, "$lte": 70 } };
var group3 = { "number": { "$gte": 71 } };
Meteor.call('sumList', group1, callback);
//Meteor.call('sumList', group2, callback);
//Meteor.call('sumList', group3, callback);
function callback(err, result) {
console.log(result)
}
}

Categories