I'm trying to get the output of getQueryResults using the below code:
var AWS = require('aws-sdk');
var athena = new AWS.Athena();
const DEBUG = process.env.DEBUG;
const GLOCA_ENVID = process.env.GLOCA_ENVID;
const GLOCA_AWS_ACCOUNTID = process.env.GLOCA_AWS_ACCOUNTID;
const GLOCA_AWS_REGION = process.env.GLOCA_AWS_REGION;
exports.handler = function(event, context, callback) {
athena.getQueryResults({
QueryExecutionId: "a1b2c3d4-5678-90ab-cdef-EXAMPLE11111"
},function(err,data){
if (err) console.log(err);
else {
console.log("Body: ", data);
}
});
}
Below is the output:
{
UpdateCount: 0,
ResultSet: { Rows: [ [Object] ], ResultSetMetadata: { ColumnInfo: [Array] } }
}
The output should look something like this:
{
"ResultSet": {
"Rows": [
{
"Data": [
{
"VarCharValue": "date"
},
{
"VarCharValue": "location"
},
{
"VarCharValue": "browser"
},
{
"VarCharValue": "uri"
},
{
"VarCharValue": "status"
}
]
},
{
"Data": [
{
"VarCharValue": "2014-07-05"
},
{
"VarCharValue": "SFO4"
},
{
"VarCharValue": "Safari"
},
{
"VarCharValue": "/test-image-2.jpeg"
},
{
"VarCharValue": "200"
}
]
},
{
"Data": [
{
"VarCharValue": "2014-07-05"
},
{
"VarCharValue": "SFO4"
},
{
"VarCharValue": "IE"
},
{
"VarCharValue": "/test-image-2.jpeg"
},
{
"VarCharValue": "200"
}
]
}
],
"ResultSetMetadata": {
"ColumnInfo": [
{
"CatalogName": "hive",
"SchemaName": "",
"TableName": "",
"Name": "date",
"Label": "date",
"Type": "date",
"Precision": 0,
"Scale": 0,
"Nullable": "UNKNOWN",
"CaseSensitive": false
},
{
"CatalogName": "hive",
"SchemaName": "",
"TableName": "",
"Name": "location",
"Label": "location",
"Type": "varchar",
"Precision": 2147483647,
"Data": [
"Scale": 0,
"Nullable": "UNKNOWN",
"CaseSensitive": true
},
{
"CatalogName": "hive",
"SchemaName": "",
"TableName": "",
"Name": "browser",
"Label": "browser",
"Type": "varchar",
"Precision": 2147483647,
"Scale": 0,
"Nullable": "UNKNOWN",
"CaseSensitive": true
}
]
}
},
"UpdateCount": 0
}
The above output is an example output, but a similar outcome is what I'm expecting. When I run in AWS CLI:
aws athena --region "us-west-2" get-query-results --query-execution-id a1b2c3d4-5678-90ab-cdef-EXAMPLE11111
I get the expected output, so I'm unable to understand why I can't get the same result via lambda.
Thank you so much for all the help! :)
It actually looks like the code is fine. Looking at the response is shows that there is an Object within the ResultSet.Rows. Try to stringify the result before logging such that the handler looks like this:
exports.handler = function(event, context, callback) {
athena.getQueryResults({
QueryExecutionId: "a1b2c3d4-5678-90ab-cdef-EXAMPLE11111"
},function(err,data){
if (err) console.log(err);
else {
console.log("Body: ", JSON.stringify(data, null, 2));
}
});
}
Related
I am tring to make a query where use the value and try to interpolate a string in a new field.
Mongo Database:
[
{
"state": "1",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
},
{
"state": "2",
"events": {
"1": [
{
"date": 123.2,
"msg": "msg1"
},
{
"date": 124.2,
"msg": "msg2"
}
],
"2": [
{
"date": 125.2,
"msg": "msg3"
},
{
"date": 126.2,
"msg": "msg4"
}
],
}
}
]
Aggregate query:
db.collection.aggregate({
"$match": {
"state": {
"$in": [
"1",
"2"
]
}
}
},
{
"$group": {
"_id": {
"state": "$state"
},
"this_path": {
"$first": {
"$concat": [
"events.",
"$state",
".0.date"
]
}
}
}
})
"this_path" gets "events.1.0.date", but how to use this value, in another query(line), I would like to do like a string interpolation. Some thing like
...
"date": {
"$first": { `\$${this_path}`}
...
so it become the "events.1.date" then "$events.1.0.date" then "123.2"
you can define it by let just for example a fragment from pipeline:
$lookup: {
from: contentCollectionName,
as: 'content',
let: {
parentId: '$id',
},
The id is taken from above matched documents, but it can be anything
I have a top level JSON structure as follows:
{
"video": [],
"messages": [],
"notifications": []
}
and i have a database output (in variable "result") as follows i want to push into the "video" array:
[
{
"_id": "5f98ab906439155cfc6f9afb",
"status": "NOT_STARTED",
"date": "2020-10-27T23:21:52.683Z",
"callInvitees": [
{
"username": "user1"
},
{
"username": "user2"
}
]
},
{
"_id": "5f98aba0789e163e0c78908f",
"status": "NOT_STARTED",
"date": "2020-10-27T23:22:08.048Z",
"callInvitees": [
{
"username": "user1"
}
]
}
]
My code is:
let dashboardJSON = { "video": [], "messages": [], "notifications": [] };
dashboardJSON.video.push(result)
It works but i am ending up with too many arrays (i think) - it looks as follows:
{
"video": [
[
{
"_id": "5f98ab906439155cfc6f9afb",
"status": "NOT_STARTED",
"date": "2020-10-27T23:21:52.683Z",
"callInvitees": [
{
"username": "user1"
},
{
"username": "user2"
}
]
},
{
"_id": "5f98aba0789e163e0c78908f",
"status": "NOT_STARTED",
"date": "2020-10-27T23:22:08.048Z",
"callInvitees": [
{
"username": "user1"
}
]
}
]
],
"messages": [],
"notifications": []
}
I want "video": [ { ... }, { ... } ] whereas i have "video": [[ { ... }, { ... } ]]
How can i resolve this?
You can use the spread operator as follows:
let result = [
{
"_id": "5f98ab906439155cfc6f9afb",
"status": "NOT_STARTED",
"date": "2020-10-27T23:21:52.683Z",
"callInvitees": [
{
"username": "user1"
},
{
"username": "user2"
}
]
},
{
"_id": "5f98aba0789e163e0c78908f",
"status": "NOT_STARTED",
"date": "2020-10-27T23:22:08.048Z",
"callInvitees": [
{
"username": "user1"
}
]
}
]
let dashboardJSON = { "video": [], "messages": [], "notifications": [] };
dashboardJSON.video.push(...result)
console.log(dashboardJSON);
Just use Array.prototype.map() method. Map method returns a new array with the result by using the provided function.
const ret = {
video: [],
messages: [],
notifications: [],
};
const data = [
{
_id: '5f98ab906439155cfc6f9afb',
status: 'NOT_STARTED',
date: '2020-10-27T23:21:52.683Z',
callInvitees: [
{
username: 'user1',
},
{
username: 'user2',
},
],
},
{
_id: '5f98aba0789e163e0c78908f',
status: 'NOT_STARTED',
date: '2020-10-27T23:22:08.048Z',
callInvitees: [
{
username: 'user1',
},
],
},
];
ret.video = data.map((x) => x);
console.log(ret);
I'm trying to query my dataset for two purposes:
Match a term (resellable = true)
Order the results by their price
lowest to highest
Data set/doc is:
"data" : {
"resellable" : true,
"startingPrice" : 0,
"id" : "4emEe_r_x5DRCc5",
"buyNowPrice" : 0.006493, //Changes per object
"sub_title" : "test 1",
"title" : "test 1",
"category" : "Education",
}
//THREE OBJECTS WITH THE VALUES OF 0.006, 0.7, 1.05 FOR BUYNOWPRICE
I have three objects of these with different buyNowPrice
Query with agg is:
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"from": 0,
"size": 5,
"aggs": {
"lowestPrice": {
"terms": {
"field": "data.buyNowPrice",
"order": {
"lowest_price": "desc"
}
},
"aggs": {
"lowest_price": {
"min": {
"field": "data.buyNowPrice"
}
},
"lowest_price_top_hits": {
"top_hits": {
"size": 5,
"sort": [
{
"data.buyNowPrice": {
"order": "desc"
}
}
]
}
}
}
}
}
}
The query works fine, and the results are 3 objects that have resellable = true
The issue is, the agg is not organizing the results based off the lowest buy now price.
Each result, the order of buyNowPrice is: 1.06, 0.006, 0.7 - which is not ordered properly.
Switching to desc has no affect, so I don't believe the agg is running at all?
EDIT:
Using the suggestion below my query now looks like:
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"from": 0,
"size": 5,
"aggs": {
"lowestPrice": {
"terms": {
"field": "data.buyNowPrice",
"order": {
"lowest_price": "asc"
}
},
"aggs": {
"lowest_price": {
"min": {
"field": "data.buyNowPrice"
}
},
"lowest_price_top_hits": {
"top_hits": {
"size": 5
}
}
}
}
}
}
With the results of the query being:
total: { value: 3, relation: 'eq' },
max_score: 0.2876821,
hits: [
{
_index: 'education',
_type: 'listing',
_id: '4emEe_r_x5DRCc5', <--- buyNowPrice of 0.006
_score: 0.2876821,
_source: [Object]
},
{
_index: 'education',
_type: 'listing',
_id: '4ee_r_x5DRCc5', <--- buyNowPrice of 1.006
_score: 0.18232156,
_source: [Object]
},
{
_index: 'education',
_type: 'listing',
_id: '4444_r_x5DRCc5', <--- buyNowPrice of 0.7
_score: 0.18232156,
_source: [Object]
}
]
}
EDIT 2:
Removing the query for resellable = true the aggregation will sort properly and return the items in the proper order. But with the query for resellable included, it does not.
I'm assuming this has to do with the _score property overriding the sorting from agg? How would this be fixed
You can use a bucket sort aggregation that is a parent pipeline
aggregation which sorts the buckets of its parent multi-bucket
aggregation. Zero or more sort fields may be specified together with
the corresponding sort order.
Adding a working example (using the same index data as given in the question), search query, and search result
Search Query:
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"from": 0,
"size": 5,
"aggs": {
"source": {
"terms": {
"field": "data.buyNowPrice"
},
"aggs": {
"latest": {
"top_hits": {
"_source": {
"includes": [
"data.buyNowPrice",
"data.id"
]
}
}
},
"highest_price": {
"max": {
"field": "data.buyNowPrice"
}
},
"bucket_sort_order": {
"bucket_sort": {
"sort": {
"highest_price": {
"order": "desc"
}
}
}
}
}
}
}
}
Search Result:
"buckets": [
{
"key": 1.0499999523162842,
"doc_count": 1,
"highest_price": {
"value": 1.0499999523162842
},
"latest": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "stof_64364468",
"_type": "_doc",
"_id": "3",
"_score": 0.08701137,
"_source": {
"data": {
"id": "4emEe_r_x5DRCc5",
"buyNowPrice": 1.05 <-- note this
}
}
}
]
}
}
},
{
"key": 0.699999988079071,
"doc_count": 1,
"highest_price": {
"value": 0.699999988079071
},
"latest": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "stof_64364468",
"_type": "_doc",
"_id": "2",
"_score": 0.08701137,
"_source": {
"data": {
"id": "4emEe_r_x5DRCc5",
"buyNowPrice": 0.7 <-- note this
}
}
}
]
}
}
},
{
"key": 0.006000000052154064,
"doc_count": 1,
"highest_price": {
"value": 0.006000000052154064
},
"latest": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "stof_64364468",
"_type": "_doc",
"_id": "1",
"_score": 0.08701137,
"_source": {
"data": {
"id": "4emEe_r_x5DRCc5",
"buyNowPrice": 0.006 <-- note this
}
}
}
]
}
}
}
]
Update 1:
If you modify your search query as :
{
"query": {
"bool": {
"must": [
{
"term": {
"data.resellable": true
}
}
]
}
},
"aggs": {
"lowestPrice": {
"terms": {
"field": "data.buyNowPrice",
"order": {
"lowest_price": "asc" <-- change the order here
}
},
"aggs": {
"lowest_price": {
"min": {
"field": "data.buyNowPrice"
}
},
"lowest_price_top_hits": {
"top_hits": {
"size": 5
}
}
}
}
}
}
Running the above search query also, you will get your required results.
I'm converting MongoDB Query to Elasticsearch in NodeJS platform. While developing I'm facing some difficulties with grouping and filtering data (getting nested objects like hits.hits._source) within Elasticsearch Query like we doing in MongoDB Query.
Example:-
UserModel.aggregate([
{
$match: {
uId: req.body.uId, timestamp: { $gte: req.body.date, $lte: new Date() }
},
},
{
$group: {
_id: "$eId",
location: {
$push: {
time: "$timestamp", lat: "$lat"
}
},
timestamp: {
$push: "$timestamp"
},
testId: { $first: "$testId" },
}
},
{
$project: {
eId: 1, location: 1, testId: 1, max: { $max: "$timestamp" }
}
},
{ $unwind: { path: "$location", preserveNullAndEmptyArrays: true } },
{
$redact: {
$cond: {
if: { $eq: ["$location.time", "$max"] },
then: "$$DESCEND",
else: "$$PRUNE"
}
}
},
{
$project: {
eId: 1, latitude: "$location.lat", testId: 1
}
},
]).exec(function (err, result) {
console.log(result)
});
What will be the equivalent query in Elasticsearch?
I'm looking for solution with grouping, unwinding and projecting (MongoDB concepts to Elasticsearch) required data with minimal nested response.
Thanks in Advance.
EDIT:-
Adding Elasticsearch Document:-
{
"timestamp": "2019-10-08T:02:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
2.000,
34.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:02:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a408",
"location": [
2.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:03:50:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
4.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4ef"
},
{
"timestamp": "2019-10-09T:03:40:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
2.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4e1"
},
{
"timestamp": "2019-10-10T:03:40:15.54Z",
"status" : 1,
"eId": "5d5d7ce0c89852e7bad4a407",
"location": [
3.100,
35.5664111801
],
"zId": "5d5d7ce0c89852e7bad4a4e1"
}
Match with status =1, and Group By eId
With that results, group by timestamp and get max timestamp value
Expected Result:-
[
{
"_id": "5d5d7ce0c89852e7bad4a407",
"max": "2019-10-10T:03:40:15.54Z", // max timestamp
"zId": [
"5d5d7ce0c89852e7bad4a4e1",
"5d5d7ce0c89852e7bad4a4ef"
]
},
{
"_id": "5d5d7ce0c89852e7bad4a408",
"max": "2019-10-09T:02:50:15.54Z",
"zId": [
"5d5d7ce0c89852e7bad4a4ef"
]
}, // ...etc
]
Thanks for the documents. Sadly, I do not know any way to retrieve only the documents having the max timestamp field value.
The following query will allow you to filter by status and group by eId then get the max timestamp value, but it will not return the documents having the max timestamp value.
{
"size": 0,
"query": {
"term": {
"status": 1
}
},
"aggregations": {
"eId_group": {
"terms": {
"field": "eId"
},
"aggregations": {
"max_timestamp": {
"max": {
"field": "timestamp"
}
}
}
}
}
}
This second query use a top_hits aggregation to retrieve the documents grouped by eId. The returned documents are sorted by decreasing timestamp value so the documents having the max timestamp will be firsts, but you may also get documents with different timestamps.
{
"size": 0,
"query": {
"term": {
"status": 1
}
},
"aggregations": {
"eId_group": {
"terms": {
"field": "eId"
},
"aggregations": {
"max_timestamp": {
"max": {
"field": "timestamp"
}
},
"top_documents": {
"top_hits": {
"size": 20,
"sort": { "timestamp": "desc"}
}
}
}
}
}
}
I used the following mapping for the index
PUT /test_index
{
"mappings": {
"properties": {
"timestamp": {
"type": "date"
},
"eId": {
"type": "keyword"
},
"zId": {
"type": "keyword"
},
"status": {
"type": "keyword"
}
}
}
}
I want to group Dynamic Object keys of a json. The object values from each keys will be group to their respective key.
I tried using map and reduce to group it but the results are not grouped as I expected.
Here is my JSON Object
var data = [
{
"type": 6,
"data": {
"error": {
"cause": {
"root": {
"Extracted": {
"Body": {
"Error": {
"ErrorCode": "143",
"ErrorString": "NotFound",
"info": {
"Error": {
"errorDesc": "Data Not Found",
"subs": {
"attrib": {
"subs_name": "123com",
"subs_no": 4
}
}
}
}
}
}
}
}
}
}
},
"status": true
},
{
"type": 6,
"data": {
"error": {
"cause": {
"root": {
"Extracted": {
"Body": {
"Error": {
"ErrorCode": "143",
"ErrorString": "NotFound",
"info": {
"Error": {
"errorDesc": "Company Not Found",
"subs": {
"attrib": {
"subs_name": "QRS",
"subs_no": 4
}
}
}
}
}
}
}
}
}
}
},
"status": true
},
{
"type": 6,
"data": {
"error": {
"cause": {
"root": {
"Extracted": {
"Body": {
"Error": {
"ErrorCode": "123",
"ErrorString": "SystemFailure",
"info": {
"Error": {
"errorDesc": "Internal server error",
"subs": {
"attrib": {
"subs_name": "ABC",
"subs_no": 2
}
}
}
}
}
}
}
}
}
}
},
"status": true
},
{
"type": 6,
"data": {
"error": {
"cause": {
"root": {
"Extracted": {
"Body": {
"Error": {
"ErrorCode": "123",
"ErrorString": "SystemFailure",
"info": {
"Error": {
"errorDesc": "Insufficient Data",
"subs": {
"attrib": {
"subs_name": "DEF",
"subs_no": 3
}
}
}
}
}
}
}
}
}
}
},
"status": true
},
{
"type": 6,
"data": {
"error": {
"cause": {
"root": {
"Extracted": {
"Body": {
"Error": {
"ErrorCode": "999",
"ErrorString": "Unknown",
"info": {
"Unknown": {
"desc": "UnknownError",
"subs": "GHI"
}
}
}
}
}
}
}
}
}
},
"status": true
}
]
This is the code that I have tried but didn't get the result I want
var x = data.map((e) => {
var el = {}
el[e.data.error.cause.root.Extracted.Body.Error.ErrorString] =
[e.data.error.cause.root.Extracted.Body.Error.info];
return el;
})
console.log(x);
The result I got:
[
{
"NotFound": {....}
},
{
"NotFound": {....}
},
{
"SystemFailure": {....}
},
{
"SystemFailure": {....}
},
{
"Unknown": {....}
},
]
The result I expect:
[
{
"NotFound": [
{
"Error": {
"errorDesc": "Data Not Found",
"subs": {
"attrib": {
"subs_name": "123com",
"subs_no": 4
}
}
},
{
"Error": {
"errorDesc": "Company Not Found",
"subs": {
"attrib": {
"subs_name": "QRS",
"subs_no": 4
}
}
}
}
]
},
{
"SystemFailure": [
{
"Error": {
"errorDesc": "Internal server error",
"subs": {
"attrib": {
"subs_name": "ABC",
"subs_no": 2
}
}
},
{
"Error": {
"errorDesc": "Insufficient Data",
"subs": {
"attrib": {
"subs_name": "DEF",
"subs_no": 3
}
}
}
]
},
{
"Unknown": [
{
"Unknown": {
"desc": "UnknownError",
"subs": "GHI"
}
}
]
}
]
You could reduce the array. Create an accumulator object with unique ErrorString as key. Set the value to be an object with the same ErrorString as key. Then add each info object based on the ErrorString. Use Object.values() get the grouped values as an array
const data=[{type:6,data:{error:{cause:{root:{Extracted:{Body:{Error:{ErrorCode:"143",ErrorString:"NotFound",info:{Error:{errorDesc:"Data Not Found",subs:{attrib:{subs_name:"123com",subs_no:4}}}}}}}}}}},status:true},{type:6,data:{error:{cause:{root:{Extracted:{Body:{Error:{ErrorCode:"143",ErrorString:"NotFound",info:{Error:{errorDesc:"Company Not Found",subs:{attrib:{subs_name:"QRS",subs_no:4}}}}}}}}}}},status:true},{type:6,data:{error:{cause:{root:{Extracted:{Body:{Error:{ErrorCode:"123",ErrorString:"SystemFailure",info:{Error:{errorDesc:"Internal server error",subs:{attrib:{subs_name:"ABC",subs_no:2}}}}}}}}}}},status:true},{type:6,data:{error:{cause:{root:{Extracted:{Body:{Error:{ErrorCode:"123",ErrorString:"SystemFailure",info:{Error:{errorDesc:"Insufficient Data",subs:{attrib:{subs_name:"DEF",subs_no:3}}}}}}}}}}},status:true},{type:6,data:{error:{cause:{root:{Extracted:{Body:{Error:{ErrorCode:"999",ErrorString:"Unknown",info:{Unknown:{desc:"UnknownError",subs:"GHI"}}}}}}}}},status:true}];
const merged = data.reduce((acc, o) => {
const e = o.data.error.cause.root.Extracted.Body.Error;
acc[e.ErrorString] = acc[e.ErrorString] || { [e.ErrorString]: [] };
acc[e.ErrorString][e.ErrorString].push(e.info)
return acc;
}, {})
const output = Object.values(merged);
console.log(output)