I'm trying to fetch the most recently created items in a dynamodb table. For that I'm using a pattern described by Alex Debrie in his dynamoddb book plus sharding.
When a new item is created in the table it also feeds a GSI with a GSIPK that is made out of the item creation day plus a random shard number between 0 and 9. The SK would be the item unique ID
GSI1
GSI1PK: truncated timestamp#[0-9]
GSI1SK: item id
thre can be few dozens of recently created items or thousands of items.
To fetch the most recent items I have three (3) parameters:
Date: The current day
Limit: total amount of items to fetch
Days:number of days back to look for items
As suggested by Alex Debrie book the method to retrieve the items is a recursive function with promises.
The problem that I'm facing is that my lambda function is very slow.
in the scenario that there are not so many items created recently, the function has to go through all the days+shards one after another to fetch items.
for example.
If I want to fetch the last 100 items in the last 7 days. and there are less than 100 items spread across the shards. The function will go through 70 Queries (7 days x 10 shards) and it takes around 10 seconds to finish
On the contrary if I want to fetch 100 items in the last 7 days and hundreds of items were created recently, then it till take around a second to run.
items are small. around 400 bytes each.
I'm running an on-demand capacity dynamodb table
Lambda is configured with memorySize: 1536MB
Node.js 16.x
Any ideas how can make this run faster ?
const getQueryParams = (createdAt, shard, limit) => {
const params = {
TableName : "table",
IndexName: 'GSI1',
KeyConditionExpression: "#gsi1pk = :gsi1pk",
ExpressionAttributeNames: {
"#gsi1pk": 'GSI1PK'
},
ExpressionAttributeValues: {
":gsi1pk": `${truncateTimestamp(timestamp).toISOString()}#${shard}` //e.g 2023-02-09T00:00:00.000Z#8
},
ScanIndexForward: false,
Limit: limit
};
return params;
}
const getItems = async => {
const items = []
const number_of_days = 3;
const getLatestItems = async ({ createdAt = new Date(), limit = 100, days = 0, shard = 0 }) => {
const query_params = getQueryParams(createdAt, shard, limit);
let max_items_to_fetch = limit;
return dynamoDb.query(query_params).then(
(data) => {
// process data.
if (data.Items) {
data.Items.forEach((item) => {
if (items.length < limit) {
items.push(item);
}
})
max_items_to_fetch = limit - data.Items.length;
}
if (items.length >= limit) {
return items;
}
if (shard < 9) {
let params = {
createdAt: new Date(createdAt.setDate(createdAt.getDate())),
limit: max_items_to_fetch,
days: days,
shard: shard + 1,
}
return getLatestItems(params);
} else if (days < number_of_days) {
let params = {
createdAt: new Date(createdAt.setDate(createdAt.getDate() - 1)),
limit: max_items_to_fetch,
days: days + 1,
shard: 0,
}
return getLatestItems(params);
}
return items;
},
(error) => {
throw new Error('Error getting all recent itmems')
}
);
}
return getLatestItems({});
};
export const main = async (event) => {
const start = Date.now();
const itemPromises = getItems();
const res = await Promise.all([itemPromises]);
const end = Date.now();
console.log(`Execution time: ${end - start} ms`);
};
Related
Without going too much in the details, my question is, how would you go about reducing the repetition in NodeJS? I am very much a beginner so please have mercy.
I am getting an api with the information, and output my own api which is that information filtered and sorted according to the req.query parameters (from, to, date).
This is the code that works, but has too much repetition in it:
app.get('/search', async (req, res, next) => {
try {
const apiRes = await axios.get('https://thistheapi.net/api/TravelPrices');
result = apiRes.data;
searchFrom = req.query.from;
searchTo = req.query.to;
searchDate = req.query.date;
const routesArray = [];
for (let route of result) { routesArray.push(route) };
if (searchFrom.toLowerCase() == "mercury" && searchTo.toLowerCase() == "earth") {
finalResult = [];
// Finding and filtering the first flight
const fromFilterF1 = "Mercury";
// Create an array, which has the results of routes that match the req.query from name/ aka starting point
firstArrayF1 = routesArray.filter(obj => obj.routeInfo.from.name == fromFilterF1);
const toFilterF1 = "Venus";
// Filter the resulting array with the next 'to' location
secondArrayF1 = firstArrayF1.filter(obj => obj.routeInfo.to.name == toFilterF1);
// Create an array that has all the providers with their data for this specific route / flight
const providerArrayF1 = secondArrayF1.map(x => x.providers)
const trialArrayF1 = [];
for (let x of providerArrayF1) { for (let y of x) { trialArrayF1.push(y) } }
// Use the req.query selected date to filter all flights that match the date
dateFilterF1 = { flightStart: searchDate };
// options for the date variable, since in the api data it has specific time of day also added
const options = { year: 'numeric', month: 'numeric', day: 'numeric' };
thirdArrayF1 = trialArrayF1.filter(obj => new Date(obj.flightStart).toLocaleDateString('en-CA', options) == dateFilterF1.flightStart);
// Sort the resulting array of matching from-location, to-location, and date - starting from the earliest flights to the latest one
thirdArrayF1.sort((a, b) => { return new Date(a.flightStart) - new Date(b.flightStart) });
finalResult.push(thirdArrayF1[0]);
// ALL OF THIS REPEATS FOR THE SECOND & THIRD FLIGHT, except the flight start date/time has to be later than the flight end time of the previous flight
// Finding and filtering the second flight
if (thirdArrayF1.length == 0) { finalResult.push(null) } else {
const fromFilterF2 = "Venus";
firstArrayF2 = routesArray.filter(obj => obj.routeInfo.from.name == fromFilterF2);
const toFilterF2 = "Earth";
secondArrayF2 = firstArrayF2.filter(obj => obj.routeInfo.to.name == toFilterF2);
const providerArrayF2 = secondArrayF2.map(x => x.providers)
const trialArrayF2 = [];
for (let x of providerArrayF2) { for (let y of x) { trialArrayf2.push(y) } }
dateFilterF2 = { flightStart: thirdArrayF1[0].flightEnd };
thirdArrayF2 = trialArrayF2.filter(obj => new Date(obj.flightStart).toLocaleDateString('en-CA', options) >= dateFilterF2.flightStart);
thirdArrayF2.sort((a, b) => { return new Date(a.flightStart) - new Date(b.flightStart) });
finalResult.push(thirdArrayF2[0])
};
// Finding and filtering the third flight
if (thirdArrayF2.length == 0) { finalResult.push(null) } else {
const fromFilterF3 = "Earth";
firstArrayF3 = routesArray.filter(obj => obj.routeInfo.from.name == fromFilterF3);
const toFilterF3 = "Jupiter";
secondArrayF3 = firstArrayF3.filter(obj => obj.routeInfo.to.name == toFilterF3);
const providerArrayF3 = secondArrayF3.map(x => x.providers)
const trialArrayF3 = [];
for (let x of providerArrayF3) { for (let y of x) { trialArrayF3.push(y) } }
dateFilterF3 = { flightStart: thirdArrayF2[0].flightEnd };
thirdArrayF3 = trialArrayF3.filter(obj => new Date(obj.flightStart).toLocaleDateString('en-CA', options) >= dateFilterF3.flightStart);
thirdArrayF3.sort((a, b) => { return new Date(a.flightStart) - new Date(b.flightStart) });
finalResult.push(thirdArrayF3[0])
};
res.json(finalResult);
} else if (searchFrom.toLowerCase() == "mercury" && searchTo.toLowerCase() == "jupiter"){ etc...
As you can see, there is a lot of similar code, but I can't figure out how to make it more compact and less repetitive, without breaking the code and it stopping to work.
I appreciate all the help and advice!
Also, this is a snippet of the api that I use:
"legs":[{"id":"a0ee2c2b-667c-46d7-87c0-2ca32da88a46","routeInfo":{"id":"44edd88d-8904-4266-9df5-f37701741123","from":{"id":"0ee3379b-98fb-4b46-9aef-0a3a81a46ad4","name":"Earth"},"to":{"id":"a504bf72-2be2-4f2b-bab1-61d818757e3a","name":"Jupiter"},"distance":628730000},"providers":[{"id":"0257eab0-7c5c-4a4c-af79-cdf6f3ab9349","company":{"id":"27b1ce2f-c88a-45f4-96e1-dd9fcbb2db73","name":"Spacegenix"},"price":570774.60,"flightStart":"2022-02-04T07:17:16.4529653Z","flightEnd":"2022-02-08T13:57:16.4529653Z"},{"id":"e6ed4071-e29c-46a1-a38f-a082eff0e4de","company":{"id":"eb12838f-afb4-4447-9781-2d87b0641337","name":"Galaxy Express"},"price":180679.62,"flightStart":"2022-02-13T00:30:16.4529883Z","flightEnd":"2022-02-17T14:29:16.4529883Z"} et cetera.
Basically I'm doing custom connected flights between different locations. I am sure there is a way to make this less repetitive, but I can't figure it out.
I have written a cloud function that runs every 5 minutes on my Firebase app. In essence, the function gathers trends data from the Google Trends website and parses the JSON into a variable.
After doing so I want to then connect to the Twitter API and search for tweets using the trending topics fetched in the first part.
My Issue seems to lie with the second part. It fetches the data but the remainder of the function does not wait for the result before writing to Firebase.
I have tried two different methods but both don't seem to work as intended. I am struggling to understand how the function should wait for the second part to gather and store the information before writing to Firebase.
Method 1
exports.callTo = functions.pubsub.schedule("5 * * * *").onRun((context) => {
let searchTrends;
const ts = Date.now();
const dateOb = new Date(ts);
const date = dateOb.getDate();
const month = dateOb.getMonth() + 1;
const year = dateOb.getFullYear();
const twitterTrends = [];
googleTrends.dailyTrends({
trendDate: new Date(year + "-" + month + "-" + date),
geo: "CA",
}, function(err, res) {
if (err) {
functions.logger.error(err);
} else {
searchTrends = JSON.parse(res).default.trendingSearchesDays[0]
.trendingSearches;
functions.logger.info(searchTrends);
for (let i = 0; i < searchTrends.length; i++) {
functions.logger.log(searchTrends[i].title.query);
T.get("search/tweets", {q: searchTrends[i].title.query, count: 1},
function(err, data, response) {
if (err) {
functions.logger.error(err);
}
functions.logger.info("Twitter data" +
JSON.stringify(data.statuses));
twitterTrends[i] = JSON.stringify(data.statuses);
});
}
const dbRef = admin.database().ref("searchTrends");
dbRef.set({google: searchTrends, twitter: twitterTrends});
}
});
});
Method 2
exports.callTo = functions.pubsub.schedule("5 * * * *").onRun((context) => {
let searchTrends;
const ts = Date.now();
const dateOb = new Date(ts);
const date = dateOb.getDate();
const month = dateOb.getMonth() + 1;
const year = dateOb.getFullYear();
const twitterTrends = [];
async function getTrends(){
googleTrends.dailyTrends({
trendDate: new Date(year + "-" + month + "-" + date),
geo: "CA",
}, function(err, res) {
if (err) {
functions.logger.error(err);
} else {
searchTrends = JSON.parse(res).default.trendingSearchesDays[0]
.trendingSearches;
functions.logger.info(searchTrends);
}
});
await getTwitterTrends();
}
async function getTwitterTrends(){
for (let i = 0; i < 1; i++) {
functions.logger.log(searchTrends[i].title.query);
T.get("search/tweets", {q: searchTrends[i].title.query, count: 1},
function(err, data, response) {
if (err) {
functions.logger.error(err);
} else {
functions.logger.info("Twitter data" +
JSON.stringify(data.statuses));
twitterTrends[i] = JSON.stringify(data.statuses);
}
});
}
return "done";
}
const dbRef = admin.database().ref("searchTrends");
dbRef.set({google: searchTrends, twitter: twitterTrends});
});
After checking your function it looks like a Promises issue. The reason you are seeing only the searchTrends data in Firestore is because the Firestore reference and upload is being done inside the callback for the dailyTrends method (taking for reference the method 1 code). However this does not wait for each request to the Twitter API to be resolved before writing to Firestore.
Based on the documentation for twit (which seems to be the wrapper you are using), it also supports standard promises. You could add each promise to an array, and then use Promise.all() to wait until they are all resolved to then write the data into Firestore. It would look something like this (which I haven’t tested since I don’t have Twitter API access).
exports.callTo = functions.pubsub.schedule("5 * * * *").onRun((context) => {
const ts = Date.now();
const dateOb = new Date(ts);
const date = dateOb.getDate();
const month = dateOb.getMonth() + 1;
const year = dateOb.getFullYear();
let searchTrends;
const twitterTrends = [];
const twPromises = [];
googleTrends.dailyTrends({
trendDate: new Date(year + "-" + month + "-" + date),
geo: "CA",
}, function(err, res) {
if (err) {
functions.logger.error(err);
} else {
searchTrends = JSON.parse(res).default.trendingSearchesDays[0]
.trendingSearches;
functions.logger.info(searchTrends);
for (let i = 0; i < searchTrends.length; i++) {
functions.logger.log(searchTrends[i].title.query);
twPromises.push(T.get("search/tweets", {q: searchTrends[i].title.query, count: 1})); // adds promises to the array
}
Promise.all(twPromises).then((responses) => { // runs when all promises from the array are resolved
responses.forEach((response) => {
twitterTrends.push(JSON.stringify(response.statuses));
})
const dbRef = admin.database().ref("searchTrends");
dbRef.set({google: searchTrends, twitter: twitterTrends});
})
}
});
});
Having issue with database query in mongoose I am setting value but not getting correct not sure why, Also want to optimize the database query. I am using mongoose for counting how many records are there with matching query params(pagination) I have to make separate query. and finding the actual records with model.find({}) have to make separate query.
But actual problem is with pagination details I am trying to get
Example in below code if I set page = 1, page_size = 10 and my row_count is 3 then I suppose to get from 1, and to 1 but instead I am getting from 1 and to 11.
Not sure what I am doing wrong here.
const pagination = async (model, query, page_number, page_size, order, order_by, next) => {
const pageS = parseInt(page_number)
let page = +pageS || 1;
const limit = parseInt(page_size)
let per_page = +page_size || 10;
if (page < 1) {
page = 1;
}
if (per_page < 1) {
per_page = 1;
}
const startIndex = (
page - 1
) * per_page;
const endIndex = page * page_size
const key = `${order}`
const results = {}
// here reading the data count from database
const resultCount = await model.countDocuments(query).exec();
if (endIndex < resultCount) {
results.next = {
page: page + 1,
page_size: limit
}
}
if (startIndex > 0) {
results.previous = {
page: page - 1,
page_size: limit
}
}
try {
// here trying to search the query with applied pagination
const data = await model.find(query)
.limit(per_page)
.skip(startIndex)
.sort({ [key] : order_by })
.exec()
// here I am passing details but not getting exact to and from; from is working expected but not to
// Example if I set page = 1, page_size = 10 and my row_count is 3 then I suppose to get from 1, and to 1 but intead I am getting from 1 and to 11.
const pagination_details = {
data: data,
meta: {
page,
page_size: per_page,
row_count: parseInt(resultCount, 10),
page_count: Math.ceil(resultCount / per_page ),
from:startIndex + 1,
to: endIndex + 1,
order: order,
order_by: order_by
}
}
return pagination_details
next()
} catch (e) {
console.log(e);
console.error(e);
}
};
Can anyone help me here to achieve the right data, what I am making mistake here. Might be something logical mistake
You have forgotten to divide the start and end indexes by the per_page to get page numbers, try replacing:
from:startIndex + 1,
to: endIndex + 1,
with:
from: Math.floor(startIndex / per_page) + 1,
to: Math.ceil(endIndex / per_page) + 1,
I want to access shopify api using Node.js with request method. I get first 50 items but i need to send the last id of the products i get as a response so it can loop through all the products until we don't have another id (i check that if the last array is not 50 in length.)
So when i get the response of lastID i want to feed that again to the same function until the Parraylength is not 50 or not 0.
Thing is request works asynchronously and i don't know how to feed the same function with the result lastID in node.js.
Here is my code
let importedData = JSON.parse(body);
//for ( const product in importedData.products ){
// console.log(`${importedData.products[product].id}`);
//}
lastID = importedData.products[importedData.products.length-1].id;
let lastIDD = lastID;
console.log(`This is ${lastID}`);
importedData ? console.log('true') : console.log('false');
let Prarraylength = importedData.products.length;
console.log(Prarraylength);
//console.log(JSON.stringify(req.headers));
return lastIDD;
});```
You can use a for loop and await to control the flow of your script in this case.
I'd suggest using the request-native-promise module to get items, since it has a promise based interface, but you could use node-fetch or axios (or any other http client) too.
In this case, to show you the logic, I've created a mock rp which normally you'd create as follows:
const rp = require("request-promise-native");
You can see we're looping through the items, 50 at a time. We're passing the last id as a url parameter to the next rp call. Now this is obviously going to be different in reality, but I believe you can easily change the logic as you require.
const totalItems = 155;
const itemsPerCall = 50;
// Mock items array...
const items = Array.from({ length: totalItems}, (v,n) => { return { id: n+1, name: `item #${n+1}` } });
// Mock of request-promise (to show logic..)
// Replace with const rp = require("request-promise-native");
const rp = function(url) {
let itemPointer = parseInt(url.split("/").slice(-1)[0]);
return new Promise((resolve, reject) => {
setTimeout(() => {
let slice = items.slice(itemPointer, itemPointer + itemsPerCall);
itemPointer += itemsPerCall;
resolve( { products: slice });
}, 500);
})
}
async function getMultipleRequests() {
let callIndex = 0;
let lastID = 0;
const MAX_CALLS = 20;
const EXPECTED_ARRAY_LENGTH = 50;
for(let callCount = 1; callCount < MAX_CALLS; callCount++) {
// Replace with the actual url..
let url = "/products/" + lastID;
let importedData = await rp(url);
lastID = importedData.products[importedData.products.length - 1].id;
console.log("Call #: " + ++callIndex + ", Item count: " + importedData.products.length + ", lastID: " + lastID);
if (importedData.products.length < EXPECTED_ARRAY_LENGTH) {
console.log("Reached the end of products...exiting loop...");
break;
}
}
}
getMultipleRequests();
I am just trying find the right sequence in N number sequence.What I am talking about is Suppose we have 5 Machines and 20 jobs have to do in that machines.We will have the probability of 20! that is 2,432,902,008,176,640,000 possible sequence to do it right .What is best Sequence based on the time of completion.we have to find it.
Unfortunately I am little bit confused that how to get the right and best time efficient sequence.
Me stuck after producing the possibilities of sequence.And I don't Know how to get the right sequence
My try
var howManyMachines = 2;
var Sequenxe = [
{
jobId:1,
timeToFinish:5
},
{
jobId:2,
timeToFinish:4
},
{
jobId:3,
timeToFinish:4
}
];
var machines = Array(howManyMachines).fill().map((m, i) => {
var Mindex = i;
if(i == 0){
Mindex = 1
}else{
Mindex = i+1
}
return {
id: i,
value: 0,
jobs: [],
name:"M"+Mindex
} });
function permutations(items) {
if (items.length == 1) return [items];
var combos = [];
for (var i = 0; i < items.length; i++) {
var first = items[i], rest = items.slice(0);
rest.splice(i, 1);
permutations(rest).forEach(function(combo){
combo.unshift(first);
combos.push(combo);
});
}
return combos;
}
const allSequence = permutations(Sequenxe);
console.log(allSequence.length+" Sequence to test")
console.log(machines.length+" Machines Available");
allSequence.forEach(singleSequence => {
console.log("===>",singleSequence)
//I don't Know what to do
});
I think the only way to get a perfect solution is to check all the possibilities.
If you care about performance, this should but this should give you a correct solution in most cases while being reasonably quick ...
Main steps area:
Sort jobs by timeToFinish, longest to shortest
Add first job to the shortest thread
Sort threads by total time of execution, shortest to longest
Go to 2 and repeat until no more jobs available
var machines = 2;
var jobs = [{
jobId: 1,
timeToFinish: 5
}, {
jobId: 2,
timeToFinish: 4
}, {
jobId: 3,
timeToFinish: 4
}];
jobs.sort((a, b) => b.timeToFinish - a.timeToFinish);
var threads = new Array(2).fill({
jobs: [],
totalTime: 0
});
while (jobs.length > 0) {
threads = threads.map(t => {
j = jobs.shift();
return j ? {
jobs: t.jobs.concat(j),
totalTime: t.totalTime + j.timeToFinish
} : t;
});
threads.sort((a, b) => a.totalTime - b.totalTime);
}
console.log(JSON.stringify(threads, null, 2))
Best according to time of completion sounds like deadline scheduling.
Planning of these large jobs in advance sounds like the knapsack problem. I'd give knapsack.js a try. Source code is on GitHub.
You might do as follows; It will generate 20 jobs with random time for each and then will evenly distribute them into 5 machines.
function groupTasks(jobs,machineCount){
var sum = jobs.reduce((p,c) => p + c.time, 0),
initial = [...Array(machineCount)].map(sa => (sa = [], sa.sum = 0, sa));
console.log("total number of jobs:",jobs.length,"\n");
console.log("total job time:", sum,"\n");
console.log("number of machines:", machineCount,"\n");
console.log("target total job time per machine:", sum/machineCount,"\n");
return jobs.sort((a,b) => b.time-a.time)
.reduce((machines,job) => { var machine = machines.reduce((p,c) => p.sum < c.sum ? p : c);
machine.push(job);
machine.sum += job.time;
return machines;
},initial);
}
var jobs = [...Array(20)].map((_,i) => ({id:i, time:~~(Math.random()*10)+1})),
result = groupTasks(jobs,5);
console.log("jobs: \n", JSON.stringify(jobs));
console.log("jobs per machine:","\n",JSON.stringify(result));