Retrieve paginated data recursively using promises - javascript

I'm using a function which returns data in a paginated form. So it'll return max 100 items and a key to retrieve the next 100 items. I want to retrieve all the items available.
How do I recursively achieve this? Is recursion a good choice here? Can I do it any other way without recursion?
I'm using Bluebird 3x as the promises library.
Here is a snippet of what I'm trying to achieve:
getEndpoints(null, platformApplication)
.then(function(allEndpoints) {
// process on allEndpoints
});
function getEndpoints(nextToken, platformApplication) {
var params = {
PlatformApplicationArn: platformApplication
};
if (nextToken) {
params.NextToken = nextToken;
}
return sns.listEndpointsByPlatformApplicationAsync(params)
.then(function(data) {
if (data.NextToken) {
// There is more data available that I want to retrieve.
// But the problem here is that getEndpoints return a promise
// and not the array. How do I chain this here so that
// in the end I get an array of all the endpoints concatenated.
var moreEndpoints = getEndpoints(data.NextToken, platformApplication);
moreEndpoints.push.apply(data.Endpoints, moreEndpoints);
}
return data.Endpoints;
});
}
But the problem is that if there is more data to be retrieved (see if (data.NextToken) { ... }), how do I chain the promises up so that in the end I get the list of all endpoints etc.

Recursion is probably the easiest way to get all the endpoints.
function getAllEndpoints(platformApplication) {
return getEndpoints(null, platformApplication);
}
function getEndpoints(nextToken, platformApplication, endpoints = []) {
var params = {
PlatformApplicationArn: platformApplication
};
if (nextToken) {
params.NextToken = nextToken;
}
return sns.listEndpointsByPlatformApplicationAsync(params)
.then(function(data) {
endpoints.push.apply(endpoints, data.Endpoints);
if (data.NextToken) {
return getEndpoints(data.NextToken, platformApplication, endpoints);
} else {
return endpoints;
}
});
}

For a more general purpose example of recursively getting and returning data from a paginated endpoint, here is what I came up with:
getData(page, dataArray) {
return new Promise((resolve, reject) => {
getEndpointHere(
{
page,
pageSize: 50,
},
(err, result) => {
if (err)
return console.error("there was a problem retrieving your data");
dataArray = dataArray.concat(result);
if (result.length < 50) {
resolve(dataArray);
} else {
resolve(getData(page + 1, dataArray));
}
}
);
});
}
getData(1, [])
.then((res) => {
console.log("SEVERAL PAGES OF DATA", res);
})
This example is employing a callback -- (err, result) -- however, that logic could be extracted out to handle the response from the endpoint. The resource I was consuming does not return a cursor or "next" token indicating if there are more records, so I used the logic of if the response has less than 50 records as the basis for continuing to request more data.

Related

Using the result from one API as a parameter for another and inserting both results into a JSON array

So I have 2 APIs:
localhost:8080/projects (gives me the list of the projects I have)
localhost:8080/tasks/{projectid} (gives me the list of tasks related to the project ID I used as parameter)
I'm using nodejs to fetch the results of the first API, then insert them into a JSON array and at the same time get each id from that same request and use them in the second api to fetch the tasks, finally I'd fetch those tasks and insert them into the same JSON array as the projects.
But I've run into a problem, after completing the operation I'd try to show the JSON array just to find out that it only contains the project names (which are the results from the first API), without finding the info from the second one.
Here's the methods I used:
first I made 2 methods, one to get the projects, and one to get the tasks, I used callbacks in both methods:
const projects = (callback) =>{
const url='localhost:8080/projects'
request({url,json:true},(error,{body}) =>{
if(error){
callback('error')
}else{
callback(undefined,body)
}
})
}
const tasks = (projectid,callback) =>{
const url='localhost:8080/tasks/'+encodeURIComponent(projectid)
request({url,json:true},(error,{body})=>{
if(error){
callback('unable to find data')
}else{
callback(undefined,body)
= }
})
}
then I used express to define routes for these 2 methods:
app.get('/projects',(req,res)=>{
func.projects((error,body)=>{
if(error){
return res.send({
error : error
})
}
res.send(body)
})
})
app.get('/tasks',(req,res)=>{
func.tasks(req.query.code,(error,body)=>{
if(error){
return res.send({
error : error
})
}
res.send(body)
})
})
Finally I tried to fetch the data from these 2 routes into a json array using Javascript and show them in a handlabars page:
fetch('/projects').then((response) => {
response.json().then((res) => {
if (res.error) {
console.log(res.error)
} else {
data = { todo: [] }
//insert each project into our json array
res.forEach((project) => {
data.todo.push({ id: project.id, text: project.id })
//Now we'll get the tasks using the project id as argument
fetch('/tasks?code='+encodeURIComponent(project.id)).then((response)=>{
response.json().then((res)=>{
if(res.error){
console.log(error)
} else {
//add each task to the same array
res.forEach((task) => {
data.todo.push({text: task.desc, parent: item.id})
})
}
})
})
})
gantt.parse(data);
}
})
})
Sorry for the long post I just wanted to explain all the details. Thanks.
Edit: The goal of my program was to use the json file as an argument for a gantt chart which is why I called the function called gantt, when I pass the json object the function only show the projects but now the tasks.
Even though when I use the console.log function it show that my object does indee contain the tasks. I think the problem here is that the function executes both foreach at the same time which is why It doesn't return the tasks since it doesn't have the required argument yet
I hope you understand that fetch is asynchronous and that's why you have a then block where code is executed after the asynchronous operation is done. I have taken your example and then added inline comments with numbers indicating the flow of execution.
// 1. Starts execution.
fetch('/projects').then((response) => {
response.json().then((res) => {
if (res.error) {
console.log(res.error)
} else {
// 2. Res has the list of projects.
data = { todo: [] }
//insert each project into our json array
res.forEach((project) => {
data.todo.push({ id: project.id, text: project.id })
// 3. Fetch is called for each project.
//Now we'll get the tasks using the project id as argument
fetch('/tasks?code='+encodeURIComponent(project.id)).then((response)=>{
response.json().then((res)=>{
if(res.error){
console.log(error)
} else {
// 5. Data is push is to the array.
//add each task to the same array
res.forEach((task) => {
data.todo.push({text: task.desc, parent: item.id})
})
}
})
})
})
// 4. parse is called.
gantt.parse(data);
}
})
})
As you can see, gantt.parse(data) is called before data arrives for the subsequent fetch requests. This means you have to wait for all requests to complete before calling gantt.parse(data). You can use Promise.all for that.
You also mentioned that console.log shows the data. That is because console.log usually works by reference. Try console.log(JSON.stringify(data)) and you'll see the missing data.
To round it off, I'll add an example using Promise.all using a public API.
fetch('https://jsonplaceholder.typicode.com/todos')
.then(response => response.json())
.then(todos => {
// I'm just taking the first 5 todos.
todos = todos.slice(0, 5);
const data = {
todos: []
};
const requests = [];
todos.forEach(todo => {
data.todos.push({
id: todo.id,
title: todo.title
});
requests.push(fetchComments(todo.id));
});
Promise.all(requests).then(responses => {
console.log(data);
// Here is where you will call gnatt.parse
});
function fetchComments(id) {
return fetch(`https://jsonplaceholder.typicode.com/todos/${id}/comments`)
.then(res => res.json()).then(comments => {
comments.forEach(comment => {
data.todos.push({
id: comment.id,
parent: id
});
});
});
}
});

Async, waterfall issue

Here i am trying to retrieve all the objects and push them into the json file. For some reason there is only one record being pushed into file when it should contain more objects. The response is being sent even before the execution. Can you help me out with this or let me know where I am going wrong? Here is my code:
exports.createjoson = (req, res) => {
const Responsearray = [];
async.waterfall(
[
function(waterfallCb) {
// ... first function
},
function(results, waterfallCb1) {
//second function
async.eachLimit(
results,
100,
function(singleResult, eachCallback) {
async.waterfall(
[
async function(innerWaterfallCb) {
try {
NewsModel.find(
{ _id: singleResult.newsId }, // #individual article
async (err, newsResult) => {
if (err) {
return innerWaterfallCb(
// #displaying error
"error in fetching news data"
);
}
const map = new Map();
for (const item of newsResult) {
if (!map.has(item.source)) {
map.set(item.source, true);
Response = {
newsId: item._id,
title: item.title,
comment: singleResult.comment
};
}
}
resPond = await Response;
Responsearray.push(resPond);
let data = JSON.stringify(Responsearray);
await fs.writeFileSync("abc.json", data);
}
);
} catch (error) {
innerWaterfallCb(error);
}
}
],
function(err) {
if (err) {
return eachCallback(err);
}
eachCallback(null);
}
);
},
function(err) {
if (err) {
return waterfallCb1(err);
}
waterfallCb1(null);
}
);
}
],
function(err) {
if (err) {
return res.status(200).json({ status: "400", message: err });
}
res.status(200).json({ status: "200", message: "success" });
}
);
};
There are a number of problems with the code:
fs.writeFileSync will overwrite the file, not append to it, so only the last data you write will be in abc.json. Also it does not return a Promise so there is no need to use await on it. It runs synchronously so will not return until it's complete (that's what the Sync in its function name means). To append instead of overwrite the file, you can set the flag option to "a" to append (the default is "w").
There doesn't seem to be a call to return innerWaterfallCb(null) anywhere - only in error conditions. The inner waterfall function shouldn't be marked async since it doesn't need to do any await calls really. But you should call return innerWaterfallCb(null) after the file is written.
It may be better to just collect the data in responseArray and write the file once at the end of the outer waterfall instead of writing it repeatedly deep inside the inner waterfall.
Variables should start with lowercase letters (like responseArray not ResponseArray since uppercase first letters indicate classes or modules usually.
Don't mix async/await with the async module (waterfall and eachLimit). If you're using proper Promises and async/await then there should be no need to use the async module. It would be cleaner to remove the use of waterfall entirely and rewrite to use Promise objects properly.

How do I skip a callback when running .map in JavaScript/Node.js?

Kind of a sequel to this question, I need to accept multiple objects in a POST request and then for each object process it, save it, and then return the saved object to the frontend (so that the client can see which columns were successfully edited).
When I use .map, it does save to the database and I can confirm this. However, I have two problems:
It does not execute res.locals.retval.addData(dtoObject); correctly, and my returning payload has no data transfer objects inside of it.
My object validation cannot be done inside of the callback of map. I initially tried reduce, but that didn't work at all and just saved all the same values to each database object. How can I exclude invalid JSON objects while I'm mapping them?
var jsonObjects = req.body;
//for (var n in req.body) {
var promises = jsonObjects.map((jsonObject) => {
var transform = new Transform();
// VALIDATION OF jsonObject VARIABLE IS HERE
if (jsonObject.id == 0) {
var databaseObject = Database.getInstance().getModel(objectName).build(jsonObject);
transform.setNew(true);
transform.setJsonObject(jsonObject);
transform.setDatabaseObject(databaseObject);
transform.baseExtract()
.then(() => transform.extract())
.then(() => transform.clean())
.then(() => transform.getDatabaseObject().save())
.then(function(data) {
// PROCESSING DATA
}).catch((e) => {
// ERROR
});
} else {
var queryParameters = {
where: {id: jsonObject.id}
};
console.log("Query parameters: ");
console.log(queryParameters);
Database.getInstance().getModel(objectName).findOne(queryParameters).then((databaseObject) => {
transform.setJsonObject(jsonObject);
transform.setDatabaseObject(databaseObject);
})
.then(() => transform.baseExtract())
.then(() => transform.extract())
.then(() => transform.clean())
.then(() => transform.getDatabaseObject().save())
.then((data) => {
// PROCESSING DATA
}).catch((e) => {
// ERROR
});
}
});
Promise.all(promises)
.then((results) => {
return next();
}).catch((e) => {
throw e;
});
Here's the resulting payload:
{
"errors": [],
"warnings": [],
"data": []
}
As #KevinB said in the comments, you are missing the return calls inside of your arrow functions so the database saves are going through because they are part of the Promise chain, but pushes to the response are stalled waiting for the return, and then the Express.js call resolves before the Promises do. Add return Database.getInstance() and return transform.baseExtract() to your code to fix this.
Use Array.prototype.filter() to remove elements you want to ignore since you won't ever need to execute Promises on them, then call Array.prototype.map() on the resulting array. If you don't want to use the arrow functions, you can specify this as a parameter to filter and map:
jsonObjects.filter(function(jsonObject) {
}, this);
var promises = jsonObjects.map(function(jsonObject) {
}, this);

How ensure async request has finished before running a function

I am performing an async request to pull data from a server and then call a function after the request. My question is how do I ensure the request is complete and all data loaded before processRecords() runs?
Thanks in advance.
function getRecords () {
var ids = Server.getIds();
var allTheRecords = [];
ids.forEach(function(recordId) {
Server.getRecord(recordId, function (error, data) {
if(error) {
console.log(error);
} else {
allTheRecords.push(data);
};
});
});
processRecords(allTheRecords);
}
How are you performing the Asynchronous request? If it's an AJAX request, the API provides for callbacks to be supplied based on the result of the call.
https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest
You could use the native Promise api to perform the async actions for you.
Using Promise.all you can give it an array of promises that need to be resolved before calling the processRecords function.
It also now more reusable as you have a getRecord function that you could use elsewhere in your code.
You should probably think of a way to add in the ability to get multiple records from the server if you control it though. You don't really want to fire off a bunch of network requests if you can do it in just one.
// Server mock of the api you have shown
const Server = {
getRecord(id, callback) {
console.log('getRecord', id)
callback(null, {id})
},
getIds() {
return [1, 2, 3]
}
}
function getRecords (ids, processRecords) {
console.log('getRecords', ids.join())
// mapping the array of id's will convert them to an
// array of Promises by calling getRecord with the id
Promise.all(ids.map(getRecord))
// then is called once all the promises are resolved
.then(processRecords)
// this will be called if the reject function of any
// promise is called
.catch(console.error.bind(console))
}
function getRecord(recordId) {
// this function returns a Promise that wraps your
// server call
return new Promise((resolve, reject) => {
Server.getRecord(recordId, function (error, data) {
if(error) {
reject(error)
} else {
resolve(data)
}
})
})
}
getRecords(Server.getIds(), function(records) {
console.log('resolved all promises')
console.log(records)
})

Sequelize JS node updating multiple entities with new data from previous promise

I am new to sequelize and NodeJS promises in general. My app basically saves tweets from the Twitter API, but also needs to update some of the saved tweets' data in realtime, like the Retweet count or the Liked count.
But it seems like after fetching the new data, when trying to run all the update on my tweet instances, nothing happens. The promise doesn't go through.
To sum up : I find 100 saved tweets, chain on a callback that fetches their new data from Twitter, and then chain on updating every single 100 tweets with the new data. The later update doesn't go through.
var Sequelize = require('sequelize');
...
//Getting 100 tweets previously saved in DB
Sequelize.query("SELECT * FROM tweets WHERE ORDER BY id DESC LIMIT 100", { model: Model }).then(function(result) {
if(result.length == 0) {
callback(false);
} else {
var ids = [];
var realData = {};
for (var i in result) {
realData[result[i].dataValues.id_str] = result[i];
ids.push(result[i].dataValues.id_str);
}
//getting twitter data for 100 tweets previously saved in DB
twitVendor.get('statuses/lookup', {
id : ids.join(',')
}, function (err, tweets, response) {
if (typeof err == 'undefined') {
//to get a synchronous saving of all tweets
//this could be cleaned up with a Sequelize.Promise.push(...)
var i = 0;
var saving = false;
while (i < tweets.length) {
if (!saving) {
saving = true;
console.log('Updating tweet ', tweets[i].id_str);
//updating tweet with new data from twitter
Sequelize.query("UPDATE tweets SET retweet_count = "+tweets[i].retweet_count+", favorite_count = "+tweets[i].favorite_count+" WHERE id_str = '"+tweets[i].id_str+"'", {
model: Model
}).then(function(result) {
console.log('Updated tweet');
saving = false;
i++;
}).catch(function (err) {
console.log('Failed to update post ', err);
saving = false;
i++;
});
}
}
callback(true);
console.log("Updated tweets");
} else {
console.log("Failed :", err);
callback(false, err);
}
});
}
}).catch(function (err) {
console.log("Failed :", err);
callback(false, err);
})
EDIT : If you want to execute the above code, I'd recommend using this Twit to hit the Twitter API : https://github.com/ttezel/twit
To get credentials to hit the API, you will need to set up an app on Twitter though : https://apps.twitter.com/
EDIT 2 : I already tried to use transactions and pure Sequelize functions to make my queries, but the issue stood still.
Don't nest promises inside of promises. Instead, chain them by returning promises. If you are returning something that is not a promise, use Promise.resolve(value) to turn it into a promise. And certainly don't put promises inside of callbacks, or even mix them at all; instead create a promise that calls the action, and then in the callback resolve the promise.
Here's my attempt to rewrite what you're trying to do. You may need to wrap the first in a Promise.resolve to take advantage of returning the new promise:
Sequelize.query("SELECT * FROM tweets WHERE ORDER BY id DESC LIMIT 100"
, { model: Model }).then(function (results) {
if (results.length == 0) {
return Promise.reject(false); //reject to hit the catch of the promise. Change false to error.
}
var ids = [];
var realData = {};
for (var i in result) {
realData[result[i].dataValues.id_str] = result[i];
ids.push(result[i].dataValues.id_str);
}
return new Promise((resolve, reject) => {
twitVendor.get('status/lookup', {
id: ids.join(',')
}, function (err, tweets, response) {
if (err) {
reject(false); //reject to hit the catch of the promise. Change false to error message
}
resolve(tweets);
})
})
}).then(function (tweets) {
function updateTweet(tweet) {
return sequelize.query(...);
}
var updatesInParallel = tweets.map(updateTweet);
return Promise.all([updatesInParallel]);
}).then(function () {
callback(true);
}).catch(function (error) {
console.log("failed: ", error);
callback(false)
});

Categories