Nodejs Split large array and make multiple API calls - javascript

I have a CSV file that contains 21k records(1 word alphanumeric type/line). I need to read these records and send them to an API in JSON key-value pair format for some processing that accepts only 500 elements at a time. I have a solution on my mind but I wanted to know that is there a better or more efficient solution/Algorithm for this?
Algorithm:
Load the CSV into an array
Split this 1D array into N array with fix length of 500 columns(elements)
With each of these N number of 500 element Array, prepare JSON payload and send to API.
Code:
var dataArray = [];
fs.readFile(inputPath, 'utf8', function (err, data) {
dataArray = data.split(/\r?\n/);
})
var temp = [];
for(i=0;i<dataArray.length;){
temp=[];
for(j=0;(j<500 && i<dataArray.length);j++){
temp.push(data[i]);
i++;
}
// make API call with current values of temp array
makeCallToAPI(temp);
}

I'd use lodash or underscore _.chunk(). Also note that both the fs and API are better handled async.
const _ = require('lodash');
async function callApi(chunk) {
// return a promise that resolves with the result of the api
}
async function readFS(inputPath) {
return new Promise((resolve, reject) => {
fs.readFile(inputPath, 'utf8', function (err, data) {
if (err) reject(err);
else resolve(data.split(/\r?\n/));
});
});
}
async function doTheWork(inputPath) {
const data = await readFS(inputPath);
const chunks = _.chunk(data, 500)
const promises = chunks.map(callApi)
return _.flatten(Promise.all(promises));
}
Also note the use of _.flatten(), since the last Promise.all() will resolve to an array of arrays of chunks of promises.

Related

How to get Object.keys iterating through object of arrays to wait for async fetched api results at the end of each iteration?

This node code
reads in some local data
fs.readFile('./local.json', 'utf8', (err, data) => {
Opens up a try {
Parses the data, then flattens it a little, const json_data = JSON.parse(data);
Loops through the resulting arrays and gathers data points
In the loop, code extracts urls and sends out two api requests for every iteration
The api requests are chained promises that return json
Returned data are checked and processed
During each iteration of the loop, data from the 3 sources are selected with ternary operators and conditions
Then string literals are composed using the preferred data points
At the and of fetch promise chain, right before the next iteration of the loop, the properly composed string literals are printed to a file
Only they are not properly composed...
The loop doesn't know enough enough to wait for promises to resolve, so data from the first iteration gets written to subsequent iterations
There is some code not shown here, for example assisting function and variable definitions, string literals, and catch blocks, but this is more or less the structure:
fs.readFile('./local.json', 'utf8', (err, data) => {
try {
// parse data and extract api endpoints
const process_local_data = JSON.parse(data);
//define functions and variables
const functions_to_check_and_process_local_data = (data) => { //...
}
const functions_to_check_and_process_api_data = (data) => { //...
}
//define first api fetcher
const first_api_fetcher = async(urla, urlb) => {
const contentType = response.headers.get("content-type");
const response = await fetch(urla);
if (contentType && contentType.indexOf("application/json") !== -1) {
let results = await response.json();
functions_to_check_and_process_api_data(results);
}
await second_api_checker(urlb); //the second api request is chained to a promise in the first one
} //end first api fetcher
//define_second_api_fetcher
const second_api_fetcher = async(url) => {
const contentType = response.headers.get("content-type");
const response = await fetch(url);
if (contentType && contentType.indexOf("application/json") !== -1) {
let results = await response.json();
functions_to_check_and_process_api_data(results);
}
} //end second api checker
//run the processing loop
Object.keys(processed_local_json).forEach(item => {
//check, process local data, and extract urls
functions_to_check_and_process_local_data(item);
//processed_local_json['item'];
//define data processing function, that waits on first_api_fetcher (which waits on second api fetcher)
const processData = async() => {
await first_api_fetcher(url1, url2).then(
//select whichever data is best and write to string literals
//join string literals into an array
//return array of string literals
) //end then
} //end processData
//print string template literals to file (waits on processData)
const printFile = async() => {
const printData = await processData();
//print to file
fs.appendFile("file", printData, (err) => {
if (err) {
console.log(err);
} else {
console.log('wrote strings template literals to file');
}
}); //end write output to file
} //end printfile
//call function to trigger api requests, data processing, and string template literal composition
printFile();
}); //end Object.keys processing loop
} //end try/catch
}) //end readfile
The code isn't perfect, but except for the major flaw, i.e. that the object.keys loop doesn't wait on the api fetches, it gets the job done.
It has a .then() in the processing function just to make it clear (to me) that processing happens after api fetches, however variables are initialized in scope outside the loop, so it doesn't seem necessary to return an array of values from the api_fetchers
Does anyone know how to get Object.keys loop to wait on api results at end of each iteration? Is there some boolian to set to send compiler back to beginning of loop until it's toggled with returned api data?
Or is there some trick to this I just missed in the docs?
One side note: in the object.keys loop you define some async functions. I wouldn’t recommended defining them on each loop! They should be setup outside the loop.
As for “getting the loop to wait”… it’s important to understand conceptually that you don’t get the loop itself to wait, per se..that object.keys line of code is synchronous code. Maybe you knew that but just want to be clear.
When you invoke asynchronous code within that loop (printfile function), you will fire off multiple asynchronous printfile functions, unless you await the asynchronous function invocation itself. That’s how you can force the loop to stop further processing until each invokation completes.

Javascript race condition multiple promises concat string

I have a function that receives an array of usernames. For each one I need to get the connections IDs and concatenate into another array. But I think the way I did have some problems of race condition that may colide and concatenate less results that it should...
const getConnections = async function (usernames) {
let connections = [];
await Promise.all(usernames.map(async (username) => {
try {
let connsFound = await dynamo.getConnectionsByUsername(username);
if (connsFound && connsFound.length > 0)
connections = connections.concat(connsFound);
} catch (error) {
console.error('ERROR GET CONNECTIONS', error)
}
}));
return connections;
};
the result of the connections its an array, so the result I would like to merge to the var connections...but .concat does not merge, it creates a new array, so I need to do a 'var = var.concat(newArray)'
I am affraid this is not safe and in some operation it will colide and overwrite some results...
Is there a better way to do it?
cheers
JavaScript is single threaded, so there is always at most one function that runs and accesses connections. You shouldn't have any problems.
However, there is no reason to do it that way. Since you are already using async/await, you can just create an array of arrays and flatten that:
const getConnections = async function (usernames) {
const connections = await Promise.all(usernames.map(async (username) => {
try {
return await dynamo.getConnectionsByUsername(username);
} catch (error) {
console.error('ERROR GET CONNECTIONS', error);
return [];
}
}));
return connections.flat();
};
.flat is relatively new, but it should be easy to write a custom helper function to achieve the same.

how to get all keys and values in redis in javascript?

I am creating a node API using javascript. I have used redis as my key value store.
I created a redis-client in my app and am able to get values for perticular key.
I want to retrieve all keys along with their values.
So Far I have done this :
app.get('/jobs', function (req, res) {
var jobs = [];
client.keys('*', function (err, keys) {
if (err) return console.log(err);
if(keys){
for(var i=0;i<keys.length;i++){
client.get(keys[i], function (error, value) {
if (err) return console.log(err);
var job = {};
job['jobId']=keys[i];
job['data']=value;
jobs.push(job);
});
}
console.log(jobs);
res.json({data:jobs});
}
});
});
but I always get blank array in response.
is there any way to do this in javascript?
Thanks
First of all, the issue in your question is that, inside the for loop, client.get is invoked with an asynchronous callback where the synchronous for loop will not wait for the asynchronous callback and hence the next line res.json({data:jobs}); is getting called immediately after the for loop before the asynchronous callbacks. At the time of the line res.json({data:jobs}); is getting invoked, the array jobs is still empty [] and getting returned with the response.
To mitigate this, you should use any promise modules like async, bluebird, ES6 Promise etc.
Modified code using async module,
app.get('/jobs', function (req, res) {
var jobs = [];
client.keys('*', function (err, keys) {
if (err) return console.log(err);
if(keys){
async.map(keys, function(key, cb) {
client.get(key, function (error, value) {
if (error) return cb(error);
var job = {};
job['jobId']=key;
job['data']=value;
cb(null, job);
});
}, function (error, results) {
if (error) return console.log(error);
console.log(results);
res.json({data:results});
});
}
});
});
But from the Redis documentation, it is observed that usage of
Keys are intended for debugging and special operations, such as
changing your keyspace layout and not advisable to production
environments.
Hence, I would suggest using another module called redisscan as below which uses SCAN instead of KEYS as suggested in the Redis documentation.
Something like,
var redisScan = require('redisscan');
var redis = require('redis').createClient();
redisScan({
redis: redis,
each_callback: function (type, key, subkey, value, cb) {
console.log(type, key, subkey, value);
cb();
},
done_callback: function (err) {
console.log("-=-=-=-=-=--=-=-=-");
redis.quit();
}
});
Combination of 2 requests:
import * as ioredis from 'ioredis';
const redis = new ioredis({
port: redisPort,
host: redisServer,
password: '',
db: 0
});
const keys = await redis.collection.keys('*');
const values = await redis.collection.mget(keys);
Order will be the same for both arrays.
This will get all keys but with no values:
const redis = require('redis');
const client = redis.createClient();
client.keys('*', (err, keys) => {
// ...
});
Now you need to get the values for those keys in a usual way. For example:
Promise.all(keys.map(key => client.getAsync(key))).then(values => {
// ...
});
or with async module or in any way you like.
You should never do this. First off, it is not recommended to use KEYS * in production. Second, this does not scale (cluster).
You can organise your cached entries into SETs and query for the items within the SET, then retrieve the references keys. This also makes invalidation easier.
Have a look at some data storage best practices.
https://redis.io/topics/data-types-intro
how to get all keys and values in redis in javascript?
You may find something useful in this link
https://github.com/NodeRedis/node_redis/tree/master/examples

How to use promises to wait for async API calls

I am creating an API that when GET, a series of calls to the News API are made, news article titles are extracted into a giant string, and that string is processed into an object to be delivered to a wordcloud on the front-end. So far, I've been able to use underscore's _.after and request-promise to make my app wait till all API calls have completed before calling processWordBank() which takes the giant string and cleans it up into an object. However, once processWordBank() is called, I don't understand where the flow of the program is. Ideally, processWordBank() returns obj to cloudObj in the router, so that the obj can be passed to res.json() and spit out as the response. I believe my use of _.after has put me in a weird situation, but it's the only way I've been able to get async calls to finish before proceeding to next desired action. Any suggestions?
(I've tried to leave out all unnecessary code but let me know if this is insufficient)
// includes...
var sourceString = ""
// router
export default ({ config }) => {
let news = Router()
news.get('/', function(req, res){
var cloudObj = getSources()
res.json({ cloudObj })
})
return news
}
// create list of words (sourceString) by pulling news data from various sources
function getSources() {
return getNewsApi()
}
// NEWS API
// GET top 10 news article titles from News API (news sources are determined by the values of newsApiSource array)
function getNewsApi() {
var finished = _.after(newsApiSource.length, processWordBank)
for(var i = 0; i < newsApiSource.length; i++) {
let options = {
uri: 'https://newsapi.org/v1/articles?source=' + newsApiSource[i] + '&sortBy=' + rank + '&apiKey=' + apiKey,
json: true
}
rp(options)
.then(function (res) {
let articles = res.articles // grab article objects from the response
let articleTitles = " " + _.pluck(articles, 'title') // extract title of each news article
sourceString += " " + articleTitles // add all titles to the word bank
finished() // this async task has finished
})
.catch(function (err) {
console.log(err)
})
}
}
// analyse word bank for patterns/trends
function processWordBank(){
var sourceArray = refineSource(sourceString)
sourceArray = combineCommon(sourceArray)
sourceArray = getWordFreq(sourceArray)
var obj = sortToObject(sourceArray[0], sourceArray[1])
console.log(obj)
return obj
}
A big issue in your asynchronous flow is that you use a shared variable sourceString to handle the results. When you have multiple calls to getNewsApi() your result is not predictable and will not always be the same, because there is no predefined order in which the asynchronous calls are executed. Not only that, but you never reset it, so all subsequent calls will also include the results of the previous calls. Avoid modifying shared variables in asynchronous calls and instead use the results directly.
I've been able to use underscore's _.after and request-promise to make my app wait till all API calls have completed before calling processWordBank()
Although it would possible to use _.after, this can be done very nicely with promises, and since you're already using promises for your requests, it's just a matter of collecting the results from them. So because you want to wait until all API calls are completed you can use Promise.all which returns a promise that resolves with an array of the values of all the promises, once all of them are fulfilled. Let's have a look at a very simple example to see how Promise.all works:
// Promise.resolve() creates a promise that is fulfilled with the given value
const p1 = Promise.resolve('a promise')
// A promise that completes after 1 second
const p2 = new Promise(resolve => setTimeout(() => resolve('after 1 second'), 1000))
const p3 = Promise.resolve('hello').then(s => s + ' world')
const promises = [p1, p2, p3]
console.log('Waiting for all promises')
Promise.all(promises).then(results => console.log('All promises finished', results))
console.log('Promise.all does not block execution')
Now we can modify getNewsApi() to use Promise.all. The array of promises that is given to Promise.all are all the API request you're doing in your loop. This will be created with Array.protoype.map. And also instead of creating a string out of the array returned from _.pluck, we can just use the array directly, so you don't need to parse the string back to an array at the end.
function getNewsApi() {
// Each element is a request promise
const apiCalls = newsApiSource.map(function (source) {
let options = {
uri: 'https://newsapi.org/v1/articles?source=' + source + '&sortBy=' + rank + '&apiKey=' + apiKey,
json: true
}
return rp(options)
.then(function (res) {
let articles = res.articles
let articleTitles = _.pluck(articles, 'title')
// The promise is fulfilled with the articleTitles
return articleTitles
})
.catch(function (err) {
console.log(err)
})
})
// Return the promise that is fulfilled with all request values
return Promise.all(apiCalls)
}
Then we need to use the values in the router. We know that the promise returned from getNewsApi() fulfils with an array of all the requests, which by themselves return an array of articles. That is a 2d array, but presumably you would want a 1d array with all the articles for your processWordBank() function, so we can flatten it first.
export default ({ config }) => {
let news = Router()
new.get('/', (req, res) => {
const cloudObj = getSources()
cloudObj.then(function (apiResponses) {
// Flatten the array
// From: [['source1article1', 'source1article2'], ['source2article1'], ...]
// To: ['source1article1', 'source1article2', 'source2article1', ...]
const articles = [].concat.apply([], apiResponses)
// Pass the articles as parameter
const processedArticles = processWordBank(articles)
// Respond with the processed object
res.json({ processedArticles })
})
})
}
And finally processWordBank() needs to be changed to use an input parameter instead of using the shared variable. refineSource is no longer needed, because you're already passing an array (unless you do some other modifications in it).
function processWordBank(articles) {
let sourceArray = combineCommon(articles)
sourceArray = getWordFreq(sourceArray)
var obj = sortToObject(sourceArray[0], sourceArray[1])
console.log(obj)
return obj
}
As a bonus the router and getNewsApi() can be cleaned up with some ES6 features (without the comments from the snippets above):
export default ({ config }) => {
const news = Router()
new.get('/', (req, res) => {
getSources().then(apiResponses => {
const articles = [].concat(...apiResponses)
const processedArticles = processWordBank(articles)
res.json({ processedArticles })
})
})
}
function getNewsApi() {
const apiCalls = newsApiSource.map(source => {
const options = {
uri: `https://newsapi.org/v1/articles?source=${source}&sortBy=${rank}&apiKey=${apiKey}`,
json: true
}
return rp(options)
.then(res => _.pluck(res.articles, 'title'))
.catch(err => console.log(err))
})
return Promise.all(apiCalls)
}

convert async to Rx.js

So, we are trying to rewrite our express server into Rx. It is currently using async for all stream operations. The code looks like the following:
var async = require('async');
function getCountAndChannels(name, cb){
var tasks = [
function(cb) {
//does a mongoDB search and returns count
},
function(cb) {
//does a findOne mongoDB search and returns
}
];
async.parallel(tasks, cb);
}
router.get('data', function(req, res) { //router is the express router
var recorders = req.query.recorders.split(',');
async.map(recorders, function(name, cb) {
getCountAndChannels(name, cb);
}, function(err, countsAndChannels) {
if(err) throw err;
// here countsAndChannels is an array with first element the count
// and second element the document.
// do other async stuff based on the results
res.status(200).json('send some calculations');
});
The thing here I have to do is loop over the array of recorders and for each one calculate the two mongoDB searches. I have tried using Rx.Observable.merge which doesn't return the results in an array but in 2 different calls of the callback. So, then I tried Rx.Observable.zip which I believe is what I'm looking for.
The problem is I don't know how to loop over the recorders and send the result when all operations are finished. Because a simple forEach loop will throw a Cannot set headers after they are sent error.
This is what I have so far:
recorders.forEach(recorder => {
Rx.Observable.zip([
search1,
search2
]).subscribe(
(countsAndChannels) => {
// do stuff
res.send('the results');
},
err => res.status(500).json(err),
() => res.send('OK')
);
});
Haven't used Rx before, so any help is appreciated.
It might be easier to convert your list of recorders to an Observable stream, then flatMap over each recorder (ie perform your async processing), then call toArray to store all the results into an array:
var recorder$ = Rx.Observable.from(recorders);
var countsAndChannels$ = recorder$
.flatMap(performAsyncTask);
// allResults$ will emit once all of the async work is complete
var allResults$= countsAndChannels$.toArray();
allResults$.subscribe(results => {
// Send response to client;
});

Categories