NodeJs : Code sometimes work - Async.map issue - javascript

I have a config json file as below.
"constraints": {
"input": "input",
"output": "output"
}
I am trying to read this file and create input and output directory with child directories.
var fs = require("fs");
var async = require("async");
var node_xj = require("xls-to-json");
var cf= JSON.parse(fs.readFileSync("config.json", 'utf8'));
// Declare variables
var files = [];
function readAsync(file, callback) {
node_xj(file, callback);
}
function create(currentDirPath, outputDirPath, callback) {
// some code creating directories
}
create(cf.lang.input, cf.lang.output, function(stat, config) {
files.push(config);
});
async.map(files, readAsync, function(err, results) {
if(err) throw err;
});
The code works fine, but its sometimes does not. Let me walk through the code. I am loading some modules.
var fs = require("fs");
var async = require("async");
var node_xj = require("xls-to-json");
I am loading the config file.
var cf= JSON.parse(fs.readFileSync("config.json", 'utf8'));
Then i am passing the cf file to create function which after its operation returns me an an object which i push it into an array.
var files = [];
function readAsync(file, callback) {
node_xj(file, callback);
}
function create(input, output, callback) {
// some code creating directories and object
}
create(cf.lang.input, cf.lang.output, function(stat, config) {
files.push(config);
});
async.map(files, readAsync, function(err, results) {
if(err) throw err;
});
Then i am passing the files array to my async.map function which passes it to readAsync function for another operation.
Question:
Can anyone tell me whether the way i have written the code flow
makes if flaw sometimes.
Is there a better way of writing the same code logic flow.
Should i use async.map to iterate files and then pass it to
readAsync

If you're depending on asynchronous operations to occur before other operations, you have to treat them asynchronously! Your code as described seems to create some directories (which will take measurable time) and immediately attempts to use them, which isn't kosher.
You might consider something like:
async.series([
function(){
// create your directories here
},
function(){
async.map(files,...)
}
]);
which will guarantee that the resources needed by your map exist before the map is called.

Related

how to get all keys and values in redis in javascript?

I am creating a node API using javascript. I have used redis as my key value store.
I created a redis-client in my app and am able to get values for perticular key.
I want to retrieve all keys along with their values.
So Far I have done this :
app.get('/jobs', function (req, res) {
var jobs = [];
client.keys('*', function (err, keys) {
if (err) return console.log(err);
if(keys){
for(var i=0;i<keys.length;i++){
client.get(keys[i], function (error, value) {
if (err) return console.log(err);
var job = {};
job['jobId']=keys[i];
job['data']=value;
jobs.push(job);
});
}
console.log(jobs);
res.json({data:jobs});
}
});
});
but I always get blank array in response.
is there any way to do this in javascript?
Thanks
First of all, the issue in your question is that, inside the for loop, client.get is invoked with an asynchronous callback where the synchronous for loop will not wait for the asynchronous callback and hence the next line res.json({data:jobs}); is getting called immediately after the for loop before the asynchronous callbacks. At the time of the line res.json({data:jobs}); is getting invoked, the array jobs is still empty [] and getting returned with the response.
To mitigate this, you should use any promise modules like async, bluebird, ES6 Promise etc.
Modified code using async module,
app.get('/jobs', function (req, res) {
var jobs = [];
client.keys('*', function (err, keys) {
if (err) return console.log(err);
if(keys){
async.map(keys, function(key, cb) {
client.get(key, function (error, value) {
if (error) return cb(error);
var job = {};
job['jobId']=key;
job['data']=value;
cb(null, job);
});
}, function (error, results) {
if (error) return console.log(error);
console.log(results);
res.json({data:results});
});
}
});
});
But from the Redis documentation, it is observed that usage of
Keys are intended for debugging and special operations, such as
changing your keyspace layout and not advisable to production
environments.
Hence, I would suggest using another module called redisscan as below which uses SCAN instead of KEYS as suggested in the Redis documentation.
Something like,
var redisScan = require('redisscan');
var redis = require('redis').createClient();
redisScan({
redis: redis,
each_callback: function (type, key, subkey, value, cb) {
console.log(type, key, subkey, value);
cb();
},
done_callback: function (err) {
console.log("-=-=-=-=-=--=-=-=-");
redis.quit();
}
});
Combination of 2 requests:
import * as ioredis from 'ioredis';
const redis = new ioredis({
port: redisPort,
host: redisServer,
password: '',
db: 0
});
const keys = await redis.collection.keys('*');
const values = await redis.collection.mget(keys);
Order will be the same for both arrays.
This will get all keys but with no values:
const redis = require('redis');
const client = redis.createClient();
client.keys('*', (err, keys) => {
// ...
});
Now you need to get the values for those keys in a usual way. For example:
Promise.all(keys.map(key => client.getAsync(key))).then(values => {
// ...
});
or with async module or in any way you like.
You should never do this. First off, it is not recommended to use KEYS * in production. Second, this does not scale (cluster).
You can organise your cached entries into SETs and query for the items within the SET, then retrieve the references keys. This also makes invalidation easier.
Have a look at some data storage best practices.
https://redis.io/topics/data-types-intro
how to get all keys and values in redis in javascript?
You may find something useful in this link
https://github.com/NodeRedis/node_redis/tree/master/examples

convert async to Rx.js

So, we are trying to rewrite our express server into Rx. It is currently using async for all stream operations. The code looks like the following:
var async = require('async');
function getCountAndChannels(name, cb){
var tasks = [
function(cb) {
//does a mongoDB search and returns count
},
function(cb) {
//does a findOne mongoDB search and returns
}
];
async.parallel(tasks, cb);
}
router.get('data', function(req, res) { //router is the express router
var recorders = req.query.recorders.split(',');
async.map(recorders, function(name, cb) {
getCountAndChannels(name, cb);
}, function(err, countsAndChannels) {
if(err) throw err;
// here countsAndChannels is an array with first element the count
// and second element the document.
// do other async stuff based on the results
res.status(200).json('send some calculations');
});
The thing here I have to do is loop over the array of recorders and for each one calculate the two mongoDB searches. I have tried using Rx.Observable.merge which doesn't return the results in an array but in 2 different calls of the callback. So, then I tried Rx.Observable.zip which I believe is what I'm looking for.
The problem is I don't know how to loop over the recorders and send the result when all operations are finished. Because a simple forEach loop will throw a Cannot set headers after they are sent error.
This is what I have so far:
recorders.forEach(recorder => {
Rx.Observable.zip([
search1,
search2
]).subscribe(
(countsAndChannels) => {
// do stuff
res.send('the results');
},
err => res.status(500).json(err),
() => res.send('OK')
);
});
Haven't used Rx before, so any help is appreciated.
It might be easier to convert your list of recorders to an Observable stream, then flatMap over each recorder (ie perform your async processing), then call toArray to store all the results into an array:
var recorder$ = Rx.Observable.from(recorders);
var countsAndChannels$ = recorder$
.flatMap(performAsyncTask);
// allResults$ will emit once all of the async work is complete
var allResults$= countsAndChannels$.toArray();
allResults$.subscribe(results => {
// Send response to client;
});

Nodejs Running Functions in Series

So right now I'm trying to use Nodejs to access files in order to write them to a server and process them.
I've split it into the following steps:
Traverse directories to generate an array of all of the file paths
Put the raw text data from each of file paths in another array
Process the raw data
The first two steps are working fine, using these functions:
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var pending = list.length;
if (!pending) return done(null, results);
list.forEach(function(file) {
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
if (!--pending) done(null, results);
});
} else {
results.push(file);
if (!--pending) done(null, results);
}
});
});
});
};
function processfilepaths(callback) {
// reading each file
for (var k in filepaths) { if (arrayHasOwnIndex(filepaths, k)) {
fs.readFile(filepaths[k], function (err, data) {
if (err) throw err;
rawdata[k] = data.toString().split(/ *[\t\r\n\v\f]+/g);
for (var j in rawdata[k]) { if (arrayHasOwnIndex(rawdata[k], j)) {
rawdata[k][j] = rawdata[k][j].split(/: *|: +/);
}}
});
}}
if (callback) callback();
}
Obviously, I want to call the function processrawdata() after all of the data has been loaded. However, using callbacks doesn't seem to work.
walk(rootdirectory, function(err, results) {
if (err) throw err;
filepaths = results.slice();
processfilepaths(processrawdata);
});
This never causes an error. Everything seems to run perfectly except that processrawdata() is always finished before processfilepaths(). What am I doing wrong?
You are having a problem with callback invocation and asynchronously calling functions. IMO I'll recommend that you use a library such as after-all to execute a callback once all your functions get executed.
Here's a example, here the function done will be called once all the functions wrapped with next are called.
var afterAll = require('after-all');
// Call `done` once all the functions
// wrapped with next() get called
next = afterAll(done);
// first execute this
setTimeout(next(function() {
console.log('Step two.');
}), 500);
// then this
setTimeout(next(function() {
console.log('Step one.');
}), 100);
function done() {
console.log("Yay we're done!");
}
I think for your problem, you can use async module for Node.js:
async.series([
function(){ ... },
function(){ ... }
]);
To answer you actual question, I need to explain how Node.js works:
Say, when you call an async operation (say mysql db query), Node.js sends "execute this query" to MySQL. Since this query will take some time (may be some milliseconds), Node.js performs the query using the MySQL async library - getting back to the event loop and doing something else there while waiting for MySQL to get back to us. Like handling that HTTP request.
So, In your case both functions are independent and executes almost in parallel.
For more information:
Async.js for use with Node.js
function processfilepaths(callback) {
// reading each file
for (var k in filepaths) { if (arrayHasOwnIndex(filepaths, k)) {
fs.readFile(filepaths[k], function (err, data) {
if (err) throw err;
rawdata[k] = data.toString().split(/ *[\t\r\n\v\f]+/g);
for (var j in rawdata[k]) { if (arrayHasOwnIndex(rawdata[k], j)) {
rawdata[k][j] = rawdata[k][j].split(/: *|: +/);
}}
});
}}
if (callback) callback();
}
Realize that you have:
for
readfile (err, callback) {... }
if ...
Node will call each readfile asynchronously, which only sets up the event and callback, then when it is done calling each readfile, it will do the if, before the callback probably even has a chance to get invoked.
You need to use either Promises, or a promise module like async to serialize it. What you would then do looks like:
async.XXXX(filepaths, processRawData,
function (err, ...) {
// function for when all are done
if (callback) callback();
}
);
Where XXXX is one of the functions from the library like series, parallel, each, etc... The only thing you also need to know is in your process raw data, async gives you a callback to call when done. Unless you really need sequential access (I don't think you do) use parallel so that you can queue up as many i/o events as possible, it should execute faster, maybe only marginally, but it'll better leverage the hardware.

Difference between readFile() and readFileSync()

The following code outputs the content of the index.html (it just contains the text hello world) to the browser. However, when I replace readFile() with readFileSync(), the request times out.
What am I missing? Is a different kind of buffer required? I am using node 0.61 and express 2.4.
var express = require('express');
var fs = require('fs');
var app = express.createServer(express.logger());
app.get('/', function(request, response) {
fs.readFile('index.html', function(err, data){
response.send(data.toString());
});
});
var port = process.env.PORT || 5000;
app.listen(port, function() {
console.log("Listening on " + port);
});
fs.readFile takes a call back which calls response.send as you have shown - good. If you simply replace that with fs.readFileSync, you need to be aware it does not take a callback so your callback which calls response.send will never get called and therefore the response will never end and it will timeout.
You need to show your readFileSync code if you're not simply replacing readFile with readFileSync.
Also, just so you're aware, you should never call readFileSync in a node express/webserver since it will tie up the single thread loop while I/O is performed. You want the node loop to process other requests until the I/O completes and your callback handling code can run.
'use strict'
var fs = require("fs");
/***
* implementation of readFileSync
*/
var data = fs.readFileSync('input.txt');
console.log(data.toString());
console.log("Program Ended");
/***
* implementation of readFile
*/
fs.readFile('input.txt', function (err, data) {
if (err) return console.error(err);
console.log(data.toString());
});
console.log("Program Ended");
For better understanding run the above code and compare the results..
readFileSync() is synchronous and blocks execution until finished. These return their results as return values.
readFile() are asynchronous and return immediately while they function in the background. You pass a callback function which gets called when they finish.
let's take an example for non-blocking.
following method read a file as a non-blocking way
var fs = require('fs');
fs.readFile(filename, "utf8", function(err, data) {
if (err) throw err;
console.log(data);
});
following is read a file as blocking or synchronous way.
var data = fs.readFileSync(filename);
LOL...If you don't want readFileSync() as blocking way then take
reference from the following code. (Native)
var fs = require('fs');
function readFileAsSync(){
new Promise((resolve, reject)=>{
fs.readFile(filename, "utf8", function(err, data) {
if (err) throw err;
resolve(data);
});
});
}
async function callRead(){
let data = await readFileAsSync();
console.log(data);
}
callRead();
it's mean behind scenes readFileSync() work same as above(promise) base.

How to aggregate array of json objects from forEach function?

I want to render array of stories from all projects in node.js. How to do that?
app.get('/stories', function(request, response) {
var projects_ids = [1,2];
project_ids.forEach(function(id) {
pivotal.getStories(id, function(err, project_stories) {
console.log("Stories: " + project_stories);
return JSON.stringify(project_stories);
});
});
response.send(array_of_stories);
});
In log I get:
Stories: [object Object]
Stories: [object Object]
Your current approach won't work since getStories are async (assumption based on the method signature of getStories). If you can, I would suggest that you create a method on pivotal that can get stories for multiple project ids, so your code would read:
app.get('/stories', function(req, res) {
var project_ids = [1, 2];
pivotal.getStories(project_ids, function(err, project_stories) {
res.send(project_stories);
}
});
If that is not an option, I would suggest that you look into a flow library like e.g. node-seq. Then you code could read something like this:
app.get('/stories', function(req, res) {
var project_ids = [1, 2];
Seq(project_ids)
.parEach(function(project_id) {
pivotal.getStories(project_id, this);
})
.seq(function() {
var aggregatedStories = [];
Hash.map(this.args, (function(arg) {
// Each parSeq aparently results in an array of objects
aggregatedStories.push(arg[0]);
}));
res.send(aggregatedStories);
});
});
The Hash.map function is from a node module called hashish
Edit: To elaborate a little more, parEach will execute the functions in parallel, and the following seq would execute after all the callbacks from the parallel executions have finished. node-seq puts the result from each parallel execution into the parameter array of the following seq, hence the somewhat cryptic Hash.map(this.args)
You want your server to send an array of project_stories objects as a response to the request? I'm not sure where your return statement returns to, so omitting that I would try something similar to the following to send an array of objects as the response to the request. Edit: As pointed out by others, .getStories is asynchronous. Maybe you could try out, caolan's async module. Implemented like this:
var async = require('async');
var array_of_stories = [];
var getStories = function(id, callback) {
pivotal.getStories(id, function(err, project_stories) {
array_of_stories.push(project_stories);
});
}
app.get('/stories', function(request, response) {
var projects_ids = [1,2];
async.forEach(project_ids, getStories, function(error) {
if(!error){
response.send(array_of_stories);
}
});
});

Categories