I'm very new to JS and functional programming in general and am struggling to find a graceful solution to this problem. Essentially, I want to make async requests to a MongoDB server, and return the results to an async to map function. The problem I am having in that the actual function within async.map is asynchronous itself. I would like to know a graceful solution here, or at least get a pointer in the right direction! Thanks!
async.map(subQuery,
function(item){
collection.distinct("author", item, function(err, authors){
counter++;
console.log("Finished query: " + counter);
var key = item['subreddit'];
return { key: authors };
})
},
function(err, result){
if (err)
console.log(err);
else{
console.log("Preparing to write to file...");
fs.writeFile("michaAggregate.json", result, function() {
console.log("The file was saved!");
});
}
db.close();
}
);
You should process item only when the data is fetched. Just use callback That the common way of JavaScript. Like this:
var processItem = function(item){
// Do some street magic with your data to process it
// Your callback function that will be called when item is processed.
onItemProccessed();
}
async.map(subQuery,
function(item){
collection.distinct("author", item, function(err, authors){
counter++;
console.log("Finished query: " + counter);
var key = item['subreddit'];
processItem(item);
})
},
function(err, result){
if (err)
console.log(err);
else{
// That string added **ADDED**
console.log('HEEY! I done with processing all data so now I can do what I want!');
console.log("Preparing to write to file...");
fs.writeFile("michaAggregate.json", result, function() {
console.log("The file was saved!");
});
}
db.close();
}
);
ADDED
By the specification of async.map you can see:
https://github.com/caolan/async
async.map(arr, iterator, callback):
callback(err, results) - A callback which is called when all iterator functions have finished, or an error occurs. Results is an array of the transformed items from the arr. As you see that callback is exactly what you need!
Related
I'm just starting to work with Javascript and Node, and Async and callbacks concepts are not something I have under control right now.
I have to call a function for each element of a documents Array. This function will call to DB and get me an array of the document annotations. I want to get all the annotations and put them on the same array. Something similar to this:
//function in an async waterfall
function(docs,callback){
let annotationsArray = [];
async.each(docs, (doc, callback2) => {
getAnnotationsFromDocument(doc.Id, callback2);
}, function (err,annotations){
if (err){
callback(err);
}
annotationsArray = annotationsArray.concat(annotations);
callback(null, annotationsArray);
});
},
//Next waterfall function
About the getAnnotationsFromDocument function, this is a simplified structure of it:
function getAnnotationsFromDocument(docId,callback){
initDB();
var async = require('async');
async.waterfall([
function authorize(callback){
//checkAuthorization
(...)
},
function getRfpdocAnnotations(auth, metadata, callback){
//call to DB
(...)
},
function processRfpdocAnnotations(rfpDocAnnotations,metadata,callback){
(...)
callback(null,annotationsList);
}
], function (err, result) {
if(err) {
callback(err);
} else {
callback(null, result);
}
});
}
Unfortunately, I'm unable to code it properly. I'm unable to get the results from the function before exiting the async.each. Could somebody explain me how to structurate the code for this?
Debugging I've found that the function getAnnotationsFromDocument gets the data and execute the last callback(null, result); properly, but when I get to function (err,annotations){, annotations is undefined.
Ok, I think I got it:
First problem was that async.each doesn't return the results on the callback like I was expecting. Unlike waterfall, it just returns the errors. I should have payed more attention reading the documentation.
Secondly, I had to create a callback on the getAnnotationsFromDocument call to process the results.
And finally, I was not executing the call to the callback of async.each, so the execution didn't get to the async.each callback and didn't continue to the next async.waterfall function.
To be quite honest, I'm not sure it's a correct answer, but it does what I was trying to achieve.
// function part of an async.waterfall
function(docs,callback){
let annotationsArray = [];
async.each(docs, (doc,callback2) => {
getAnnotationsFromDocument(doc._id, function(err,result){
if (err){
callback2(err);
}else{
annotationsArray = annotationsArray.concat(result);
}
callback2();
})
}, (err) =>{
if( err ) {
callback(err);
} else {
callback(null,annotationsArray); //to the next waterfall function
}
});
Once fs.readFile loop through all files and get the matching data and push it to results, I want to call callback(results) so i can send response to client. I am getting an error with below code Error: Callback is already called HOw can i resolve this issue using async approach.
app.js
searchFileService.readFile(searchTxt, logFiles, function(lines, err) {
console.log('Logs', lines);
if (err)
return res.send();
res.json(lines);
})
readFile.js
var searchStr;
var results = [];
function readFile(str,logFiles,callback){
searchStr = str;
async.map(logFiles, function(logfile, callback) {
fs.readFile('logs/dit/' + logfile.filename, 'utf8', function(err, data) {
if (err) {
callback(null,err);
}
var lines = data.split('\n'); // get the lines
lines.forEach(function(line) { // for each line in lines
if (line.indexOf(searchStr) != -1) { // if the line contain the searchSt
results.push(line);
callback(results,null);
}
});
});
}), function(error, result) {
results.map(result,function (result){
console.log(result);
});
};
}
Note: this answer is an extension to trincot's answer. So if this answers your question, kindly mark his as the answer!
You said: Once fs.readFile loop through all files and get the matching data and push it to results then I don't think .map is the appropriate function for this, to be honest. This is for transforming every element from an array into another which is not what you are doing.
A better method would be .eachSeries to read one file at a time.
It's a good idea to rename your second callback to something else e.g. done to not confuse yourself (and others). Calling done() is for telling that the operation on the file is completed as in we are "done" reading the file.
Lastly, be careful with your typos. The first one may have prevented you from getting into the last part.
var results = [];
var searchStr;
function readFile(str, logFiles, callback) {
searchStr = str;
// loop through each file
async.eachSeries(logFiles, function (logfile, done) {
// read file
fs.readFile('logs/dit/' + logfile.filename, 'utf8', function (err, data) {
if (err) {
return done(err);
}
var lines = data.split('\n'); // get the lines
lines.forEach(function(line) { // for each line in lines
if (line.indexOf(searchStr) != -1) { // if the line contain the searchSt
results.push(line);
}
});
// when you are done reading the file
done();
});
// wrong: }), function (err) {
}, function (err) {
if (err) {
console.log('error', err);
}
console.log('all done: ', results);
// wrong: results.map(result, function (result){
results.map(function (result){
console.log(result);
});
// send back results
callback(results);
});
}
I have a Node.js function to fetch some value from DB table
var GetPoints = function(ibmdb, dbConnection, msisdn) {
ibmdb.open(dbConnection, function(err, conn) {
if (err) {
//Error
} else {
conn.query('SELECT id,msisdn,points FROM t_points WHERE msisdn =' + msisdn, function(err, data) {
console.log(err);
if (!err) {
conn.close(function(err) {
if (!err) {}
});
consele.log(data);
//return data[0].POINTS;
} else {
//Error
}
});
}
console.log("points" + points);
});
}
I want to know how I can access the data object when I call this function from outside
var data = GetPoints(ibmdb, dbConnection, msisdn);
The value is coming correctly when I do a console.log
You can't return the value from an async function directly. Promises are generally used this situation. You return a promise which can later be called .then upon to retrieve the said value.
var Promise = require('bluebird');
var GetPoints = function(ibmdb, dbConnection, msisdn) {
// return a Promise
return new Promise(function(resolve){
ibmdb.open(dbConnection, function(err, conn) {
if(err) throw err; // throw the error for it to be caught
…
conn.query('SELECT …', function(err, data) {
if(err) throw err;
…
consele.log(data);
//return data[0].POINTS;
resolve(data);
}); }); }); }
GetPoints().then(function(data){
// do something with data
}).catch(function(err){
// handle err
});
Additionally, Bluebird has a promisify function that turns an async function (that takes a callback) into a function that returns a Promise. It makes the above code much simpler:
Note: Although I was reluctant because if you're using MySQL with which the promisification could be a little tricky: 1, 2. But For now I've added .promisifyAll where it might seem redundant as it will likely be executed more than once, but hopefully bluebird's promisification is smart enough to handle this. Nonetheless if you manage to promisify more efficiently you can just remove the redundant promisifyAll and just use X.yyyAsync methods as described:
function GetConnection(ibmdb, dbConnection, msisdn){
Promise.promisifyAll(ibmdb);
return ibmdb.openAsync();
}
function getData(conn){
Promise.promisifyAll(conn);
return conn.queryAsync('SELECT …');
}
GetConnection()
.then(getData)
.then(function(data){
// do something with data
})
The callback function you gave after the SQL query gets executed asynchronously. Instead of trying to get that data outside the function, you should try to perform whatever you need to do inside. Keep in mind you can create another function and call it with the data to continue your work.
I am making consecutive http get calls using async.js series, all of which is inside a for loop. First I retrieve a number of results from an API query, then I run another query on each of the results. The results are stored in an array and then saved to a CouchDB database. Because the number of results is limited to 200, I have to do this multiple times (hence the for loop). The basic structure of the code is as follows (full code below)
for (...) {
async.series(
[
function(){ http get method },
function (){ async.eachSeries(){ http get method }, callback function }
],
function(){ database operations }
);//end series
}//end for loop
My problem is that the loop executes only once. Everything inside the loop works as expected, and the data is saved correctly to the database--but I can't figure out why the loop won't run again. I know if I put a method call in the async callback function it will run fine, so maybe I am missing something about how async works. I think that the for loop should be on the call stack, so when async is done, the loop should simply continue, but this obviously isn't the case.
Full code:
for (var retstart = 0; retstart < elsvr_count; retstart += elsvr_retSize) {
var elsvr_resultChunk;
async.series(
[
function(callback){
var elsvr_Query = String(elsvr_baseURL) + "apiKey="+ String(elsvr_apiKey) + "&query=af-id(" + String(elsvr_ID) + ")&httpAccept=application/" + String(elsvr_resultType) + "&count=" + String(elsvr_retSize) + "&view=";
$.get(elsvr_Query, function(result) {
elsvr_count = parseInt(result["search-results"]["opensearch:totalResults"]); //the number of results
console.log("count set at " + elsvr_count);
elsvr_resultChunk = result["search-results"]["entry"]; //the current chunk of the total result, the size of which elsvr_retSize
callback(null);
});//end get
},
function(callback){
async.eachSeries(elsvr_resultChunk, function(item, callback){
var docQuery = item["prism:url"] + "?apiKey=" + String(elsvr_apiKey) + "&httpAccept=application/" + String(elsvr_resultType);
$.ajax({
url: docQuery,
type: 'GET',
dataType: 'json',
success: function(result){
elsvr_results.push(result);
return callback(null);
},
error: function(err){
console.log("error returned: "+err.statusText);
elsvr_errors = elsvr_errors+1;
return callback(null);
}
});
},
function(err, results) {
if (err) console.log("error: " + err);
else
callback(null, elsvr_results);
});
}
],
//callback from async.series
function (err, results){
if (err)
console.log("ERROR: " + JSON.stringify(err));
else {
db.getDoc('unprocessed', function(er, doc){
if (er) throw new Error(JSON.stringify(er));
if (doc.elsvr != undefined)
doc.elsvr = _.extend(results[1], doc.elsvr);
else
doc.elsvr = results[1];
db.saveDoc('unprocessed', doc, function(er, ok) {
if (er) throw new Error(JSON.stringify(er));
console.log('saved a chunk to the database: ' + db.name);
});
});
}
}
);//end async.series
}//end for loop
Okay, I finally solved it. Turns out the loop was not executing synchronously, and since I update the loop conditional (elsvr_count) inside the async series, the loop was finished before the conditional was actually set properly. However, that wasn't the only issue--if I removed that conditional update and simply set elsvr_count to be a high number outside the loop, the loop would still run asynchronously before the async series returned.
I'll post my solution in case anyone else runs into a similar problem: Basically, replace the for loop with an async.whilst loop as follows
async.whilst(function() { return retstart < elsvr_count; },
function(callback) {
var elsvr_resultChunk;
async.series(
//...
//same stuff as above goes here
//...
//save the document to the database
db.saveDoc('unprocessed', doc, function(er, ok) {
if (er) throw new Error(JSON.stringify(er));
retstart += elsvr_retSize; //<-- to replace the increment of the for loop
callback(null); //<-- important
});
//...
//...
);//end async.series
},
//callback for the whilst loop. this will be called once the condition is no longer met (retstart < elsvr_retSize)
function(err) {
//do stuff in here
}
);//end async.whilst
So basically I am making a database query, to get all posts with a certain id, then add them to a list, so I can return. But the list is returned, before the callback has finished.
How do I prevent it from being returned before callback has finished?
exports.getBlogEntries = function(opid) {
var list12 =[];
Entry.find({'opid' : opid}, function(err, entries) {
if(!err) {
console.log("adding");
entries.forEach( function(currentEntry){
list12.push(currentEntry);
});
}
else {
console.log("EEEERROOR");
}
//else {console.log("err");}
});
console.log(list12);
return list12;
};
ALL callback is asynchronous, so we don't have any guarantee if they will run exactly in the order we have leave them.
To fix it and make the process "synchronous" and guarantee an order executation you have two solutions:
First: make all process in nested list:
instead of this:
MyModel1.find({}, function(err, docsModel1) {
callback(err, docsModel1);
});
MyModel2.find({}, function(err, docsModel2) {
callback(err, docsModel2);
});
use this:
MyModel1.find({}, function(err, docsModel1) {
MyModel2.find({}, function(err, docsModel2) {
callback(err, docsModel1, docsModel2);
});
});
The last snippet above guarantee us that MyModel2 will be executed AFTER MyModel1 is executed.
Second: Use some framework as Async. This framework is awesome and have several helper functions to execute code in series, parallels, whatever way we want.
Example:
async.series(
{
function1 : function(callback) {
//your first code here
//...
callback(null, 'some result here');
},
function2 : function(callback) {
//your second code here (called only after the first one)
callback(null, 'another result here');
}
},
function(err, results) {
//capture the results from function1 and function2
//if function1 raise some error, function2 will not be called.
results.function1; // 'some result here'
results.function2; // 'another result here'
//do something else...
}
);
You could use sync database calls but that would work around the concept of node.js.
The proper way is to pass a callback to the function that queries the database and then call the provided callback inside the database query callback.
How do I prevent it from being returned before callback has finished?
The callback is asynchronous, and you cannot avoid that. Hence, you must not return a list.
Instead, offer a callback for when it's filled. Or return a Promise for the list. Example:
exports.getBlogEntries = function(opid, callback) {
Entry.find({'opid': opid}, callback); // yes, that's it.
// Everything else was boilerplate code
};
There is an alternate way to handle this scenario. You can use the async module and when the forEach has finished then make the return call. Please find the code snippet below for the same:
var async = requires('async');
exports.getBlogEntries = function(opid) {
var list12 =[];
Entry.find({'opid' : opid}, function(err, entries) {
if(!err) {
console.log("adding");
async.forEachSeries(entries,function(entry,returnFunction){
list12.push(entry);
},function(){
console.log(list12);
return list12;
});
}
else{
console.log("EEEERROOR");
}
});
};