forcing completion of an rxjs observer - javascript

I've got an rxjs observer (really a Subject) that tails a file forever, just like tail -f. It's awesome for monitoring logfiles, for example.
This "forever" behavior is great for my application, but terrible for testing. Currently my application works but my tests hang forever.
I'd like to force an observer change to complete early, because my test code knows how many lines should be in the file. How do I do this?
I tried calling onCompleted on the Subject handle I returned but at that point it's basically cast as an observer and you can't force it to close, the error is:
Object # has no method 'onCompleted'
Here's the source code:
function ObserveTail(filename) {
source = new Rx.Subject();
if (fs.existsSync(filename) == false) {
console.error("file doesn't exist: " + filename);
}
var lineSep = /[\r]{0,1}\n/;
tail = new Tail(filename, lineSep, {}, true);
tail.on("line", function(line) {
source.onNext(line);
});
tail.on('close', function(data) {
console.log("tail closed");
source.onCompleted();
});
tail.on('error', function(error) {
console.error(error);
});
this.source = source;
}
And here's the test code that can't figure out how to force forever to end (tape style test). Note the "ILLEGAL" line:
test('tailing a file works correctly', function(tid) {
var lines = 8;
var i = 0;
var filename = 'tape/tail.json';
var handle = new ObserveTail(filename);
touch(filename);
handle.source
.filter(function (x) {
try {
JSON.parse(x);
return true;
} catch (error) {
tid.pass("correctly caught illegal JSON");
return false;
}
})
.map(function(x) { return JSON.parse(x) })
.map(function(j) { return j.name })
.timeout(10000, "observer timed out")
.subscribe (
function(name) {
tid.equal(name, "AssetMgr", "verified name field is AssetMgr");
i++;
if (i >= lines) {
handle.onCompleted(); // XXX ILLEGAL
}
},
function(err) {
console.error(err)
tid.fail("err leaked through to subscriber");
},
function() {
tid.end();
console.log("Completed");
}
);
})

It sounds like you solved your problem, but to your original question
I'd like to force an observer change to complete early, because my test code knows how many lines should be in the file. How do I do this?
In general the use of Subjects is discouraged when you have better alternatives, since they tend to be a crutch for people to use programming styles they are familiar with. Instead of trying to use a Subject I would suggest that you think about what each event would mean in an Observable life cycles.
Wrap Event Emitters
There already exists wrapper for the EventEmitter#on/off pattern in the form of Observable.fromEvent. It handles clean up and keeping the subscription alive only when there are listeners. Thus ObserveTail can be refactored into
function ObserveTail(filename) {
return Rx.Observable.create(function(observer) {
var lineSep = /[\r]{0,1}\n/;
tail = new Tail(filename, lineSep, {}, true);
var line = Rx.Observable.fromEvent(tail, "line");
var close = Rx.Observable.fromEvent(tail, "close");
var error = Rx.Observable.fromEvent(tail, "error")
.flatMap(function(err) { return Rx.Observable.throw(err); });
//Only take events until close occurs and wrap in the error for good measure
//The latter two are terminal events in this case.
return line.takeUntil(close).merge(error).subscribe(observer);
});
}
Which has several benefits over the vanilla use of Subjects, one, you will now actually see the error downstream, and two, this will handle clean up of your events when you are done with them.
Avoid *Sync Methods
Then this can be rolled into your file existence checking without the use of readSync
//If it doesn't exist then we are done here
//You could also throw from the filter if you want an error tracked
var source = Rx.Observable.fromNodeCallback(fs.exists)(filename)
.filter(function(exists) { return exists; })
.flatMap(ObserveTail(filename));
Next you can simplify your filter/map/map sequence down by using flatMap instead.
var result = source.flatMap(function(x) {
try {
return Rx.Observable.just(JSON.parse(x));
} catch (e) {
return Rx.Observable.empty();
}
},
//This allows you to map the result of the parsed value
function(x, json) {
return json.name;
})
.timeout(10000, "observer timed out");
Don't signal, unsubscribe
How do you stop "signal" a stop when streams only travel in one direction. We rarely actually want to have an Observer directly communicate with an Observable, so a better pattern is to not actually "signal" a stop but to simply unsubscribe from the Observable and leave it up to the Observable's behavior to determine what it should do from there.
Essentially your Observer really shouldn't care about your Observable more than to say "I'm done here".
To do that you need to declare a condition you want to reach in when stopping.
In this case since you are simply stopping after a set number in your test case you can use take to unsubscribe. Thus the final subscribe block would look like:
result
//After lines is reached this will complete.
.take(lines)
.subscribe (
function(name) {
tid.equal(name, "AssetMgr", "verified name field is AssetMgr");
},
function(err) {
console.error(err)
tid.fail("err leaked through to subscriber");
},
function() {
tid.end();
console.log("Completed");
}
);
Edit 1
As pointed out in the comments, In the case of this particular api there isn't a real "close" event since Tail is essentially an infinite operation. In this sense it is no different from a mouse event handler, we will stop sending events when people stop listening. So your block would probably end up looking like:
function ObserveTail(filename) {
return Rx.Observable.create(function(observer) {
var lineSep = /[\r]{0,1}\n/;
tail = new Tail(filename, lineSep, {}, true);
var line = Rx.Observable.fromEvent(tail, "line");
var error = Rx.Observable.fromEvent(tail, "error")
.flatMap(function(err) { return Rx.Observable.throw(err); });
//Only take events until close occurs and wrap in the error for good measure
//The latter two are terminal events in this case.
return line
.finally(function() { tail.unwatch(); })
.merge(error).subscribe(observer);
}).share();
}
The addition of the finally and the share operators creates an object which will attach to the tail when a new subscriber arrives and will remain attached as long as there is at least one subscriber still listening. Once all the subscribers are done however we can safely unwatch the tail.

Related

Any way to kill String.prototype.match after specified time passed? [duplicate]

Is it possible to cancel a regex.match operation if takes more than 10 seconds to complete?
I'm using an huge regex to match a specific text, and sometimes may work, and sometimes can fail...
regex: MINISTÉRIO(?:[^P]*(?:P(?!ÁG\s:\s\d+\/\d+)[^P]*)(?:[\s\S]*?))PÁG\s:\s+\d+\/(\d+)\b(?:\D*(?:(?!\1\/\1)\d\D*)*)\1\/\1(?:[^Z]*(?:Z(?!6:\s\d+)[^Z]*)(?:[\s\S]*?))Z6:\s+\d+
Working example: https://regex101.com/r/kU6rS5/1
So.. i want cancel the operation if takes more than 10 seconds. Is it possible? I'm not finding anything related in sof
Thanks.
You could spawn a child process that does the regex matching and kill it off if it hasn't completed in 10 seconds. Might be a bit overkill, but it should work.
fork is probably what you should use, if you go down this road.
If you'll forgive my non-pure functions, this code would demonstrate the gist of how you could communicate back and forth between the forked child process and your main process:
index.js
const { fork } = require('child_process');
const processPath = __dirname + '/regex-process.js';
const regexProcess = fork(processPath);
let received = null;
regexProcess.on('message', function(data) {
console.log('received message from child:', data);
clearTimeout(timeout);
received = data;
regexProcess.kill(); // or however you want to end it. just as an example.
// you have access to the regex data here.
// send to a callback, or resolve a promise with the value,
// so the original calling code can access it as well.
});
const timeoutInMs = 10000;
let timeout = setTimeout(() => {
if (!received) {
console.error('regexProcess is still running!');
regexProcess.kill(); // or however you want to shut it down.
}
}, timeoutInMs);
regexProcess.send('message to match against');
regex-process.js
function respond(data) {
process.send(data);
}
function handleMessage(data) {
console.log('handing message:', data);
// run your regex calculations in here
// then respond with the data when it's done.
// the following is just to emulate
// a synchronous computational delay
for (let i = 0; i < 500000000; i++) {
// spin!
}
respond('return regex process data in here');
}
process.on('message', handleMessage);
This might just end up masking the real problem, though. You may want to consider reworking your regex like other posters have suggested.
Another solution I found here:
https://www.josephkirwin.com/2016/03/12/nodejs_redos_mitigation/
Based on the use of VM, no process fork.
That's pretty.
const util = require('util');
const vm = require('vm');
var sandbox = {
regex:/^(A+)*B/,
string:"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC",
result: null
};
var context = vm.createContext(sandbox);
console.log('Sandbox initialized: ' + vm.isContext(sandbox));
var script = new vm.Script('result = regex.test(string);');
try{
// One could argue if a RegExp hasn't processed in a given time.
// then, its likely it will take exponential time.
script.runInContext(context, { timeout: 1000 }); // milliseconds
} catch(e){
console.log('ReDos occurred',e); // Take some remedial action here...
}
console.log(util.inspect(sandbox)); // Check the results

Wait for a child process to finish before really going down

Within a module I am coding, I have a on-shutdown sequence that is meant to kill a good bunch of workers (32+). Problem is, that the parent process simply cuts away and leaves the children on their own. They do exit through their own routines, but there is no way the parent would wait for the children to finish! (Dat bad daddy doe.)
I have been trying everything with Async and other stuff, but none would really stick. I even tried solutions such as deasync - which is based on fibers - but it simply doesn't do anything.
The Processes object looks like so:
{
"Worker group": {
config: {/*...*/},
children: [
{ /* child_process.spawn or cluster.Worker instance */ }
]
}
}
So what I am trying to do is iterate over each group of workers and SIGTERM them. But no matter what, the parent exits and leaves the children on its own. Here's my code:
// Shutdown handlers
var _shut = false;
this.addShutdownHandler(function(ctx, next){
if(_shut) return;
_shut = true;
for(var id in this.procs) {
var p = this.procs[id];
async.forEachOf(p.children, function(c, n, step){
c.on("exit",step).kill();
}, next);
}
}.bind(this));
As you can see, I assigned the step method as a listener...but that doesn't change anything. How can I make sure that step is called when the process is really offline AND without making node hiccup to the end with no further warning? Thanks!
Edit
I actually managed to find a solution, but it is not entirely what I had looked for. It does the job, but looking at the code, you may understand why I call this "hacky".
// Shutdown handlers
var _shut = false;
this.addShutdownHandler(function(ctx, next){
if(_shut) return;
_shut = true;
// Merge all the children together.
var allChildren = [];
for(var id in this.procs) {
var p = this.procs[id];
p.children.forEach(function(c){
// Trigger shutdown and add to list.
c.on("exit", function(){
c._exited = true;
c._exitArgs = arguments;
}).kill(PowerHouse.KILL_SIGNAL);
allChildren.push(c);
});
}
// Make sure they all are gone.
var allDone = false;
async.whilst(
function() {
return !allDone;
},
function(proceed) {
var allTrue = [];
var newChildren = [];
allChildren.forEach(function(c, i, ref){
if(!c._exited) {
if(c.pid) {
try {
process.kill(c.pid, 0);
} catch (e) {
c._exited = true;
}
} else if(c.process && c.process.pid) {
try {
process.kill(c.process.pid, 0);
} catch (e) {
c._exited = true;
}
} else if(c.isDead) {
c._exited = c.isDead();
}
}
// Overwriting the other array
if(!c._exited) {
newChildren.push(c);
}
allTrue.push(c._exited);
});
if(allTrue.length > 0) {
for(var i in allTrue) {
if(!allTrue[i]) {
allDone = false;
break;
}
}
} else {
// There are NO entries. It's safe to say...
allDone = true;
}
allChildren = newChildren;
// async.nextTick does NOT do this...? I am really surprised.
// FIXME: ...an answer.
async.setImmediate(proceed);
},
function(err) {
next(err);
}
);
}.bind(this));
The further, I should mention that when the parent had exited, I would be returned to my shell and suddenly, messages from other eiting child processes appeared on the same line as the current shell - i.e. as if I had typed that stuff in. I could screenshot it if wanted, but I believe its easily imaginable.
From what I gather (How to prevent child node processes from getting killed with the parent node process?) when the parent dies the children should as well.
Is there something in the children process that would make them ignore SIGTERM? An active DB connection, etc. I would try explicitly catching SIGTERM in your children processes and doing any cleanup necessary in the handler. At the very least this will get the children to exit closer to when the parent does

RxJs Dynamically add events from another EventEmitter

I have an Observable coming from an EventEmitter which is really just a http connection, streaming events.
Occasionally I have to disconnect from the underlying stream and reconnect. I am not sure how to handle this with rxjs.
I am not sure if i can complete a source and then dynamically add other "source" to the source, or if I have to do something like i have at the very bottom.
var Rx = require('rx'),
EventEmitter = require('events').EventEmitter;
var eventEmitter = new EventEmitter();
var eventEmitter2 = new EventEmitter();
var source = Rx.Observable.fromEvent(eventEmitter, 'data')
var subscription = source.subscribe(function (data) {
console.log('data: ' + data);
});
setInterval(function() {
eventEmitter.emit('data', 'foo');
}, 500);
// eventEmitter stop emitting data, underlying connection closed
// now attach seconds eventemitter (new connection)
// something like this but obvouisly doesn't work
source
.fromEvent(eventEmitter2, 'data')
Puesdo code that is more of what i am doing, I am creating a second stream connection before I close the first, so i don't "lose" any data. Here i am not sure how to stop the Observable without "losing" records due to onNext not being called due to the buffer.
var streams = [], notifiers = [];
// create initial stream
createNewStream();
setInterval(function() {
if (params of stream have changed) createNewStream();
}, $1minutes / 3);
function createNewStream() {
var stream = new eventEmitterStream();
stream.once('connected', function() {
stopOthers();
streams.push(stream);
createSource(stream, 'name', 'id');
});
}
function stopOthers() {
while(streams.length > 0) {
streams.pop().stop(); // stop the old stream
}
while(notifiers.length > 0) {
// if i call this, the buffer may lose records, before onNext() called
//notifiers.pop()(Rx.Notification.createOnCompleted());
}
}
function createObserver(tag) {
return Rx.Observer.create(
function (x) {
console.log('Next: ', tag, x.length, x[0], x[x.length-1]);
},
function (err) {
console.log('Error: ', tag, err);
},
function () {
console.log('Completed', tag);
});
}
function createSource(stream, event, id) {
var source = Rx.Observable
.fromEvent(stream, event)
.bufferWithTimeOrCount(time, max);
var subscription = source.subscribe(createObserver(id));
var notifier = subscription.toNotifier();
notifiers.push(notifier);
}
First and formost, you need to make sure you can remove all listeners from your previously "dead" emitter. Otherwise you'll create a leaky application.
It seems like the only way you'll know that an EventEmitter has died is to watch frequency, unless you have an event that fires on error or completion (for disconnections). The latter is much, much more preferrable.
Regardless, The secret sauce of Rx is making sure to wrap your data stream creation and teardown in your observable. If wrap the creation of the emitter in your observable, as well as a means to tear it down, you'll be able to use awesome things like the retry operator to recreate that observable.
So if you have no way of knowing if it died, and you want to reconnect it, you can use something like this:
// I'll presume you have some function to get an EventEmitter that
// is already set up
function getEmitter() {
var emitter = new EventEmitter();
setInterval(function(){
emitter.emit('data', 'foo');
}, 500)
return emitter;
}
var emitterObservable = Observable.create(function(observer) {
// setup the data stream
var emitter = getEmitter();
var handler = function(d) {
observer.onNext(d);
};
emitter.on('data', handler);
return function() {
// tear down the data stream in your disposal function
emitter.removeListener('on', handler);
};
});
// Now you can do Rx magic!
emitterObservable
// if it doesn't emit in 700ms, throw a timeout error
.timeout(700)
// catch all* errors and retry
// this means the emitter will be torn down and recreated
// if it times out!
.retry()
// do something with the values
.subscribe(function(x) { console.log(x); });
* NOTE: retry catches all errors, so you may want to add a catch above it to handle non-timeout errors. Up to you.

Message Manager API sendAsyncMessage callback

I can return a value if I send a sync message:
// frame script
var chromeBtnText = sendSyncMessage("getChromeToolbarButtonText");
if (chromeBtnText == 'blah') {
alert('tool is blah');
}
// chrome script
messageManager.addMessageListener("getChromeToolbarButtonText", listener);
function listener(message) {
return document.getElementById('myChromeToolbarButton').label.value;
}
How do I achieve this with a callback with sendAsyncMessage?
I was hoping to do something like:
// frame script
function myCallback(val) {
var chromeBtnText = val;
if (chromeBtnText == 'blah') {
alert('tool is blah');
}
}
var chromeBtnText = sendAsyncMessage("getChromeToolbarButtonText", null, myCallback);
There is no callback for replies. In fact, there is no reply at all. The return value from the chrome message listener is simply ignored for async messages.
To do fully async communication, you'd have to send another message containing the reply.
Frame script
addMessageListener("getChromeToolbarButtonTextReply", function(message) {
alert(message.data.btnText);
});
sendAsyncMessage("getChromeToolbarButtonText");
Chrome
messageManager.addMessageListener("getChromeToolbarButtonText", function(message) {
var btnText = document.getElementById('myChromeToolbarButton').label.value;
// Only send message to the frame script/message manager
// that actually asked for it.
message.target.messageManager.sendAsyncMessage(
"getChromeToolbarButtonTextReply",
{btnText: btnText}
);
});
PS: All messages share a namespace. So to avoid conflicts when another piece of code wants to use the same name getChromeToolbarButtonText, you better choose a more unique name in the first place, like prefixing your messages with your add-on name my-unique-addoon:getChromeToolbarButtonText or something like that. ;)
I was also hoping to do something similar:
messageManager.sendAsyncMessage("my-addon-framescript-message", null, myCallback);
I'm going the other direction so the myCallback would be in chrome but it's exactly the same principle.
I'd used similar approaches to #Noitidart and #nmaier before but in this new case I wanted to bind to some local data so myCallback can behave differently based on the application state at the time the first message was sent rather than at the time the callback is executed, all while allowing for the possibility of multiple message round-trips being in progress concurrently.
Chrome:
let someLocalState = { "hello": "world" };
let callbackName = "my-addon-somethingUnique"; // based on current state or maybe generate a UUID
let myCallback = function(message) {
messageManager.removeMessageListener(callbackName, myCallback);
//message.data.foo == "bar"
//someLocalState.hello == "world"
}.bind(this); // .bind(this) is optional but useful if the local state is attached to the current object
messageManager.addMessageListener(callbackName, myCallback);
messageManager.sendAsyncMessage("my-addon-framescript-message", { callbackName: callbackName } );
Framescript:
let messageHandler = function(message) {
let responseData = { foo: "bar" };
sendAsyncMessage(message.data.callbackName, responseData);
};
addMessageListener("my-addon-framescript-message", messageHandler);
There's a real-world example here: https://github.com/luckyrat/KeeFox/commit/c50f99033d2d07068140438816f8cc5e5e290da9
It should be possible for Firefox to be improved to encapsulate this functionality in the built-in messageManager one day but in the mean-time this approach works well and with a surprisingly small amount of boiler-plate code.
in this snippet below. i add the callback before sendAsyncMessage('my-addon-id#jetpack:getChromeToolbarbuttonText'... as i know it will send back. Then I remove it after callback executes. I know I don't have to but just to kind of make it act like real callback, just to kind of show people, maybe it helps someone understand.
Frame:
/////// frame script
function CALLBACK_getChromeToolbarButtonText(val) {
removeMessageListner('my-addon-id#jetpack:getChromeToolbarButtonTextCallbackMessage', CALLBACK_getChromeToolbarButtonText); //remove the callback
var chromeBtnText = val;
if (chromeBtnText == 'blah') {
alert('tool is blah');
}
}
addMessageListener('my-addon-id#jetpack:getChromeToolbarButtonTextCallbackMessage', CALLBACK_getChromeToolbarButtonText); //add the callback
var chromeBtnText = sendAsyncMessage("my-addon-id#jetpack:getChromeToolbarButtonText", null);
Chrome:
////// chrome script
messageManager.addMessageListener("my-addon-id#jetpack:getChromeToolbarButtonText", listener);
function listener() {
var val = document.getElementById('myChromeToolbarButton').label.value;
sendAsyncMessage('my-addon-id#jetpack:getChromeToolbarButtonTextCallbackMessage',val);
}

Do I ever need to synchronize node.js code like in Java?

I have only recently started developing for node.js, so forgive me if this is a stupid question - I come from Javaland, where objects still live happily sequentially and synchronous. ;)
I have a key generator object that issues keys for database inserts using a variant of the high-low algorithm. Here's my code:
function KeyGenerator() {
var nextKey;
var upperBound;
this.generateKey = function(table, done) {
if (nextKey > upperBound) {
require("../sync/key-series-request").requestKeys(function(err,nextKey,upperBound) {
if (err) { return done(err); }
this.nextKey = nextKey;
this.upperBound = upperBound;
done(nextKey++);
});
} else {
done(nextKey++);
}
}
}
Obviously, when I ask it for a key, I must ensure that it never, ever issues the same key twice. In Java, if I wanted to enable concurrent access, I would make make this synchronized.
In node.js, is there any similar concept, or is it unnecessary? I intend to ask the generator for a bunch of keys for a bulk insert using async.parallel. My expectation is that since node is single-threaded, I need not worry about the same key ever being issued more than once, can someone please confirm this is correct?
Obtaining a new series involves an asynchronous database operation, so if I do 20 simultaneous key requests, but the series has only two keys left, won't I end up with 18 requests for a new series? What can I do to avoid that?
UPDATE
This is the code for requestKeys:
exports.requestKeys = function (done) {
var db = require("../storage/db");
db.query("select next_key, upper_bound from key_generation where type='issue'", function(err,results) {
if (err) { done(err); } else {
if (results.length === 0) {
// Somehow we lost the "issue" row - this should never have happened
done (new Error("Could not find 'issue' row in key generation table"));
} else {
var nextKey = results[0].next_key;
var upperBound = results[0].upper_bound;
db.query("update key_generation set next_key=?, upper_bound=? where type='issue'",
[ nextKey + KEY_SERIES_WIDTH, upperBound + KEY_SERIES_WIDTH],
function (err,results) {
if (err) { done(err); } else {
done(null, nextKey, upperBound);
}
});
}
}
});
}
UPDATE 2
I should probably mention that consuming a key requires db access even if a new series doesn't have to be requested, because the consumed key will have to be marked as used in the database. The code doesn't reflect this because I ran into trouble before I got around to implementing that part.
UPDATE 3
I think I got it using event emitting:
function KeyGenerator() {
var nextKey;
var upperBound;
var emitter = new events.EventEmitter();
var requesting = true;
// Initialize the generator with the stored values
db.query("select * from key_generation where type='use'", function(err, results)
if (err) { throw err; }
if (results.length === 0) {
throw new Error("Could not get key generation parameters: Row is missing");
}
nextKey = results[0].next_key;
upperBound = results[0].upper_bound;
console.log("Setting requesting = false, emitting event");
requesting = false;
emitter.emit("KeysAvailable");
});
this.generateKey = function(table, done) {
console.log("generateKey, state is:\n nextKey: " + nextKey + "\n upperBound:" + upperBound + "\n requesting:" + requesting + " ");
if (nextKey > upperBound) {
if (!requesting) {
requesting = true;
console.log("Requesting new series");
require("../sync/key-series-request").requestSeries(function(err,newNextKey,newUpperBound) {
if (err) { return done(err); }
console.log("New series available:\n nextKey: " + newNextKey + "\n upperBound: " + newUpperBound);
nextKey = newNextKey;
upperBound = newUpperBound;
requesting = false;
emitter.emit("KeysAvailable");
done(null,nextKey++);
});
} else {
console.log("Key request is already underway, deferring");
var that = this;
emitter.once("KeysAvailable", function() { console.log("Executing deferred call"); that.generateKey(table,done); });
}
} else {
done(null,nextKey++);
}
}
}
I've peppered it with logging outputs, and it does do what I want it to.
As another answer mentions, you will potentially end up with results different from what you want. Taking things in order:
function KeyGenerator() {
// at first I was thinking you wanted these as 'class' properties
// and thus would want to proceed them with this. rather than as vars
// but I think you want them as 'private' members variables of the
// class instance. That's dandy, you'll just want to do things differently
// down below
var nextKey;
var upperBound;
this.generateKey = function (table, done) {
if (nextKey > upperBound) {
// truncated the require path below for readability.
// more importantly, renamed parameters to function
require("key-series-request").requestKeys(function(err,nKey,uBound) {
if (err) { return done(err); }
// note that thanks to the miracle of closures, you have access to
// the nextKey and upperBound variables from the enclosing scope
// but I needed to rename the parameters or else they would shadow/
// obscure the variables with the same name.
nextKey = nKey;
upperBound = uBound;
done(nextKey++);
});
} else {
done(nextKey++);
}
}
}
Regarding the .requestKeys function, you will need to somehow introduce some kind of synchronization. This isn't actually terrible in one way because with only one thread of execution, you don't need to sweat the challenge of setting your semaphore in a single operation, but it is challenging to deal with the multiple callers because you will want other callers to effectively (but not really) block waiting for the first call to requestKeys() which is going to the DB to return.
I need to think about this part a bit more. I had a basic solution in mind which involved setting a simple semaphore and queuing the callbacks, but when I was typing it up I realized I was actually introducing a more subtle potential synchronization bug when processing the queued callbacks.
UPDATE:
I was just finishing up one approach as you were writing about your EventEmitter approach, which seems reasonable. See this gist which illustrates the approach. I took. Just run it and you'll see the behavior. It has some console logging to see which calls are getting deferred for a new key block or which can be handled immediately. The primary moving part of the solution is (note that the keyManager provides the stubbed out implementation of your require('key-series-request'):
function KeyGenerator(km) {
this.nextKey = undefined;
this.upperBound = undefined;
this.imWorkingOnIt = false;
this.queuedCallbacks = [];
this.keyManager = km;
this.generateKey = function(table, done) {
if (this.imWorkingOnIt){
this.queuedCallbacks.push(done);
console.log('KG deferred call. Pending CBs: '+this.queuedCallbacks.length);
return;
};
var self=this;
if ((typeof(this.nextKey) ==='undefined') || (this.nextKey > this.upperBound) ){
// set a semaphore & add the callback to the queued callback list
this.imWorkingOnIt = true;
this.queuedCallbacks.push(done);
this.keyManager.requestKeys(function(err,nKey,uBound) {
if (err) { return done(err); }
self.nextKey = nKey;
self.upperBound = uBound;
var theCallbackList = self.queuedCallbacks;
self.queuedCallbacks = [];
self.imWorkingOnIt = false;
theCallbackList.forEach(function(f){
// rather than making the final callback directly,
// call KeyGenerator.generateKey() with the original
// callback
setImmediate(function(){self.generateKey(table,f);});
});
});
} else {
console.log('KG immediate call',self.nextKey);
var z= self.nextKey++;
setImmediate(function(){done(z);});
}
}
};
If your Node.js code to calculate the next key didn't need to execute an async operation then you wouldn't run into synchronization issues because there is only one JavaScript thread executing code. Access to the nextKey/upperBound variables will be done in sequence by only one thread (i.e. request 1 will access first, then request 2, then request 3 et cetera.) In the Java-world you will always need synchronization because multiple threads will be executing even if you didn't make a DB call.
However, in your Node.js code since you are making an async call to get the nextKey you could get strange results. There is still only one JavaScript thread executing your code, but it would be possible for request 1 to make the call to the DB, then Node.js might accept request 2 (while request 1 is getting data from the DB) and this second request will also make a request to the DB to get keys. Let's say that request 2 gets data from the DB quicker than request 1 and update nextKey/upperBound variables with values 100/150. Once request 1 gets its data (say values 50/100) then it will update nextKey/upperBound. This scenario wouldn't result in duplicate keys, but you might see gaps in your keys (for example, not all keys 100 to 150 will be used because request 1 eventually reset the values to 50/100)
This makes me think that you will need a way to sync access, but I am not exactly sure what will be the best way to achieve this.

Categories