Access worker environment from master (Node.js Cluster)

Access worker environment from master (Node.js Cluster) - javascript

I fork workers in my Node.js application via the Cluster module and pass a custom ID to the environment of all my workers. That works well for far.
However, I do not know how I can access this id in my master when an 'online' or 'exit' event is emitted.
The documentation is not very helpful. Could you please point me the right way?
var cluster = require('cluster');
if (cluster.isMaster) {
//MASTER
function fork() {
var worker_env = {worker_id:'my_custom_id'};
cluster.fork(worker_env);
}
cluster.on('online', function(worker) {
console.log(worker.process.env.worker_id); // undefined
//
// How can I access my custom worker id here?
//
});
cluster.on('exit', function(worker, code, signal) {
//
// And here...?
//
fork();
});
} else {
// WORKER
console.log(process.env.worker_id); // my_custom_id
}

theres no way, the worker process env is not exposed to the master.
One aproach can be a map of our cluster (a object containig the needed info).
Something like these:
var cluster = require('cluster');
if (true === cluster.isMaster) {
//CODE EXECUTED BY MASTER
var cluster_map = {}; // Here we store the workers info in a object
var restart_Limit = 10; // max global worker restart (10)
function fork_worker(myWorkerId) {
// these makes worker_id available in the worker
var worker = cluster.fork({
worker_id: myWorkerId
});
// max restarts limit (global)
if (worker.id >= restart_Limit) {
console.log('Restart limit reached, bye!');
process.kill();
}
// here we add the key "myWorkerId" to the cluster map
cluster_map[worker.id] = myWorkerId;
// WORKER AUTO-KILL
setTimeout(function() {
console.log('stoping...' + myWorkerId);
worker.kill();
}, 3000);
}
cluster.on('online', function(worker) {
var online_proc = cluster_map[worker.id];
console.log('worker online: ' + online_proc + '\n Restarts: ' + worker.id);
});
cluster.on('exit', function(worker, code, signal) {
var exited_proc = cluster_map[worker.id];
// delete the process from the cluster map
delete cluster_map[worker.id];
console.log("worker offline: " + exited_proc);
// WORKER AUTO-RESTART
setTimeout(function() {
console.log('Restarting... ' + exited_proc);
fork_worker(exited_proc);
}, 3000);
});
// start the magic ( 3 workers )
(function() {
fork_worker('id_1');
fork_worker('id_2');
fork_worker('id_3');
})();
} else {
//CODE EXECUTED BY EACH WORKER (process env is present here).
console.log('hi from the worker, process.env: ' + process.env.worker_id);
// all the hard work for the workers here.
}

Related

Running node-rdkafka code in server

I'm running the below node-rdkafka code in Eclipse as Node.js application. This is the sample code from https://blizzard.github.io/node-rdkafka/current/tutorial-producer_.html
I want to run this in a test server and call from iOS Mobile application.
I knew about running node.js app in AWS.
Question I: Is there any other options to run in a free test server environment like Tomcat?
Question II: Even If I am able to run this node.js app in a server, how do i call from a mobile application? Do I need to call producer.on('ready', function(arg) (or) What function i need to call from Mobile app?
var Kafka = require('node-rdkafka');
//console.log(Kafka.features);
//console.log(Kafka.librdkafkaVersion);
var producer = new Kafka.Producer({
'metadata.broker.list': 'localhost:9092',
'dr_cb': true
});
var topicName = 'MyTest';
//logging debug messages, if debug is enabled
producer.on('event.log', function(log) {
console.log(log);
});
//logging all errors
producer.on('event.error', function(err) {
console.error('Error from producer');
console.error(err);
});
//counter to stop this sample after maxMessages are sent
var counter = 0;
var maxMessages = 10;
producer.on('delivery-report', function(err, report) {
console.log('delivery-report: ' + JSON.stringify(report));
counter++;
});
//Wait for the ready event before producing
producer.on('ready', function(arg) {
console.log('producer ready.' + JSON.stringify(arg));
for (var i = 0; i < maxMessages; i++) {
var value = new Buffer('MyProducerTest - value-' +i);
var key = "key-"+i;
// if partition is set to -1, librdkafka will use the default partitioner
var partition = -1;
producer.produce(topicName, partition, value, key);
}
//need to keep polling for a while to ensure the delivery reports are received
var pollLoop = setInterval(function() {
producer.poll();
if (counter === maxMessages) {
clearInterval(pollLoop);
producer.disconnect();
}
}, 1000);
});
/*
producer.on('disconnected', function(arg) {
console.log('producer disconnected. ' + JSON.stringify(arg));
});*/
//starting the producer
producer.connect();

First of all, you need an HTTP server. ExpressJS can be used. Then, just tack on the Express code basically at the end, but move the producer loop into the request route.
So, start with what you had
var Kafka = require('node-rdkafka');
//console.log(Kafka.features);
//console.log(Kafka.librdkafkaVersion);
var producer = new Kafka.Producer({
'metadata.broker.list': 'localhost:9092',
'dr_cb': true
});
var topicName = 'MyTest';
//logging debug messages, if debug is enabled
producer.on('event.log', function(log) {
console.log(log);
});
//logging all errors
producer.on('event.error', function(err) {
console.error('Error from producer');
console.error(err);
});
producer.on('delivery-report', function(err, report) {
console.log('delivery-report: ' + JSON.stringify(report));
counter++;
});
//Wait for the ready event before producing
producer.on('ready', function(arg) {
console.log('producer ready.' + JSON.stringify(arg));
});
producer.on('disconnected', function(arg) {
console.log('producer disconnected. ' + JSON.stringify(arg));
});
//starting the producer
producer.connect();
Then, you can add this in the same file.
var express = require('express')
var app = express()
app.get('/', (req, res) => res.send('Ready to send messages!'))
app.post('/:maxMessages', function (req, res) {
if (req.params.maxMessages) {
var maxMessages = parseInt(req.params.maxMessages);
for (var i = 0; i < maxMessages; i++) {
var value = new Buffer('MyProducerTest - value-' +i);
var key = "key-"+i;
// if partition is set to -1, librdkafka will use the default partitioner
var partition = -1;
producer.produce(topicName, partition, value, key);
} // end for
} // end if
}); // end app.post()
app.listen(3000, () => console.log('Example app listening on port 3000!'))
I don't think the poll loop is necessary since you don't care about the counter anymore.
Now, connect your mobile app to http://<your server IP>:3000/ and send test messages with a POST request to http://<your server IP>:3000/10, for example, and adjust to change the number of messages to send

I might be late on this but this is how I did using promises and found it better than have a time out etc.
const postMessageToPublisher = (req, res) => {
return new Promise((resolve, reject) => {
producer.connect();
producer.setPollInterval(globalConfigs.producerPollingTime);
const actualBody = requestBody.data;
const requestBody = req.body;
const topicName = req.body.topicName;
const key = requestBody.key || uuid();
const partition = requestBody.partition || undefined;
const data = Buffer.from(JSON.stringify(udpatedBody));
/**
* Actual messages are sent here when the producer is ready
*/
producer.on(kafkaEvents.READY, () => {
try {
producer.produce(
topic,
partition,
message,
key // setting key user provided or UUID
);
} catch (error) {
reject(error);
}
});
// Register listener for debug information; only invoked if debug option set in driver_options
producer.on(kafkaEvents.LOG, log => {
logger.info('Producer event log notification for debugging:', log);
});
// Register error listener
producer.on(kafkaEvents.ERROR, err => {
logger.error('Error from producer:' + JSON.stringify(err));
reject(err);
});
// Register delivery report listener
producer.on(kafkaEvents.PUBLISH_ACKNOWLEDGMENT, (err, ackMessage) => {
if (err) {
logger.error(
'Delivery report: Failed sending message ' + ackMessage.value
);
logger.error('and the error is :', err);
reject({ value: ackMessage.value, error: err });
} else {
resolve({
teamName: globalConfigs.TeamNameService,
topicName: ackMessage.topic,
key: ackMessage.key.toString()
});
}
});
});
};
Please note that kafkaEvents contains my constants for the events we listen to and it is just a reference such as kafkaEvents.LOG is same as event.log
and also the calling function is expecting this to a promise and accordingly we user .then(data => 'send your response to user from here') and .catch(error => 'send error response to user
this is how I achieved it using promises

child-process on callback access global object nodejs

process npm module in Node and accessing another file that does computations for me. The problem is that when on. message event/callback I am not sure what it is actually but there i am trying to access global variable and it says it is undefined. If somebody have can have a good explained solution.
_addBlock(newBlock)
{
newBlock.previousHash = this._getLatestBlock().hash;
var child =
childProcess.fork('C:\\Users\\Yoana\\WebstormProjects\\ChildProcess\\mining1.js'
);
child.on('message', function(newBlock)
{
// Receive results from child process
console.log('received: ' , newBlock);
this.chain.push(newBlock);
})
// Send child process some work
child.send(newBlock);
}
It says that this.chain.push is undefined. The method _addBlock is part of a class Blockchain and this.chain is globally accessible.

I'm not sure which model are you using i.e. node.js master/worker architecture with cluster native module or child_process native module with message passing etc., by the way despite of sharing globals it is not recommended (how to handle the shared memory? how to handle protected memory?), you can do in this way:
global.GlobalBotFactory = function() {
if (typeof(instance)=="undefined")
instance = new MyClass(options);
return instance;
}
and then you can reference it in other files like
this.instance = GlobalBotFactory(); // the shared factory instance
But this approach, despite it works, could led to several issues like
concurrent variable modification
shared memory corruption
reader/writer problem
etc. so I strongly suggest to follow a node cluster module with master/worker approach and then message passing:
/// node clustering
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
if (cluster.isMaster) { // master node
var masterConfig=require('./config/masterconfig.json');
// Fork workers.
var maxCPUs = masterConfig.cluster.worker.num;
maxCPUs=(maxCPUs>=numCPUs)?numCPUs:maxCPUs;
for (let i = 0; i < maxCPUs; i++) {
const worker=cluster.fork();
}
var MasterNode=require('./lib/master');
var master= new MasterNode(masterConfig);
master.start()
.then(done=> {
console.log(`Master ${process.pid} running on ${masterConfig.pubsub.node}`);
})
.catch(error=> {
console.error(`Master ${process.pid} error`,error);
});
}
else if (cluster.isWorker) { // worker node
var workerConfig=require('./config/workerconfig.json');
var WorkerNode=require('./lib/worker');
var worker= new WorkerNode(workerConfig);
worker.start()
.then(done=> {
console.log(`Worker ${process.pid} running on ${workerConfig.pubsub.node}`);
})
.catch(error=> {
console.error(`Worker ${process.pid} error`,error);
});
}
For the message passing part take care since you will deal with async forked process, and in node.js there is not guarantee that a message will be delivered, so you need a ack logic or you can use a pubsub approach (Redis will offer this for free, please check here), by the way here you are
for (var i = 0; i < 2; i++) {
var worker = cluster.fork();
// Receive messages from this worker and handle them in the master process.
worker.on('message', function(msg) {
console.log('Master ' + process.pid + ' received message from worker ' + this.pid + '.', msg);
});
// Send a message from the master process to the worker.
worker.send({msgFromMaster: 'This is from master ' + process.pid + ' to worker ' + worker.pid + '.'});
}
this will fork the workers and listen for incoming messages from the master or other workers. But please keep in mind that the delivery logic it's up to you. See here for more info about subprocess.send.

Promise.all() not resolving when running server - otherwise works fine

I've written a small tool that returns a promise after calling several other promises. This tool works great when I test it solo, it takes about 10 seconds in the example below. However, when I try to run it along with a http server instance it, takes in the order of several minutes to return, if at all!
I'm fairly sure I'm just misunderstanding something here, as I'm not extremely proficient in Node. If anyone can spot an issue, or suggest an alternative to using promises for handling asynchronous methods, please let me know!
Just to clarify, it's the Promise.all returned by the traceRoute function which is hanging. The sub-promises are all resolving as expected.
Edit: As suggested in the comments, I have also tried a recursive version with no call to Promise.all; same issue.
This is a working standalone version being called without any http server instance running:
const dns = require('dns');
const ping = require('net-ping');
var traceRoute = (host, ttl, interval, duration) => {
var session = ping.createSession({
ttl:ttl,
timeout: 5000
});
var times = new Array(ttl);
for (var i=0; i<ttl; i++){
times[i] = {'ttl': null, 'ipv4': null, 'hostnames': [], 'times': []}
};
var feedCb = (error, target, ttl, sent, rcvd) => {
var ms = rcvd - sent;
if (error) {
if (error instanceof ping.TimeExceededError) {
times[ttl-1].ttl = ttl;
times[ttl-1].ipv4 = error.source;
times[ttl-1].times.push(ms)
} else {
console.log(target + ": " +
error.toString () +
" (ttl=" + ttl + " ms=" + ms +")");
}
} else {
console.log(target + ": " +
target + " (ttl=" + ttl + " ms=" + ms +")");
}
}
var proms = new Array();
var complete = 0
while(complete < duration){
proms.push(
new Promise((res, rej) => {
setTimeout(function(){
session.traceRoute(
host,
{ maxHopTimeouts: 5 },
feedCb,
function(e,t){
console.log('traceroute done: resolving promise')
res(); // resolve inner promise
}
);
}, complete);
})
)
complete += interval;
}
return Promise.all(proms)
.then(() => {
console.log('resolving traceroute');
return times.filter((t)=> t.ttl != null);
});
}
traceRoute('195.146.144.8', 20, 500, 5000)
.then( (times) => console.log(times) )
Below, is the same logic being called from inside the server instance, this is not working as it should. See the inline comment for where exactly it hangs.
const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({server: server, path: "/wss"});
const dns = require('dns');
const ping = require('net-ping');
var traceRoute = (host, ttl, interval, duration) => {
var session = ping.createSession({
ttl:ttl,
timeout: 5000
});
var times = new Array(ttl);
for (var i=0; i<ttl; i++){
times[i] = {'ttl': null, 'ipv4': null, 'hostnames': [], 'times': []}
};
var feedCb = (error, target, ttl, sent, rcvd) => {
var ms = rcvd - sent;
if (error) {
if (error instanceof ping.TimeExceededError) {
times[ttl-1].ttl = ttl;
times[ttl-1].ipv4 = error.source;
times[ttl-1].times.push(ms)
} else {
console.log(target + ": " +
error.toString () + " (ttl=" + ttl + " ms=" + ms +")");
}
} else {
console.log(target + ": " + target +
" (ttl=" + ttl + " ms=" + ms +")");
}
}
var proms = new Array();
var complete = 0
while(complete < duration){
proms.push(
new Promise((res, rej) => {
setTimeout(function(){
session.traceRoute(
host,
{ maxHopTimeouts: 5 },
feedCb,
function(e,t){
console.log('traceroute done: resolving promise')
res(); // resolve inner promise
}
);
}, complete);
})
)
complete += interval;
}
console.log('Promise all:', proms);
// #####################
// Hangs on this promise
// i.e. console.log('resolving traceroute') is not called for several minutes.
// #####################
return Promise.all(proms)
.then(() => {
console.log('resolving traceroute')
return times.filter((t)=> t.ttl != null)
});
}
wss.on('connection', function connection(ws, req) {
traceRoute('195.146.144.8', 20, 500, 5000)
.then((data) => ws.send(data));
});
app.use('/tools/static', express.static('./public/static'));
app.use('/tools/templates', express.static('./public/templates'));
app.get('*', function (req, res) {
res.sendFile(__dirname + '/public/templates/index.html');
});
server.listen(8081);
Note: I have tried calling it before the server.listen, after server.listen, from inside wss.on('connection', .... None of which makes a difference. Calling it anywhere, while the server is listening, causes it to behave in a non-deterministic manner.

I'm not going to accept this answer as it's only a workaround; it was just too long to put in the comments...
None of the promises, including the Promise.all, are throwing exceptions. However, Node seems to be parking the call to Promise.all. I accidentally discovered that if I keep a timeout loop running while waiting for the promise.all to resolve, then it will in fact resolve as and when expected.
I'd love if someone could explain exactly what is happening here as I don't really understand.
var holdDoor = true
var ps = () => {
setTimeout(function(){
console.log('status:', proms);
if (holdDoor) ps();
}, 500);
}
ps();
return Promise.all(proms)
.then(() => {
holdDoor = false
console.log('Resolving all!')
return times.filter((t)=> t.ttl != null)
});

Your code is working perfectly fine!
To reproduce this I've created a Dockerfile with a working version. You can find it in this git repository, or you can pull it with docker pull luxferresum/promise-all-problem.
You can run the docker image with docker run -ti -p 8081:8081 luxferresum/promise-all-problem. This will expose the webserver on localhost:8081.
You can also just run the problematic.js with node problematic.js and then opening localhost:8081 in the web browser.
The web socket will be opened by const ws = new WebSocket('ws://localhost:8081/wss'); which then triggers the code to run.
Its just very important to actually open the web socket, without that the code will not run.

I would suggest replacing the trace route with something else, like a DNS lookup, and see of the issue remains. At this point you cannot be sure it relates to raw-socket, since that uses libuv handles directly and does not effect other parts of the Node.js event loop.

Using Node.js Cluster workers for specific tasks

Using the cluster module in node I can allow for a http server to run on multiple cores, increasing concurrency, which is great. But if my app contains other tasks which I do not want to run multiple times, such as a scheduled event. How can I do this?
My basic code using the cluster module is like so...
var cluster = require('cluster');
if(cluster.isMaster){
var numWorkers = require('os').cpus().length;
for(var i = 0; i < numWorkers; i++) {
cluster.fork();
}
//restart dead workers
cluster.on('exit', function(worker, code, signal) {
console.log('Worker ' + worker.process.pid + ' died with code: ' + code + ', and signal: ' + signal);
console.log('Starting a new worker');
cluster.fork();
});
}else{
var http = require('http');
var server = http.createServer(function(req, res) {
res.write(process.pid.toString());
res.end();
});
server.listen(5000);
console.log("Server is listening");
}
Now if I have a task that I want to run every hour like.
setInterval(function(){
doSomething();
}, 60*60*1000);
Where would I put it? I have read that the master should only manage workers, to avoid any risk of the master throwing an error and crashing. Is it possible to give individual events to specific workers maybe?

Creating new processes using node js does not have pid

I am following the example presented here
to create a server that would run on multiple cores.I get the following error:
created worker: undefined
created worker: undefined
created worker: undefined
created worker: undefined
workers[m.process].lastCb=new Date().getTime();
^
TypeError: Cannot set property 'lastCb' of undefined
at Worker.<anonymous> (/home/anr/Desktop/node js/clustering2.js:56:29)
at Worker.EventEmitter.emit (events.js:98:17)
at ChildProcess.EventEmitter.emit (events.js:98:17)
at handleMessage (child_process.js:318:10)
at Pipe.channel.onread (child_process.js:345:11)
This is my code:
var cluster=require('cluster');
var os=require('os');
var http=require('http');
var numCores=os.cpus().length;
var rsswarn=(50*1024*1024),
heapWarn=(50*1024*1024);
var workers={};
if(cluster.isMaster)
{
for(var i=0;i<numCores;i+=1)
{
createWorker();
}
setInterval(function killWorkers(){
var time=new Date().getTime();
for(pid in workers)
{
if(workers.hasOwnProperty(pid) && workers[pid].lastCb+5000<time)
{
console.log('Long running process '+pid+ ' killed');
workers[pid].worker.kill();
delete workers[pid];
createWorker();
}
}
},1000);
}
else
{
http.Server(function makeServer(req,res){
if (Math.floor(Math.random() * 200) === 4) {
console.log('Stopped ' + process.pid + ' from ever finishing');
while(true) { continue; }
}
res.writeHead(200);
res.end('hello world from ' + process.pid + '\n');
}).listen(8000);
//Report stats once a second
setInterval(function report(){
process.send({cmd: "reportMem", memory: process.memoryUsage(), process: process.pid});
}, 1000);
}
function createWorker()
{
var worker=cluster.fork();
console.log('created worker: '+worker.pid);
workers[worker.pid]={worker:worker,lastCb:(new Date().getTime()-1000)};
worker.on('message',function(m){
if(m.cmd==='reportMem')
{
workers[m.process].lastCb=new Date().getTime();
if(m.memory.rss>rssWarn)
{
console.log('worker thread '+m.process+' taking too much memory');
}
}
});
}

You will find the pid in worker.process.pid. Either the example has a typo, or this has changed for newer versions of node.js.
// This works:
console.log('created worker: ' + worker.process.pid);
Reference: worker.process and ChildProcess.pid
After changing all worker.pid to worker.process.pid you will also notice there is a typo with the variable rsswarn which is later called as rssWarn
After fixing these two things, your code should work.
Have fun!

We Keep Coding

JavaScript is the programming language of the Web.

Access worker environment from master (Node.js Cluster) - javascript

Related

Running node-rdkafka code in server

child-process on callback access global object nodejs

Promise.all() not resolving when running server - otherwise works fine

Using Node.js Cluster workers for specific tasks

Creating new processes using node js does not have pid

Categories

Resources