Using Node.js Cluster workers for specific tasks - javascript

Using the cluster module in node I can allow for a http server to run on multiple cores, increasing concurrency, which is great. But if my app contains other tasks which I do not want to run multiple times, such as a scheduled event. How can I do this?
My basic code using the cluster module is like so...
var cluster = require('cluster');
if(cluster.isMaster){
var numWorkers = require('os').cpus().length;
for(var i = 0; i < numWorkers; i++) {
cluster.fork();
}
//restart dead workers
cluster.on('exit', function(worker, code, signal) {
console.log('Worker ' + worker.process.pid + ' died with code: ' + code + ', and signal: ' + signal);
console.log('Starting a new worker');
cluster.fork();
});
}else{
var http = require('http');
var server = http.createServer(function(req, res) {
res.write(process.pid.toString());
res.end();
});
server.listen(5000);
console.log("Server is listening");
}
Now if I have a task that I want to run every hour like.
setInterval(function(){
doSomething();
}, 60*60*1000);
Where would I put it? I have read that the master should only manage workers, to avoid any risk of the master throwing an error and crashing. Is it possible to give individual events to specific workers maybe?

Related

child-process on callback access global object nodejs

process npm module in Node and accessing another file that does computations for me. The problem is that when on. message event/callback I am not sure what it is actually but there i am trying to access global variable and it says it is undefined. If somebody have can have a good explained solution.
_addBlock(newBlock)
{
newBlock.previousHash = this._getLatestBlock().hash;
var child =
childProcess.fork('C:\\Users\\Yoana\\WebstormProjects\\ChildProcess\\mining1.js'
);
child.on('message', function(newBlock)
{
// Receive results from child process
console.log('received: ' , newBlock);
this.chain.push(newBlock);
})
// Send child process some work
child.send(newBlock);
}
It says that this.chain.push is undefined. The method _addBlock is part of a class Blockchain and this.chain is globally accessible.
I'm not sure which model are you using i.e. node.js master/worker architecture with cluster native module or child_process native module with message passing etc., by the way despite of sharing globals it is not recommended (how to handle the shared memory? how to handle protected memory?), you can do in this way:
global.GlobalBotFactory = function() {
if (typeof(instance)=="undefined")
instance = new MyClass(options);
return instance;
}
and then you can reference it in other files like
this.instance = GlobalBotFactory(); // the shared factory instance
But this approach, despite it works, could led to several issues like
concurrent variable modification
shared memory corruption
reader/writer problem
etc. so I strongly suggest to follow a node cluster module with master/worker approach and then message passing:
/// node clustering
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
if (cluster.isMaster) { // master node
var masterConfig=require('./config/masterconfig.json');
// Fork workers.
var maxCPUs = masterConfig.cluster.worker.num;
maxCPUs=(maxCPUs>=numCPUs)?numCPUs:maxCPUs;
for (let i = 0; i < maxCPUs; i++) {
const worker=cluster.fork();
}
var MasterNode=require('./lib/master');
var master= new MasterNode(masterConfig);
master.start()
.then(done=> {
console.log(`Master ${process.pid} running on ${masterConfig.pubsub.node}`);
})
.catch(error=> {
console.error(`Master ${process.pid} error`,error);
});
}
else if (cluster.isWorker) { // worker node
var workerConfig=require('./config/workerconfig.json');
var WorkerNode=require('./lib/worker');
var worker= new WorkerNode(workerConfig);
worker.start()
.then(done=> {
console.log(`Worker ${process.pid} running on ${workerConfig.pubsub.node}`);
})
.catch(error=> {
console.error(`Worker ${process.pid} error`,error);
});
}
For the message passing part take care since you will deal with async forked process, and in node.js there is not guarantee that a message will be delivered, so you need a ack logic or you can use a pubsub approach (Redis will offer this for free, please check here), by the way here you are
for (var i = 0; i < 2; i++) {
var worker = cluster.fork();
// Receive messages from this worker and handle them in the master process.
worker.on('message', function(msg) {
console.log('Master ' + process.pid + ' received message from worker ' + this.pid + '.', msg);
});
// Send a message from the master process to the worker.
worker.send({msgFromMaster: 'This is from master ' + process.pid + ' to worker ' + worker.pid + '.'});
}
this will fork the workers and listen for incoming messages from the master or other workers. But please keep in mind that the delivery logic it's up to you. See here for more info about subprocess.send.

How to run Node Cluster on windows?

Anyone know how to run Node Cluster on windows? I haven't been able to find any articles on the web and cannot seem to solve this problem:
events.js:160
throw er; // Unhandled 'error' event
^
Error: write ENOTSUP
at exports._errnoException (util.js:1007:11)
at ChildProcess.target._send (internal/child_process.js:634:20)
at ChildProcess.target.send (internal/child_process.js:521:19)
at sendHelper (cluster.js:751:15)
at send (cluster.js:534:12)
at cluster.js:509:7
at SharedHandle.add (cluster.js:99:3)
at queryServer (cluster.js:501:12)
at Worker.onmessage (cluster.js:449:7)
at ChildProcess.<anonymous> (cluster.js:765:8)
And the code...
if (cluster.isMaster) {
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('online', (worker) => {
console.log('Worker ' + worker.process.pid + ' is online');
});
cluster.on('exit', (worker, code, signal) => {
console.log(`Worker ${worker.process.pid} died with code ${code} and signal ${signal}`);
});
} else {
console.log('else part ');
openPort();
}
function openPort() {
let server = dgram.createSocket('udp4');
server.bind(port, host);
server.on('message', processMessage);
}
Support for UDP clustering was added in v0.11.14 (for Linux and OSX).
Check file on node.js master, which says "dgram clustering is currently not supported on windows"
In the current node js version I am using below code to create cluster on windows.
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log("worker ${worker.process.pid} died");
cluster.fork();
});
} else {
var express = require('express');
var http = require('http');
// init app
var app = express();
function createServer(app) {
return http.createServer(app);
}
app.locals.server = createServer(app);
app.locals.server.listen(port, function() {
console.info("server online");
});
}
This will create clusters on same port.
So, in order to use UDP with Node cluster on Windows, you have to call server.bind like this:
server.bind({port: 1900, exclusive: true}, function () {
console.log('PORT BIND SUCCESS');
server.setBroadcast(true);
server.setMulticastTTL(128);
server.addMembership(multicastAddress, myIp);
});
The key part is to pass in the object {port: PORT, exclusive: true} to the bind function. I found the answer here: https://github.com/misterdjules/node/commit/1a87a95d3d7ccc67fd74145c6f6714186e56f571

Node.js TCP Connections

I am trying to write a Node.js app that will perform TCP connections to some TCP/IP modules we use in the office. These modules are installed in old consoles and therefore the software to manage them is very old and it is no longer supported. The way we operate them now is to use the command prompt in Windows and connect to them that way. Here is how we are currently doing it.
>telnet <XX.XX.XX.XX> <PORT>
if the connection is successful then we get a blank screen in which we then type
<Ctrl+A> 200
Where is holding the Ctrl key and the A key at the same time, followed by the number 200. It then pops out some simple details about the module, which is what we want. This method works for us, but the problem is that it is not practical when you have 20+ of these modules each with different IPs. We have to connect to them every week to make sure they are working and doing 20 of them really takes up a lot of your time, so we want to optimize the process. We began writing a mini app in Node.js that takes care of the connections
var net = require('net');
var HOST_ARR = ['XX.XX.XX.XX'];
var PORT = 10001;
var client = new net.Socket();
var spawn = require('child_process').spawn('cmd');
var exec = require('child_process').exec;
var readline = require('readline');
var rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
function connect(host, port){
return new Promise(function(resolve, reject){
client.connect(port, host, function() {
// Write a message to the socket as soon as the client is connected
console.log('CONNECTED TO: ' + host + ':' + port);
/*** EXECUTE <Ctrl+A> 200 COMMAND BEFORE CLOSING THE CONNECTION ***/
resolve('Success');
//end connection
client.destroy();
});
//handle errors
client.on('error', function(err) {
reject(err.code);
});
});
}
We are able to connect to the modules just fine, and the process is extremely fast. We are just stuck on how to execute the command <Ctrl+A> 200 once the connection starts. Any suggestions?
I tried
exec('\0x01200', function(error, stdout, stderr) { //ASCII for <Ctrl+A> is \0x01
console.log('stdout: ' + stdout);
console.log('stderr: ' + stderr);
if (error !== null) {
console.log('exec error: ' + error);
}
});
but did not work. Thanks in advance
How about just sending <Ctrl+A>200 by client after connected Like:
client.write(new Buffer([
0x01, // Ctrl-A
0x32, // 2
0x30, // 0
0x30, // 0
// uncomment if necessary
// 0x0d, // \r
// 0x0a, // \n
]));

Access worker environment from master (Node.js Cluster)

I fork workers in my Node.js application via the Cluster module and pass a custom ID to the environment of all my workers. That works well for far.
However, I do not know how I can access this id in my master when an 'online' or 'exit' event is emitted.
The documentation is not very helpful. Could you please point me the right way?
var cluster = require('cluster');
if (cluster.isMaster) {
//MASTER
function fork() {
var worker_env = {worker_id:'my_custom_id'};
cluster.fork(worker_env);
}
cluster.on('online', function(worker) {
console.log(worker.process.env.worker_id); // undefined
//
// How can I access my custom worker id here?
//
});
cluster.on('exit', function(worker, code, signal) {
//
// And here...?
//
fork();
});
} else {
// WORKER
console.log(process.env.worker_id); // my_custom_id
}
theres no way, the worker process env is not exposed to the master.
One aproach can be a map of our cluster (a object containig the needed info).
Something like these:
var cluster = require('cluster');
if (true === cluster.isMaster) {
//CODE EXECUTED BY MASTER
var cluster_map = {}; // Here we store the workers info in a object
var restart_Limit = 10; // max global worker restart (10)
function fork_worker(myWorkerId) {
// these makes worker_id available in the worker
var worker = cluster.fork({
worker_id: myWorkerId
});
// max restarts limit (global)
if (worker.id >= restart_Limit) {
console.log('Restart limit reached, bye!');
process.kill();
}
// here we add the key "myWorkerId" to the cluster map
cluster_map[worker.id] = myWorkerId;
// WORKER AUTO-KILL
setTimeout(function() {
console.log('stoping...' + myWorkerId);
worker.kill();
}, 3000);
}
cluster.on('online', function(worker) {
var online_proc = cluster_map[worker.id];
console.log('worker online: ' + online_proc + '\n Restarts: ' + worker.id);
});
cluster.on('exit', function(worker, code, signal) {
var exited_proc = cluster_map[worker.id];
// delete the process from the cluster map
delete cluster_map[worker.id];
console.log("worker offline: " + exited_proc);
// WORKER AUTO-RESTART
setTimeout(function() {
console.log('Restarting... ' + exited_proc);
fork_worker(exited_proc);
}, 3000);
});
// start the magic ( 3 workers )
(function() {
fork_worker('id_1');
fork_worker('id_2');
fork_worker('id_3');
})();
} else {
//CODE EXECUTED BY EACH WORKER (process env is present here).
console.log('hi from the worker, process.env: ' + process.env.worker_id);
// all the hard work for the workers here.
}

Seeking Guidance on troubleshooting node memory issue

I'm trying to implement a message queue using node, socket.io, and redis. I am attempting to follow the reliable queue pattern outlined Here. I am trying to read a logfile (60M in size) in line-by-line (will be changing this later) and pump the lines into the queue for processing later. However, I am running into a memory allocation issue. I'm not sure how to troubleshoot this and would like some guidance on where to start. I can't tell if the issue is in reading the file, or in the redis client. I have been able to add messages to the queue one by one like this :
socket.emit('message', 'some sort of log line here');
Therefore I know the listener is working, but when I run the socketClient.js file It will spin out for a bit and then ultimately fail with the following generic error message:
FATAL ERROR: JS Allocation failed - process out of memory
Is there some error handling, or profiling I can add to get more information on where this is failing?
Here is the code:
socketListener.js
var util = require("util"),
redis = require("redis"),
io = require('socket.io').listen(8088)
client = redis.createClient("7777", "localhost");
util.log("Established connection to redis");
io.sockets.on('connection', function(socket) {
util.log("socket connection established for socket : " + socket);
socket.on('message', function (data) {
util.log("received the following data : ");
util.log(JSON.stringify(data, 0, 3));
client.on("error", function(err) {
util.log("Error " + err);
});
try {
// reliable queue pattern implementation
util.log("queuing up the data in the list");
client.rpush('logList', data);
client.brpoplpush('logList', 'dequeueList', 10);
} catch (err) {
util.log("An error occurred : ");
util.log(JSON.stringify(err, 0, 3));
}
});
socket.on('disconnect', function() {});
});
socketClient.js
var io = require("socket.io-client");
var socket = io.connect('http://localhost:8088');
var redis = require('redis');
var util = require('util');
var fs = require('fs');
var readline = require('readline');
socket.on('connect', function() {
client = redis.createClient("7777", "localhost");
var rd = readline.createInterface({
input: fs.createReadStream('someLogFile.log'),
terminal: false
});
rd.on('line', function(line) {
util.log("reading line " + line);
socket.emit('message', line);
});
client.lrange('dequeueList', 0, -1, function(err, results) {
if (err) {
util.log(err);
} else {
var multi = client.multi();
for (var i=0; i < results.length; i++) {
util.log('got : ' + results[i]);
multi.hgetall(results[i]);
}
multi.exec(function(err, logs) {
util.log("executing the multi commands");
util.log(JSON.stringify(logs, 0 ,3));
})
};
});
})
Thank you in advance for the help!

Categories