I want to understand how this code work in node.js/expressjs framework.
My assumtion is like this:
1. A request comes in and it is handled with thatRequest route.
2. B request comes in and the thatRequest handles that too.
3. There are no shared variables between A and B connection.
The connections are treated privately. They share some common configs but nothing more.
A comes for a computer and B comes from another computer.
My understanding is that each time checkStates is called it starts with virgin version variables defined outside the function.
What will be the case of using node cluster which spawn a new process for each processor an environment has?
// in thatRequest.js file
const RETRY_EVERY = 3; // seconds
const MAX_RETRY = 10; // maximum retry times
let checkInterval = 0;
let remainingRetry = 0;
async function checkStates (req, res) {
remainingRetry = !remainingRetry ? MAX_RETRY * 1 : remainingRetry--;
if (remainingRetry < 1) {
clearInterval(checkInterval);
}
}
}
const request = (req, res) => {
checkInterval = setInterval(() => {
checkStates(req, res);
}, RETRY_EVERY * 1000);
return checkStates(req, res);
};
export default request;
// in a routes.js file
import thatRequest from 'thatRequest';
export default (req, res) => {
thatRequest(req, res);
};
Related
I am trying to create a simple server which will give every new request to different worker. The DATA object is a simple javascript object in separate file. The problem I faced with is CONSISTENCY of this DATA object.
How to prevent worker from handling the request if the previous request is still proceeding? For example first request is UPDATE and lasts longer and the next request is DELETE and proceeds faster What node tool or pattern I need to use to be 100% percent sure that DELETE will happen after UPDATE?
I need to run every worker on a different port
const cluster = require('cluster');
const http = require('http');
const numCPUs = require('os').cpus().length;
cluster.schedulingPolicy = cluster.SCHED_RR;
const PORT = 4000;
if (cluster.isMaster) {
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
} else {
http.createServer((req, res) => {
if(req.url === '/users' && req.method === "PUT") {
updateUser(req)
} else if(req.url === '/users' && req.method === "DELETE") {
deleteUser(req)
}
}).listen(PORT++);
}
Each worker must reserve ("lock") the DATA object for exclusive use before it can change it. This can be done by writing a lock file and deleting it again after successful object change.
try {
fs.openSync("path/to/lock/file", "wx+");
/* Change DATA object */
fs.rmSync("path/to/lock/file");
} catch(err) {
if (err.code === "EEXIST") throw "locking conflict";
}
The worker executing the first (UPDATE) request will succeed in writing the lock file, but a concurrent worker executing a second (DELETE) request will experience a locking conflict. It can then either report the failure to the user, or re-try after a short waiting time.
(If you decide to implement the lock in this way, the asynchronous fs methods may be more efficient.)
Your code won't even create multiple servers set aside the different ports, and the PORT variable is a const, so it won't increment either.
What node tool or pattern I need to use to be 100% percent sure that DELETE will happen after UPDATE?
Use some sort of lock, not yet available on JavaScript
Use a semaphore/Mutex variable lock (See code).
Remember, JavaScript is a single-threaded language.
need to run every worker on a different port
For each worker, set the listening based on worker ID (See code). Remember that the CPU cannot have capability to generate workers equal to that of number of cores.
Sample working code:
const express = require('express')
const cluster = require('cluster')
const os = require('os')
if (cluster.isMaster) {
for (let i = 0; i < os.cpus().length; i++) {
cluster.fork()
}
} else {
const app = express()
// Global semaphore/mutex variable isUpdating
var isUpdating = false;
const worker = {
handleRequest(req, res) {
console.log("handleRequest on worker /" + cluster.worker.id);
if (req.method == "GET") { // FOR BROWSER TESTING, CHANGE IT LATER TO PUT
isUpdating = true;
console.log("updateUser GET");
// do updateUser(req);
isUpdating = false;
} else if (req.method == "DELETE") {
if (!isUpdating) { // Check for update lock
console.log("updateUser DELETE");
// do deleteUser(req)
}
}
},
}
app.get('/users', (req, res) => {
worker.handleRequest(req, res)
})
// Now each worker will run on different port
app.listen(4000 + cluster.worker.id, () => {
console.log(`Worker ${cluster.worker.id} started listening on port ${4000 + cluster.worker.id}`)
})
}
I've been trying to just extract and scrape the number of COVID cases off of a website and display them on an index.html page, using puppeteer to scrape the data from the official COVID website. I have this code for my API in my index.js file:
const func = require('./scrapers');
app.get('/creators', async (req, res) => {
const numC = func.scrapeCases('https://covid19.ca.gov/state-dashboard/');
var myFinal = numC.toString();
numC.then(result => {
const setter = result.toString();
myFinal = setter;
console.log(myFinal)
//res.send('6')
res.send(myFinal)
});
})
The function "scrapeCases" is in a file called scrapers.js in the same directory as index.js. It returns the jsonValue() of the number I'm trying to extract from the COVID website (number of cases), which in reality gives me a Promise return value, which is why I also used toString() to change that.
When I do console.log(myFinal) as given, the proper number of cases is shown in the terminal as if it were a regular string. But when I do res.send(myFinal), it doesn't show up in my index.html page, but if I comment res.send(myFinal) and do res.send('6') as an arbitrary example, the 6 is displayed (after a short delay of time, not sure why). Why is it that it doesn't work with res.send?
Also, if I do this instead:
app.get('/creators', async (req, res) => {
const numC = await func.scrapeCases('https://covid19.ca.gov/state-dashboard/');
var myFinal = numC.toString();
console.log(myFinal)
//res.send('6')
res.send(myFinal)
})
Here I add "await" to my func.ScrapeCases statement since I think this is another way I can get the value of the Promise of numC. Again, the console.log(myFinal) gives me the proper amount of cases as a simple number with commas in between (e.g. 1,366,234), and res.send('6') again displays on my index.html if I uncomment it, but res.send(myFinal) does not display anything.
Any help is appreciated thank you!
By wrapping res.send(), it is modified to be executed after the async function is executed.
This is the code that created and solved the same situation as you.
const express = require('express');
const app = express();
const asyncWrapper = (fn) => {
return (req, res, next) => {
return Promise.resolve(fn(req))
.then((result) => res.send(result))
.catch((err) => next(err))
}
}
const count = async () => {
let count = 0;
for(let i = 0; i < 1000000; ++i){
count++;
}
return count.toString();
}
app.get('/creators', asyncWrapper(count));
app.listen(3000, () => {
log(`API Server listening on port 3000!`);
});
Probably your code will be like this (I haven't tested it. May need to be modified)
const func = require('./scrapers');
const asyncWrapper = (fn) => {
return (req, res, next) => {
return Promise.resolve(fn(req))
.then((result) => res.send(result))
.catch((err) => next(err))
}
}
const count = async () => {
let numC = await func.scrapeCases('https://covid19.ca.gov/state-dashboard/');
return numC.toString();
}
app.get('/creators', asyncWrapper(count));
The function I would like this function to run by itself at time intervals. As it is now I have to visit the '/getCompanyInfo' path to trigger it. I would like it to run every minute as if I was visiting the '/getCompanyInfo' path each minute. The app is on Heroku and I would like the function to execute without any pages open.
The original function that is triggered by visiting the path.
const express = require('express');
const app = express();
/**
* getCompanyInfo ()
*/
app.get('/getCompanyInfo', function(req,res){
const companyID = oauthClient.getToken().realmId;
console.log(companyID)
const url = OAuthClient.environment.production ;
oauthClient.makeApiCall({url: url + 'v3/company/0000000000/salesreceipt/8?minorversion=41'})
.then(function(authResponse){
console.log("The response for API call is :"+JSON.parse(JSON.stringify(authResponse)));
res.send(authResponse);
})
.catch(function(e) {
console.error(e);
});
});
One of my attempts here was to put it in a function that executes each minute using node-schedule.
This one doesn't do anything other than print 'This will run once a minute.' to the console.
I tried removing
app.get(function(req,res){
and the
})
below it but that made the app (hosted on Heroku) fail to build.
const express = require('express');
const app = express();
var schedule = require('node-schedule');
var j = schedule.scheduleJob('* * * * *', function(){
console.log('This will run once a minute.');
app.get(function(req,res){
const companyID = oauthClient.getToken().realmId;
console.log(companyID)
const url = OAuthClient.environment.production ;
oauthClient.makeApiCall({url: url + 'v3/company/0000000000/salesreceipt/8?minorversion=41'})
.then(function(authResponse){
console.log("The response for API call is :"+JSON.parse(JSON.stringify(authResponse)));
res.send(authResponse);
})
.catch(function(e) {
console.error(e);
});
});
});
More Context:
It is inside an app I have on Heroku. I would like to set the app to make a requests for JSON data from the API every x time without me having to touch it.
app.get initializes api handler - e.g. this is your api route definition - the thing that will respond when you call GET /getCompanyInfo via web browser or some other client. You should not redefine it regularly with your scheduled action.
The failed build after you've removed the route handler is probably because of the res.send(authResponse); left behind.
You could have something like:
// function that will be used to get the data
const getCompanyInfo = (done) => {
const companyID = oauthClient.getToken().realmId
console.log(companyID)
const url = OAuthClient.environment.production
oauthClient.makeApiCall({url: url + 'v3/company/0000000000/salesreceipt/8?minorversion=41'})
.then((authResponse) => {
console.log("The response for API call is :"+JSON.parse(JSON.stringify(authResponse)))
done(authResponse)
})
.catch((e) => {
console.error(e)
})
}
// this will trigger the function regularly on the specified interval
const j = schedule.scheduleJob('* * * * *', () => {
getCompanyInfo((companyInfo) => {
// ...do whatever you want with the info
})
})
// this will return you the data by demand, when you call GET /getCompanyInfo via browser
app.get('/getCompanyInfo', function(req,res) {
getCompanyInfo((companyInfo) => {
res.send(companyInfo)
})
})
Heroku has an add on called Heroku Scheduler that does what you want. The node-schedule npm package might do the job, but as you mentioned, you probably aren't going to be able to see the execution/results/logs of your jobs that run every 24 hours without making some interface for it on your own.
For your issue, calling app.get doesn't make a lot of sense. That's just telling node about the route. Assuming you have your /getCompanyInfo route up and running, you just need to call it in your scheduled job, not re-register it every time.
You could also just do this (http being the http client you're using):
var j = schedule.scheduleJob('* * * * *', async function(){
console.log('This will run once a minute.');
const result = await http.get('/getCompanyInfo');
console.log(result);
});
Couldn't find an answer, so I'm asking here -
I'm writing an API in node.js (6.2.0) and I have a problem when I'm serving to multiple clients.
The code is -
"use strict";
const express = require('express');
const router = express.Router();
const FileRetriever = require('./FileRetriever');
function doSomething(uid, callback) {
let finalCount = 0;
let cb = null;
FileRetriever.foo(uid, function (err, data) {
finalCount = data.length;
cb = callback(finalCount);
data.forEach(function(obj, i) {
doSomething2(obj, cb);
});
})
}
function doSomething2(_obj, cb) {
let fn = null;
FileRetriever.bar(_obj, function(err, data){
cb(null, data);
})
}
router.route('/foo').get(function (req, res) {
let uid = req.query.uid;
function callback(_finalCount) {
let counter = 1;
let finalCount = _finalCount;
let output = [];
return function(err, data) {
output.push(data);
if (output.length === (finalCount -1)) {
res.send(output);
}
}
}
doSomething(uid, callback);
});
Obviously it's a bit more complicated than that, but this is the simplified version.
Please help me understand what am I missing -
This is what I have in mind on how it should work -
A user goes to /foo with a parameter uid.
He gets to this route and doSomething is invoked for this user.
In doSomething, I first get initial data that I invoke callback with that returns a function of its own which will be now known as cb.
When cb is passed to doSomething2 and get invoked there, it's still under the same stack, under the same user.
I ran a couple of tests, with one user that should return an output with the length of 6 and another with the length of 100.
When I run this code once per user, it all works fine, but if the route gets called at the same time by the two users, the lengths are not [6,100] (but they are always the same).
What am I doing wrong?
It should work...
Hope that I was clear, thanks.
I'm trying to build a simple tool that pings a bunch of url's to monitor their status and it updates a variable with each app's status.
I also have another file which i'd like to be able to execute at any time to get the current status of each app from that variable.
Here's my main file and you can see there are 2 exports - start, and getStatuses.
index.js
'use strict';
const rest = require('restler');
const time = require('simple-time');
const seconds = time.SECOND;
// The list of apps to check if are running
var apps = {
myApp: {
url: 'http://myUrl.com',
status: null,
lastUpdatedAt: new Date()
}
};
/**
* Loop through and check the status of every app
*/
function checkAllStatuses() {
for (var name in apps) {
if (apps.hasOwnProperty(name)) {
var app = apps[name];
console.log('app = ', app);
checkAppStatus(name, app);
}
}
}
/**
* Checks the status of an app
*
* #param name - The name of the app
* #param app - The app that we're checking the status of
*/
function checkAppStatus(name, app) {
var req = rest.get(app.url);
req.on('complete', function(result, response) {
if(response.statusCode !== app.status) {
updateStatus(name, response.statusCode);
}
});
req.on('error', function(e) {
console.log('ERROR: ' + e.message);
});
req.on('timeout', function(data, response) {
console.log('Request timed out');
});
}
/**
* Updates the status of an app
*
* #param app - The app to update the status of
* #param status - The status to update the app to
*/
function updateStatus(name, status) {
apps[name].status = status;
apps[name].lastUpdatedAt = new Date();
}
function getStatuses() {
return apps;
}
function start() {
// Check every 5 seconds
setInterval(checkAllStatuses, 5*seconds);
}
module.exports.start = start;
module.exports.getStatuses = getStatuses;
Then i have a file which starts the process:
start.js
'use strict';
const status = require('./index');
status.start();
Then i have a file that I want to execute to get the current status of the apps:
consume.js
'use strict';
const status = require('./index');
console.log(status.getStatuses());
The problem is that consume.js just displays exactly what's in the initial app variable in index.js which is:
{
myApp: {
url: 'http://myUrl.com',
status: null,
lastUpdatedAt: new Date()
}
};
while the process running the start() command is displaying an updated status that is NOT null.
How can I make it so consume.js can see the value of a variable that start.js is updating?
I'd like to not have to use a datastore if possible. Worst case scenario is i write to a file, run redis, mongo, or some other datastore but i'm trying to avoid that making this app as simple as possible.
You are using the same code, index.js in both start.js and consume.js, but creating two separate instances of it when you run each file.
That is, the apps variable is changing in the instance created by start.js, but nothing in consume.js tells your code to change the apps variable.
If you are not saving a history of statuses, or saving the data to a datastore, what's the point in the start routine? You could just call checkAllStatuses and then return the results when you wish to consume the data.
Edit
Here's an example of combining the two files (start.js and consume.js) into one file. It also adds in a sample socket.io implementation since you stated that providing the statuses via websockets to clients was the evntual goal.
var app = require('http').createServer(handler)
var io = require('socket.io')(app);
var fs = require('fs');
//// Your status library
var status = require('./index');
//// Start getting statuses
status.start();
app.listen(80);
//
// This is just the default handler
// in the socket.io example
//
function handler (req, res) {
fs.readFile(__dirname + '/index.html',
function (err, data) {
if (err) {
res.writeHead(500);
return res.end('Error loading index.html');
}
res.writeHead(200);
res.end(data);
});
}
io.on('connection', function (socket) {
// Someone wants the list of statuses
// This uses socket.io acknowledgements
// to return the data. You may prefer to use
// `socket.emit` instead or an altogether different socket library.
socket.on('status_fetch', function (data, callback_fn) {
callback_fn( status.getStatuses() );
});
});