I want to download 70 images. Their complete size is around 100mb.
This is my simplified part of code
function downloadImage(src){
var dst = '...';
request(src).pipe(fs.createWriteStream(dst));
return dst;
}
arrayOf70.forEach(function(e){
var thing = new Thing({
// ...
image: downloadImage(url)
});
thing.save();
}
The problem is there are too much concurrent downloads. Okay first step: Pass a huge timeout to request.
request({url: src, timeout: 120000000}).pipe(fs.createWriteStream(dst));
Well, that didn't worked well since it exceeds the OS TCP timeout. At least I think that's the problem. Anyway I'm getting timed out connections
stream.js:94
throw er; // Unhandled stream error in pipe.
^
Error: connect ETIMEDOUT
at exports._errnoException (util.js:746:11)
at TCPConnectWrap.afterConnect [as oncomplete] (net.js:1000:19)
So. What is the way to go to limit the amount of concurrent downloads?
Timeouts are not an ideal solution. What you really need is the ability to wait for a download to finish and afterwards immediately start a new download. And that a specific number of times in parallel.
You could do that by using a callback.
function downloadImage(src, callback){
var dst = '...';
http.get(src, function(res) {
res.pipe(fs.createWriteStream(dst))
.on("finish", function() {
callback(dst);
});
});
}
function downloadAllImages(array) {
var idx = 0;
function downloadLoop() {
if(idx >= array.length) return;
downloadImage(array[idx++], function(dst) {
var thing = new Thing({
// ...
image: dst
});
thing.save();
downloadLoop();
});
}
for(var i = 0; i < 5; i++) downloadLoop(); //start 5 concurrent download "loops"
}
Here's an example with setInterval:
var array_length = arrayOf70.length;
var i = 0;
var request_interval = setInterval(makeRequest, 100);
function makeRequest()
if(i<array_length){
var thing = new Thing({
// ...
image: downloadImage(url)
});
thing.save();
i++;
}else{
clearInterval(request_interval);
}
},100);
Related
I'm working on making a Homebridge plugin for a project. Homebridge is a Node.js server which I have running on a Raspberry Pi which emulates an Apple HomeKit Bridge.
Using this link, I was able to execute Python code from the following Node.js code:
var Service, Characteristic;
var spawn = require('child_process').spawn;
var py = spawn('python', ['/home/pi/Desktop/RFbulb/nRF24L01PLUS.py']);
var data = [10,10,10];
var dataString = '';
var RFstatus = true;
module.exports = function(homebridge) {
Service = homebridge.hap.Service;
Characteristic = homebridge.hap.Characteristic;
homebridge.registerAccessory("homebridge-RFbulb", "RFbulb", RFbulbAccessory);
}
function RFbulbAccessory(log, config) {
this.log = log;
this.config = config;
this.name = config["name"];
this.address = config["address"];
this.service = new Service.Lightbulb(this.name);
this.service
.getCharacteristic(Characteristic.On)
.on('get', this.getOn.bind(this))
.on('set', this.setOn.bind(this));
}
RFbulbAccessory.prototype.setOn = function(on, callback) { // This is the function throwing the error
var state = on ? "on": "off";
if (state == "on") {
data = [1,parseInt(this.address, 10),100];
dataString = '';
py.stdout.on('data', function(data) {
dataString += data.toString();
});
py.stdout.on('end', function() {
console.log(dataString);
});
py.stdin.write(JSON.stringify(data));
py.stdin.end();
RFstatus = true;
}
callback(null);
}
RFbulbAccessory.prototype.getServices = function() {
return [this.service];
}
Interestingly enough, when I activate the setOn function the first time (for example, to turn the device on) it works fine, but when I activate the setOn function a second time (to turn the device off) I get the following errors and the server exits:
events.js:141
throw er; // Unhandled 'error' event
^
Error: write after end
at writeAfterEnd (_stream_writable.js:166:12)
at Socket.Writable.write (_stream_writable.js:211:5)
at Socket.write (net.js:642:40)
at RFbulbAccessory.setOn (/usr/lib/node_modules/homebridge-RFbulb/index.js:47:12)
at emitThree (events.js:97:13)
at emit (events.js:175:7)
at Characteristic.setValue (/usr/lib/node_modules/homebridge/node_modules/hap-nodejs/lib/Characteristic.js:155:10)
at Bridge.<anonymous> (/usr/lib/node_modules/homebridge/node_modules/hap-nodejs/lib/Accessory.js:710:22)
at Array.forEach (native)
at Bridge.Accessory._handleSetCharacteristics (/usr/lib/node_modules/homebridge/node_modules/hap-nodejs/lib/Accessory.js:655:8)
What could be causing this error? Especially since the function appears to work fine for a single use.
You're getting that error because you're closing the input stream:
py.stdin.end();
After a stream has been closed, you can no longer write to it like you are here:
py.stdin.write(JSON.stringify(data));
If the Python program you're running accepts multiple commands over STDIN then simply remove the py.stdin.end() line.
However, it's likely that your Python program runs once then completes. If that's the case, you will need to respawn the process every time you want the program to run.
if (state === "on") {
py = spawn('python', ['/home/pi/Desktop/RFbulb/nRF24L01PLUS.py']);
...
}
I am running node.js on raspbian and trying to save/update a file every 2/3 seconds using the following code:
var saveFileSaving = false;
function loop() {
mainLoop = setTimeout(function() {
// update data
saveSaveFile(data, function() {
//console.log("Saved data to file");
loop();
});
}, 1500);
}
function saveSaveFile(data, callback) {
if(!saveFileSaving) {
saveFileSaving = true;
var wstream = fs.createWriteStream(path.join(__dirname, 'save.json'));
wstream.on('finish', function () {
saveFileSaving = false;
callback(data);
});
wstream.on('error', function (error) {
console.log(error);
saveFileSaving = false;
wstream.end();
callback(null);
});
wstream.write(JSON.stringify(data));
wstream.end();
} else {
callback(null);
}
}
When I run this it works fine for an hour then starts spitting out:
[25/May/2016 11:3:4 am] { [Error: EROFS, open '<path to file>']
errno: 56,
code: 'EROFS',
path: '<path to file>' }
I have tried jsonfile plugin which also sends out a similiar write error after an hour.
I have tried both fileSystem.writeFile and fileSystem.writeFileSync both give the same error after an hour.
I was thinking it had to do with the handler not being let go before a new save occurs which is why I started using the saveFileSaving flag.
Resetting the system via hard reset fixes the issue (soft reset does not work as the system seems to be locked up).
Any suggestions guys? I have searched the web and so only found one other question slightly similar from 4 years ago which was left in limbo.
Note: I am using the callback function from the code to continue with the main loop.
I was able to get this working by unlinking the file and saving the file every time I save while it is not pretty it works and shouldn't cause too much overhead.
I also added a backup solution which saves a backup every 5 minutes in case the save file has issues.
Thank you for everyone's help.
Here is my ideas:
1) Check free space when this problem happens by typing in terminal:
df -h
2) Also check if file is editable when problem occurs. with nano or vim and etc.
3) Your code too complicated for simply scheduling data manipulation and writing it to file. Because of even Your file will be busy (saveFileSaving) You will lose data until next iteration, try to use that code:
var
async = require('async'),
fs = require('fs'),
path = require('path');
async.forever(function(next) {
// some data manipulation
try {
fs.writeFileSync(path.join(__dirname, 'save.json'), JSON.stringify(data));
}
catch(ex) {
console.error('Error writing data to file:', ex);
}
setTimeout(next, 2000);
});
4) How about keeping file descriptor open?
var
async = require('async'),
fs = require('fs'),
path = require('path');
var file = fs.createWriteStream(path.join(__dirname, 'save.json'));
async.forever(function(next) {
// some data manipulation
file.write(JSON.stringify(data));
setTimeout(next, 2000);
});
var handleSignal = function (exc) {
// close file
file.end();
if(exc) {
console.log('STOPPING PROCESS BECAUSE OF:', exc);
}
process.exit(-1);
}
process.on('uncaughtException', handleSignal);
process.on('SIGHUP', handleSignal);
5) hardware or software problems (maybe because of OS drivers) with raspberry's storage controller.
I have a simple case where I'm requesting a different upstream proxy server from my node.js server. With the increase in load I see the request takes lot of time to execute(though time taken to respond from my upstream proxy server is constant across the requests). To demonstrate the issue i've written a sample program as below. When I execute the below program, the first request takes 118ms to execute and the last one takes 10970ms depending on the website you hit (I've changed the url to google, Try it out with your favourite website). If you observe i'm using async to parallelize my requests.
The question is, what is the reason node.js takes this much time to execute a request when run in parallel. To give some more context on the infra settings(centos 6.5) I have opened up my port range from 1024 to 65535, change the fin_timeout to 15 seconds and enable tw_reuse =1 for sockets in sysctl.conf
var http = require('http');
var uuid = require('node-uuid');
var async = require('async');
function callExternalUrl(){
var uniqueId = uuid.v4();
console.time(uniqueId);
var options = {
host: 'google.com',
port: '80',
path: '/',
method: 'GET'
};
var req = http.request(options, function(res) {
var msg = '';
res.setEncoding('utf8');
res.on('data', function(chunk) {
msg += chunk;
console.timeEnd(uniqueId);
});
res.on('end', function() {
});
});
req.end();
}
function iterateAsync(callback){
var iter = [];
for(var i=0; i<1000; i++){
iter[i] = i;
}
async.each(iter,
function(item, callback) {
callExternalUrl();
},
function(err) {
callback(err);
}
);
}
iterateAsync(function(){console.log('done');});
To give more context below is the code in ruby to do the same. I understand i can't compare these two languages as in apples to apples. But the idea is to show the time it takes to execute the same requests in sequence using ruby. I don't see any increase in the response times for each request going out in sequence. So, I doubt the parallel requests using node is taking more time for the request to respond(and the issue is not from the server to respond but its from sending out the request from the machine itself)
require 'rest_client'
1000.times do |number|
beginning = Time.now
response = RestClient.get 'http://google.com'
puts "Time elapsed #{Time.now - beginning} seconds"
end
For one, you're not calling the async iterator callback function:
function callExternalUrl(asyncCallback) {
...
res.on('end', function() {
asyncCallback();
});
...
}
function iterateAsync(callback) {
var iter = [];
for(var i=0; i<1000; i++){
iter[i] = i;
}
async.each(iter,
function(item, asyncCallback) { // <-- HERE
callExternalUrl(asyncCallback);
},
function(err) {
callback(err);
}
);
}
Also, depending on the Node version you're using, the http module may limit the number of parallel requests being made to a particular hostname:
$ node -pe 'require("http").globalAgent.maxSockets'
On Node 0.10, the default is 5; on Node 0.12, the default is Infinity ("unlimited"). So if you're not on Node 0.12, you should increase that value in your code:
var http = require('http');
http.globalAgent.maxSockets = Infinity;
...
I've tried to run your scenario by using JXcore (fork of Node.JS, and an open source project now on github), which offers multitasking (among many other new features).
var task = function (item) {
var http = require('http');
var uuid = require('node-uuid');
var uniqueId = uuid.v4() + "-" + process.threadId;
console.time(uniqueId);
var options = {
host: 'google.com',
port: '80',
path: '/',
method: 'GET'
};
var req = http.request(options, function (res) {
var msg = '';
res.setEncoding('utf8');
res.on('data', function (chunk) {
msg += chunk;
console.timeEnd(uniqueId);
});
res.on('end', function () {
process.release();
});
});
req.end();
process.keepAlive();
};
jxcore.tasks.setThreadCount(4);
console.time("total");
process.on('exit', function () {
console.timeEnd("total");
});
for (var i = 0; i < 1000; i++)
jxcore.tasks.addTask(task, i);
The sample is not really optimized, but still the total 1000 requests runs with JXcore a little bit faster for me (I was able to measure up to 20% gain on my platform). That may vary depending on the machine, since multitasking is using different threads/instances within one single process (no need for clustering any more). My machine has just 4 threads, that's why I used jxcore.tasks.setThreadCount(4);. You can try with your 32 :)
The way of handling each single request is not significantly different, so I'm not saying that each request takes less time, but the key might be hidden in different queuing mechanism as opposite to "async" module. And of course thanks to multitasking.
I need to connect to websocket even if the first attemp is not successful. I need some loop.
Now I have:
ws = new WebSocket('ws://domain');
if(!ws) return;
ws.onopen = function() {
ws.send('getpayments '+ response );
}; ...
and I need do this until connect.
Please help me.
Not really a loop but recursive retries:
var retry_connecting = function(domain, clb) {
var ws = new WebSocket(domain);
ws.onerror = function() {
console.log('WS Error! Retrying...');
// let the client breath for 100 millis
setTimeout(function() {
retry_connecting(domain, clb);
}, 100);
};
ws.onopen = function() {
clb(ws);
};
};
and usage
retry_connecting('ws://domain', function(ws) {
console.log('We are connected!');
});
This code will try to connect ad infinitum. I don't recommend that. But I'm sure you'll be able to modify it to run only a finite number of times and then return an error after too many retries.
I'm using a node.js server as a live chat feature. And the client side is using ajax long polling it.
I noticed that It always hangs completely after a while(a few hours) so unless i restart it. After it hanged, It's not responding to any client anymore, all the long polling returns error says the node server is not responding.
I have a lot of codes there so I'm gonna paste them all here since I know there could be so many possibilities for this kind of thing. But what's the common reasons? What problems should I check first?
Usually, the top looks like this:
Cpu(s): 1.7%us, 0.3%sy, 0.0%ni, 98.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Mem: 3844592k total, 494764k used, 3349828k free, 98224k buffers
Swap: 0k total, 0k used, 0k free, 171324k cached
Here are some errors in the forever server error log. i noticed one socket hang up error. Could it be the problem?
events.js:72
throw er; // Unhandled 'error' event
^
Error: socket hang up
at SecurePair.error (tls.js:999:23)
at EncryptedStream.CryptoStream._done (tls.js:695:22)
at CleartextStream.read [as _read] (tls.js:496:24)
at CleartextStream.Readable.read (_stream_readable.js:320:10)
at EncryptedStream.onCryptoStreamFinish (tls.js:301:47)
at EncryptedStream.g (events.js:175:14)
at EncryptedStream.EventEmitter.emit (events.js:117:20)
at finishMaybe (_stream_writable.js:354:12)
at endWritable (_stream_writable.js:361:3)
at EncryptedStream.Writable.end (_stream_writable.js:339:5)
at EncryptedStream.CryptoStream.end (tls.js:633:31)
(node) warning: possible EventEmitter memory leak detected. 11 listeners added. Use emitter.setMaxListeners() to increase limit.
Trace
at EventEmitter.addListener (events.js:160:15)
at Hub.<anonymous> (/home/ec2-user/AjaxIM-retail/server/middleware/im/hub.js:88:33)
at /home/ec2-user/AjaxIM-retail/server/libs/utils.js:43:23
at IncomingMessage.<anonymous> (/home/ec2-user/AjaxIM-retail/server/libs/authentication/lp/index.js:75:33)
at IncomingMessage.EventEmitter.emit (events.js:117:20)
at _stream_readable.js:920:16
at process._tickCallback (node.js:415:13)
(node) warning: possible EventEmitter memory leak detected. 11 listeners added. Use emitter.setMaxListeners() to increase limit.
....
That's all the listeners I have..is there any problems?
this.events.addListener('update', o_.bind(function(package) {
if(this.clear != 0){
delete this.sessions[this.clear];
}
var _package = package.toJSON();
if(package.type == 'status' && package.status == 'offline') {
var sids = Object.keys(this.sessions), sid, sess;
for(sid in this.sessions) {
sess = this.sessions[sid];
if(sess.data('username') == package.username) {
if(sess.listeners.length)
sess.send(200, {type: 'goodbye'});
delete this.sessions[sid];
break;
}
}
}
}, this));
};
Hub.prototype.destroy = function(sid, fn) {
this.set(sid, null, fn);
};
Hub.prototype.reap = function(ms) {
var threshold = +new Date - ms,
sids = Object.keys(this.sessions);
for(var i = 0, len = sids.length; i < len; ++i) {
var sid = sids[i], sess = this.sessions[sid];
if(sess.lastAccess < threshold) {
this.events.emit('update', new packages.Offline(sess.data('username')));
}
}
};
Hub.prototype.get = function(req, fn) {
if(this.sessions[req.sessionID]) {
fn(null, this.sessions[req.sessionID]);
} else {
this.auth.authenticate(req, o_.bind(function(data) {
if(data) {
var session = new User(req.sessionID, data);
this.set(req.sessionID, session);
this.auth.friends(req, data, o_.bind(function(friends) {
var friends_copy = friends.slice();
o_.values(this.sessions).filter(function(friend) {
return ~friends.indexOf(friend.data('username'));
}).forEach(function(friend) {
var username = friend.data('username');
friends_copy[friends_copy.indexOf(username)] =
[username, friend.status()];
}, this);
session._friends(friends_copy);
session.events.addListener('status',
o_.bind(function(value, message) {
this.events.emit(
'update',
new packages.Status(session.data('username'),
value,
message)
);
}, this));
this.events.addListener('update',
o_.bind(session.receivedUpdate, session));
this.set(req.sessionID, session);
fn(null, session);
}, this));
} else {
fn();
}
}, this));
}
};