I am giving a try to workers in js and I tried to make a simple sort using the same js sort function. The comparison i am making is just using an async function which will sort 60000 random numbers. The first will sort the random numbers as traditionally we are used to do it.
async function normalSort(arr) {
return new Promise((res) => {
let copy = arr;
copy.sort((a, b) => a > b ? 1 : -1);
return res(copy)
})
}
the other is a normal function which will be called for a workersHandler function
const { Worker, parentPort, workerData } = require('worker_threads');
function sort(data) {
let copy = data;
copy.sort((a, b) => a > b ? 1 : -1);
parentPort.postMessage(copy)
process.exit();
}
sort(workerData);
the workers handler function
const os = require('os');
const path = require('path');
const { Worker } = require('worker_threads');
async function workersHandler(arr) {
const startTime = Date.now();
const cpusAmount = os.cpus().length;
const chSize = Math.ceil(arr.length / cpusAmount)
let promises = [];
for (let i = 0; i < arr.length; i += chSize) {
const end = i + chSize;
const currentChunk = arr.slice(i, end);
const promise = new Promise((res, rej) => {
//#ts-ignore
const worker = new Worker(path.join(__dirname, '..', '/utils/sort.js'), { workerData: currentChunk })
worker.on('message', res)
worker.on('error', rej)
})
promises.push(promise);
}
let result = await Promise.all(promises)
return result;
}
and the main function which will call the others functions
function main() {
let arr = new Array(60000).fill(0).map((_, i) => Math.round(Math.random() * 100));
const startTime = Date.now();
workersHandler(arr).then(r => console.log('workers sort', Date.now() - startTime + ' ms'))
normalSort(arr).then(r => console.log('normal sort', Date.now() - startTime + ' ms'))
}
main();
Surprisingly the normal sort function is way faster and is working in one thread.
I am receiving for the workers function 101 ms
for the normal sort function 53 ms
Someone could explain me why these weird results?. Are workers not so fast or I am making a wrong implementation?.
Basically, using a single worker thread and waiting for it to do the work will always be slower than doing the work in the local thread, because:
Creating threads takes time.
Sending data between threads takes time.
Where you might get gains is if you have isolated pieces of work that can be handled in parallel, and multiple CPU cores to work with. In that situation, you can send different pieces of work to multiple workers (up to as many CPU cores as are available), provided the work isn't constrained by some other single resource they'd all be competing for.
Below I've posted a program that sorts 12 arrays locally and via workers with repeated races. (When sorting in workers, it transfers the array data to the worker and then back rather than copying it.) It starts the workers in advance and reuses them, and but it includes the time that took when determining the average time the workers took to do their work, so we're including all overhead.
On my workstation, with four CPU cores and letting it have a worker for each core, workers easily win:
# of workers: 4
Local average: 8790.010573029518ms
Workers' average: 3550.658817946911ms
Workers win, taking 40.39425% of the time local did
If I limit it to one worker, though, the worker is pure overhead and the local thread wins:
# of workers: 1
Local average: 8907.022233068943ms
Workers' average: 8953.339844942093ms
Local wins, taking 99.48268% of the time workers did
Even just two workers wins, because they can work in parallel on this multi-core machine:
# of workers: 2
Local average: 8782.853852927685ms
Workers' average: 4754.60275799036ms
Workers win, taking 54.13505% of the time local did
On a single core machine (if you can find one anymore), those two workers would be pure overhead again, and the local thread would win.
Here's main.js:
const os = require('os');
const { Worker } = require('worker_threads');
const { performance } = require('perf_hooks');
const MAX_UINT32 = (2**32)-1;
const ARRAY_SIZE = 100000;
const ARRAY_COUNT = 12;
const workerCount = +process.argv[2] || os.cpus().length;
const raceCount = +process.argv[3] || 5;
class WorkerQueue {
#workers;
#available;
#pending;
#checkPending = () => { // private methods still aren't unflagged in v13, so...
if (this.#available.length && this.#pending.length) {
const resolve = this.#pending.shift();
const worker = this.#available.shift();
resolve(worker);
}
};
constructor(...workers) {
this.#workers = new Set(workers);
this.#available = [...this.#workers];
this.#pending = [];
}
get() {
return new Promise(resolve => {
this.#pending.push(resolve);
this.#checkPending();
});
}
release(worker) {
if (!this.#workers.has(worker)) {
throw new Error("Uknown worker");
}
this.#available.push(worker);
this.#checkPending();
}
terminate() {
for (const worker of this.#workers) {
worker.terminate();
}
this.#workers = new Set();
this.#available = [];
this.#pending = [];
}
}
const {workers, workerCreationTime} = createWorkers();
main();
function createWorkers() {
const start = performance.now();
const workers = new WorkerQueue(
...Array.from({length: workerCount}, () => new Worker("./worker.js"))
);
const workerCreationTime = performance.now() - start;
return {workers, workerCreationTime};
}
async function main() {
try {
console.log(`Workers: ${workerCount} (in ${workerCreationTime}ms), races: ${raceCount}`);
let localAverage = 0;
let workersAverage = 0;
for (let n = 1; n <= raceCount; ++n) {
console.log(`Race #${n}:`);
const {localTime, workersTime} = await sortRace();
localAverage += localTime;
workersAverage += workersTime;
}
// Include the time it took to create the workers in the workers' average, as
// though we'd created them for each race. (We didn't because doing so would
// have given the local thread an advantage: after the first race, it's warmed
// up, but a new worker would be cold. So we let the workers be warm but add
// the full creation time into each race.
workersAverage += workerCreationTime;
console.log("----");
console.log(`# of workers: ${workerCount}`);
console.log(`Local average: ${localAverage}ms`);
console.log(`Workers' average: ${workersAverage}ms`);
if (localAverage > workersAverage) {
showWinner("Workers win", "local", workersAverage, localAverage);
} else {
showWinner("Local wins", "workers", localAverage, workersAverage);
}
workers.terminate();
} catch (e) {
console.error(e.message, e.stack);
}
}
function showWinner(msg, loser, winnerAverage, loserAverage) {
const percentage = (winnerAverage * 100) / loserAverage;
console.log(`${msg}, taking ${percentage.toFixed(5)}% of the time ${loser} did`);
}
async function sortRace() {
// Create a bunch of arrays for local to sort
const localArrays = Array.from({length: ARRAY_COUNT}, () => createRandomArray(ARRAY_SIZE));
// Copy those array so the workers are dealing with the same values
const workerArrays = localArrays.map(array => new Uint32Array(array));
const localStart = performance.now();
const localResults = await Promise.all(localArrays.map(sortLocal));
const localTime = performance.now() - localStart;
checkResults(localResults);
console.log(`Local time: ${localTime}ms`);
const workerStart = performance.now();
const workersResults = await Promise.all(workerArrays.map(sortViaWorker));
const workersTime = performance.now() - workerStart;
checkResults(workersResults);
console.log(`Workers' time: ${workersTime}ms`);
return {localTime, workersTime};
}
async function sortLocal(array) {
await Promise.resolve(); // To make it start asynchronously, like `sortViaWorker` does
array.sort((a, b) => a - b);
return array;
}
async function sortViaWorker(array) {
const worker = await workers.get();
return new Promise(resolve => {
worker.once("message", result => {
workers.release(worker);
resolve(result.array);
});
worker.postMessage({array}, [array.buffer]);
});
}
function checkResults(arrays) {
for (const array of arrays) {
const badIndex = array.findIndex((value, index) => index > 0 && array[index-1] > value);
if (badIndex !== -1) {
throw new Error(
`Error, array entry ${badIndex} has value ${array[badIndex]} ` +
`which is > previous value ${array[badIndex-1]}`
);
}
}
}
function createRandomArray(length) {
const array = new Uint32Array(Uint32Array.BYTES_PER_ELEMENT * length);
return randomFillArray(array);
}
function randomFillArray(array) {
for (let length = array.length, i = 0; i < length; ++i) {
array[i] = Math.random() * MAX_UINT32;
}
return array;
}
and worker.js:
const { parentPort } = require("worker_threads");
parentPort.on("message", ({array}) => {
array.sort((a, b) => a - b);
parentPort.postMessage({array}, [array.buffer]);
});
60000 may not be enough, IPC times matter
btw IPC: generic JavaScript datatypes, including generic JS arrays are heavy when copied to workers, but there are binary array types, https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
postMessage() has a transfer argument, but it applies to a limited number of types only.
https://nodejs.org/api/worker_threads.html#worker_threads_port_postmessage_value_transferlist and https://developer.mozilla.org/en-US/docs/Web/API/Worker/postMessage:
postMessage(value[, transferList])
node: transferList may be a list of ArrayBuffer and MessagePort objects. After transferring, they will not be usable on the sending side of the channel anymore (even if they are not contained in value).
MDN: An optional array of Transferable objects to transfer ownership of. If the ownership of an object is transferred, it becomes unusable (neutered) in the context it was sent from and becomes available only to the worker it was sent to. Transferable objects are instances of classes like ArrayBuffer, MessagePort or ImageBitmap objects that can be transferred.
Effect of types:
let typ=prompt("Type: 0/1/2/3 (Array/Float64Array/Float32Array/Uint32Array)");
let len=parseInt(prompt("Length"));
let basearray;
switch(typ){
case "1":basearray=new Float64Array(len);break;
case "2":basearray=new Float32Array(len);break;
case "3":basearray=new Uint32Array(len);break;
default: basearray=new Array(len);break;
}
for(let i=0;i<basearray.length;i++)
basearray[i]=Math.random()*0x1000000;
let cpus=4,
chunksize=basearray.length/cpus,
chunks=[],chunksw=[];
for(let i=0;i<cpus;i++)
chunksw[i]=(chunks[i]=basearray.slice(i*chunksize,(i+1)*chunksize)).slice();
let start=Date.now();
for(let i=0;i<cpus;i++)
chunks[i].sort((a,b)=>a-b);
console.log("Seq:",Date.now()-start);
let code="onmessage=event=>postMessage(event.data.sort((a,b)=>a-b));";
let ws=[],cnt=0;
for(let i=0;i<cpus;i++){
ws[i]=new Worker("data:text/plain,"+escape(code));
let j=i;
ws[i].onmessage=event=>{
chunksw[j]=event.data;
if(++cnt===cpus){
console.log("Par:",Date.now()-start);
if(len<=20)
for(let i=0;i<cpus;i++)
console.log(chunks[i],chunksw[i]);
}
};
}
start=Date.now();
for(let i=0;i<cpus;i++)
ws[i].postMessage(chunksw[i]);
Specify a length divisible by 4. If length is 20 or less, the resulting sorted chunks are going to be logged too for verification purposes. JS Array-s are reliably slower for me when passed around (compared to the thread-less run), regardless of containing 20 or 6000000 elements (while a 6-million-element JS array runs for 8 seconds for me on an older laptop, it still may be safer to start with something less). The other types are faster when threaded, Uint being the fastest.
Actually anything which is not 1/2/3 is going to result in a JS Array (the slowest one), including the empty string.
Effect of transfer is not that spectacular, but already appears from the beginning (with 4 elements it is 59-69 ms vs 20-22 ms on my PC):
let typ=prompt("Type: 0/1/2 (Float64Array/Float32Array/Uint32Array)");
let len=parseInt(prompt("Length"));
let basearray;
switch(typ){
case "1":basearray=new Float32Array(len);break;
case "2":basearray=new Uint32Array(len);break;
default:basearray=new Float64Array(len);
}
for(let i=0;i<basearray.length;i++)
basearray[i]=Math.random()*0x1000000;
let cpus=4,
chunksize=basearray.length/cpus,
chunksw=[],chunkswt=[];
for(let i=0;i<cpus;i++)
chunkswt[i]=(chunksw[i]=basearray.slice(i*chunksize,(i+1)*chunksize)).slice();
let start;
let code="onmessage=event=>postMessage(event.data.sort((a,b)=>a-b));";
let ws=[],cnt=0;
for(let i=0;i<cpus;i++){
ws[i]=new Worker("data:text/plain,"+escape(code));
let j=i;
ws[i].onmessage=event=>{
chunksw[j]=event.data;
if(++cnt===cpus){
console.log("Non-transfer:",Date.now()-start);
// launch transfer measurement
cnt=0;start=Date.now();
for(let i=0;i<cpus;i++)
wst[i].postMessage(chunkswt[i].buffer,[chunkswt[i].buffer]); }
};
}
let codet;
switch(typ){
case "1":
codet="onmessage=event=>{"+
"let arr=new Float32Array(event.data);"+
"arr.sort((a,b)=>a-b);"+
"postMessage(event.data,[event.data]);};";
break;
case "2":
codet="onmessage=event=>{"+
"let arr=new Uint32Array(event.data);"+
"arr.sort((a,b)=>a-b);"+
"postMessage(event.data,[event.data]);};";
break;
default:
codet="onmessage=event=>{"+
"let arr=new Float64Array(event.data);"+
"arr.sort((a,b)=>a-b);"+
"postMessage(event.data,[event.data]);};";
}
let wst=[];
for(let i=0;i<cpus;i++){
wst[i]=new Worker("data:text/plain,"+escape(codet));
let j=i;
wst[i].onmessage=event=>{
switch(typ){
case "1":chunkswt[j]=new Float32Array(event.data);break;
case "2":chunkswt[j]=new Uint32Array(event.data);break;
default:chunkswt[j]=new Float64Array(event.data);
}
if(++cnt===cpus){
console.log("Transfer:",Date.now()-start);
if(len<=20)
for(let i=0;i<cpus;i++)
console.log(chunksw[i],chunkswt[i]);
}
};
}
// launch non-transfer measurement
start=Date.now();
for(let i=0;i<cpus;i++)
ws[i].postMessage(chunksw[i]);
This code is a bit messy because it is the buffer which can be transferred, not the typed arrays themselves, and also, while the second measurement is initialized as a direct copy-paste (which already isn't that pretty), it is then launched from inside the completion function of the first one.
(I do not wish to provide exact measurement results because my PC is doing some other things too. Just run the snippets a couple times with varied or even repeated parameters)
Related
I want to sort an array, using Web Workers. But this array might receive new values over time, while the worker is still performing the sort function.
So my question is, how can I "stop" the sorting computation on the worker after receiving the new item, so it can perform the sort on the array with that item, while still keeping the sorting that was already made?
Example:
let worker = new Worker('worker.js');
let list = [10,1,5,2,14,3];
worker.postMessage({ list });
setInterval(() => worker.postMessage({ num: SOME_RANDOM_NUM, list }), 100);
worker.onmessage = event => {
list = event.data.list;
}
So lets say that, I've passed 50, the worker made some progress in the sorting before that and now I have something like this:
[1, 2, 3, 10, 5, 14, 50]. Which means the sorting stopped at index 3. So I pass this new array back to the worker, so it can continue the sorting from position 3.
How can I accomplish that, since there is no way to pause/resume a web worker?
Even though the Worker works on an other thread than the one of your main page, and can thus run continuously without blocking the UI, it still runs on a single thread.
This means that until your sort algorithm has finished, the Worker will delay the execution of the message event handler; it is as blocked as would be the main thread.
Even if you made use of an other Worker from inside this worker, the problem would be the same.
The only solution would be to use a kind of generator function as the sorter, and to yield it every now and then so that the events can get executed.
But doing this will drastically slow down your sorting algorithm.
To make it better, you could try to hook to each Event Loop, thanks to a MessageChannel object: you talk in one port and receive the message in the next Event loop. If you talk again to the other port, then you have your own hook to each Event loop.
Now, the best would be to run a good batch in every of these Event loop, but for demo, I'll call only one instance of our generator function (that I borrowed from this Q/A)
const worker = new Worker(getWorkerURL());
worker.onmessage = draw;
onclick = e => worker.postMessage(0x0000FF/0xFFFFFF); // add a red pixel
// every frame we request the current state from Worker
function requestFrame() {
worker.postMessage('gimme a frame');
requestAnimationFrame(requestFrame);
}
requestFrame();
// drawing part
const ctx = canvas.getContext('2d');
const img = ctx.createImageData(50, 50);
const data = new Uint32Array(img.data.buffer);
ctx.imageSmoothingEnabled = false;
function draw(evt) {
// converts 0&1 to black and white pixels
const list = evt.data;
list.forEach((bool, i) =>
data[i] = (bool * 0xFFFFFF) + 0xFF000000
);
ctx.setTransform(1,0,0,1,0,0);
ctx.clearRect(0,0,canvas.width,canvas.height);
ctx.putImageData(img,0,0);
// draw bigger
ctx.scale(5,5);
ctx.drawImage(canvas, 0,0);
}
function getWorkerURL() {
const script = document.querySelector('[type="worker-script"]');
const blob = new Blob([script.textContent]);
return URL.createObjectURL(blob);
}
body{
background: ivory;
}
<script type="worker-script">
// our list
const list = Array.from({length: 2500}).map(_=>+(Math.random()>.5));
// our sorter generator
let sorter = bubbleSort(list);
let done = false;
/* inner messaging channel */
const msg_channel = new MessageChannel();
// Hook to every Event loop
msg_channel.port2.onmessage = e => {
// procede next step in sorting algo
// could be a few thousands in a loop
const state = sorter.next();
// while running
if(!state.done) {
msg_channel.port1.postMessage('');
done = false;
}
else {
done = true;
}
}
msg_channel.port1.postMessage("");
/* outer messaging channel (from main) */
self.onmessage = e => {
if(e.data === "gimme a frame") {
self.postMessage(list);
}
else {
list.push(e.data);
if(done) { // restart the sorter
sorter = bubbleSort(list);
msg_channel.port1.postMessage('');
}
}
};
function* bubbleSort(a) { // * is magic
var swapped;
do {
swapped = false;
for (var i = 0; i < a.length - 1; i++) {
if (a[i] > a[i + 1]) {
var temp = a[i];
a[i] = a[i + 1];
a[i + 1] = temp;
swapped = true;
yield swapped; // pause here
}
}
} while (swapped);
}
</script>
<pre> click to add red pixels</pre>
<canvas id="canvas" width="250" height="250"></canvas>
Note that the same can be achieved with an async function, which may be more practical in some cases:
const worker = new Worker(getWorkerURL());
worker.onmessage = draw;
onclick = e => worker.postMessage(0x0000FF/0xFFFFFF); // add a red pixel
// every frame we request the current state from Worker
function requestFrame() {
worker.postMessage('gimme a frame');
requestAnimationFrame(requestFrame);
}
requestFrame();
// drawing part
const ctx = canvas.getContext('2d');
const img = ctx.createImageData(50, 50);
const data = new Uint32Array(img.data.buffer);
ctx.imageSmoothingEnabled = false;
function draw(evt) {
// converts 0&1 to black and white pixels
const list = evt.data;
list.forEach((bool, i) =>
data[i] = (bool * 0xFFFFFF) + 0xFF000000
);
ctx.setTransform(1,0,0,1,0,0);
ctx.clearRect(0,0,canvas.width,canvas.height);
ctx.putImageData(img,0,0);
// draw bigger
ctx.scale(5,5);
ctx.drawImage(canvas, 0,0);
}
function getWorkerURL() {
const script = document.querySelector('[type="worker-script"]');
const blob = new Blob([script.textContent]);
return URL.createObjectURL(blob);
}
body{
background: ivory;
}
<script type="worker-script">
// our list
const list = Array.from({length: 2500}).map(_=>+(Math.random()>.5));
// our sorter generator
let done = false;
/* outer messaging channel (from main) */
self.onmessage = e => {
if(e.data === "gimme a frame") {
self.postMessage(list);
}
else {
list.push(e.data);
if(done) { // restart the sorter
bubbleSort(list);
}
}
};
async function bubbleSort(a) { // async is magic
var swapped;
do {
swapped = false;
for (var i = 0; i < a.length - 1; i++) {
if (a[i] > a[i + 1]) {
const temp = a[i];
a[i] = a[i + 1];
a[i + 1] = temp;
swapped = true;
}
if( i % 50 === 0 ) { // by batches of 50?
await waitNextTask(); // pause here
}
}
} while (swapped);
done = true;
}
function waitNextTask() {
return new Promise( (resolve) => {
const channel = waitNextTask.channel ||= new MessageChannel();
channel.port1.addEventListener("message", (evt) => resolve(), { once: true });
channel.port2.postMessage("");
channel.port1.start();
});
}
bubbleSort(list);
</script>
<pre> click to add red pixels</pre>
<canvas id="canvas" width="250" height="250"></canvas>
There are two decent options.
Option 1: Worker.terminate()
The first is just to kill your existing web worker and start a new one. For that you can use Worker.terminate().
The terminate() method of the Worker interface immediately terminates the Worker. This does not offer the worker an opportunity to finish its operations; it is simply stopped at once.
The only downsides of this approach are:
You lose all worker state. If you had to copy a load of data into it for the request you have to do it all again.
It involves thread creation and destruction, which isn't as slow as most people think but if you terminate web workers a lot it might cause issues.
If neither of those are an issue it is probably the easiest option.
In my case I have lots of state. My worker is rendering part of an image, and when the user pans to a different area I want it to stop what it is doing and start rendering the new area. But the data needed to render the image is pretty huge.
In your case you have the state of your (presumably huge) list that you don't want to use.
Option 2: Yielding
The second option is basically to do cooperative multitasking. You run your computation as normal, but every now and then you pause (yield) and say "should I stop?", like this (this is for some nonsense calculation, not sorting).
let requestId = 0;
onmessage = event => {
++requestId;
sortAndSendData(requestId, event.data);
}
function sortAndSendData(thisRequestId, data) {
let isSorted = false;
let total = 0;
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
// Check if we are still the current request ID.
if (thisRequestId !== requestId) {
// Data was changed. Cancel this sort.
return;
}
}
postMessage(total);
}
This won't work though because sortAndSendData() runs to completion and blocks the web worker's event loop. We need some way to yield just before thisRequestId !== requestId. Unfortunately Javascript doesn't quite have a yield method. It does have async/await so we might try this:
let requestId = 0;
onmessage = event => {
console.log("Got event", event);
++requestId;
sortAndSendData(requestId, event.data);
}
async function sortAndSendData(thisRequestId, data) {
let isSorted = false;
let total = 0;
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
await Promise.resolve();
// Check if we are still the current request ID.
if (thisRequestId !== requestId) {
console.log("Cancelled!");
// Data was changed. Cancel this sort.
return;
}
}
postMessage(total);
}
Unfortunately it doesn't work. I think it's because async/await executes things eagerly using "microtasks", which get executed before pending "macrotasks" (our web worker message) if possible.
We need to force our await to become a macrotask, which you can do using setTimeout(0):
let requestId = 0;
onmessage = event => {
console.log("Got event", event);
++requestId;
sortAndSendData(requestId, event.data);
}
function yieldToMacrotasks() {
return new Promise((resolve) => setTimeout(resolve));
}
async function sortAndSendData(thisRequestId, data) {
let isSorted = false;
let total = 0;
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
await yieldToMacrotasks();
// Check if we are still the current request ID.
if (thisRequestId !== requestId) {
console.log("Cancelled!");
// Data was changed. Cancel this sort.
return;
}
}
postMessage(total);
}
This works! However it is extremely slow. await yieldToMacrotasks() takes approximately 4 ms on my machine with Chrome! This is because browsers set a minimum timeout on setTimeout(0) of something like 1 or 4 ms (the actual minimum seems to be complicated).
Fortunately another user pointed me to a quicker way. Basically sending a message on another MessageChannel also yields to the event loop, but isn't subject to the minimum delay like setTimeout(0) is. This code works and each loop only takes ~0.04 ms which should be fine.
let currentTask = {
cancelled: false,
}
onmessage = event => {
currentTask.cancelled = true;
currentTask = {
cancelled: false,
};
performComputation(currentTask, event.data);
}
async function performComputation(task, data) {
let total = 0;
let promiseResolver;
const channel = new MessageChannel();
channel.port2.onmessage = event => {
promiseResolver();
};
while (data !== 0) {
// Do a little bit of computation.
total += data;
--data;
// Yield to the event loop.
const promise = new Promise(resolve => {
promiseResolver = resolve;
});
channel.port1.postMessage(null);
await promise;
// Check if this task has been superceded by another one.
if (task.cancelled) {
return;
}
}
// Return the result.
postMessage(total);
}
I'm not totally happy about it - it relies on postMessage() events being processed in FIFO order, which I doubt is guaranteed. I suspect you could rewrite the code to make it work even if that isn't true.
You can do it with some trick – with the help of setTimeout function interrupting. For example it is not possible without an addition thread to execute 2 functions parallel, but with setTimeout function interrupting trick we can do it like follows:
Example of parallel execution of functions
var count_0 = 0,
count_1 = 0;
function func_0()
{
if(count_0 < 3)
setTimeout(func_0, 0);//the same: setTimeout(func_0);
console.log('count_0 = '+count_0);
count_0++
}
function func_1()
{
if(count_1 < 3)
setTimeout(func_1, 0);
console.log('count_1 = '+count_1)
count_1++
}
func_0();
func_1();
You will get this output:
count_0 = 0
count_1 = 0
count_0 = 1
count_1 = 1
count_0 = 2
count_1 = 2
count_0 = 3
count_1 = 3
Why is it possible? Because the setTimeout function needs some time to be executed. And this time is even enought for the execution of some part from your following code.
Solution for you
For this case you have to write your own array sort function (or you can also use the following function from me) because we can not interrupt the native sort function. And in this your own function you have to use this setTimeout function interrupting trick. And you can receive your message event notification.
In the following example I have the interrupting in the half length of my array, and you can change it if you want.
Example with custom sort function interrupting
var numbers = [4, 2, 1, 3, 5];
// this is my bubble sort function with interruption
/**
* Sorting an array. You will get the same, but sorted array.
* #param {array[]} arr – array to sort
* #param {number} dir – if dir = -1 you will get an array like [5,4,3,2,1]
* and if dir = 1 in opposite direction like [1,2,3,4,5]
* #param {number} passCount – it is used only for setTimeout interrupting trick.
*/
function sortNumbersWithInterruption(arr, dir, passCount)
{
var passes = passCount || arr.length,
halfOfArrayLength = (arr.length / 2) | 0; // for ex. 2.5 | 0 = 2
// Why we need while loop: some values are on
// the end of array and we have to change their
// positions until they move to the first place of array.
while(passes--)
{
if(!passCount && passes == halfOfArrayLength)
{
// if you want you can also not write the following line for full break of sorting
setTimeout(function(){sortNumbersWithInterruption(arr, dir, passes)}, 0);
/*
You can do here all what you want. Place 1
*/
break
}
for(var i = 0; i < arr.length - 1; i++)
{
var a = arr[i],
b = arr[i+1];
if((a - b) * dir > 0)
{
arr[i] = b;
arr[i+1] = a;
}
}
console.log('array is: ' + arr.join());
}
if(passCount)
console.log('END sring is: ' + arr.join());
}
sortNumbersWithInterruption(numbers, -1); //without passCount parameter
/*
You can do here all what you want. Place 2
*/
console.log('The execution is here now!');
You will get this output:
array is: 4,2,3,5,1
array is: 4,3,5,2,1
The execution is here now!
array is: 4,5,3,2,1
array is: 5,4,3,2,1
END sring is: 5,4,3,2,1
You can do it with insertion sort (kind of).
Here is the idea:
Start your worker with an internal empty array (empty array is sorted obviously)
Your worker receives only elements not the entire array
Your worker insert any received element right in correct position into the array
Every n seconds, the worker raises a message with the current array if it has changed after the last event. (If you prefer, you can send the array on every insertion, but is more efficient to buffer somehow)
Eventually, you get the entire array, if any item is added, you will receive the updated array to.
NOTE: Because your array is always sorted, you can insert in correct position using binary search. This is very efficient.
I think the case comes down to careful management of postMessage calls and amount of data passed to be processed at a time. Was dealing with problem of this kind - think about not sending all new data into the function at once but rather creating your own queue and when small enough portion of the task has been acomplished by webworker thread send a message back to the main thread and decide to send the next portion, wait or quit.
In Your case, e.g. one time You get 9000 new items, next 100k - maybe create a queue/buffer that adds next 10k new elements each time webworker is done processing last data change.
const someWorker = new Worker('abc.js');
var processingLock = false;
var queue = [];
function newDataAction(arr = null) {
if (arr != null) {
queue = queue.concat(arr);
}
if (!processingLock) {
processingLock = true;
var data = [];
for (let i = 0; i < 10000 && queue.length > 0; i++) {
data.push(queue.pop());
}
worker.postMessage(data);
}
}
someWorker.addEventListener('message', function(e) {
if (e.data == 'finished-last-task') {
processingLock = false;
if (queue.length > 0) {
newDataAction();
}
}
});
Worked through many sorting algorithms and I don't see how sending new data into an sorting algorithm with partially sorted array makes much difference in terms of compuation time from sorting them both sequentially and performing a merge.
I have a function that has more than 1400+ crypto pairs and I have to send an API against each pair and store the trades. Now each pair takes 3-4 seconds hence the whole function takes a lot of time. I am getting the pairs from my DB and I am storing trade data in my DB as well. I need to process the pairs in parallel so the trades from the pair in the beginning don't miss because the function is not processing.
This is my current function:
const getTrades = async () => {
let page = 1;
const results = await db.query("SELECT * FROM pairs;");
const pairs = results.rows;
const latest = await db.query("SELECT MAX(trade_time) FROM trades");
const latestTrade = latest.rows[0].max;
const coinResult = await db.query("SELECT * FROM coins");
let coinsInfo = coinResult.rows;
coinsInfo = coinsInfo.flat();
for (const pair of pairs) {
let biggestTrade = [];
const response = await axios.get(
`https://api.binance.com/api/v3/trades?symbol=${pair.pair}`
);
let filtered = response.data;
filtered = filtered.filter((trade) => trade.time > latestTrade);
let sells = filtered.filter((trade) => trade.isBuyerMaker === true);
let buys = filtered.filter((trade) => trade.isBuyerMaker === false);
if (sells.length > 0) {
biggestTrade.push(
sells.reduce(function (prev, current) {
return prev.quoteQty > current.quoteQty ? prev : current;
})
);
}
if (buys.length > 0) {
biggestTrade.push(
buys.reduce(function (prev, current) {
return prev.quoteQty > current.quoteQty ? prev : current;
})
);
}
biggestTrade = biggestTrade.flat();
for (const trade of filtered) {
let priceUSD = 0;
let baseAssetIcon = "null";
for (const coin of coinsInfo) {
if (coin.symbol.toUpperCase() === pair.quote_asset) {
priceUSD = coin.current_price;
}
if (coin.symbol.toUpperCase() === pair.base_asset) {
baseAssetIcon = coin.image_url;
}
if (priceUSD > 0 && baseAssetIcon != "null") {
break;
}
}
if (trade.quoteQty * priceUSD > 50000) {
const results = db.query(
"INSERT INTO trades (exchange_name, exchange_icon_url, trade_time, price_in_quote_asset,price_in_usd, trade_value, base_asset_icon, qty, quoteQty, is_buyer_maker, pair, base_asset_trade, quote_asset_trade) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12, $13)",
[
"Binance",
"https://assets.coingecko.com/markets/images/52/small/binance.jpg?1519353250",
trade.time,
trade.price,
priceUSD,
trade.quoteQty * priceUSD,
baseAssetIcon,
trade.qty,
trade.quoteQty,
trade.isBuyerMaker,
pair.pair,
pair.base_asset,
pair.quote_asset,
]
);
console.log("TRADE ADDED");
}
}
}
console.log("PAIRS ARE OVER");
};
pairs has over 1400 entries and this is the one where are looping through.
depends on how many servers you are running this function on.
if it's one single machine, use worker_threads, basically run the same function in separate threads to achieve parallelization, but to be honest, 1400 pairs are a lot, each for 3-4 seconds, so total around 1-2hrs per run if in serial. Depending on your machines, if you have 8 cores, it might reduce the time by 8 folds but still leave you like around 10 minutes. and cloud service usually charge a lot more for instances that have more cpu cores.
if it's multiple machines, use a master and a queue to push new pairs to each worker machine and for each worker machine, you can also generate multiple threads for each machine, in that way you can scale horizontally, and it's possible to finish the run in seconds. in this situation, each machine you can get the cheap one from cloud providers.
so depends on your requirements, if you wanna super fast, you gotta add more machines.
I want to compare two large files(5GB+) and find if they are the same or not. One solution I considered is hashing both with crypto and then comparing the hashes. But this would take a lot of time since I will have to go through the entire files instead of stopping when a difference is found.
Another solution I thought was to compare the file as they are being streamed with fs.createReadStream() and break when a difference is found.
stream.on('data', (data) => {
//compare the data from this stream with the other stream
})
But I am not quite sure how I can have two streams that are synchronized.
As requested in your comments, if you want to see how an implementation can be written to do this, here's one. Here's how it works:
Open each of the two files
Compare the two files sizes. If not the same, resolve false.
Allocate two 8k buffers (you can choose the size of buffer to use)
Read 8k of each file (or less if not 8k left in the file) into your buffers
Compare those two buffers. If not identical, resolve false.
When you finish comparing all the bytes, resolve true
Here's the code:
const fs = require('fs');
const fsp = fs.promises;
// resolves to true or false
async function compareFiles(fname1, fname2) {
const kReadSize = 1024 * 8;
let h1, h2;
try {
h1 = await fsp.open(fname1);
h2 = await fsp.open(fname2);
const [stat1, stat2] = await Promise.all([h1.stat(), h2.stat()]);
if (stat1.size !== stat2.size) {
return false;
}
const buf1 = Buffer.alloc(kReadSize);
const buf2 = Buffer.alloc(kReadSize);
let pos = 0;
let remainingSize = stat1.size;
while (remainingSize > 0) {
let readSize = Math.min(kReadSize, remainingSize);
let [r1, r2] = await Promise.all([h1.read(buf1, 0, readSize, pos), h2.read(buf2, 0, readSize, pos)]);
if (r1.bytesRead !== readSize || r2.bytesRead !== readSize) {
throw new Error("Failed to read desired number of bytes");
}
if (buf1.compare(buf2, 0, readSize, 0, readSize) !== 0) {
return false;
}
remainingSize -= readSize;
pos += readSize;
}
return true;
} finally {
if (h1) {
await h1.close();
}
if (h2) {
await h2.close();
}
}
}
// sample usage
compareFiles("temp.bin", "temp2.bin").then(result => {
console.log(result);
}).catch(err => {
console.log(err);
});
This could be sped up a bit by opening and closing the files in parallel using Promise.allSettled() to track when they are both open and then both closed, though because of the complications if one succeeds in opening and the other doesn't and you don't want to leak the one opened file handle, it takes a bit more code to do that perfectly so I kept it simpler here.
And, if you really wanted to optimize for performance, it would be worth testing larger buffers to see if it makes things faster or not.
It's also possible that buf1.equals(buf2) might be faster than buf1.compare(buf2), but you have to make sure that a partial buffer read at the end of the file still works properly when using that since .equals() always compares the entire buffer. You could build two versions and compare their performance.
Here's a more complicated version that opens and closes the files in parallel and might be slightly faster:
const fs = require('fs');
const fsp = fs.promises;
async function compareFiles(fname1, fname2) {
const kReadSize = 1024 * 8;
let h1, h2;
try {
let openResults = await Promise.allSettled([fsp.open(fname1), fsp.open(fname2)]);
let err;
if (openResults[0].status === "fulfilled") {
h1 = openResults[0].value;
} else {
err = openResults[0].reason;
}
if (openResults[1].status === "fulfilled") {
h2 = openResults[1].value;
} else {
err = openResults[1].reason;
}
// after h1 and h2 are set (so they can be properly closed)
// throw any error we got
if (err) {
throw err;
}
const [stat1, stat2] = await Promise.all([h1.stat(), h2.stat()]);
if (stat1.size !== stat2.size) {
return false;
}
const buf1 = Buffer.alloc(kReadSize);
const buf2 = Buffer.alloc(kReadSize);
let pos = 0;
let remainingSize = stat1.size;
while (remainingSize > 0) {
let readSize = Math.min(kReadSize, remainingSize);
let [r1, r2] = await Promise.all([h1.read(buf1, 0, readSize, pos), h2.read(buf2, 0, readSize, pos)]);
if (r1.bytesRead !== readSize || r2.bytesRead !== readSize) {
throw new Error("Failed to read desired number of bytes");
}
if (buf1.compare(buf2, 0, readSize, 0, readSize) !== 0) {
return false;
}
remainingSize -= readSize;
pos += readSize;
}
return true;
} finally {
// does not return file close errors
// but does hold resolving the promise until the files are closed
// or had an error trying to close them
// Since we didn't write to the files, a close error would be fairly
// unprecedented unless the disk went down
const closePromises = [];
if (h1) {
closePromises.push(h1.close());
}
if (h2) {
closePromises.push(h2.close());
}
await Promise.allSettled(closePromises);
}
}
compareFiles("temp.bin", "temp2.bin").then(result => {
console.log(result);
}).catch(err => {
console.log(err);
});
There are certainly libraries that do this, and file-sync-cmp is very popular (270k weekly downloads). It does the comparison in the simplest way, by reading the same number of bytes from the two files in different buffers, and then comparing the buffers byte by byte.
There's also a more modern library, filecompare, "using native Promises and native BufferTools (alloc and Buffer comparisons)".
Whenever practical, don't reinvent the wheel :)
Since the difference might be at the very end of the files, I guess calculating a hash of the files is the most (yet costly) straightforward and secure process.
Did you try the MD5-File npm package and get some performance indicators?
So I'm trying to connect to external server called Pexels to get some photos. I'm doing that from node.js but it is just a javascript issue. Pexels unfortunately lets user to download object with only 40 pictures per page.
https://api.pexels.com/v1/curated?per_page=40&page=1 // 40 is maximum
But actually I need more then that. I'd like to get 160 results, ie. to combine all first four pages. In order to do that I tried looping the request:
let pexelsData = [];
for(let i = 1; i < 5; i++) {
const randomPage = getRandomFromRange(1, 100); //pages should be randomized
const moreData = await axios.get(`https://api.pexels.com/v1/curated?per_page=40&page=${randomPage}`,
createHeaders('bearer ', keys.pexelsKey));
pexelsData = [ ...moreData.data.photos, ...pexelsData ];
}
Now I can use pexelsData but it work very unstable, sometimes it is able to get all combined data, sometimes it crashes. Is there a correct and stable way of looping requests?
You work with 3rd party API, which has rate limits. So you should add rate limits to your code. The simplest solution for you is using p-limit or similar approach form promise-fun
It will looks like that:
const pLimit = require('p-limit');
const limit = pLimit(1);
const input = [
limit(() => fetchSomething('foo')),
limit(() => fetchSomething('bar')),
limit(() => doSomething())
];
(async () => {
// Only one promise is run at once
const result = await Promise.all(input);
console.log(result);
})();
you can break it into functions like..
let images=[];
const getResponse = async i=> {
if(i<5)
return await axios.get(`https://api.pexels.com/v1/curated?per_page=40&page=${i}`)
}
const getImage = (i)=>{
if(i<5){
try {
const request = getResponse(i);
images = [...images,...request];
// here you will get all the images in an array
console.log(images)
getImage(++i)
} catch (error) {
console.log("catch error",error)
// getImage(i)
}
}
}
getImage(0); //call initail
That is, the total amount of data downloaded across all resources (including video/media), similar to that returned by Chrome DevTools' Network tab.
There doesn't seem to be any way to do this as of January 2018 that works with all resource types (listening for the response event fails for videos), and that correctly counts compressed resources.
The best workaround seems to be to listen for the Network.dataReceived event, and process the event manually:
const resources = {};
page._client.on('Network.dataReceived', (event) => {
const request = page._networkManager._requestIdToRequest.get(
event.requestId
);
if (request && request.url().startsWith('data:')) {
return;
}
const url = request.url();
// encodedDataLength is supposed to be the amount of data received
// over the wire, but it's often 0, so just use dataLength for consistency.
// https://chromedevtools.github.io/devtools-protocol/tot/Network/#event-dataReceived
// const length = event.encodedDataLength > 0 ?
// event.encodedDataLength : event.dataLength;
const length = event.dataLength;
if (url in resources) {
resources[url] += length;
} else {
resources[url] = length;
}
});
// page.goto(...), etc.
// totalCompressedBytes is unavailable; see comment above
const totalUncompressedBytes = Object.values(resources).reduce((a, n) => a + n, 0);
The solution of #mjs works perfectly even in 2021. Just need to replace:
page._networkManager -> page._frameManager._networkManager
Full example that works for me:
const resources = {};
page._client.on('Network.dataReceived', (event) => {
const request = page._frameManager._networkManager._requestIdToRequest.get(
event.requestId
);
if (request && request.url().startsWith('data:')) {
return;
}
const url = request.url();
const length = event.dataLength;
if (url in resources) {
resources[url] += length;
} else {
resources[url] = length;
}
});
await page.goto('https://stackoverflow.com/questions/48263345/how-can-i-get-the-raw-download-size-of-a-request-using-puppeteer');
const totalUncompressedBytes = Object.values(resources).reduce((a, n) => a + n, 0);
console.log(totalUncompressedBytes);
If you are using puppeteer, you have server side node... Why not pipe the request through a stream, or streams and then calculate the content size?
Also there is https://github.com/watson/request-stats
Also you may want to call page.waitForNavigation as you may be wrestling with async timing issues
const imgaes_width = await page.$$eval('img', anchors => [].map.call(anchors, img => img.width));
const imgaes_height = await page.$$eval('img', anchors => [].map.call(anchors, img => img.height));