I'm trying to speed-up matrices multiplications in pure javascript. Multiplications appear to be very slow above a few hundreds of lines, over the minute on a thousand of lines: you'll see the execution time bellow.
How would you solve this? We are working on a split + parallelization solution in Node.js so I'm looking for the best options to optimize it in pure javascript. My solution has to adapt the parallelized flows itself to the number of CPU threads available (that is unknown at design time).
Some data :
const math = require("mathjs");
// a1 is a 1000x1000 float matrix
// b1 is a 1000x400
math.multiply(a1, b1)
// runs in 19.6 seconds on a CPU 4.2Ghz
// a2 is 1600x1200
// b2 is 1200x800
math.multiply(a2, b2)
// runs in 78 seconds
Array lookup optimization
Arrays are associative lookup tables in JavaScipt - they are inefficient by nature. An optimization of this kind of array access
var array = some_2D_Array;
var nRows = array.length;
var nCols = array[0].length;
for( var r = 0; r < nRows; ++r) {
for( var c = 0; c < nCols; ++c) {
// do something with array[r][c];
}
}
is to replace it with
var array = some_2D_Array;
var nRows = array.length;
var nCols = array[0].length;
for( var r = 0; r < nRows; ++r) {
var aRow = array[r]; // lookup the row once
for( var c = 0; c < nCols; ++c) {
// do something with aRow[c];
}
}
which avoids searching the array object for the row array within each iteration of the inner loop. Performance gain will depend on the JS engine and the number of inner iterations.
Typed Array Usage
Another alternative could be to use a one dimensional typed array to avoid associative array index lookup instead of computing it. Here's some test code I ran in Node so see what difference it might make:
function Mat (rows, cols) {
var length = rows*cols,
buffer = new Float64Array( length)
;
function getRow( r) {
var start = r*cols,
inc = 1;
return { length: cols, start, inc, buffer};
}
function getCol( c) {
var start = c,
inc = cols;
return { length: rows, start, inc, buffer};
}
function setRC(r,c, to) {
buffer[ r*cols + c] = to;
}
this.rows = rows;
this.cols = cols;
this.buffer = buffer;
this.getRow = getRow;
this.getCol = getCol;
this.setRC = setRC;
}
Mat.dotProduct = function( vecA, vecB) {
var acc=0,
length = vecA.length,
a = vecA.start, aInc = vecA.inc, aBuf = vecA.buffer,
b = vecB.start, bInc = vecB.inc, bBuf = vecB.buffer
;
if( length != vecB.length) {
throw "dot product vectors of different length";
}
while( length--) {
acc += aBuf[ a] * bBuf[ b];
a += aInc;
b += bInc;
}
return acc;
}
Mat.mul = function( A, B, C) {
if( A.cols != B.rows) {
throw "A cols != B.rows";
}
if( !C) {
C = new Mat( A.rows, B.cols);
}
for( var r = 0; r < C.rows; ++r) {
var Arow = A.getRow(r);
for (var c = 0; c < C.cols; ++c) {
C.setRC( r, c, this.dotProduct( Arow, B.getCol(c)));
}
}
return C;
}
function test() {
// A.cols == B.rows
let side = 128;
let A = new Mat( side, side)
let B= new Mat( side, side);
A.buffer.fill(1)
B.buffer.fill(1)
console.log( "starting test");
let t0 = Date.now();
Mat.mul( A,B);
let t1 = Date.now();
console.log( "time: " + ((t1-t0)/1000).toFixed(2) +" seconds");
}
test()
Results for multiplying two square matrices (1.1Ghz Celeron):
// 128 x 128 = 0.05 seconds
// 256 x 256 = 0.14 seconds
// 512 x 512 = 7 seconds
// 768 x 768 = 25 seconds
// 1024 x 1024 = 58 seconds
The differences in CPU speed suggest this approach could be significantly faster but ... the code is experimental, the system had no other load and timings were for array multiplication alone - they exclude time taken to decode and populate the array with data. Any serious gain would need to be proven in practice.
I eventually decided that when multiplying two square matrices together, doubling the side dimension used should make the operation take 8 times as long: four times as many result elements to calculate and twice as many elements in vectors used to calculate dot products. The comparative times for 512 x 512 and 1024 x 1024 multiplications fit in line with this expectation.
Related
[just joined. first post \o/]
I'm working on a 'battleblocks' project idea of mine to help learn JS, where I have a 10x10 css grid of dynamically created divs. They are identifiable from numbers 1 to 100, reading left to right (row 1 has 1,2,3..10, row 2 has 11,12..20 etc). I need to be able to have a nested array of columns that house 10x arrays (columnArray[0] contains 1,11,21..91 - columnArray[1] contains 2,12,22..92 etc). And the same for rows - a row array that has 10x row arrays (rowArray[0] contains 1,2,3..10 - rowArray[1] contains 11,12,13..20 etc).
Ive declared column array globally, but as it stands whatever ive done so far causes a 'aw, snap! something went wrong while displaying this webpage.' error.
loadColsArray();
// load column arrays
function loadColsArray() {
let rowsAr = [];
let count = 0;
for (let c = 1; c <= 10; c++) {
for (let r = 0; r <= 100; r + 10) {
rowsAr[count] = c + r;
count++;
}
columnArray[c - 1] = rowsAr;
count = 0;
rowsAr = [];
}
console.log(columnArray);
}
Any help appreciated.
ps: added code as a snippet, because 'code sample' option broke up my pasted code.
There are a few problems in your code:
The "Aw Snap" is caused by an infinite loop in your code which occurs because you never increment r. You must use r += 10 to increment it by 10.
Since you initialise r to 0, your exit condition must be r < 100, otherwise 11 iterations will occur.
You also need to define columnArray before you use it (it's not defined in the snippet).
Try this:
let columnArray = []; // ←
loadColsArray();
// load column arrays
function loadColsArray() {
let rowsAr = [];
let count = 0;
for (let c = 1; c <= 10; c++) {
for (let r = 0; r < 100; r += 10) { // ←
rowsAr[count] = c + r;
count++;
}
columnArray[c - 1] = rowsAr;
count = 0;
rowsAr = [];
}
console.log(columnArray);
}
Maybe I have misunderstanding on Merge sort. Can someone explain to me if I break the toBeSorted list into array of sub arrays like this:
const toBeSorted = [2,4,5,1,6,8]
const brokenDown = [[2],[4],[5],[1],[6],[8]]
then I do the usual sort and merge thing with the subarrays inside brokenDown. What is the difference or drawbacks comparing to the classic original solution?
I understand merge sort as halving the original list down till each sub array only contain one item, then sort them and merge them together. So instead of halving, I just iterate through the original array and make it an array of subarrays.
I tried both solutions, the classic one took around 3000ms to sort 200,000 while my solution took 5000ms to sort the same amount of data.
So I think I am lacking some understanding on Merge Sort.
my solution full code:
(() => {
let i = 0
const data = []
const size = 200000
while (i < size) {
data.push(Math.round(Math.random() * 1000))
i++
}
function mergeSort(arr) {
if (arr.length < 2) return arr[0]
const output = []
for (let i=0; i<arr.length; i+=2) {
output.push(sortAndStitch(arr[i], arr[i+1]))
}
return mergeSort(output)
}
function breakDown(arr) {
const output = []
for (item of data) {
output.push([item])
}
return output
}
function sortAndStitch(sub1, sub2) {
const arr1 = sub1 || [], arr2 = sub2 || [], output = []
while(arr1.length && arr2.length) {
if (arr1[0] > arr2[0]) {
output.push(arr2.shift())
} else {
output.push(arr1.shift())
}
}
return output.concat(...arr1, ...arr2)
}
const start = new Date().getTime()
mergeSort(breakDown(data))
const interval = new Date().getTime() - start + 'ms'
console.log({ size, interval })
})()
the classic solution that I am comparing to:
(() => {
let i = 0
const data = []
const size = 200000
while (i < size) {
data.push(Math.round(Math.random() * 1000))
i++
}
const mergeSort = nums => {
if (nums.length < 2) {
return nums;
}
const length = nums.length;
const middle = Math.floor(length / 2);
const left = nums.slice(0, middle);
const right = nums.slice(middle);
return merge(mergeSort(left), mergeSort(right));
};
const merge = (left, right) => {
const results = [];
while (left.length && right.length) {
if (left[0] <= right[0]) {
results.push(left.shift());
}
else {
results.push(right.shift());
}
}
return results.concat(left, right);
};
const start = new Date().getTime()
mergeSort(data)
const interval = new Date().getTime() - start + 'ms'
console.log({ size, interval })
})()
The first example pushes n = 200,000 sub-arrays of size 1 to create an array of n = 200,000 sub-arrays. Then for each "pass", it indexes two sub-arrays from the array of sub-arrays, merges them into a single sub-array, and pushes that sub-array onto yet another array of sub-arrays. So the code ends up creating 19 arrays of sub-arrays (including the original), by pushing the sub-arrays onto arrays of sub-arrays, which ends up as 400000+ pushes of sub-arrays onto arrays of sub-arrays. Those arrays of sub-arrays are also consuming a lot of space.
The second example splits the array into two sub-arrays, then recursion follows only the left sub-arrays until a sub-array of size 1 is reached, then a right sub-array of size 1 is reached, and only then does merging begin, following the call chain up and down, depth first, left first. Only 18 levels of recursion occur, and the shifts and pushes are only performed on arrays of numbers, while the first example is also performing those 400000+ pushes of sub-arrays onto arrays of sub-arrays.
An indexed based merge sort (top down or bottom up) would be much faster for either first or second example, taking about 50 ms to sort 200,000 numbers. As commented above, Wikipedia has psuedo code examples:
https://en.wikipedia.org/wiki/Merge_sort
classic original solution
The "classic original solution" is bottom up merge sort. The 1945 EDVAC that Von Newmann described a merge sort for didn't have a stack. Prior to that, Hollerith card sorters, dating back to 1887, used radix sort. The IBM type 77 collators (1937) could merge two decks of sorted cards into a single merged deck, which is what probably gave Von Newmann the idea of merge sort in 1945, followed up in 1948 with a description and analysis of bottom up merge sort.
The first practical non-punched card based sorts were tape sorts (variations of bottom up merge sort), used on the Univac 1 (early 1950's) which could have up to 10 Uniservo tape drives.
Most libraries use some variation of hybrid insertion (on small sub-arrays) + bottom up merge sort for stable sort. Run time for insertion + top down merge sort is about the same, but recursion could throw a stack overflow exception mid-sort. Example hybrid insertion + bottom up merge sort (on my system (Intel 3770K 3.5 ghz, Windows 7 Pro 64 bit, to sort 200,000 numbers, Chrome takes ~40 ms, IE 11 takes ~35 ms.
function merge(a, b, bgn, mid, end) {
var i = bgn // left: a[bgn,mid)
var j = mid // right: a[mid,end)
var k = bgn // index for b[]
while(true){
if(a[i] <= a[j]){ // if left <= right
b[k++] = a[i++] // copy left
if(i < mid) // if not end of left
continue // continue back to while
do // else copy rest of right
b[k++] = a[j++]
while(j < end)
break // and break
} else { // else left > right
b[k++] = a[j++] // copy right
if(j < end) // if not end of right
continue // continue back to while
do // else copy rest of left
b[k++] = a[i++]
while(i < mid)
break // and break
}
}
}
function insertionsort(a, ll, rr)
{
var i = ll+1
while(i < rr){
var t = a[i]
var j = i
while((j > ll) && a[j-1] > t){
a[j] = a[j-1]
j -= 1}
a[j] = t
i += 1}
}
function getpasscount(n) // return # passes
{
var i = 0
for(var s = 1; s < n; s <<= 1)
i += 1
return(i)
}
function mergesort(a)
{
var n = a.length
if(n < 2) // if size < 2 return
return
var b = new Array(n) // allocate temp array
var s = 64 // set run size
if(0 != (1&getpasscount(n))) // for even # of passes
s = 32
for(var rr = 0; rr < n; ){ // do insertion sorts
var ll = rr;
rr += s;
if(rr > n)
rr = n;
insertionsort(a, ll, rr);
}
while(s < n){ // while not done
var ee = 0 // reset end index
while(ee < n){ // merge pairs of runs
var ll = ee // ll = start of left run
var rr = ll+s // rr = start of right run
if(rr >= n){ // if only left run
do // copy it
b[ll] = a[ll]
while(++ll < n)
break // end of pass
}
ee = rr+s // ee = end of right run
if(ee > n)
ee = n
merge(a, b, ll, rr, ee) // merge a[left],a[right] to b[]
}
var t = a // swap array references
a = b
b = t
s <<= 1 // double the run size
}
}
var a = new Array(200000)
for (var i = 0; i < a.length; i++) {
a[i] = Math.round(Math.random() * 1000000000)
}
console.time('measure')
mergesort(a)
console.timeEnd('measure')
for (var i = 1; i < a.length; i++) {
if(a[i-1] > a[i]){
console.log('error')
break
}
}
This might be a duplicate, though I didn't find any questions specific to my problem here.
Say I have an array like this
var hundred = [1,2,3,4,5...100]
This array has 100 elements. From 1 to 100.
Based on an integer, how can I split this array into another array with the same amount of elements, except they've been evenly distributed like this?
var integer = 2;
var hundred = [50,50,50,50,50,50...100,100,100,100,100,100]
In this example, the array has 50 elements with the value 50, and 50 elements with the value 100, because the integer was 2.
I'm bad at math, so this might be incorrect, but I hope you understand what I mean. The array must have the same ammount of indexes after the calculation.
Edit (Due to me being very bad at formulating questions, I'm going to use the code I need this for here):
So I have a frequencybin array (from the AudioContext analyser):
var fbc_array = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(fbc_array);
This array has a set number of elements ( They are the frequencies of audio played ).
Now I have a spectrum analyser, which has a set number of "bars" so if I have only 3 bars, then how can I split the fbc_array so that each bar has the evenly distributed frequency in it? For example, with 3 bars, bar one would have the bass, bar two would have the mids, bar three would have the treble.
I'm using a for loop for iterating over each bar:
for (i = 0; i < bars; i++) {
bar_x = i * canspace;
bar_width = 2;
bar_height = -3 - (fbc_array[i] / 2);
ctx.fillRect(bar_x, canvas.height, bar_width, bar_height);
}
Here's what I gathered from this craziness! Sorry you're having trouble conveying your problem. That's always a headache! Good luck.
//set integer to whatever you want
var integer = 3;
var randomNumber;
var output = new Array()
function getRandomIntInclusive(min, max) {
randomNumber = Math.floor(Math.random() * (max - min + 1)) + min;
}
for(i = 0; i<integer;i++){
getRandomIntInclusive(1,100);
for(j = 1; j< (100/integer); j++){
output.push(randomNumber);
}
}
//note, you will not always get 100 array items
//you can check with console.log(output.length);
console.log(output);
(Written before your update, so guessing here).
You're looking for a way to approximate a graph so that it's divided into bands and each point within a band is replaced with that band's maximum:
Number.prototype.times = function(fn) {
var a = [];
for(var i = 0; i < this; i++)
a.push(fn(i));
return a;
}
function approximate(src, n) {
var res = [],
size = Math.ceil(src.length / n),
i = 0;
while(i < src.length) {
var chunk = src.slice(i, i += size)
var p = Math.max.apply(null, chunk);
// this gives you an average instead of maximum
// p = chunk.reduce((x, y) => x + y) / chunk.length;
res = res.concat(size.times(i => p));
}
return res;
}
src = 20..times(i => 10 + Math.floor(Math.random() * 80));
res = approximate(src, 4);
document.write('<pre>'+JSON.stringify(src));
document.write('<pre>'+JSON.stringify(res));
What I'm basically trying to do is to map an array of data points into a WebGL vertex buffer (Float32Array) in realtime (working on animated parametric surfaces). I've assumed that representing data points with Float32Arrays (either one Float32Array per component: [xx...x, yy...y] or interleave them: xyxy...xy) should be faster than storing them in an array of points: [[x, y], [x, y],.. [x, y]] since that'd actually be a nested hash and all. However, to my surprise, that leads to a slowdown of about 15% in all the major browsers (not counting array creation time). Here's a little test I've set up:
var points = 250000, iters = 100;
function map_2a(x, y) {return Math.sin(x) + y;}
var output = new Float32Array(3 * points);
// generate data
var data = [];
for (var i = 0; i < points; i++)
data[i] = [Math.random(), Math.random()];
// run
console.time('native');
(function() {
for (var iter = 0; iter < iters; iter++)
for (var i = 0, to = 0; i < points; i++, to += 3) {
output[to] = data[i][0];
output[to + 1] = data[i][1];
output[to + 2] = map_2a(data[i][0], data[i][1]);
}
}());
console.timeEnd('native');
// generate data
var data = [new Float32Array(points), new Float32Array(points)];
for (var i = 0; i < points; i++) {
data[0][i] = Math.random();
data[1][i] = Math.random();
}
// run
console.time('typed');
(function() {
for (var iter = 0; iter < iters; iter++)
for (var i = 0, to = 0; i < points; i++, to += 3) {
output[to] = data[0][i];
output[to + 1] = data[1][i];
output[to + 2] = map_2a(data[0][i], data[1][i]);
}
}());
console.timeEnd('typed');
Is there anything I'm doing wrong?
I think your problem is that you are not comparing the same code. In the first example, you have one large array filled with very small arrays. In the second example, you have two very large arrays, and both of them need to be indexed. The profile is different.
If I structure the first example to be more like the second (two large generic arrays), then the Float32Array implementation far outperforms the generic array implementation.
Here is a jsPerf profile to show it.
In V8 variables can have SMI (int31/int32), double and pointer type. So I guess when you operate with floats it should be converted to double type. If you use usual arrays it is converted to doubles already.
Desperately need a Javascript equivalent to polyval and polyfit functions that exist in Matlab. Essentially those functions in matlab do a curve fit based on two equally sized arrays depending on a specified polynomial. I need to do some calculations that involve curve fitting in javascript and can't for the life of me find an equivalent function.
This is MatLab's explanation of the function polyfit
"P = POLYFIT(X,Y,N) finds the coefficients of a polynomial P(X) of
degree N that fits the data Y best in a least-squares sense. P is
a
row vector of length N+1 containing the polynomial coefficients in
descending powers, P(1)*X^N + P(2)*X^(N-1) +...+ P(N)*X + P(N+1)."
This is MatLab's explanation of polyval.
"POLYVAL Evaluate polynomial.
Y = POLYVAL(P,X) returns the value of a polynomial P evaluated at
X. P
is a vector of length N+1 whose elements are the coefficients of
the
polynomial in descending powers.
Y = P(1)*X^N + P(2)*X^(N-1) + ... + P(N)*X + P(N+1)"
Any help would be super.
Regards,
numericjs may help you get started.
POLYFIT performs a least-square polynomial fitting which comes down to solving a system of linear equations. I did a quick search, but I couldn't find a basic linear algebra Javascript library that solves such systems... The easiest method would be to implement the Gaussian elimination algorithm yourself.
POLYVAL is simply evaluating the polynomial at the points X by substituting the coefficients in the equation.
perhaps this code might help someone
function _prepare(_mat) {
_mat=[[]].concat(_mat)
for(i=0;i<_mat.length;++i)
_mat[i]=[0].concat(_mat[i])
return _mat
}
function linear(_mat){
_mat=_prepare(_mat)
return _solve(_mat)
}
function _solve(_mat){
var c=new Array(),d=new Array()
var n=_mat.length-1
for(i=0;i<=n+1;i++) {
d[i]=new Array();
c[i]=0
for(j=0;j<=n+1;++j)
d[i][j]=0
}
// mission impossible
// calculate all the determinants of the system
for(m=2; m<=n ; ++m) {
for(i=m;i<=n;++i)
for(j = m-1;j<=n+1;++j)
d[i][j] = [_mat[i][j] * _mat[m-1][m-1] , _mat[i][m-1]]
for(i=m;i<=n;++i)
for(j=m-1;j<=n+1;++j) {
_mat[i][j] = d[i][j][0]-d[i][j][1]*_mat[m-1][j]
if(Math.abs(_mat[i][j])<1e-25) _mat[i][j]=0 // i have to add this line
}
}
// now the coefficients of equation (not exactly)
for(i=n;i>=1;--i) {
c[i-1] = _mat[i][n+1]
if (i!=n)
for(j=n; j>=i+1;--j)
c[i-1] = c[i-1] -_mat[i][j] * c[j-1]
if(_mat[i][i]!=0)
c[i-1]=c[i-1] / _mat[i][i]
else
c[i-1]=0
if(Math.abs(c[i-1])<1e-25)
c[i-1]=0
}
c.length=n
return c
}
function fitpoly(e,b){
var a=new Array()
var n = 1+b,e=[[0,0]].concat(e),ns=e.length-1
for(i=0;i<=n+1;i++) {
a[i]=new Array();
for(j=0;j<=n+1;++j)
a[i][j]=0
}
for(m=1;m <= n;m++)
for(i=1;i<= m;i++) {
j = m - i + 1;
for(ii=1;ii <= ns;ii++)
a[i][j] = a[i][j] + Math.pow(e[ii][0], m-1)
}
for(i=1;i<= n;++i)
for(ii=1;ii<=ns;++ii)
a[i][n+1] = a[i][n+1] +e[ii][1]*Math.pow(e[ii][0],i-1)
for(m = n+2 ; m <= 2*n ; ++m)
for(i = m-n; i<= n;++i) {
j= m -i
for(ii=1; ii<=ns;++ii)
a[i][j] = a[i][j] + Math.pow(e[ii][0],m-2) // coefficients of system
}
a.length=a.length-1
return _solve(a)
}
//and then
poly_degree = 6
points= [[2,2],[2,4],[4,6],[6,4],[8,2]]
// coefficients of polynome
console.log(fitpoly(points, poly_degree))
// or solve a linear system. Here with six variables
solution = linear([[1,2,3,-2,-3,-26,52],[3,2,5,-2,4,30,-60],[6,1,-4,-1,5,94,-188],[-1,2,4,3,4,30,-60],[-1,4,2,-1,2,26,-52],[3,-3,11,-7,-2,-1,-95]])
console.log(solution)
Give this gist a try, it uses numeric.js:
function polyfit(xArray, yArray, order) {
if (xArray.length <= order) console.warn("Warning: Polyfit may be poorly conditioned.")
let xMatrix = []
let yMatrix = numeric.transpose([yArray])
for (let i = 0; i < xArray.length; i++) {
let temp = []
for (let j = 0; j <= order; j++) {
temp.push(Math.pow(xArray[i], j))
}
xMatrix.push(temp)
}
let xMatrixT = numeric.transpose(xMatrix)
let dot1 = numeric.dot(xMatrixT, xMatrix)
let dot2 = numeric.dot(xMatrixT, yMatrix)
let dotInv = numeric.inv(dot1)
let coefficients = numeric.dot(dotInv, dot2)
return coefficients
}