Slow shuffle for large arrays - javascript

I'm implementing the Fisher-yates shuffle in a Photoshop script. I want to create an array of n unique random elements from a maximum of about 99999. Where n is a small value but could be up to to maximum.
With maximum of under a thousand this is fine (runs in milliseconds), but considerably much slower for 10,000 (about 20 seconds).
Is there a better/faster way to do this? Bear in mind that'll it'll need to be in ECMAScript.
var maxNumber = 99;
var numToGenerate = 5;
var bigShuffle = shuffle(maxNumber);
var randomNumbers = bigShuffle.slice(0, numToGenerate);
alert(randomNumbers);
function shuffle(m)
{
var temp;
var rnd;
// create an empy array
var arr = new Array();
for(var i = 0 ; i < m; i++)
{
arr.push(i);
}
while (m)
{
rnd = Math.floor(Math.random() * m-=1);
// And swap it
temp = arr[m];
arr[m] = arr[rnd];
arr[rnd] = temp;
}
return arr;
}

Related

how to get multiple random elements from an array list?

The title basically describes my problem... I wanted to get 3 elements from an array list without getting a duplicate. Because the others I have found uses the Math.floor((Math.random() * list.length)) which I think is only limited to one output.
n unique elements from a set are a combination.
Without getting in too much detail,
combinations = variations * permutations, which means we can just generate a variation (of the same length) and ignore the order.
The Fisher-Yates shuffle can do this for example:
function shuffled(elements){
// Return shuffled elements such that each variation has the same probability
const copy = [...elements];
for(let i = copy.length - 1; i >= 0; i--){
let j = Math.floor(Math.random() * (i + 1)); // 0 <= j <= i
let tmp = copy[i];
copy[i] = copy[j];
copy[j] = tmp;
}
return copy;
}
function choose(elements, n){
// Return a combination of n elements
return shuffled(elements).slice(0, n);
}
var elements = ['a', 'b', 'c', 'd'];
var N = 1000;
var results = {}; // how many times each element was chosen
for(let i = 0; i < N; i++){
for(let x of choose(elements, 3)){
results[x] = (results[x] || 0) + 1;
}
}
console.log(results); // 3/4 * N = 750

Maximizing efficiency of this while loop

I have a while loop that is supposed to randomly plant numberOfTruth true values in a 2 dimensional. I am only to change them if they are not true already, and only do so for numberOfTruths times:
function truthfulArray(maxY, maxX, numberOfTruths, array) {
var x;
var y;
var counter = 0;
while (counter < numberOfTruths) {
x = generateRandomNumber(maxX);
y = generateRandomNumber(maxY);
if (!array[x][y]) {
array[x][y] = true;
counter++;
}
}
return array;
}
I could see the issue with its optimization in relation to the size of 2 dimensional array and as the numberOfTruths approaches the product of maxY and maxX it would waste alot of resources to complete the task. I was wondering what tweaks I can make to the function to make it more efficient. Thanks in advance!
*** generateRandomNumber(max) is a simple function that returns a random number from 0 to the max value entered.
Based on #Bergi's comment, here's one optimization. It assumes the array is empty to begin with (or rather, it doesn't care and just overwrites things).
It's slower when the fill factor (N / (X * Y)) is low (testing with X = Y = 500), but seems to win out over the original implementation at about 40%, and is decisively faster (like 3x) at 80%. (You might want to use that as a heuristic.)
The general idea is that we first fill random rows from the rows' start, then spread out each row using Fisher-Yates shuffle. I've transposed the y and x compared to the original, because that's just how I'm used to dealing with 2D arrays. :D
function shuffle(array) { // h/t https://bost.ocks.org/mike/shuffle/
var m = array.length, t, i;
while (m) {
i = Math.floor(Math.random() * m--);
t = array[m];
array[m] = array[i];
array[i] = t;
}
return array;
}
function truthfulArrayFillRows(maxY, maxX, numberOfTruths, array) {
var nLeft = numberOfTruths;
var nSeededPerRow = new Array(maxY).fill(0);
// Seed rows randomly with trues starting from the left
while(nLeft > 0) {
var y = generateRandomNumber(maxY);
var x = nSeededPerRow[y];
if(x < maxX) {
array[y][x] = true;
nLeft --;
nSeededPerRow[y] ++;
}
}
// Shuffle the rows we seeded
for(var y = 0; y < maxY; y++) {
if(nSeededPerRow[y] > 0) {
shuffle(array[y]);
}
}
return array;
}
Instead of randomly selecting numberOfTruth many positions of all possible positions, you could randomly select numberOfTruth many of only the valid positions. To do that, you would have to find those positions first.
function shuffle(a) {
var j, x, i;
for (i = a.length - 1; i > 0; i--) {
j = Math.floor(Math.random() * (i + 1));
x = a[i];
a[i] = a[j];
a[j] = x;
}
return a;
}
function chunkArray(myArray, chunk_size){
var results = [];
while (myArray.length) {
results.push(myArray.splice(0, chunk_size));
}
return results;
}
function flatten(array) {
var flattened=[];
for (var i=0; i<array.length; ++i) {
var current = array[i];
for (var j=0; j<current.length; ++j)
flattened.push(current[j]);
}
return flattened;
}
function truthfulArray(maxY, maxX, numberOfTruths, array){
var flatArray = flatten(array);
var validPositions = flatArray.map(function(e, i){if(!e) return i});
var randomPositions = shuffle(validPositions).slice(0, numberOfTruths);
randomPositions.forEach(function(i){flatArray[i] = true;});
return chunkArray(flatArray, maxY);
}
var maxX = 20, maxY = 30;
var array = Array(maxX).fill(Array(maxY).fill(false));
truthfulArray(maxY, maxX, 40, array);
If this approach is more efficient depends on your data. If there are many valid positions to choose from, your code should be totally fine and efficient. The more trues there are in the array the less likely it is that your code (randomly) hits them. In that case the approach I described will be more efficient.
I hope you find that helpful.

Strange performance quirk in javascript

I experience a strange differences in performance of simple task depending of array I am working with. The task is to calculate sum of those elements in array which are greater than 5. Task is performed on arrays with equal lenghts.
I try the very same approach on three different array objects:
1) var hugeArray1 - array with all elemenets randomly picked from 0:10 range
2) var hugeArray2 - copy of hugeArray1 sorted with Array.prototype.sort()
3) var hugeArray3 - handcrafted but sorted array with values from 0:10 range, spread to equaly cover this interval.
I try to calculate sum of elements greater than 5 many times for each Array and then average them. What is strange, time needed varies a lot for those three arrays.
1) hugeArray1: 5.805ms
2) hugeArray2: 15.738ms
3) hugeArray3: 3.753ms
Result for array sorted with sort() is extreamly poor. Why is that? it looks like sort() returns some kind of wraper/proxy instead of 'native' Array, which affects performance. I tried it on 2 computers. Also i tried to change order of testing.
I include code below, please tell me what is happening here.
// random array with elements 0-10 of size n
function randomArray(n) {
var arr = [];
for (var i = 0; i < n; ++i) {
arr.push(Math.random() * 10);
}
return arr;
};
// measures time of execution
function measureTime(f) {
var start = new Date().getTime();
f();
var stop = new Date().getTime();
return stop - start;
};
// enumerate ofer array and calculate sum of elementsgreater than 5
function sumGreaterThan5(arr) {
var sum = 0;
for (var i = 0; i < arr.length; ++i) {
if (arr[i] > 5.0)
sum += arr[i];
}
return sum;
}
// generate array os size 'size' with elements with constant step to fill interval 0:10
function generateSortedArr(size) {
var arr = [];
for (var i = 0; i < size; ++i) {
arr.push(i * 10 / size);
}
return arr;
}
var huge = 1000000;
var hugeArray1 = randomArray(huge);
var hugeArray2 = hugeArray1.slice(0).sort();
var hugeArray3 = generateSortedArr(huge);
var hugeArrays = [hugeArray1, hugeArray2, hugeArray3];
hugeArrays.forEach(x=> {
var res = [];
for (var i = 0; i < 1000; ++i) {
res.push(measureTime(function () {
sumGreaterThan5(x);
}));
}
console.log(res.reduce((prev, curr)=> prev + curr) / res.length);
});
// random array with elements 0-10 of size n
function randomArray(n) {
var arr = [];
for (var i = 0; i < n; ++i) {
arr.push(Math.random() * 10);
}
return arr;
};
// measures time of execution
function measureTime(f) {
var start = new Date().getTime();
f();
var stop = new Date().getTime();
return stop - start;
};
// enumerate ofer array and calculate sum of elementsgreater than 5
function sumGreaterThan5(arr) {
var sum = 0;
for (var i = 0; i < arr.length; ++i) {
if (arr[i] > 5.0)
sum += arr[i];
}
return sum;
}
// generate array os size 'size' with elements with constant step to fill interval 0:10
function generateSortedArr(size) {
var arr = [];
for (var i = 0; i < size; ++i) {
arr.push(i * 10 / size);
}
return arr;
}
var huge = 1000000;
var hugeArray1 = randomArray(huge);
var hugeArray2 = hugeArray1.slice(0).sort();
var hugeArray3 = generateSortedArr(huge);
var hugeArrays = [hugeArray1, hugeArray2, hugeArray3];
hugeArrays.forEach(function(x){
var res = [];
for (var i = 0; i < 1000; ++i) {
res.push(measureTime(function () {
sumGreaterThan5(x);
}));
}
console.log(res.reduce(function(prev, curr){return prev + curr},0) / res.length);
});

Fastest way to loop through this array in Javascript on Chrome 36

I have a very big array which looks similar to this
var counts = ["gfdg 34243","jhfj 543554",....] //55268 elements long
this is my current loop
var replace = "";
var scored = 0;
var qgram = "";
var score1 = 0;
var len = counts.length;
function score(pplaintext1) {
qgram = pplaintext1;
for (var x = 0; x < qgram.length; x++) {
for (var a = 0, len = counts.length; a < len; a++) {
if (qgram.substring(x, x + 4) === counts[a].substring(0, 4)) {
replace = parseInt(counts[a].replace(/[^1-9]/g, ""));
scored += Math.log(replace / len) * Math.LOG10E;
} else {
scored += Math.log(1 / len) * Math.LOG10E;
}
}
}
score1 = scored;
scored = 0;
} //need to call the function 1000 times roughly
I have to loop through this array several times and my code is running slowly. My question is what the fastest way to loop through this array would be so I can save as much time as possible.
Your counts array appears to be a list of unique strings and values associated with them. Use an object instead, keyed on the unique strings, e.g.:
var counts = { gfdg: 34243, jhfj: 543554, ... };
This will massively improve the performance by removing the need for the O(n) inner loop by replacing it with an O(1) object key lookup.
Also, avoid divisions - log(1 / n) = -log(n) - and move loop invariants outside the loops. Your log(1/len) * Math.LOG10E is actually a constant added in every pass, except that in the first if branch you also need to factor in Math.log(replace), which in log math means adding it.
p.s. avoid using the outer scoped state variables for the score, too! I think the below replicates your scoring algorithm correctly:
var len = Object.keys(counts).length;
function score(text) {
var result = 0;
var factor = -Math.log(len) * Math.LOG10E;
for (var x = 0, n = text.length - 4; x < n; ++x) {
var qgram = text.substring(x, x + 4);
var replace = counts[qgram];
if (replace) {
result += Math.log(replace) + factor;
} else {
result += len * factor; // once for each ngram
}
}
return result;
}

pushing arrays in multidimensional array

I want to push arrays containing random numbers (0 to 10) into a bigger array once the total of its contents is about to exceed 30. But the output is messed up.
var bigarray = new Array();
var smallarray = new Array();
var randNum = 0;
var total = 0;
for (var i = 0; i<10; i++){
randNum = (10*Math.random()).toFixed(0);
total = total + randNum;
if(total>30) {
bigarray.push(smallarray)
smallarray.length=0;
smallarray.push(randNum);
total = randNum;
} else {
smallarray.push(randNum);
}
}
alert(" BIG ARRAY IS "+bigarray);
two wrong things are visible on the first sight in the code
(1) instead of
randNum = (10*Math.random()).toFixed(0);
you probably want
randNum = Math.floor(11*Math.random());
Math.floor instead of toFixed() - see #kennebec comment
11 instead of 10 to return numbers 0 to 10, as 0 <= Math.random() < 1
(2) the following line pushes (many times) the reference to the same smallarray object.
bigarray.push(smallarray);
In the next step you clear the array with smallarray.length = 0. Because the array is not copied to the bigarray, but only referenced, the generated items are lost.
EDIT: I read your question wrong - the rest of the answer is fixed
You probably want to push the duplicate of the smallarray into bigarray, so replace the line above with the following:
bigarray.push(smallarray.slice(0));
You need another loop inside the main one to populate the smallarray, something like:
var bigarray = new Array();
for (var i = 0; i<10; i++){
// moving the variable declarations inside this loop means they are re-set for each small array
var smallarray = new Array();
// create the first entry for the small array
var randNum = Math.floor(11*Math.random());
var total = randNum;
// loop to populate the small array
while(total <= 30){
smallarray.push(randNum);
randNum = Math.floor(11*Math.random());
total += randNum;
}
bigarray.push(smallarray)
}
I made changes to you code and came up with this.
var bigarray = [];
var smallarray = [];
var randNum = 0;
var total = 0;
for (var i = 0; i < 10; i += 1) {
randNum = Math.floor(10 * Math.random()); // you will never have a value of 10?
total = total + randNum;
if (total > 30) {
bigarray.push(smallarray.slice())
smallarray.length = 0;
smallarray.push(randNum);
total = randNum;
} else {
smallarray.push(randNum);
}
}
alert(" BIG ARRAY IS " + bigarray);
On jsfiddle
Things I changed were:
Ran the code through a beautifier
Changed your use of new Array to []
{} and []
Use {} instead of new Object(). Use [] instead of new Array().
Because Object and Array can be overwritten by the user
Changed ++ to += 1
This pattern can be confusing.
Check out Code Conventions for the JavaScript Programming Language and jslint
Added array.slice when you push smallarray to bigarray, this makes a copy in this case. It is important to understand how javascript works, read Is JavaScript a pass-by-reference or pass-by-value language? Without using slice, which makes a copy as the array only contains primitives, when you set the length of the array to 0, then the data was lost.
Changed your use of number.toFixed to Math.floor so that randNum remains a number
Note: Math.random returns a floating-point, pseudo-random number in the range [0, 1] that is, from 0 (inclusive) up to but not including 1 (exclusive)
Whether your code now produces your expected out, I can not be sure from your description but this should be a good starting point.
var bigarray = new Array();
var smallarray = new Array();
var randNum = 0;
var total = 0;
for (var i = 0; i < 10; i++) {
for (var j = 0; j < smallarray.length; j++) {
total = total + smallarray[j];
}
if (total <= 30)
{
randNum = Math.floor((Math.random() * 10) + 1);
smallarray.push(randNum);
}
else {
bigarray.push(smallarray.slice(0));
smallarray.length = 0;
}
total = 0;
}
alert(" BIG ARRAY IS " + bigarray);

Categories