I have a huge array of length over 200,000. I need to get top 10 values. I know sort is not the best solution. I have tried the following solution:
const sortBy = 'key';
const results = [];
const m = 10;
const N = array.length;
while (array.length && results.length < m) {
let currentMax = 0;
let currentMaxIndex = 0;
array.forEach((record, i) => {
if (record[sortBy] >= currentMax) {
currentMax = record[sortBy];
currentMaxIndex = i;
}
});
results.push(...array.splice(currentMaxIndex, 1));
}
Here array is an Array of length 200,000.
Problem is, I think if m equals N, then this
is going to take more time than sort itself. I want to know if there
is a better solution, that can handle both the cases.
Thank you for the help, but the actual question is m ∈ (0, N). m can take any value up to N. So, at which point would it be advisable to switch to in-built sort?
As per my understanding as m reaches N the complexity increases and sort is the best solution when m === N
I have tested with the example provided by #t-j-crowder here. A test to get top 10 from 100 entries.
When testing with different values in m, the faster algorithm is changing to sort at m === 85. So, I want to find out if there is any way to determine when we should switch back to sort, to have optimal performance in all cases.
You don't need to sort the whole array, you just need to insert into your top 10 array in numeric order and drop any additional entries:
var a = Array.from({length:100}, () => Math.floor(Math.random() * 1000000));
var check = a.slice().sort((left, right) => right - left).slice(0, 10);
console.log("check", check);
function checkResult(top10) {
var n;
for (n = 0; n < 10; ++n) {
if (top10[n] !== check[n]) {
console.log("Error in test at #" + n + ", expected " + check[n] + ", got " + top10[n]);
return false;
}
}
return true;
}
var top10 = [];
var cutoff = -Infinity;
var full = false;
var n, len, value;
for (n = 0, len = a.length; n < len; ++n) {
value = a[n];
// If the value may be in the top 10...
if (!full || value >= cutoff) {
// Insert into top10
let found = false;
for (let n = 0; n < top10.length; ++n) {
if (top10[n] <= value) {
top10.splice(n, 0, value);
found = true;
break;
}
}
if (!found) {
top10.push(value);
}
// Trim it
if (top10.length > 10) {
full = true;
top10.length = 10;
}
// Remember the lowest of the top 10 candidates we have now so we can not bother with checking lower ones
cutoff = top10[top10.length - 1];
}
}
console.log("top10", top10);
console.log("Good? " + checkResult(top10));
.as-console-wrapper {
max-height: 100% !important;
}
You can probably tidy that up a bit, optimize it further, but you get the idea. Maintaining a list of just the top 10 highest you've seen, dropping ones off the bottom if others join it.
Benchmark here, easily beats sorting and then grabbing the top 10 on Chrome and Firefox; the converse is true on Edge.
function limitSort(array, sortBy, limit) {
const result = [];
// Iterate over the array *once*
for(const el of array) {
// Exclude all elements that are definetly not in the resulting array
if(result.length < limit || el[sortBy] > result[0]) {
// Determine the position were the element needs to be inserted (Insertion sort)
let insertAt = 0;
while(insertAt < result.length && result[insertAt][sortBy] < el[sortBy]) insertAt++;
// Finally insert it
result.splice(insertAt, 0, el);
// If the results are getting to long, remove the lowest element
if(result.length > limit)
result.splice(0, 1);
}
}
return result;
}
This implements the algorithm, Niet the Dark Absol proposed above. Try it!
I've been trying to find a O(n) solution to the following problem: Find the number of anagrams (permutations) of string s in string b, where s.length will always be smaller than b.length
I read that the optimal solution involves keeping track of the frequencies of the characters in the smaller string and doing the same for the sliding window as it moves across the larger string, but I'm not sure how that implementation actually works. Right now my solution doesn't work (see comments) but even if it did, it would take O(s + sn) time.
EDIT: Sample input: ('aba', 'abaab'). Output: 3, because 'aba' exists in b starting at index 0, and 'baa' at 1, and 'aab' at 2.
function anagramsInStr(s,b) {
//O(s)
let freq = s.split("").reduce((map, el) => {
map[el] = (map[el] + 1) || 1;
return map;
}, {});
let i = 0, j = s.length;
// O(n)
for (let char in b.split("")) {
// O(s)
if (b.length - char + 1 > s.length) {
let window = b.slice(i,j);
let windowFreq = window.split("").reduce((map, el) => {
map[el] = (map[el] + 1) || 1;
return map;
}, {});
// Somewhere about here compare the frequencies of chars found in the window to the frequencies hash defined in the outer scope.
i++;
j++;
}
}
}
Read through the comments and let me know if you have any questions:
function countAnagramOccurrences(s, b) {
var matchCount = 0;
var sCounts = {}; // counts for the letters in s
var bCounts = {}; // counts for the letters in b
// construct sCounts
for (var i = 0; i < s.length; i++) {
sCounts[s[i]] = (sCounts[s[i]] || 0) + 1;
}
// all letters that occur in sCounts
var letters = Object.keys(sCounts);
// for each letter in b
for (var i = 0; i < b.length; i++) {
// maintain a sliding window
// if we already have s.length items in the counts, remove the oldest one
if (i >= s.length) {
bCounts[b[i-s.length]] -= 1;
}
// increment the count for the letter we're currently looking at
bCounts[b[i]] = (bCounts[b[i]] || 0) + 1;
// test for a match (b counts == s counts)
var match = true;
for (var j = 0; j < letters.length; j++) {
if (sCounts[letters[j]] !== bCounts[letters[j]]) {
match = false;
break;
}
}
if (match) {
matchCount += 1;
}
}
return matchCount;
}
console.log(countAnagramOccurrences('aba', 'abaab')); // 3
EDIT
A note about the runtime: this is sort of O(nk + m), where n is the length of s, m is the length of b, and k is the number of unique characters in b. Since m is always less than n, we can reduce to O(nk), and since k is bounded by a fixed constant (the size of the alphabet), we can further reduce to O(n).
I have a function that takes an array and a number. It scans the array for the two numbers that appear earliest in the array that add up to the number. I would like to know, performance-wise, what could help this function run faster. It has to process a list of like 10,000,000 items in under 6 seconds. I have refactored it a few times now, but still not getting there.
What is the best array iteration method for speed? I assumed for loops would be the slowest so I chose map. Is there a faster way? every()?
NOTE: the provided array could have duplicate, positive, or negative numbers (let's say up to 1000000...for now).
var low_pair = function (ints, s) {
var lowNum = ints.length, lowMatch, highNum, clone = [], i;
for (i = 0; i < ints.length; i++) {
clone[i] = ints.map(function (n, ind) {
if (ind !== i && ints[i] + n == s) {
i > ind ? highNum = i : highNum = ind;
if (highNum < lowNum) {
lowNum = highNum;
lowMatch = [ints[i], ints[ind]];
}
}
});
}
return lowMatch;
};
We are going to create a function that returns the earliest pair of elements that add up to the needed sum:
function find_pair(l, s) {
var min_indexes = {};
for (var i = 0; i < l.length; i++) {
var a = l[i];
if ((s - a) in min_indexes)
return [s - a, a];
if (!(a in min_indexes))
min_indexes[a] = i;
}
}
For this purpose, for every number we process, we store its minimum index. If we currently process number a, we check if s - a has its minimum index set. If yes, this means we found our wanted sum and we return both elements.
For example:
> var l = [2, 3, 4, 5, 5, 7, 8]
> find_pair(l, 10)
[5, 5]
> find_pair(l, 6)
[2, 4]
> find_pair(l, 5)
[2, 3]
> find_pair(l, 15)
[7, 8]
> find_pair([5, 9, 13, -3], 10)
[13, -3]
What is the best array iteration method for speed?
See What's the fastest way to loop through an array in JavaScript? for that. But notice the answers there might be deprecated, and current engines are better at optimising different things. You should always benchmark yourself, in your own target environment.
However, instead of looking for raw speed and microoptimisations, you should try to improve your algorithm. In your case, you can double the speed of your function by simply starting the inner loop at i so you don't visit all combinations twice. Also, by returning early from the function you can speed up the average case (depending on your data). To find the "earliest pair" you don't have to loop through the entire array and calculate a minimum, you just have to iterate the pairs in the chosen order. If the data is ordered (or at least skewed to some distribution) you could take advantage of that as well.
I'd use
function firstPair(ints, s) {
var len = ints.length;
for (var end = 0; end < len; end++)
for (var i=0, j=end; i<end; i++)
if (i != --j && ints[i]+ints[j] == s)
return [i, j];
for (var start = 0; start < len; start++)
for (var i=start, j=len; i<len; i++)
if (i != --j && ints[i]+ints[j] == s)
return [i, j];
return null;
}
As suggested by the other answers, if the range of the values in your array is limited, you could drastically reduce the complexity of your algorithm by using a lookup table - trading memory for performance. Using a bitmap for already-occured integers, it could look like this:
function firstPair(ints, s) {
var map = []; // or, if domain is known and typed arrays are supported, use
// var map = new Uint16Array(Math.ceil(domainSize / 16));
for (var i=0; i<ints.length; i++) {
var x = ints[i],
r = s - x;
if (map[r >> 4] & (1 << (r & 0xF))) // get
return [r, x];
map[x >> 4] |= 1 << (x & 0xF); // set
}
return null;
}
The main problem is that your current function complexity is O(n^2) which is way too high for a 10000000 element array. The map function iterates through the entire array. So you make 10000000 * 10000000 = 100 trillion "operations". The complexity needs to be decreased. My best guess -> use a hash table within a linear loop. Below is my example code with a worst case test of 10 million elements that runs in around 8 seconds on my old machine. It makes only 10 million runs instead of 100 trillion.
<!DOCTYPE html>
<html>
<head>
<script type="text/javascript">
var low_pair = function (ints, s) {
var found = {};
var lowMatch;
for (var i = 0; i < ints.length; i++) {
var num = ints[i];
var prevNum = s-num;
if (found[prevNum] === true){
if (prevNum>num){
lowMatch = [num, prevNum];
} else {
lowMatch = [prevNum, num];
}
break;
} else {
found[num] = true;
}
}
return lowMatch;
};
var test_array_size = 10000000;
var test_array = new Array(test_array_size);
for (var i=0;i<test_array_size;++i){
test_array[i] = test_array_size-i;
}
console.log("Array initialized");
var start = new Date().getTime();
console.log(low_pair(test_array, 12));
var end = new Date().getTime();
console.log("Running time: "+(end-start)+" ms");
</script>
<head>
<body>
</body>
</html>
This function consistently runs a 50,000,000 items array in 0ms:
var low_pair = function (s) {
var ints = [];
for(var i = 0; i < 50000000; i++) {
ints.push(Math.floor(Math.random() * 9));
}
console.time('pair');
var counter = 1;
for (var i = 0; i < ints.length; i++) {
var sum = ints[i] + ints[counter];
if (i !== counter) {
if (sum === s) {
console.timeEnd('pair');
return console.log([ints[i], ints[counter]]);
}
}
if (i == counter) {
counter++;
i = -1;
}
}
console.time('pair');
console.log( undefined);
};
I need to go through an array from the middle outwords.
var array = [a,b,c,d,e];
I would need to print in this order:
c,d,b,e,a
I have already split the array in half, first going forward and then going backwards which is already an improvement, but I really need to go one on each side till the end of the array on each side.
Say I want to start in the middle. I have the following before the loop statement, condition, and I can't seem to figure out the third part to switch one on each side incrementally.
for (var i = Math.floor(array.length/2); i >= 0 || i < array.length; i?){
//Do Something here.
}
Does anyone know how to do this?
Obviously I can't seem to test this in this condition.
Thanks
I modified the answer below (Thanks so much) to come up with this function. It allows to start from anywhere in the array and choose the direction to go. I am sure it could be written more elegantly. There is also a safety for wrong index numbers.
var array = ["a", "b", "c", "d", "e"];
function processArrayMiddleOut(array, startIndex, direction){
if (startIndex < 0){
startIndex = 0;
}
else if ( startIndex > array.length){
startIndex = array.lenght-1;
};
var newArray = [];
var i = startIndex;
if (direction === 'right'){
var j = i +1;
while (j < array.length || i >= 0 ){
if (i >= 0) newArray.push(array[i]);
if (j < array.length) newArray.push(array[j]);
i--;
j++;
};
}
else if(direction === 'left'){
var j = i - 1;
while (j >= 0 || i < array.length ){
if (i < array.length) newArray.push(array[i]);
if (j >= 0) newArray.push(array[j]);
i++;
j--;
};
};
return newArray;
}
var result = processArrayMiddleOut(array, 2, 'left');
alert(result.toString());
http://jsfiddle.net/amigoni/cqCuZ/
Two counters, one going up, other going down:
var array = ["a", "b", "c", "d", "e"];
var newArray = [];
var i = Math.ceil(array.length/2);
var j = i - 1;
while (j >= 0)
{
newArray.push(array[j--]);
if (i < array.length) newArray.push(array[i++]);
}
http://jsfiddle.net/X9cQL/
So I decided to revisit this, not feeling very satisfied with the first answer I gave. I was positive there would be some relationship between the index numbers when the data is successfully reordered; I found that pattern in the addition of the iteration number to the last item position.
For our initial array, we'll use the following: ['a', 'b', 'c', 'd', 'e'].
Our starting point is Math.floor( arr.length / 2 ), which gives us 2, corresponding to c in the array values. This is on iteration 0. The following instructions detail how we walk through an array with an odd number of values:
Position | Direction | Iteration | New Position | Value at Position
----------+-----------+-----------+--------------+-------------------
2 | - | 0 | 2 | c
2 | + | 1 | 3 | d
3 | - | 2 | 1 | b
1 | + | 3 | 4 | e
4 | - | 4 | 0 | a
You'll see a pattern developing, when our iteration is odd we add it to our location to find our new position. When the iteration is negative, we subtract it from our position to find the new location.
When dealing with an array that has an even number of values, the rules are flipped. When you have an even number of values, we subtract odd iterations from location to get the new position, and add even iterations to our location to find the next value.
To demonstrate how little code is needed to perform this sorting logic, below is a minified version of the above logic (the aforementioned link is far more readable):
// DON'T USE THIS IN PRODUCTION, OR YOUR TEAM MAY KILL YOU
function gut(a){
var o=[],s=a.length,l=Math.floor(s/2),c;
for(c=0;c<s;c++)o.push(a[l+=(s%2?c%2?+c:-c:c%2?-c:+c)]);
return o
}
Implementing the above logic in a more readable manner:
// Sort array from inside-out [a,b,c,d,e] -> [c,d,b,e,a]
function gut( arr ) {
// Resulting array, Counting variable, Number of items, initial Location
var out = [], cnt,
num = arr.length,
loc = Math.floor( num / 2 );
// Cycle through as many times as the array is long
for ( cnt = 0; cnt < num; cnt++ )
// Protecting our cnt variable
(function(){
// If our array has an odd number of entries
if ( num % 2 ) {
// If on an odd iteration
if ( cnt % 2 ) {
// Move location forward
loc = loc + (+cnt);
} else {
// Move location backwards
loc = loc + (-cnt);
}
// Our array has an even number of entries
} else {
// If on an odd iteration
if ( cnt % 2 ) {
// Move location backwards
loc = loc + (-cnt);
} else {
// Move location forwards
loc = loc + (+cnt);
}
}
// Push val at location to new array
out.push( arr[ loc ] );
})()
// Return new array
return out;
}
Okay, let's solve this problem step by step:
An array may either have an odd or an even number of elements:
If the array has an odd number of elements:
The middle element is at index (array.length - 1) / 2. Let this index be called mid.
There are mid number of elements to the left of the middle element. Obviously.
There are mid number of elements to the right of the middle element.
If the array has an even number of elements:
The middle element is at index array.length / 2. Let this index be called mid.
There are mid number of elements to the left of the middle element. Obviously.
There are mid - 1 number of elements to the right of the middle element.
Now let's create a function to tackle this problem using the above known data:
function processMidOut(array, callback) {
var length = array.length;
var odd = length % 2; // odd is 0 for an even number, 1 for odd
var mid = (length - odd) / 2; // succinct, isn't it?
callback(array[mid]); // process the middle element first
for (var i = 1; i <= mid; i++) { // process mid number of elements
if (odd || i < mid) // process one less element if even
callback(array[mid + i]); // process the right side element first
callback(array[mid - i]); // process the left side element next
}
}
That's all that there is to it. Now let's create some arrays and process them mid out:
var odd = ["a", "b", "c", "d", "e"];
var even = ["a", "b", "c", "d", "e", "f"];
var oddOrder = "";
var evenOrder = "";
processMidOut(odd, function (element) {
oddOrder += element;
});
processMidOut(even, function (element) {
evenOrder += element;
});
alert(oddOrder);
alert(evenOrder);
You can find a working demo here: http://jsfiddle.net/xy267/1/
Very interesting algorithm. Here is what I came with:
walkMidleOut = function(arr, callback) {
var mid = (arr.length - arr.length % 2) / 2;
for (var i = 0; i < arr.length; i++) {
var s = -1,
j = (i % 2 ? (s = 1, i + 1) : i) / 2,
index = mid + s * j == arr.length ? 0 : mid + s * j;
callback.call(arr, arr[index], index);
}
}
Usage:
walkMidleOut([1,2,3,4,5], function(el, index) {
console.log(el, index);
});
Will give you:
3 2
4 3
2 1
5 4
1 0
Function can be used with any number of elements, odd or even.
How about using concat() and slice()? You can just pass this the index of the middle element.
Array.prototype.eachFrom = function(index){
var index = index > this.length ? 0 : index;
return [].concat(this.slice(index), this.slice(0, index));
}
so for example:
var arr = ['a', 'b', 'c', 'd', 'e'], arr = arr.eachFrom(2);
for( var i = 0; i < arr.length; i++ ) { doFunThings(); }
Using underscore and _( Object ).Sort_Inside_Out():
_.mixin( {
Sort_Inside_Out: function ( Object ) {
Counter = 0
return (
_( Object ).sortBy( function ( Element ) {
Counter =
-Counter + (
( Math.sign( Counter ) == 1 ) ?
0 :
1 )
return ( Counter )
} ) )
},
} )
Here is a simple way to start at any index in an array and loop both forward and backward at the same time (i.e., iterate through all the items starting with items closest to the index and moving farther away).
let passing = 0;
function bothSides(arr, idx) {
newArr = [];
const shortLen = Math.min(idx, arr.length - idx);
for (let i = 0; i < shortLen; i++) {
newArr.push(arr[idx + i]); // add next
newArr.push(arr[idx - i - 1]); // add previous
}
for (let i = idx + shortLen; i < arr.length; i++) {
newArr.push(arr[i]); // add any remaining on right
}
for (let i = idx - shortLen - 1; i > -1; i--) {
newArr.push(arr[i]); // add any remaining on left
}
return newArr;
}
var arr = [...Array(10).keys()]; // 0,1,2,3,4,5,6,7,8,9
passing += bothSides(arr, 0) == '0,1,2,3,4,5,6,7,8,9' ? 1 : 0;
passing += bothSides(arr, 2) == '2,1,3,0,4,5,6,7,8,9' ? 1 : 0;
passing += bothSides(arr, 4) == '4,3,5,2,6,1,7,0,8,9' ? 1 : 0;
passing += bothSides(arr, 5) == '5,4,6,3,7,2,8,1,9,0' ? 1 : 0;
passing += bothSides(arr, 7) == '7,6,8,5,9,4,3,2,1,0' ? 1 : 0;
passing += bothSides(arr, 9) == '9,8,7,6,5,4,3,2,1,0' ? 1 : 0;
// same algorigthm but as generator
function* bothSidesG(arr, idx) {
const shortLen = Math.min(idx, arr.length - idx);
for (let i = 0; i < shortLen; i++) {
yield arr[idx + i]; // add next
yield arr[idx - i - 1]; // add previous
}
for (let i = idx + shortLen; i < arr.length; i++) {
yield arr[i]; // add any remaining on right
}
for (let i = idx - shortLen - 1; i > -1; i--) {
yield arr[i]; // add any remaining on left
}
}
var arr2 = [...Array(7).keys()]; // 0,1,2,3,4,5,6
passing += [...bothSidesG(arr2, 0)] == '0,1,2,3,4,5,6' ? 1 : 0;
passing += [...bothSidesG(arr2, 1)] == '1,0,2,3,4,5,6' ? 1 : 0;
passing += [...bothSidesG(arr2, 3)] == '3,2,4,1,5,0,6' ? 1 : 0;
passing += [...bothSidesG(arr2, 5)] == '5,4,6,3,2,1,0' ? 1 : 0;
passing += [...bothSidesG(arr2, 6)] == '6,5,4,3,2,1,0' ? 1 : 0;
console.log(`Passing ${passing} of 11 tests`);