I have two arrays: data and usedIndexes. First, I need to find all dividable (to another number in the array) numbers and store them with their indexes (let's say in myMap) and then somehow go through the second array "usedIndexes" and find dividable pairs that one of their index number is in usedIndexes array.
For example, given arrays:
data: [8, 3, 5, 2, 7, 9, 50]
usedIndexes = [0, 1, 6]
const myMap = {}
for (let i = 0; i < arr.length; i++) {
for(let y = i + 1; y < arr.length; y++) {
if(arr[i] % arr[y] === 0 || arr[y] % arr[i] === 0 ) {
if(!myMap[i]) {
myMap[i] = arr[i]
}
if(!myMap[y]) {
myMap[y] = arr[y];
}
}
}
}
So this will give me something like (index - value pair), myMap:
{
0: 8,
3: 2,
1: 3,
5: 9,
2: 5,
6: 50
3: 2,
6: 50
}
So actual dividable pairs like:
8 - 2
3 - 9
5 - 50
2 - 50
Quite frankly, I am not sure maybe map/object is not the best data structure to hold these values but I couldn't think something else.
Now, how can I loop through the usedIndexes array and find key-value pairs which at least one of the keys has in usedIndexes. Please read the comment in code sample below.
for (let i = 0; i < usedIndexes.length; i++){
// e.g. usedIndexes first element is 0 and myMap has 0:8
// but what I need to return 0:8 AND 3:2 because dividable pair is 8 - 2.
}
Don't store the divisor value in the map, store its index:
const myMap = {};
for (let i = 0; i < arr.length; i++) {
for (let j = 0; j < arr.length; j++) {
if (i != j && arr[i] % arr[j] === 0) {
myMap[i] = j;
myMap[j] = i;
}
}
}
Then it's only a matter of going through usedIndexes to find those that appear as part of a pair in the map:
const result = [];
for (const index of usedIndexes) {
if (index in myMap) {
const other = myMap[index];
result.push([{index, value: arr[index]}, {index: other, value: arr[other]}]);
}
}
console.log(result);
However, this will not find all possible pair where one value divides the other and one value is referenced by a usedIndex. This is due to myMap storing only a single pair for each index, even when an index would be included in multiple pairs. (For simplicity I didn't bother to check whether the assignment would override an other value in the first snippet, it only changes whether the first or last combination would be found, it doesn't solve the problem).
To alleviate this problem, we should just get rid of the myMap and fill the result right inside the nested loop:
const result = [];
for (let i = 0; i < arr.length; i++) {
for (let j = 0; j < arr.length; j++) {
if (i != j && arr[i] % arr[j] === 0) {
if (usedIndexes.includes(i) || usedIndexes.includes(j)) {
result.push([{index: i, value: arr[i]}, {index: j, value: arr[j]}]);
}
}
}
}
console.log(result);
The only problem with this solution is its inefficiency - usedIndexes.includes does iterate the usedIndexes array every time. You could alleviate that by creating a Set of indices then use .has() inside the loop, but we can actually do better: just don't iterate those indices in the first place!
const result = [];
for (const i of usedIndexes) {
for (let j = 0; j < arr.length; j++) {
if (i != j) {
const [a, b] = arr[i] > arr[j] ? [i, j] : [j, i];
if (arr[a] % arr[b] === 0) {
result.push([{index: a, value: arr[a]}, {index: b, value: arr[b]}]);
}
}
}
}
console.log(result);
Is there any built-in JavaScript function to do a partial sort? If not, what is a good way to implement it?
Given an unsorted array of N elements, I would like to find K elements that are minimal with respect to some weighting function. K is much smaller than N, so it would be inefficient to sort the whole array and take the first K elements.
I would be happy even if there was something non-standard, browser-dependent. I could still fallback to the custom JavaScript implementation.
PS: This is my current custom implementation (without taking a weighting function into account, just sorting the elements as they are for simplicity):
function bisect(items, x, lo, hi) {
var mid;
if (typeof(lo) == 'undefined') lo = 0;
if (typeof(hi) == 'undefined') hi = items.length;
while (lo < hi) {
mid = Math.floor((lo + hi) / 2);
if (x < items[mid]) hi = mid;
else lo = mid + 1;
}
return lo;
}
function insort(items, x) {
items.splice(bisect(items, x), 0, x);
}
function partialSort(items, k) {
var smallest = [];
for (var i = 0, len = items.length; i < len; ++i) {
var item = items[i];
if (smallest.length < k || item < smallest[smallest.length - 1]) {
insort(smallest, item);
if (smallest.length > k)
smallest.splice(k, 1);
}
}
return smallest;
}
console.log(partialSort([5, 4, 3, 2, 1, 6, 7, 8, 1, 9], 3));
The algorithm walks through the given array one single time, keeping track of a sorted list of the k smallest items so far, using binary search to insert new elements.
Please post alternative solutions if you think they might be faster or more elegant. Timings are very welcome.
No. There's only the full array sort, so you will need to use your own implementation.
Little improvement on your code (I had thought of exactly the same algorithm :-)):
function partialSort(items, k) {
var smallest = items.slice(0, k).sort(),
max = smallest[k-1];
for (var i = k, len = items.length; i < len; ++i) {
var item = items[i];
if (item < max) {
insort(smallest, item);
smallest.length = k;
max = smallest[k-1];
}
}
return smallest;
}
(Even seems to be a little faster, I guess due to caching the max variable)
For relatively small k it can be worth it to implement a Max Heap (by lack of a native one in JavaScript):
Create a Max Heap of the first k values
For each remaining value:
If it is less than the root of the heap, replace the root with this value. Otherwise ignore the value. Note that the heap never changes in size.
Finally sort the heap and return it.
This is in fact an improvement on another idea using a Min Heap, but that one needs to heapify the whole array, and so will not run as fast. After heapifying the whole array, you just extract k times a value from that heap, and return those values.
I have added both solutions to Bergi's jsperf.com performance tests (copied to jsbench.me). For that particular test (5000 array values, k = 10), the Max Heap solution is faster. But this advantage will shrink as k is increased.
Here is the code for the Max Heap solution:
// A few Heap-functions that operate on an array
function maxSiftDown(arr, i=0, value=arr[i]) {
if (i >= arr.length) return;
while (true) {
var j = i*2+1;
if (j+1 < arr.length && arr[j] < arr[j+1]) j++;
if (j >= arr.length || value >= arr[j]) break;
arr[i] = arr[j];
i = j;
}
arr[i] = value;
}
function maxHeapify(arr) {
for (var i = arr.length>>1; i--; ) maxSiftDown(arr, i);
return arr;
}
// The main algorithm
function partialSortWithMaxHeap(items, k) {
var heap = maxHeapify(items.slice(0, k));
for (var i = k, len = items.length; i < len; ++i) {
var item = items[i];
if (item < heap[0]) maxSiftDown(heap, 0, item);
}
return heap.sort((a,b) => a-b);
}
// Sample data & call
var arr = Array.from({length:5000}, () => Math.floor(Math.random() * 1e5));
console.log(partialSortWithMaxHeap(arr, 10));
There's no native partial sort function. The closest thing to what you want is Array.filter.
function isSmallEnough(element, index, array) {
return (element <= 10);
}
var filtered = [12, 5, 8, 130, 44].filter(isSmallEnough);
// filtered is [5, 8]
The example was borrowed (and slightly modified) from the above link.
I made a version than works with objects, like Array.sort(f):
function partialSort(items, k,f) {
function bisect(items, x, lo, hi) {
var mid;
if (typeof(lo) == 'undefined') lo = 0;
if (typeof(hi) == 'undefined') hi = items.length;
while (lo < hi) {
mid = Math.floor((lo + hi) / 2);
if (0>f(x,items[mid])) hi = mid;
else lo = mid + 1;
}
return lo;
}
function insort(items, x) {
items.splice(bisect(items, x), 0, x);
}
var smallest = items.slice(0, k).sort(f),
max = smallest[k-1];
for (var i = k, len = items.length; i < len; ++i) {
var item = items[i];
if (0>f(item,max)) {
insort(smallest, item);
smallest.length = k;
max = smallest[k-1];
}
}
return smallest;
}
// [ { e: 1 }, { e: 1 }, { e: 2 } ]
console.log(partialSort([{e:4},{e:6},{e:1},{e:8},{e:3},{e:1},{e:6},{e:2}],3,(a,b)=>a.e-b.e))
console.log()
In the code block below, the nisetamafibo function keeps an array of the smallest items found so far. The array is sorted and truncated to length K after a certain number of new items have been added to the array, where the number is taken from the Fibonacci sequence so that it is first 1, next 1, then 2, then 3, then 5, and so on. The nisetamadouble method doubles the interval after which the array is sorted instead, so that it is first 1, then 2, then 4, and so on. (I also tried the approach that I sorted the array each time after a fixed number of new items like 10 had been added, but it was slower. And I also tried to initialize the array at the start of the function so that I took in a fixed number of the first items of the input and sorted them, but I found that initializing the array with 1 or 0 items was the fastest, so I removed the initialization step.)
The nisetamainsertion function uses insertion sort to sort the items. It's very slow at high K-values because insertion sort has quadratic time complexity, but it's fast at K-values of around 10 to 100 or lower, because insertion sort is fast for short arrays. The nisetamachoose method chooses nisetamainsertion for K-values of 100 or less but nisetamafibo otherwise. (In the Java JDK, the file DualPivotQuicksort.java uses insertion sort instead of quicksort for arrays with less than 47 items. A presentation about sorting algorithms in R said that "fastest for < 30 items is insert sort".)
I also tried to implement the quickselect algorithm with and without recursion. The version that didn't use recursion was a bit faster, but both versions were still slow compared to other methods especially in cases where N was high and K was low.
On another Stack Exchange site, someone came up with new variants of the Floyd-Rivest algorithm which were faster than the regular Floyd-Rivest algorithm in C: https://softwareengineering.stackexchange.com/questions/284767/kth-selection-routine-floyd-algorithm-489. I tried to implement the variant called select7MO3 in JavaScript, but it ended up being one of the slowest options in my benchmark.
function nisetamafibo(a,k=1){
let found=[],len=a.length,unsorted=0,biggestfound=Infinity,nextsort=1,prevsort=1,oldsort
for(let i=0;i<len;i++){
if(a[i]<biggestfound||i<k){
found.push(a[i])
if(++unsorted==nextsort){
found.sort((l,r)=>l<r?-1:l>r?1:0)
found=found.slice(0,k)
biggestfound=found[found.length-1]
oldsort=nextsort;nextsort+=prevsort;prevsort=oldsort
unsorted=0
}
}
}
found.sort((l,r)=>l<r?-1:l>r?1:0)
return found.slice(0,k)
}
function nisetamadouble(a,k=1){
let found=[],len=a.length,unsorted=0,biggestfound=Infinity,nextsort=1
for(let i=0;i<len;i++){
if(a[i]<biggestfound||i<k){
found.push(a[i])
if(++unsorted==nextsort){
found.sort((l,r)=>l<r?-1:l>r?1:0)
found=found.slice(0,k)
biggestfound=found[found.length-1]
nextsort*=2
unsorted=0
}
}
}
found.sort((l,r)=>l<r?-1:l>r?1:0)
return found.slice(0,k)
}
function nisetamainsertion(a,k=1){
let found=a.slice(0,k),l=a.length
found.sort((l,r)=>l<r?-1:l>r?1:0)
let biggestfound=found[k-1]
for(let i=0;i<l;i++){
let v=a[i]
if(v<biggestfound){
let insertat=k-1
for(let j=0;j<k-1;j++)if(v<found[j]||j==i){insertat=j;break}
for(let j=k-1;j>insertat;j--)found[j]=found[j-1]
found[insertat]=v
biggestfound=found[k-1]
}
}
return found
}
function nisetamachoose(a,k=1){
return k<=100?nisetamainsertion(a,k):nisetamafibo(a,k)
}
function quickselect(a,k,l,r){
l=l||0
r=r||a.length-1
while(true){
let pivot=a[r],pos=l
for(let i=l;i<=r;i++)if(a[i]<pivot){let temp=a[i];a[i]=a[pos];a[pos++]=temp}
let temp=a[r];a[r]=a[pos];a[pos]=temp
if(pos==k)break
pos<k?l=pos+1:r=pos-1
}
}
function quickselectrecursive(a,k,l,r){
l=l||0
r=r||a.length-1
let pivot=a[r],pos=l
for(let i=l;i<=r;i++)if(a[i]<pivot){let temp=a[i];a[i]=a[pos];a[pos++]=temp}
let temp=a[r];a[r]=a[pos];a[pos]=temp
if(pos<k)quickselectrecursive(a,pos+1,r,k)
if(pos>k)quickselectrecursive(a,l,pos-1,k)
}
function sortslice(a,k){
a.sort((l,r)=>l<r?-1:l>r?1:0)
return a.slice(0,k)
}
// https://softwareengineering.stackexchange.com/questions/284767/kth-selection-routine-floyd-algorithm-489
function select7MO3(a,k){
let l=0,i,r=a.length-1,rr=r,ll=l
while(r>l){
if(a[k]<a[l]){let t=a[l];a[l]=a[k];a[k]=t}
if(a[r]<a[l]){let t=a[l];a[l]=a[r];a[r]=t}
if(a[r]<a[k]){let t=a[k];a[k]=a[r];a[r]=t}
if((r-l)>k){
let n=r-l+1
i=k-l+1
let s=(2*n/3)
let div=i-n
let sd=(n*s*(n-s)/n)*(div<0?-1:div>0?1:0)
ll=Math.max(l,k-i*s/n+sd)
rr=Math.min(r,k+(n-i)*s/n+sd)
}
let pivot=a[k]
i=l
let j=r
let t=a[l];a[l]=a[k];a[k]=t
if(a[r]>pivot){t=a[r];a[r]=a[l];a[l]=t}
while(i<j){
let t=a[i];a[i]=a[j];a[j]=t
i++
j--
while(a[i]<pivot)i++
while(a[j]>pivot)j--
}
if(a[l]==pivot){i--;let t=a[l];a[l]=a[j];a[j]=t}
else{j++;let t=a[j];a[j]=a[r];a[r]=t}
if(j<=k)l=j+1
else if(k<=j)r=j-1
}
let out=a.slice(0,k)
out.sort((l,r)=>l<r?-1:l>r?1:0)
return out
}
// OP and Bergi
function bisect(items, x, lo, hi) {
var mid;
if (typeof(lo) == 'undefined') lo = 0;
if (typeof(hi) == 'undefined') hi = items.length;
while (lo < hi) {
mid = Math.floor((lo + hi) / 2);
if (x < items[mid]) hi = mid;
else lo = mid + 1;
}
return lo;
}
function insort(items, x) {
items.splice(bisect(items, x), 0, x);
}
function OP(items, k) {
var smallest = [];
for (var i = 0, len = items.length; i < len; ++i) {
var item = items[i];
if (smallest.length < k || item < smallest[smallest.length - 1]) {
insort(smallest, item);
if (smallest.length > k)
smallest.splice(k, 1);
}
}
return smallest;
}
function OP_Bergi(items, k) {
var smallest = items.slice(0, k).sort(),
max = smallest[k-1];
for (var i = k, len = items.length; i < len; ++i) {
var item = items[i];
if (item < max) {
insort(smallest, item);
smallest.length = k;
max = smallest[k-1];
}
}
return smallest;
}
// trincot
function maxSiftDown(arr, i=0, value=arr[i]) {
if (i >= arr.length) return;
while (true) {
var j = i*2+1;
if (j+1 < arr.length && arr[j] < arr[j+1]) j++;
if (j >= arr.length || value >= arr[j]) break;
arr[i] = arr[j];
i = j;
}
arr[i] = value;
}
function maxHeapify(arr) {
for (var i = arr.length>>1; i--; ) maxSiftDown(arr, i);
return arr;
}
function trincot_MaxHeap(items, k) {
var heap = maxHeapify(items.slice(0, k));
for (var i = k, len = items.length; i < len; ++i) {
var item = items[i];
if (item < heap[0]) maxSiftDown(heap, 0, item);
}
return heap.sort((a,b) => a-b);
}
// DiazJara
function DiazJara(items, k,f) {
function bisect(items, x, lo, hi) {
var mid;
if (typeof(lo) == 'undefined') lo = 0;
if (typeof(hi) == 'undefined') hi = items.length;
while (lo < hi) {
mid = Math.floor((lo + hi) / 2);
if (0>f(x,items[mid])) hi = mid;
else lo = mid + 1;
}
return lo;
}
function insort(items, x) {
items.splice(bisect(items, x), 0, x);
}
var smallest = items.slice(0, k).sort(f),
max = smallest[k-1];
for (var i = k, len = items.length; i < len; ++i) {
var item = items[i];
if (0>f(item,max)) {
insort(smallest, item);
smallest.length = k;
max = smallest[k-1];
}
}
return smallest;
}
// benchmark
for(let nk of'31 33 40 42 44 51 53 55 60 62 64 66 71 73 75'.split(' ')){
let n=parseInt(nk[0]),k0=parseInt(nk[1]),k=10**k0
let opt=[
'OP(a,k)',
'OP_Bergi(a,k)',
'trincot_MaxHeap(a,k)',
'DiazJara(a,k,(l,r)=>l-r)',
'DiazJara(a,k,(l,r)=>l<r?-1:l>r?1:0)',
'nisetamafibo(a,k)',
'nisetamadouble(a,k)',
// 'nisetamainsertion(a,k)', // this would've taken too long to run at K=1e6
'nisetamachoose(a,k)',
'quickselect(a,k);a=a.slice(0,k);a.sort((l,r)=>l<r?-1:l>r?1:0)',
'quickselectrecursive(a,k);a=a.slice(0,k);a.sort((l,r)=>l<r?-1:l>r?1:0)',
'select7MO3(a,k);a=a.slice(0,k);a.sort((l,r)=>l<r?-1:l>r?1:0)',
'sortslice(a,k)'
]
let ord=Array.from({length:100},()=>Array(opt.length).fill().map((_,i)=>i)).flat()
ord.sort(()=>Math.random()-.5)
for(let x of ord){
let o=opt[x]
let a=Array.from({length:10**n},()=>Math.random())
let t1=process.hrtime.bigint()
eval(o)
let t2=process.hrtime.bigint()-t1
console.log(n+' '+k0+' '+o+' '+t2)
}
}
This shows the median time of a hundred runs in ms and the average rank of each method (where for example 7/4 means that N was 1e7 and K was 1e4):
For most combinations of N and K, Bergi's modified version of the OP's code was actually slower than the OP's code, even though the OP's code was extremely slow in the case where N and K were both 1e6.
(l,r)=>l<r?-1:l>r?1:0 is faster than (l,r)=>l-r as you can see by comparing the two versions of Díaz-Jara's method above.
Here's also versions of my nisetamadouble and nisetamainsertion methods which return the indexes of the smallest items in addition to the values:
let a=Array.from({length:1e5},()=>Math.random())
let k=10
let l=a.length
let biggestfound=Infinity,foundind=[],foundval=[]
for(let i=0;i<l;i++){
let v=a[i]
if(i<k||v<biggestfound){
let insertat=k-1
for(let j=0;j<k-1;j++)if(v<foundval[j]||j==i){insertat=j;break}
for(let j=k-1;j>insertat;j--){foundind[j]=foundind[j-1];foundval[j]=foundval[j-1]}
foundind[insertat]=i
foundval[insertat]=v
biggestfound=foundval[k-1]
}
}
console.log(foundind)
console.log(foundval)
function nisetama(a,k=1){
let found=[],len=a.length,unsorted=0,biggestfound=Infinity,nextsort=1
for(let i=0;i<len;i++){
if(a[i]<biggestfound||i<k){
found.push(a[i])
if(++unsorted==nextsort){
found.sort((l,r)=>l<r?-1:l>r?1:0)
found=found.slice(0,k)
biggestfound=found[found.length-1]
nextsort*=2
unsorted=0
}
}
}
found.sort((l,r)=>l<r?-1:l>r?1:0)
return found.slice(0,k)
}
let a2=a
nisetama(a2,k)
biggestfound=a2[k-1],foundind=[]
for(let i=0;i<l;i++)if(a[i]<=biggestfound)foundind.push(i)
foundind.sort((l,r)=>a[l]<a[r]?-1:a[l]>a[r]?1:0)
foundind=foundind.slice(0,k)
console.log(foundind)
console.log(foundval)