Performance optimizations for add, removeFirst, removeFirstN array operations - javascript

For my use case I've found that the shift/slice methods are stressing my CPU way too much, as the array grows in size. In theory the array could be as big as 86400 items, although usually it would much lower - around 10000 array elements.
I've tried to illustrate it with a simple example. Imagine this at a very large scale. It'll run decently up until a point, but generally it seems highly ineffective to remove the first (or first n) item(s) like this.
Hopefully somebody with more knowledge in "why that is", can fill out one or more of the 3 functions in the snippet below:
add()
removeFirst()
removeFirstN(n)
Immutability won't work here - or rather, since we're after the optimal performance, copying a growing and quite large datastructure (array in this case) definitely won't work.
Any good suggestions? :-)
let objArray = []
let maxCount = 10;
let i = 0;
function add(){
objArray.push({x: + new Date(), y: Math.floor(Math.random() * 10000) + 1});
console.log("add")
}
function removeFirst(){
objArray.shift();
console.log("removeFirst")
}
function removeFirstN(n){
objArray.splice(0,n)
console.log(`removeFirstN(${n})`)
}
// Every second and obj is added to the array
setInterval(function(){
if(objArray.length === maxCount){
removeFirst();
} else if(objArray.length > maxCount) { // this is possible since we're allowed to change maxCount
const diff = objArray.length+1 - maxCount;
removeFirstN(diff);
}
// Always add
add();
i++;
if(i === 15) {
maxCount--;
i = 0;
}
console.log(`length: ${[...objArray].length}`)
console.log([...objArray])
}, 1000)

Judging by the listed operations, you’re looking for a queue with constant-time enqueue and dequeue. When you use an array as a queue by moving all the elements for operations on one side, that operation instead takes time proportional to the number of elements in the array. An implementation based on a circular buffer or linked list (both satisfy the constant-time requirement) will be faster as the number of elements becomes larger.
Linked lists are simple enough to demonstrate in a post:
class LinkedQueue {
constructor() {
this.head = null;
this.tail = null;
}
enqueue(value) {
const node = {value, next: null};
if (this.tail === null) {
// Empty queue; make this the only node
this.tail = this.head = node;
} else {
// Make this the successor of the current last node,
// then make it the new last node
this.tail = this.tail.next = node;
}
}
dequeue() {
const result = this.head.value;
if (this.head === this.tail) {
// Last element remaining
this.head = this.tail = null;
} else {
// Remove the first element
this.head = this.head.next;
}
return result;
}
}
but for the best performance in practice, you’ll want to use a queue based on a circular buffer. double-ended-queue is one such npm package.

Related

Sorting a link list

I am trying to solve the LeetCode problem 148. Sort List
Given the head of a linked list, return the list after sorting it in ascending order.
I am trying to do it in a recursive way before trying something smarter, as I am learning to handle data structures.
This is my code:
/**
* Definition for singly-linked list.
* function ListNode(val, next) {
* this.val = (val===undefined ? 0 : val)
* this.next = (next===undefined ? null : next)
* }
*/
/**
* #param {ListNode} head
* #return {ListNode}
*/
var sortList = function(head) {
let previousNode = head
if(!head){
return head
}
let node = head.next
if(!node){
return head
}
let start = head
let previousNode1 = head
function sortList1(node, previousNode){
if(node.next == null){
return start
}
let temp = node.next;
if(traverseFromHead(node)){
start = node
}
previousNode1 = node
return sortList1(temp, node)
}
return sortList1(node, previousNode)
function traverseFromHead(node){
let myPosition = start
let inserted = false
if(start.val > node.val){
previousNode1.next = node.next
node.next = start
console.log("found in head excahange", node)
return node;
}
let myprevious2 = start
while(myPosition.next != null){
if(myPosition.val>=node.val){
console.log("before check start was", start, "with position at", myPosition.val, "for point", node.val, "my previous is", myprevious2.val)
let temp = node.next
myprevious2.next = node
node.next = myPosition
// previousNode1.next = temp
console.log("after update start is", start, "with position at", myPosition.val, "for point", node.val)
return null
}
myprevious2 = myPosition;
myPosition = myPosition.next
}
return false
}
};
I am not able to get it working correctly; it must be I am doing something wrong by logic or by concept
For instance for the linked list 4→2→3→0 the expected output would be 0→2→3→4, but my code produces 2→0.
Where is the problem in my code?
You have tried to implement insertion sort.
There are these issues that prevent it from working correctly:
The base case of the recursive function is not correct. With if(node.next == null) you are stopping too early. It could well be that this tail node should be moved elsewhere in the list, yet this node's value is not compared with anything. The stop condition really should be node == null.
previousNode1 = node is not always correctly identifying the previous node. If the call to traverseFromHead moved node to elsewhere in the list, then previousNode1 should not change, because what used to be the node before node, will now have become the node before the next node you want to process. For the same reason the second argument you pass in the recursive call is most often wrong: sortList1(temp, node).
It is a bit overwhelming to have that many variants of previousNodeXX variables. I would suggest to at least eliminate this previousNode1 and continue to work with previousNode, passing it also as argument to traverseFromHead. So call it as traverseFromHead(node, previousNode) and make sure you pass the correct second argument to sortList1. There are two cases to distinguish:
When node wasn't moved, then sortList1(temp, node) is correct, but when node was moved, it should be sortList1(temp, previousNode). You can make the distinction with a conditional operator:
sortList1(temp, previousNode.next != node ? previousNode : node)
traverseFromHead only removes the node from its current position in the if case, but forgets to do the same in the more general case. In the general case, the node is inserted, but previousNode.next is not adapted, meaning you now have two nodes whose next property point to node. There are several ways to do it right. I would suggest to perform the node-removal action in all cases before doing anyting else. You could place the code for node extraction before the if statement so that it always happens:
previousNode.next = node.next // <-- should always happen
if(start.val > node.val){
//...
I can understand why you put previousNode1.next = temp in comments inside the loop. Most often this needs to happen, but not when node didn't move! To solve this dilemma, perform a quick exit when node is already at its correct position (in comparison with previousNode). So at the top of the function do:
if (node.val >= previousNode.val) return null;
Now you can be sure that node will move.
traverseFromHead has a strange while condition. With the above corrections in place, this while condition can just be the opposite of the if condition, so that you can deal with the insertion after the loop:
while (myPosition.val < node.val)
myprevious2 = myPosition;
myPosition = myPosition.next
}
Here is your code with those corrections:
var sortList = function(head) {
let previousNode = head
if(!head){
return head
}
let node = head.next
if(!node){
return head
}
let start = head
function sortList1(node, previousNode){
if(node == null){ // Corrected base case
return start
}
let temp = node.next;
if(traverseFromHead(node, previousNode)){ // Pass the second argument
start = node
}
// Depending on whether node was moved, the node that precedes temp is different
return sortList1(temp, previousNode.next != node ? previousNode : node)
}
return sortList1(node, previousNode)
function traverseFromHead(node, previousNode){ // Second argument
if (node.val >= previousNode.val) return null; // Quick exit for trivial case
previousNode.next = node.next // Always first extract the node
if(start.val >= node.val){ // Equal is also good, so >=
node.next = start
return node;
}
let myPosition = start.next // Can start iteration at second node
let myprevious2 = start
while (myPosition.val < node.val) { // Look for the insertion spot
myprevious2 = myPosition;
myPosition = myPosition.next
}
// Now perform the re-insertion
myprevious2.next = node
node.next = myPosition
return null
}
};
Other remarks
Insertion sort is not the most efficient among sorting algorithms, and for linked lists it is quite easy to implement better performing sorting algorithms.
See for instance Merge sort on linked list
I have here adapted that solution for the LeetCode challenge (spoiler):
var sortList = function(head) {
if (!head || !head.next) return head; // Nothing to sort
// Find last node of first half
let tail = head;
for (let fast = tail.next; fast?.next; fast = fast.next.next) {
tail = tail.next;
}
// Split list into two halves
let head2 = tail.next;
tail.next = null;
// Recursively sort the two shorter lists
head = sortList(head);
head2 = sortList(head2);
// Merge the two sorted lists
if (head.val > head2.val) [head2, head] = [head, head2];
tail = head;
while (tail.next && head2) {
if (tail.next.val > head2.val) [head2, tail.next] = [tail.next, head2];
tail = tail.next;
}
tail.next ??= head2;
return head;
};
MergeSort naturally fits for linked lists.
In merge sort you consider that merging 2 size one lists is trivial (just put the higher head value after the lower one).
Extending that idea, if you have two already sorted lists, then it's easy to merge them as well. Just create a new list where you add the highest of both lists till both lists are empty.
So you can do a merge sort by creating first 2 lists of size 1. (ie. the first 2 elements of your list) Then merging them.
Then create a second list of size 2 (by merging 2 of size 1).
And continue until you have merged the entire original list into a sorted list.
Recursion
To implement this recursively first write a merge function that given two sorted lists merges them by preserving the sort order.
Then do the following to implement sort:
If your list is empty, then return the list as your result
Now merge the first element with sort(rest of the list)

Python __repr__ method: writing a JS equivalent?

I am working through a short beginner's course on Algorithms and Data Structures. The instructor's language is Python; I am converting the code examples to JavasScript. So far, so good.
I am dealing with Linked Lists. The instructor tests the code using Python's __repr__() method. After days of trial and error, I have a working JS solution, but it is not exactly the same as the Python code. I would like to know if there is a better way of implementing the JS code, which I provide, along with the Python code.
Python
# class LinkedList and its methods are presumed to exist
def __repr__(self):
nodes = []
current = self.head
while current:
if current is self.head:
nodes.append("[Head: %s]" % current.data)
elif current.next_node is None:
nodes.append("[Tail: %s]" % current.data)
else
nodes.append("[%s]" % current.data)
current = current.next_node
return '-> '.join(nodes)
# running script
>>> l = LinkedList()
>>> l.add(1)
>>> l.add(2)
>>> l.add(3)
>>> l
[Head: 3]-> [2]-> [Tail: 1] # output
>>>
JS
// class LinkedList and its methods are presumed to exist
repr () {
let nodes = "";
let current = this.head;
while (current) {
if (current === this.head) {
nodes = `Head: ${current.data}-> `;
} else if (current.nextNode === null) {
nodes += `Tail: ${current.data}`;
} else {
nodes += `${current.data}-> `;
}
current = current.nextNode;
}
return nodes;
// running the script
let l = LinkedList();
l.add(1);
l.add(2);
l.add(3);
let result = l.repr();
console.log(result); // Head: 3-> 2-> Tail: 1
Again, the two fragments will only run in a full implementation of the Linked List algorithm, but they do work.
Attempts I have made: I tried to use JS toString(), append() and appendChild(), but they were too difficult for me to understand how best to use them, particularly as the last two modify the DOM. I'm sure there is a better way of implementing a JS equivalent of the Python __repr__(); I would like to know how it might be done.
A closer implementation would use a toString method. This method is called implicitly when a conversion to string is needed. Python has actually two methods for this, which have a slightly different purpose: __repr__ and __str__. There is no such distinction in JavaScript.
Furthermore, we should realise that Python's print will implicitly call __repr__, which is not how console.log works. So with console.log you'd have to enforce that conversion to string.
Here is how the given Python code would be translated most literally (I add the classes needed to run the script):
class Node {
constructor(data, next=null) {
this.data = data;
this.next_node = next;
}
}
class LinkedList {
constructor() {
this.head = null;
}
add(data) {
this.head = new Node(data, this.head);
}
toString() {
let nodes = [];
let current = this.head;
while (current) {
if (current === this.head) {
nodes.push(`[Head: ${current.data}]`);
} else if (current.next_node === null) {
nodes.push(`[Tail: ${current.data}]`);
} else {
nodes.push(`[${current.data}]`);
}
current = current.next_node;
}
return nodes.join('-> ');
}
}
// running script
let l = new LinkedList();
l.add(1);
l.add(2);
l.add(3);
// Force conversion to string
console.log(`${l}`); // [Head: 3]-> [2]-> [Tail: 1]
Personally, I would make the following changes (not reflected in the Python version):
Produce output without the words "Head" and "Tail" and other "decoration". This is too verbose to my liking. Just output the separated values.
Make list instances iterable, implementing the Symbol.iterator method (In Python: __iter__). Then use this for implementing the toString method.
Allow the list constructor to take any number of values with which the list should be populated.
This leads to the following version:
class Node {
constructor(data, next=null) {
this.data = data;
this.next = next;
}
}
class LinkedList {
constructor(...values) { // Accept any number of values
this.head = null;
// Populate in reverse order
for (let data of values.reverse()) this.add(data);
}
add(data) {
this.head = new Node(data, this.head);
}
// Make lists iterable
*[Symbol.iterator]() {
let current = this.head;
while (current) {
yield current.data;
current = current.next;
}
}
toString() {
// Array.from triggers the above method
return Array.from(this).join("→");
}
}
// Provide the desired values immediately:
let l = new LinkedList(3, 2, 1);
console.log(`${l}`); // 3→2→1

Javascript observer or proxy without all changes going through proxy

I'm writing a subclass of arrays in Javascript to have better support for matrix operations (I know others exist, this is partially for me to re-teach myself linear algebra), and what I want is to have some properties that are reset whenever any values in the matrix are adjusted. Some calculations like the determinant are computationally intensive, and I'd like to be able to store them to avoid re-calculation, but then they need to be reset to null whenever any matrix elements are changed.
Essentially, it seems like what i want is the deprecated Array.observe(). And the replacement, proxies, seem like a lot of overhead for this one thing. As alluded to in some of the comments on Detecting Changes in a Javascript Array using the proxy object that were not directly addressed, I don't want to have to access my matrices only ever through proxies. I use a lot of handy [i][j] indexing and [mat[i], mat[j]] = [mat[j], mat[i]] in the code I've written so far.
class Matrix extends Array {
constructor() {
var args = [];
for (var i = 0; i < arguments.length; i++) {
if (Array.isArray(arguments[i])) {
args.push(new Matrix(...arguments[i]));
} else {
args.push(arguments[i]);
}
}
super(...args);
this._determinant = null;
}
determ(forceRecalculate = false) {
if (this._determinant === null || forceRecalculate) {
this.upperEchelon();
}
return this._determinant;
}
upperEchelon(reduced = false) {
//There's a lot of code here but in the process of doing this other thing
//you get 99% of the way to calculating the determinant so it does this
this._determinant = factor;
}
}
Basically, I want anything like mat[0][0] = 10 or mat.push([2,4,5]) that updates the values in the matrix to set mat._determinant = null. Or any equivalent method of flagging that it needs to be re-calculated next time it's asked for. I'm not opposed to using proxies necessarily if someone can help me figure out the implementation, I would just rather have this set-to-null-on-update property be inherent to my class functionality.
What I really want is a way to overload base methods like [] a la C# so the functions that do the updating would trigger this without changing syntax, but I've resigned myself to not having that in JS.
While a Proxy would work, it would also be pretty slow. A different approach would be for every method that needs to use the value of _determinant go through a different function first to check to see if the _determinant needs to be updated (and if so, updates it). This way, the expensive recalculation is not done every time the array changes, but only just in time for the result to be used. For example:
class Matrix extends Array {
constructor() {
var args = [];
for (var i = 0; i < arguments.length; i++) {
if (Array.isArray(arguments[i])) {
args.push(new Matrix(...arguments[i]));
} else {
args.push(arguments[i]);
}
}
super(...args);
this._determinant = null;
}
// next method is effectively a recursive deep join
// could also use toString if it doesn't interfere with anything else
getString() {
const itemsStr = this.map((item) => (
item instanceof Matrix
? item.getString()
: item
))
.join(',');
const result = '[' + itemsStr + ']';
return result;
}
getDeterm() {
const newString = this.getString();
if (newString !== this._lastString) {
this._lastString = newString;
this.upperEchelon();
}
return this._determinant;
}
upperEchelon() {
console.log('running upperEchelon');
this._determinant = Math.random();
}
}
const m = new Matrix([2, 3, 4], 5);
console.log(m.getDeterm());
// Not calculated again:
console.log(m.getDeterm());
// Mutation, next call of getDeterm will run upperEchelon:
m[0][0] = 1;
console.log(m.getDeterm());

Breadth first search binary search tree javascript implementation

I have the following code that implements a BST tree in JavaScript.
function Node(value) {
this.left = null;
this.right = null;
this.value = value;
}
function BinarySearchTree() {
this.root = null;
return;
}
BinarySearchTree.prototype.push = function(value) {
if (!this.root) {
this.root = new Node(value);
return;
}
var currentRoot = this.root;
var newNode = new Node(value);
while (currentRoot) {
if (value < currentRoot.value) {
if (!currentRoot.left) {
currentRoot.left = newNode;
break;
} else {
currentRoot = currentRoot.left;
}
} else {
if (!currentRoot.right) {
currentRoot.right = newNode;
break;
} else {
currentRoot = currentRoot.right;
}
}
}
}
var a = new BinarySearchTree();
a.push(27);
a.push(14);
a.push(35);
a.push(10);
a.push(19);
a.push(31);
a.push(42);
I am trying to implement a function which can do a breadth first traversal of the tree. This is what I have tried so far.
console.log(a.root.value);
traverse(a.root);
//function to traverse
function traverse(node) {
currentNode = node;
while (currentNode.left) {
displayNodes(currentNode);
parent = currentNode;
currentNode = currentNode.left;
displayNodes(currentNode);
if(parent.right!=null){
displayNodes(parent.right);
}
}
}
//function that displays the left and right node of a node
function displayNodes(node) {
if (node.left != null) {
console.log(node.left.value);
}
if (node.right != null) {
console.log(node.right.value);
}
}
I am unable to implement a function that could scale with a large number of data. I am not sure if a recursive method to traverse would be better or using a while loop. How can I implement the function? I know that the function gives unexpected behavior? What correction should I make?
You currently traverse the path from the root node to the left-most leaf.
A simple non-recursive breadth-first traversal function invoking a callback on each traversed node could look as follows:
// Breadth-first traversal:
function traverse(node, cb) {
var current = [node];
while (current.length > 0) {
var next = [];
for (var node of current) {
cb(node);
if (node.left) next.push(node.left);
if (node.right) next.push(node.right);
}
current = next;
}
}
// Example:
traverse(root, function(node) {
console.log(node.value);
});
It works by keeping an array of already discovered or traversed nodes current which initially contains just your root node. Now, you iteratively replace each node in that list with its children. In above function, the children are stored in a next array. At the end of each iteration, all nodes of the current level in current are replaced with all their children of the next deeper level in next. See also the first suggestion given by #DavidKnipe's answer.
A non-recursive approach has the advantage of not being subject to the call stack size limit. This theoretically allows you to handle larger data structures when the call stack size is limited.
If you're looking for a way to BFS using O(1) memory, I don't think there's a nice way to do it. (DFS is another matter though. Are you sure it has to be BFS?)
There are two ways I can see to do this. You could start with the array [this.root], and write a function that iterates over an array of nodes and then returns an array of children of those nodes. Then call that function on the array of children, and keep going down the tree until you get an empty array.
If memory is an issue, there's another way to do it. Instead of remembering the array of nodes at a given level, you could just remember the depth, then redo the iteration each time. So you'd have a function which takes a natural number n and iterates over the tree, but without going deeper than n, and does whatever it is you're trying to do at the nth level only; then call this function for all values of n until there are no more nodes left.
That last one might sound very wasteful, but it might not be too bad if the last few levels of the tree contain most of the nodes. It depends on your dataset and computational capabilities.

When should I use a JavaScript array in lieu of a JavaScript object literal?

This question may seem trivial but it's something that's been bothering me for a while so I thought I'd ask. Apologies in advance if this question seems silly/naive to you.
So in JavaScript, object literals {} are hash tables under the hood. Meaning that they have near constant time lookup and insertion. So the question I keep wondering is (outside of pure convenience) why would I ever use a JavaScript array in lieu of an object literal?
Take this example of creating a Queue in JavaScript;
1) First, we'll use an Object literal as our storage component;
var Queue = function() {
this.storage = {};
this.order = [];
this.length = 0;
this.add = function(item) {
this.storage[item] = 1;
this.order.push(item);
this.length++;
};
this.remove = function() {
var removed = this.order.shift();
delete this.storage[removed];
this.length--;
return removed;
};
this.contains = function(value) {
return this.storage[value] ? true : false;
};
this.size = function() {
return this.length;
};
};
2) Second, using an array as our Queue's storage component
var Queue2 = function() {
this.storage = [];
this.add = function(item) {
this.storage.push(item);
return this;
};
this.remove = function() {
return this.storage.shift();
};
this.contains = function(target) {
var clone = this.storage.slice(0);
clone.sort();
var recurse = function(low, high) {
if (high === low) return false;
var mid = Math.floor((high - low) / 2) + low;
if (clone[mid] === target) return true;
else if (clone[mid] > target)
return recurse(low, mid);
else if (clone[mid] < target)
return recurse(mid + 1, high);
};
return recurse(0, clone.length);
}
};
lets create two separate Queues;
var objQueue = new Queue();
var arrayQueue = new Queue2();
Now lets add 1 million random passwords to both our Queues.
for (var i = 0; i < 1000000; i++) {
objQueue.add(Math.random().toString(36).slice(2));
}
for (var i = 0; i < 1000000; i++) {
arrayQueue.add(Math.random().toString(36).slice(2));
}
Normally we would never have to do this all at one time. But yes, it's going to take a little longer to fill up our object Queue. We've had to create a separate array in our Queue constructor to keep track of the order. Yes, this is very annoying. Unfortunately, when adding keys as numbers to a JavaScript object, JavaScript auto-sorts the numerical keys. So if you add 4, then 6, then 2, then 1. The Object will look like this:
{ '1': 1, '2':1, '4':1, '6':1 }
and for a Queue, we want it to look like this:
{ '4': 1, '6':1, '2':1, '1':1 }
NOTE: This is not the case when adding strings. With strings, the order of addition is preserved so creating a separate array to preserve order is unnecessary. I'm not sure if this is intentional or just a mistake? But in this particular case, since we're creating a Queue and order matters, it's annoying. Which could be one reason people might prefer to just use an array for storage in this situation.
Removing from the queue will have the same time complexity in both cases because we're always removing the first value. No big deal there.
But what happens if we want to search our Queue? This probably goes against the nature of what a Queue is supposed to be used for(does it??? I dunno, maybe our Queue is a priority queue, we're tired of waiting, and we want to search where our number is in the queue) but let's just say we want to search this giant Queue we've created.
Lets add a value value to the end of both our Queues that is not random:
objQueue.add('bvflq9kk61xajor');
arrayQueue.add('bvflq9kk61xajor');
Now lets search for this value in our array Queue and measure the time:
var start1 = new Date().getTime();
var result1 = arrayQueue.contains('bvflq9kk61xajor');
var end1 = new Date().getTime();
var time1 = end1 - start1;
Now lets search for this value in our object Queue and measure the time:
var start2 = new Date().getTime();
var result2 = objQueue.contains('bvflq9kk61xajor');
var end2 = new Date().getTime();
var time2 = end2 - start2;
RESULTS
console.log('Execution time for Array Queue: ' + time1);
console.log('RESULT:', result1);
//Execution time for Array Queue: **3873**
//RESULT: true
console.log('Execution time for Object Queue: ' + time2);
console.log('RESULT', result2);
//Execution time for Array Queue: **0**
//RESULT: true
The main reason the array Queue takes so much longer to execute is because I'm having to sort it before performing a Binary Search on it. Binary search is fast but requires the array to be pre-sorted. I could pre-sort the array each time I add a new value to the array Queue but then this would defeat the purpose of what a Queue is: FIFO(first in first out) and we'd start running into the same order issue we have with objects.
So I guess the real question I'm wondering is why do we even bother with arrays when we can do most things we need to do with a JS Object? Especially when JS objects are hash tables and thus we get near constant time lookup with them.
I guess it wouldn't matter so much if you never planned on searching through your data but when is that ever the case?? When you fill up an object or an array with data, you're inevitably going to want to search your data? Yes? No? Am I totally wrong here? And when you go to search this data, you'll be stoked to have stored it in what's essentially a hash table. Right?
Am I totally wrong here? Missing something? Would love some feedback on when to use arrays and when to use object literals. I'm guessing one case would be when your data set is small.

Categories