The expected result of:
(1.175).toFixed(2) = 1.18 and
(5.175).toFixed(2) = 5.18
But in JS showing:
(1.175).toFixed(2) = 1.18 but
*(5.175).toFixed(2) = 5.17*
How to rectify the problem?
It's not a bug. It's related to the fact numbers aren't stored in decimal but in IEEE754 (so 5.175 isn't exactly stored).
If you want to round in a specific direction (up) and you consistently have numbers of this precision, you might use this trick :
(5.175 + 0.00001).toFixed(2)
You could always try using round, instead of toFixed.
Math.round(5.175*100)/100
You could even try putting it in some prototype method if you want.
Created a jsBin that implements a simple prototype on Number.
Number.prototype.toFixed = function(decimals) {
return Math.round(this * Math.pow(10, decimals)) / (Math.pow(10, decimals));
};
It's because the numbers are stored as IEEE754.
I would recommend you to use the Math class (round, floor or ceil methods, depending on your needing).
I've just created a class MathHelper that can easily solve your problem:
var MathHelper = (function () {
this.round = function (number, numberOfDecimals) {
var aux = Math.pow(10, numberOfDecimals);
return Math.round(number * aux) / aux;
};
this.floor = function (number, numberOfDecimals) {
var aux = Math.pow(10, numberOfDecimals);
return Math.floor(number * aux) / aux;
};
this.ceil = function (number, numberOfDecimals) {
var aux = Math.pow(10, numberOfDecimals);
return Math.ceil(number * aux) / aux;
};
return {
round: round,
floor: floor,
ceil: ceil
}
})();
Usage:
MathHelper.round(5.175, 2)
Demo: http://jsfiddle.net/v2Dj7/
Actually I think this is a bug in the implementation of Number.prototype.toFixed. The algorithm given in ECMA-262 20.1.3.3 10-a says to round up as a tie-breaker. As others have mentioned, there probably isn't a tie to break due to floating point imprecisions in the implementation. But that doesn't make it right :)
At least this behavior is consistent across FF, Chrome, Opera, Safari. (Haven't tried others.)
FWIW, you can actually implement your own version of toFixed per the spec in JS and that behaves as you would expect. See http://jsfiddle.net/joenudell/7qahrb6d/.
Kippie
your solution has problems
one of them
39133.005.toFixed(2) => 39133
var Calc = function () {
var self = this;
this.float2Array = function(num) {
var floatString = num.toString(),
exp = floatString.indexOf(".") - (floatString.length - 1),
mant = floatString.replace(".", "").split("").map(function (i) { return parseInt(i); });
return { exp: exp, mant: mant };
};
this.round2 = function (num, dec, sep) {
var decimal = !!dec ? dec : 2,
separator = !!sep ? sep : '',
r = parseFloat(num),
exp10 = Math.pow(10, decimal);
r = Math.round(r * exp10) / exp10;
var rr = Number(r).toFixed(decimal).toString().split('.');
var b = rr[0].replace(/(\d{1,3}(?=(\d{3})+(?:\.\d|\b)))/g, "\$1" + separator);
r = (rr[1] ? b + '.' + rr[1] : b);
return r;
};
this.toFixed10 = function (f, num) {
var prepareInt = self.float2Array(f),
naturalInt = prepareInt.mant,
places = Math.abs(prepareInt.exp),
result = prepareInt.mant.slice(),
resultFixedLenth;
// if number non fractional or has zero fractional part
if (f.isInt()) {
return f.toFixed(num);
}
// if the number of decimal places equals to required rounding
if (places === num) {
return Number(f).toString();
}
//if number has trailing zero (when casting to string type float 1.0050 => "1.005" => 005 <0050)
if (places < num) {
return Number(f).round2(num);
}
for (var e = naturalInt.length - (places > num ? (places - num) : 0), s = 0; e >= s; e--) {
if (naturalInt.hasOwnProperty(e)) {
if (naturalInt[e] > 4 && naturalInt[e - 1] < 9) {
result[e] = 0;
result[e - 1] = naturalInt[e - 1] + 1;
break;
} else if (naturalInt[e] > 4 && naturalInt[e - 1] === 9) {
result[e] = 0;
result[e - 1] = 0;
result[e - 2] = naturalInt[e - 2] < 9 ? naturalInt[e - 2] + 1 : 0;
} else if (e === 0 && naturalInt[e] === 9 && naturalInt[e + 1] === 9) {
result[e] = 0;
result.unshift(1);
} else {
break;
}
}
}
if (places - num > 0) {
resultFixedLenth = result.slice(0, -(places - num));
} else {
for (var i = 0, l = num - places; i < l; i++) {
result.push(0);
}
resultFixedLenth = result;
}
return (parseInt(resultFixedLenth.join("")) / Math.pow(10, num)).round2(num);
};
this.polyfill = function() {
if (!Array.prototype.map) {
Array.prototype.map = function (callback, thisArg) {
var T, A, k;
if (this == null) { throw new TypeError(' this is null or not defined'); }
var O = Object(this), len = O.length >>> 0;
if (typeof callback !== 'function') { throw new TypeError(callback + ' is not a function'); }
if (arguments.length > 1) { T = thisArg; }
A = new Array(len);
k = 0;
while (k < len) {
var kValue, mappedValue;
if (k in O) {
kValue = O[k];
mappedValue = callback.call(T, kValue, k, O);
A[k] = mappedValue;
}
k++;
}
return A;
};
}
};
this.init = function () {
self.polyfill();
Number.prototype.toFixed10 = function (decimal) {
return calc.toFixed10(this, decimal);
}
Number.prototype.round2 = function (decimal) {
return calc.round2(this, decimal);
}
Number.prototype.isInt = function () {
return (Math.round(this) == this);
}
}
}, calc = new Calc(); calc.init();
this works good)
obj = {
round(val) {
const delta = 0.00001
let num = val
if (num - Math.floor(num) === 0.5) {
num += delta
}
return Math.round(num + delta)
},
fixed(val, count = 0) {
const power = Math.pow(10, count)
let res = this.round(val * power) / power
let arr = `${res}`.split('.')
let addZero = ''
for (let i = 0; i < count; i++) {
addZero += '0'
}
if (count > 0) {
arr[1] = ((arr[1] || '') + addZero).substr(0, count)
}
return arr.join('.')
}
}
obj.fixed(5.175, 2)
// "5.18"
Related
I am trying to solve the Fibonacci algorithm using matrices. My target time complexity is an o(logn) instead of an o(n). The return output of the program is not the number for the series but the sixth significant digits. Its why I am returning the remainder of the solution divided by a million.
I have written the code and it runs well but I noticed that for extremely large inputs, I get a NAN(not a number) instead of an output
const fib = (n) => {
let fibMatrix = [[1,1], [1,0]]
if(n == 0){
return 0;
}
raiseToPower(fibMatrix, n - 1);
return Math.floor(fibMatrix[0][0] % 1000000)
}
const raiseToPower = (matrix, n) => {
if(n == 0 || n == 1){
return;
}
let newMatrix = [[1,1], [1,0]]
raiseToPower(matrix, Math.floor(n / 2))
multiplyMatrices(matrix, matrix)
if(n % 2 !== 0){
multiplyMatrices(matrix, newMatrix)
}
}
const multiplyMatrices = (matrix, newMatrix) => {
let x = matrix[0][0]*newMatrix[0][0] + matrix[0][1]*newMatrix[1][0];
let y = matrix[0][0]*newMatrix[0][1] + matrix[0][1]*newMatrix[1][1];
let z = matrix[1][0]*newMatrix[0][0] + matrix[1][1]*newMatrix[1][0];
let w = matrix[1][0]*newMatrix[0][1] + matrix[1][1]*newMatrix[1][1];
matrix[0][0] = x;
matrix[0][1] = y;
matrix[1][0] = z;
matrix[1][1] = w;
}
console.log(fib(2000))
Thats my code above. Is there anything I could change to actually make this much more performant?
I actually found the error. My numbers were getting larger than the maximum value.
I changed this by instead returning the remainder of my value divided by a million to the matrix and then returning the value from the matrix instead of returning the value and then dividing by a million. The former is efficient and works for any sized inputs.
const fib = (n) => {
let fibMatrix = [[1,1], [1,0]]
if(n == 0){
return 0;
}
raiseToPower(fibMatrix, n - 1);
return (fibMatrix[0][0])
}
const raiseToPower = (matrix, n) => {
if(n == 0 || n == 1){
return;
}
let newMatrix = [[1,1], [1,0]]
raiseToPower(matrix, Math.floor(n / 2))
multiplyMatrices(matrix, matrix)
if(n % 2 !== 0){
multiplyMatrices(matrix, newMatrix)
}
}
const multiplyMatrices = (matrix, newMatrix) => {
let x = (matrix[0][0]*newMatrix[0][0] + matrix[0][1]*newMatrix[1][0]);
let y = (matrix[0][0]*newMatrix[0][1] + matrix[0][1]*newMatrix[1][1]);
let z = (matrix[1][0]*newMatrix[0][0] + matrix[1][1]*newMatrix[1][0]);
let w = (matrix[1][0]*newMatrix[0][1] + matrix[1][1]*newMatrix[1][1]);
matrix[0][0] = x % 1000000;
matrix[0][1] = y % 1000000;
matrix[1][0] = z % 1000000;
matrix[1][1] = w % 1000000;
}
console.log(fib(10000))
Challenge: https://www.codewars.com/kata/57c7930dfa9fc5f0e30009eb/train/javascript
Hi I have been trying this problem for many hours but unfortunately my code is taking too long to pass:
function closestPower(num) {
num = Math.floor(num);
if (num < 4) return 4;
// check if input is perfect power
let base = 2;
while (base < 10) {
let exponent = Math.trunc(getBaseLog(base , num));
if ( Math.pow(base, exponent) === num ) {
return num;
}
base++;
}
// check for upper and lower
base = 2;
const verifyObj = {upper:null, lower:null}; // verify
let upperPower = num + 1;
let lowerPower = num - 1;
while (!verifyObj.upper || !verifyObj.lower)
{
// no perfect power
if (lowerPower <= 2 ) verifyObj.lower = "Not found";
if (upperPower === Infinity ) verifyObj.upper = "Not found";
// up til base 9
if (base === 10) {
if (!verifyObj.upper) upperPower++;
if (!verifyObj.lower) lowerPower--;
base = 2;
}
// upper
if (!verifyObj.upper) {
let exponent = Math.trunc(getBaseLog(base , upperPower));
if ( Math.pow(base, exponent) === upperPower ) {
verifyObj.upper = upperPower;
}
}
// lower
if (!verifyObj.lower) {
let exponent = Math.trunc(getBaseLog(base , lowerPower));
if ( Math.pow(base, exponent) === lowerPower ) {
verifyObj.lower = lowerPower;
}
}
base++;
}
console.log(verifyObj) // {upper:64, lower: 49}
// nearest power
if ((upperPower - num) < (num - lowerPower)) {
return upperPower;
}
else return lowerPower;
}
closestPower(56.5); // 49
function getBaseLog(x, y) {
return Math.log(y) / Math.log(x);
}
I realized that my code is redundant as all i need to know if a “base” and “exponent” are more than 1 to determine a perfect power. Any formulas or ideas?
Some issues:
There is no reason why base should not be allowed to be 10 or more
Trying with upperPower at each increment is taking too many iterations. The distance to the next power might be rather big.
I would suggest the following algorithm:
Let the exponent to try with start at 2, and then increment by 1. Calculate which could be the corresponding base. The real base can be found by raising n to the inverse exponent (i.e. 1/exp). Then there are only 2 interesting integer bases to consider: by rounding downwards and upwards.
Here is an implementation:
function closestPower(n) {
if (n <= 6) return 4;
let result = -1;
let closest = n;
for (let factor, exp = 2; (factor = n ** (1 / exp)) > 1.9; ++exp) {
let above = Math.ceil(factor);
for (let intfactor = Math.floor(factor); intfactor <= above; intfactor++) {
let power = intfactor ** exp;
let diff = Math.abs(power - n);
if (diff == 0) return n;
if (diff < closest || diff == closest && power < n) {
closest = diff;
result = power;
}
}
}
return result;
}
// Some tests:
const tests = [
[0, 4], [9, 9], [30, 32], [34, 32], [56.5, 49],
[123321456654, 123321773584]
];
for (let [n, expected] of tests) {
let result = closestPower(n);
if (result === expected) continue;
console.log(`closestPower(${n}) returned ${result}, but expected ${expected}`);
}
console.log("all tests done");
Here's my algorithm
first i will get the exponent from base that less than of the n then I added the current base of the loop with the n then get the base log.
function closestPower(n) {
if(n < 4) return 4
let closest = []
let base = 2
while(base < n) {
const exponent = Math.floor(Math.log(n + base) / Math.log(base))
const power = Math.pow(base,exponent)
if(exponent === 1) break
if(power === n) return n
closest.push(power)
base++
}
return closest.reduce((prev, curr) => (Math.abs(curr - n) < Math.abs(prev - n) ? curr : prev))
}
console.log(closestPower(0))
console.log(closestPower(9))
console.log(closestPower(30))
console.log(closestPower(34))
console.log(closestPower(56.5))
console.log(closestPower(123321456654))
I'm generating a number based on a fixed character set.
function generator()
{
var text = "";
var char_list = "LEDGJR", number_list = "0123456789";
for(var i=0; i < 2; i++ )
{
text += char_list.charAt(Math.floor(Math.random() * char_list.length));
}
for(var j=0; j < 2; j++ )
{
text += number_list.charAt(Math.floor(Math.random() *
number_list.length));
}
return text;
}
Result :
RE39, JE12 etc...
Once all the permutation related to the above sequence is done, then the generator should generate string as RE391, JE125 means adding one more number to the complete number.
How can I get the permutation count of sequence?
For simplicity consider the case where:
chars = "AB"
nums = "123";
and we want to generate a 4-digit sequence of two chars and two numbers.
We define these variables
rows = [chars, chars, nums, nums]
rowSizes = rows.map(row => row.length) // [2, 2, 3, 3]
It’s easy to see the set size of all possible permuations equals:
spaceSize = rowSizes.reduce((m, n) => m * n, 1) // 2*2*3*3 = 36
And we define two set of utility functions, usage of which I'll explain in detail later.
decodeIndex() which gives us uniqueness
function euclideanDivision(a, b) {
const remainder = a % b;
const quotient = (a - remainder) / b;
return [quotient, remainder]
}
function decodeIndex(index, rowSizes) {
const rowIndexes = []
let dividend = index
for (let i = 0; i < rowSizes.length; i++) {
const [quotient, remainder] = euclideanDivision(dividend, rowSizes[i])
rowIndexes[i] = remainder
dividend = quotient
}
return rowIndexes
}
getNextIndex() which gives us pseudo-randomness
function isPrime(n) {
if (n <= 1) return false;
if (n <= 3) return true;
if (n % 2 == 0 || n % 3 == 0) return false;
for (let i = 5; i * i <= n; i = i + 6) {
if (n % i == 0 || n % (i + 2) == 0) return false;
}
return true;
}
function findNextPrime(n) {
if (n <= 1) return 2;
let prime = n;
while (true) {
prime++;
if (isPrime(prime)) return prime;
}
}
function getIndexGeneratorParams(spaceSize) {
const N = spaceSize;
const Q = findNextPrime(Math.floor(2 * N / (1 + Math.sqrt(5))))
const firstIndex = Math.floor(Math.random() * spaceSize);
return [firstIndex, N, Q]
}
function getNextIndex(prevIndex, N, Q) {
return (prevIndex + Q) % N
}
Uniqueness
Like mentioned above, spaceSize is the number of all possible permutations, thus each index in range(0, spaceSize) uniquely maps to one permutation. decodeIndex helps with this mapping, you can get the corresponding permutation to an index by:
function getSequenceAtIndex(index) {
const tuple = decodeIndex(index, rowSizes)
return rows.map((row, i) => row[tuple[i]]).join('')
}
Pseudo-Randomness
(Credit to this question. I just port that code into JS.)
We get pseudo-randomness by polling a "full cycle iterator"†. The idea is simple:
have the indexes 0..35 layout in a circle, denote upperbound as N=36
decide a step size, denoted as Q (Q=23 in this case) given by this formula‡
Q = findNextPrime(Math.floor(2 * N / (1 + Math.sqrt(5))))
randomly decide a starting point, e.g. number 5
start generating seemingly random nextIndex from prevIndex, by
nextIndex = (prevIndex + Q) % N
So if we put 5 in we get (5 + 23) % 36 == 28. Put 28 in we get (28 + 23) % 36 == 15.
This process will go through every number in circle (jump back and forth among points on the circle), it will pick each number only once, without repeating. When we get back to our starting point 5, we know we've reach the end.
†: I'm not sure about this term, just quoting from this answer
‡: This formula only gives a nice step size that will make things look more "random", the only requirement for Q is it must be coprime to N
Full Solution
Now let's put all the pieces together. Run the snippet to see result.
I've also includes the a counter before each log. For your case with char_list="LEDGJR", number_list="0123456789", the spaceSize for 4-digit sequence should be 6*6*10*10 = 3600
You'll observe the log bump to 5-digit sequence at 3601 😉
function euclideanDivision(a, b) {
const remainder = a % b;
const quotient = (a - remainder) / b;
return [quotient, remainder];
}
function decodeIndex(index, rowSizes) {
const rowIndexes = [];
let divident = index;
for (let i = 0; i < rowSizes.length; i++) {
const [quotient, remainder] = euclideanDivision(divident, rowSizes[i]);
rowIndexes[i] = remainder;
divident = quotient;
}
return rowIndexes;
}
function isPrime(n) {
if (n <= 1) return false;
if (n <= 3) return true;
if (n % 2 == 0 || n % 3 == 0) return false;
for (let i = 5; i * i <= n; i = i + 6) {
if (n % i == 0 || n % (i + 2) == 0) return false;
}
return true;
}
function findNextPrime(n) {
if (n <= 1) return 2;
let prime = n;
while (true) {
prime++;
if (isPrime(prime)) return prime;
}
}
function getIndexGeneratorParams(spaceSize) {
const N = spaceSize;
const Q = findNextPrime(Math.floor((2 * N) / (1 + Math.sqrt(5))));
const firstIndex = Math.floor(Math.random() * spaceSize);
return [firstIndex, N, Q];
}
function getNextIndex(prevIndex, N, Q) {
return (prevIndex + Q) % N;
}
function generatorFactory(rows) {
const rowSizes = rows.map((row) => row.length);
function getSequenceAtIndex(index) {
const tuple = decodeIndex(index, rowSizes);
return rows.map((row, i) => row[tuple[i]]).join("");
}
const spaceSize = rowSizes.reduce((m, n) => m * n, 1);
const [firstIndex, N, Q] = getIndexGeneratorParams(spaceSize);
let currentIndex = firstIndex;
let exhausted = false;
function generator() {
if (exhausted) return null;
const sequence = getSequenceAtIndex(currentIndex);
currentIndex = getNextIndex(currentIndex, N, Q);
if (currentIndex === firstIndex) exhausted = true;
return sequence;
}
return generator;
}
function getRows(chars, nums, rowsOfChars, rowsOfNums) {
const rows = [];
while (rowsOfChars--) {
rows.push(chars);
}
while (rowsOfNums--) {
rows.push(nums);
}
return rows;
}
function autoRenewGeneratorFactory(chars, nums, initRowsOfChars, initRowsOfNums) {
let realGenerator;
let currentRowOfNums = initRowsOfNums;
function createRealGenerator() {
const rows = getRows(chars, nums, initRowsOfChars, currentRowOfNums);
const generator = generatorFactory(rows);
currentRowOfNums++;
return generator;
}
realGenerator = createRealGenerator();
function proxyGenerator() {
const sequence = realGenerator();
if (sequence === null) {
realGenerator = createRealGenerator();
return realGenerator();
} else {
return sequence;
}
}
return proxyGenerator;
}
function main() {
const char_list = "LEDGJR"
const number_list = "0123456789";
const generator = autoRenewGeneratorFactory(char_list, number_list, 2, 2);
let couter = 0
setInterval(() => {
console.log(++couter, generator())
}, 10);
}
main();
I'm trying to transform an array of numbers such that each number has only one nonzero digit.
so basically
"7970521.5544"
will give me
["7000000", "900000", "70000", "500", "20", "1", ".5", ".05", ".004", ".0004"]
I tried:
var j = "7970521.5544"
var k =j.replace('.','')
var result = k.split('')
for (var i = 0; i < result.length; i++) {
console.log(parseFloat(Math.round(result[i] * 10000) /10).toFixed(10))
}
Any ideas, I'm not sure where to go from here?
Algorithm:
Split the number in two parts using the decimal notation.
Run a for loop to multiply each digit with the corresponding power of 10, like:
value = value * Math.pow(10, index); // for digits before decimal
value = value * Math.pow(10, -1 * index); // for digits after decimal
Then, filter the non-zero elements and concatenate both the arrays. (remember to re-reverse the left-side array)
var n = "7970521.5544"
var arr = n.split('.'); // '7970521' and '5544'
var left = arr[0].split('').reverse(); // '1250797'
var right = arr[1].split(''); // '5544'
for(let i = 0; i < left.length; i++)
left[i] = (+left[i] * Math.pow(10, i) || '').toString();
for(let i = 0; i < right.length; i++)
right[i] = '.' + +right[i] * Math.pow(10, -i);
let res = left.reverse() // reverses the array
.filter(n => !!n)
// ^^^^^^ filters those value which are non zero
.concat(right.filter(n => n !== '.0'));
// ^^^^^^ concatenation
console.log(res);
You can use padStart and padEnd combined with reduce() to build the array. The amount you want to pad will be the index of the decimal minus the index in the loop for items left of the decimal and the opposite on the right.
Using reduce() you can make a new array with the padded strings taking care to avoid the zeroes and the decimal itself.
let s = "7970521.5544"
let arr = s.split('')
let d_index = s.indexOf('.')
if (d_index == -1) d_index = s.length // edge case for nums with no decimal
let nums = arr.reduce((arr, n, i) => {
if (n == 0 || i == d_index) return arr
arr.push((i < d_index)
? n.padEnd(d_index - i, '0')
: '.' + n.padStart(i - d_index, '0'))
return arr
}, [])
console.log(nums)
You could split your string and then utilize Array.prototype.reduce method. Take note of the decimal position and then just pad your value with "0" accordingly. Something like below:
var s = "7970521.5544";
var original = s.split('');
var decimalPosition = original.indexOf('.');
var placeValues = original.reduce((accum, el, idx) => {
var f = el;
if (idx < decimalPosition) {
for (let i = idx; i < (decimalPosition - 1); i++) {
f += "0";
}
accum.push(f);
} else if (idx > decimalPosition) {
let offset = Math.abs(decimalPosition - idx) - 2;
for (let i = 0; i <= offset; i++) {
f = "0" + f;
}
f = "." + f;
accum.push(f);
}
return accum;
}, []);
console.log(placeValues);
Shorter alternative (doesn't work in IE) :
var s = "7970521.5544"
var i = s.split('.')[0].length
var a = [...s].reduce((a, c) => (i && +c && a.push(i > 0 ?
c.padEnd(i, 0) : '.'.padEnd(-i, 0) + c), --i, a), [])
console.log( a )
IE version :
var s = "7970521.5544"
var i = s.split('.')[0].length
var a = [].reduce.call(s, function(a, c) { return (i && +c && a.push(i > 0 ?
c + Array(i).join(0) : '.' + Array(-i).join(0) + c), --i, a); }, [])
console.log( a )
function standardToExpanded(n) {
return String(String(Number(n))
.split(".")
.map(function(n, i) {
// digits, decimals..
var v = n.split("");
// reverse decimals..
v = i ? v.reverse() : v;
v = v
.map(function(x, j) {
// expanded term..
return Number([x, n.slice(j + 1).replace(/\d/g, 0)].join(""));
})
.filter(Boolean); // omit zero terms
// unreverse decimals..
v = i ? v.map(function(x) {
return '.' + String(x).split('').reverse().join('')
}).reverse() : v;
return v;
})).split(',');
}
console.log(standardToExpanded("7970521.5544"));
// -> ["7000000", "900000", "70000", "500", "20", "1", ".5", ".05", ".004", ".0004"]
This looks like something out of my son's old 3rd Grade (core curriculum) Math book!
I'm trying to create a damerau-levenshtein distance function in JS.
I've found a description off the algorithm on WIkipedia, but they is no implementation off it. It says:
To devise a proper algorithm to calculate unrestricted
Damerau–Levenshtein distance note that there always exists an optimal
sequence of edit operations, where once-transposed letters are never
modified afterwards. Thus, we need to consider only two symmetric ways
of modifying a substring more than once: (1) transpose letters and
insert an arbitrary number of characters between them, or (2) delete a
sequence of characters and transpose letters that become adjacent
after deletion. The straightforward implementation of this idea gives
an algorithm of cubic complexity: O\left (M \cdot N \cdot \max(M, N)
\right ), where M and N are string lengths. Using the ideas of
Lowrance and Wagner,[7] this naive algorithm can be improved to be
O\left (M \cdot N \right) in the worst case. It is interesting that
the bitap algorithm can be modified to process transposition. See the
information retrieval section of[1] for an example of such an
adaptation.
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
The section [1] points to http://acl.ldc.upenn.edu/P/P00/P00-1037.pdf which is even more complicated to me.
If I understood this correctly, it's not that easy to create an implementation off it.
Here's the levenshtein implementation I currently use :
levenshtein=function (s1, s2) {
// discuss at: http://phpjs.org/functions/levenshtein/
// original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com)
// bugfixed by: Onno Marsman
// revised by: Andrea Giammarchi (http://webreflection.blogspot.com)
// reimplemented by: Brett Zamir (http://brett-zamir.me)
// reimplemented by: Alexander M Beedie
// example 1: levenshtein('Kevin van Zonneveld', 'Kevin van Sommeveld');
// returns 1: 3
if (s1 == s2) {
return 0;
}
var s1_len = s1.length;
var s2_len = s2.length;
if (s1_len === 0) {
return s2_len;
}
if (s2_len === 0) {
return s1_len;
}
// BEGIN STATIC
var split = false;
try {
split = !('0')[0];
} catch (e) {
// Earlier IE may not support access by string index
split = true;
}
// END STATIC
if (split) {
s1 = s1.split('');
s2 = s2.split('');
}
var v0 = new Array(s1_len + 1);
var v1 = new Array(s1_len + 1);
var s1_idx = 0,
s2_idx = 0,
cost = 0;
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) {
v0[s1_idx] = s1_idx;
}
var char_s1 = '',
char_s2 = '';
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) {
v1[0] = s2_idx;
char_s2 = s2[s2_idx - 1];
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) {
char_s1 = s1[s1_idx];
cost = (char_s1 == char_s2) ? 0 : 1;
var m_min = v0[s1_idx + 1] + 1;
var b = v1[s1_idx] + 1;
var c = v0[s1_idx] + cost;
if (b < m_min) {
m_min = b;
}
if (c < m_min) {
m_min = c;
}
v1[s1_idx + 1] = m_min;
}
var v_tmp = v0;
v0 = v1;
v1 = v_tmp;
}
return v0[s1_len];
}
What are your ideas for building such an algorithm and, if you think it would be too complicated, what could I do to make no difference between 'l' (L lowercase) and 'I' (i uppercase) for example.
The gist #doukremt gave: https://gist.github.com/doukremt/9473228
gives the following in Javascript.
You can change the weights of operations in the weighter object.
var levenshteinWeighted= function(seq1,seq2)
{
var len1=seq1.length;
var len2=seq2.length;
var i, j;
var dist;
var ic, dc, rc;
var last, old, column;
var weighter={
insert:function(c) { return 1.; },
delete:function(c) { return 0.5; },
replace:function(c, d) { return 0.3; }
};
/* don't swap the sequences, or this is gonna be painful */
if (len1 == 0 || len2 == 0) {
dist = 0;
while (len1)
dist += weighter.delete(seq1[--len1]);
while (len2)
dist += weighter.insert(seq2[--len2]);
return dist;
}
column = []; // malloc((len2 + 1) * sizeof(double));
//if (!column) return -1;
column[0] = 0;
for (j = 1; j <= len2; ++j)
column[j] = column[j - 1] + weighter.insert(seq2[j - 1]);
for (i = 1; i <= len1; ++i) {
last = column[0]; /* m[i-1][0] */
column[0] += weighter.delete(seq1[i - 1]); /* m[i][0] */
for (j = 1; j <= len2; ++j) {
old = column[j];
if (seq1[i - 1] == seq2[j - 1]) {
column[j] = last; /* m[i-1][j-1] */
} else {
ic = column[j - 1] + weighter.insert(seq2[j - 1]); /* m[i][j-1] */
dc = column[j] + weighter.delete(seq1[i - 1]); /* m[i-1][j] */
rc = last + weighter.replace(seq1[i - 1], seq2[j - 1]); /* m[i-1][j-1] */
column[j] = ic < dc ? ic : (dc < rc ? dc : rc);
}
last = old;
}
}
dist = column[len2];
return dist;
}
Stolen from here, with formatting and some examples on how to use it:
function DamerauLevenshtein(prices, damerau) {
//'prices' customisation of the edit costs by passing an object with optional 'insert', 'remove', 'substitute', and
//'transpose' keys, corresponding to either a constant number, or a function that returns the cost. The default cost
//for each operation is 1. The price functions take relevant character(s) as arguments, should return numbers, and
//have the following form:
//
//insert: function (inserted) { return NUMBER; }
//
//remove: function (removed) { return NUMBER; }
//
//substitute: function (from, to) { return NUMBER; }
//
//transpose: function (backward, forward) { return NUMBER; }
//
//The damerau flag allows us to turn off transposition and only do plain Levenshtein distance.
if (damerau !== false) {
damerau = true;
}
if (!prices) {
prices = {};
}
let insert, remove, substitute, transpose;
switch (typeof prices.insert) {
case 'function':
insert = prices.insert;
break;
case 'number':
insert = function (c) {
return prices.insert;
};
break;
default:
insert = function (c) {
return 1;
};
break;
}
switch (typeof prices.remove) {
case 'function':
remove = prices.remove;
break;
case 'number':
remove = function (c) {
return prices.remove;
};
break;
default:
remove = function (c) {
return 1;
};
break;
}
switch (typeof prices.substitute) {
case 'function':
substitute = prices.substitute;
break;
case 'number':
substitute = function (from, to) {
return prices.substitute;
};
break;
default:
substitute = function (from, to) {
return 1;
};
break;
}
switch (typeof prices.transpose) {
case 'function':
transpose = prices.transpose;
break;
case 'number':
transpose = function (backward, forward) {
return prices.transpose;
};
break;
default:
transpose = function (backward, forward) {
return 1;
};
break;
}
function distance(down, across) {
//http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
let ds = [];
if (down === across) {
return 0;
} else {
down = down.split('');
down.unshift(null);
across = across.split('');
across.unshift(null);
down.forEach(function (d, i) {
if (!ds[i]) {
ds[i] = [];
}
across.forEach(function (a, j) {
if (i === 0 && j === 0) {
ds[i][j] = 0;
} else if (i === 0) {
//Empty down (i == 0) -> across[1..j] by inserting
ds[i][j] = ds[i][j - 1] + insert(a);
} else if (j === 0) {
//Down -> empty across (j == 0) by deleting
ds[i][j] = ds[i - 1][j] + remove(d);
} else {
//Find the least costly operation that turns the prefix down[1..i] into the prefix across[1..j] using
//already calculated costs for getting to shorter matches.
ds[i][j] = Math.min(
//Cost of editing down[1..i-1] to across[1..j] plus cost of deleting
//down[i] to get to down[1..i-1].
ds[i - 1][j] + remove(d),
//Cost of editing down[1..i] to across[1..j-1] plus cost of inserting across[j] to get to across[1..j].
ds[i][j - 1] + insert(a),
//Cost of editing down[1..i-1] to across[1..j-1] plus cost of substituting down[i] (d) with across[j]
//(a) to get to across[1..j].
ds[i - 1][j - 1] + (d === a ? 0 : substitute(d, a))
);
//Can we match the last two letters of down with across by transposing them? Cost of getting from
//down[i-2] to across[j-2] plus cost of moving down[i-1] forward and down[i] backward to match
//across[j-1..j].
if (damerau && i > 1 && j > 1 && down[i - 1] === a && d === across[j - 1]) {
ds[i][j] = Math.min(ds[i][j], ds[i - 2][j - 2] + (d === a ? 0 : transpose(d, down[i - 1])));
}
}
});
});
return ds[down.length - 1][across.length - 1];
}
}
return distance;
}
//Returns a distance function to call.
let dl = DamerauLevenshtein();
console.log(dl('12345', '23451'));
console.log(dl('this is a test', 'this is not a test'));
console.log(dl('testing testing 123', 'test'));