Convert LaTeX to dynamic Javascript function - javascript

I have a user input for an equation - this input generates LaTeX code using a separate API which I did not code (namely, Mathquill, not that it matters).
My problem is best illustrated by an example: suppose the LaTeX code generated from the user input was this:
x^2+3x-10sin\left(2x\right)
How would I convert this (on the fly of course) into a JavaScript function which, hard-coded, would look like this:
function(x) {
return Math.pow(x, 2) + 3 * x - 10 * Math.sin(2 * x);
}
Are there any APIs or am I looking at writing something which will interpret the LaTeX symbols and make a function, somehow? Or what?

I have written a (by no means general purpose) solution, heavily based on George's code.
Here it is:
var CALC_CONST = {
// define your constants
e: Math.E,
pi: Math.PI
};
var CALC_NUMARGS = [
[/^(\^|\*|\/|\+|\-)$/, 2],
[/^(floor|ceil|(sin|cos|tan|sec|csc|cot)h?)$/, 1]
];
var Calc = function(expr, infix) {
this.valid = true;
this.expr = expr;
if (!infix) {
// by default treat expr as raw latex
this.expr = this.latexToInfix(expr);
}
var OpPrecedence = function(op) {
if (typeof op == "undefined") return 0;
return op.match(/^(floor|ceil|(sin|cos|tan|sec|csc|cot)h?)$/) ? 10
: (op === "^") ? 9
: (op === "*" || op === "/") ? 8
: (op === "+" || op === "-") ? 7
: 0;
}
var OpAssociativity = function(op) {
return op.match(/^(floor|ceil|(sin|cos|tan|sec|csc|cot)h?)$/) ? "R" : "L";
}
var numArgs = function(op) {
for (var i = 0; i < CALC_NUMARGS.length; i++) {
if (CALC_NUMARGS[i][0].test(op)) return CALC_NUMARGS[i][1];
}
return false;
}
this.rpn_expr = [];
var rpn_expr = this.rpn_expr;
this.expr = this.expr.replace(/\s+/g, "");
// This nice long regex matches any valid token in a user
// supplied expression (e.g. an operator, a constant or
// a variable)
var in_tokens = this.expr.match(/(\^|\*|\/|\+|\-|\(|\)|[a-zA-Z0-9\.]+)/gi);
var op_stack = [];
in_tokens.forEach(function(token) {
if (/^[a-zA-Z]$/.test(token)) {
if (CALC_CONST.hasOwnProperty(token)) {
// Constant. Pushes a value onto the stack.
rpn_expr.push(["num", CALC_CONST[token]]);
}
else {
// Variables (i.e. x as in f(x))
rpn_expr.push(["var", token]);
}
}
else {
var numVal = parseFloat(token);
if (!isNaN(numVal)) {
// Number - push onto the stack
rpn_expr.push(["num", numVal]);
}
else if (token === ")") {
// Pop tokens off the op_stack onto the rpn_expr until we reach the matching (
while (op_stack[op_stack.length - 1] !== "(") {
rpn_expr.push([numArgs(op_stack[op_stack.length - 1]), op_stack.pop()]);
if (op_stack.length === 0) {
this.valid = false;
return;
}
}
// remove the (
op_stack.pop();
}
else if (token === "(") {
op_stack.push(token);
}
else {
// Operator
var tokPrec = OpPrecedence(token),
headPrec = OpPrecedence(op_stack[op_stack.length - 1]);
while ((OpAssociativity(token) === "L" && tokPrec <= headPrec) ||
(OpAssociativity(token) === "R" && tokPrec < headPrec)) {
rpn_expr.push([numArgs(op_stack[op_stack.length - 1]), op_stack.pop()]);
if (op_stack.length === 0) break;
headPrec = OpPrecedence(op_stack[op_stack.length - 1]);
}
op_stack.push(token);
}
}
});
// Push all remaining operators onto the final expression
while (op_stack.length > 0) {
var popped = op_stack.pop();
if (popped === ")") {
this.valid = false;
break;
}
rpn_expr.push([numArgs(popped), popped]);
}
}
/**
* returns the result of evaluating the current expression
*/
Calc.prototype.eval = function(x) {
var stack = [], rpn_expr = this.rpn_expr;
rpn_expr.forEach(function(token) {
if (typeof token[0] == "string") {
switch (token[0]) {
case "var":
// Variable, i.e. x as in f(x); push value onto stack
//if (token[1] != "x") return false;
stack.push(x);
break;
case "num":
// Number; push value onto stack
stack.push(token[1]);
break;
}
}
else {
// Operator
var numArgs = token[0];
var args = [];
do {
args.unshift(stack.pop());
} while (args.length < numArgs);
switch (token[1]) {
/* BASIC ARITHMETIC OPERATORS */
case "*":
stack.push(args[0] * args[1]);
break;
case "/":
stack.push(args[0] / args[1]);
break;
case "+":
stack.push(args[0] + args[1]);
break;
case "-":
stack.push(args[0] - args[1]);
break;
// exponents
case "^":
stack.push(Math.pow(args[0], args[1]));
break;
/* TRIG FUNCTIONS */
case "sin":
stack.push(Math.sin(args[0]));
break;
case "cos":
stack.push(Math.cos(args[0]));
break;
case "tan":
stack.push(Math.tan(args[0]));
break;
case "sec":
stack.push(1 / Math.cos(args[0]));
break;
case "csc":
stack.push(1 / Math.sin(args[0]));
break;
case "cot":
stack.push(1 / Math.tan(args[0]));
break;
case "sinh":
stack.push(.5 * (Math.pow(Math.E, args[0]) - Math.pow(Math.E, -args[0])));
break;
case "cosh":
stack.push(.5 * (Math.pow(Math.E, args[0]) + Math.pow(Math.E, -args[0])));
break;
case "tanh":
stack.push((Math.pow(Math.E, 2*args[0]) - 1) / (Math.pow(Math.E, 2*args[0]) + 1));
break;
case "sech":
stack.push(2 / (Math.pow(Math.E, args[0]) + Math.pow(Math.E, -args[0])));
break;
case "csch":
stack.push(2 / (Math.pow(Math.E, args[0]) - Math.pow(Math.E, -args[0])));
break;
case "coth":
stack.push((Math.pow(Math.E, 2*args[0]) + 1) / (Math.pow(Math.E, 2*args[0]) - 1));
break;
case "floor":
stack.push(Math.floor(args[0]));
break;
case "ceil":
stack.push(Math.ceil(args[0]));
break;
default:
// unknown operator; error out
return false;
}
}
});
return stack.pop();
};
Calc.prototype.latexToInfix = function(latex) {
/**
* function: converts latex notation to infix notation (human-readable, to be converted
* again to prefix in order to be processed
*
* Supported functions / operators / notation:
* parentheses, exponents, adding, subtracting, multipling, dividing, fractions
* trigonometric (including hyperbolic) functions, floor, ceil
*/
var infix = latex;
infix = infix
.replace(/\\frac{([^}]+)}{([^}]+)}/g, "($1)/($2)") // fractions
.replace(/\\left\(/g, "(") // open parenthesis
.replace(/\\right\)/g, ")") // close parenthesis
.replace(/[^\(](floor|ceil|(sin|cos|tan|sec|csc|cot)h?)\(([^\(\)]+)\)[^\)]/g, "($&)") // functions
.replace(/([^(floor|ceil|(sin|cos|tan|sec|csc|cot)h?|\+|\-|\*|\/)])\(/g, "$1*(")
.replace(/\)([\w])/g, ")*$1")
.replace(/([0-9])([A-Za-z])/g, "$1*$2")
;
return infix;
};
Example of usage:
var latex = "e^x+\\frac{2}{3}x-4sin\\left(x\\right)";
var calc = new Calc(latex);
var test = calc.eval(3.5); // 36.85191820278412

Well, you're going to have to decide on exactly which operations you support at some point. After that it shouldn't be hard to implement an evaluator using a parser like the Shunting-yard algorithm to yield a representation of the equation that is more easy to evaluate (that is, an abstract syntax tree).
I have a simple example of this kind of evaluator written in JavaScript at: http://gjp.cc/projects/logic_tables.html It takes logical expressions like !(p ^^ q) & ~(p || q) instead of LaTeX, but it might still be a useful example for you.
The JavaScript (http://gpittarelli.com/projects/logic_tables.js):
var CALCULATOR_CONSTANTS = {
/* True values. */
't': true,
'true': true,
/* False values. */
'c': false,
'false': false
};
// The Calculator constructor takes an expression and parses
// it into an AST (refered to as rpn_expr)
var Calculator = function(expr) {
this.valid = true;
var OpPrecedence = function(op) {
return (op === "!" || op === "~")? 9
: (op === "&" || op === "&&")? 7
: (op === "|" || op === "||" )? 7
: (op === "^" || op === "^^")? 7
: (op === "->")? 5
: (op === "<-")? 5
: 0;
}
var OpAssociativity = function(op) {
return (op === "!" || op === "~")? "R":"L";
}
this.rpn_expr = [];
this.variables = [];
var rpn_expr = this.rpn_expr;
var variables = this.variables;
expr = expr.replace(/\s+/g, "");
// This nice long regex matches any valid token in a user
// supplied expression (e.g. an operator, a constant or
// a variable)
var in_tokens = expr.match(/(\!|\~|\|+|&+|\(|\)|\^+|(->)|(<-)|[a-zA-Z0-9]+)/gi);
var op_stack = [];
in_tokens.forEach(function(token) {
if (/[a-zA-Z0-9]+/.test(token)) {
if (CALCULATOR_CONSTANTS.hasOwnProperty(token)) {
// Constant. Pushes a boolean value onto the stack.
rpn_expr.push(CALCULATOR_CONSTANTS[token]);
} else {
// Variables
rpn_expr.push(token);
variables.push(token);
}
}
else if (token === ")") {
// Pop tokens off the op_stack onto the rpn_expr until we
// reach the matching (
while (op_stack[op_stack.length-1] !== "(") {
rpn_expr.push(op_stack.pop());
if (op_stack.length === 0) {
this.valid = false;
return;
}
}
// Remove the (
op_stack.pop();
}
else if (token === "(") {
op_stack.push(token);
}
else {
// Operator
var tokPrec = OpPrecedence( token ),
headPrec = OpPrecedence( op_stack[op_stack.length-1] );
while ((OpAssociativity(token) === "L" && tokPrec <= headPrec)
|| (OpAssociativity(token) === "R" && tokPrec < headPrec) ) {
rpn_expr.push(op_stack.pop());
if (op_stack.length === 0)
break;
headPrec = OpPrecedence( op_stack[op_stack.length-1] );
}
op_stack.push(token);
}
});
// Push all remaining operators onto the final expression
while (op_stack.length > 0) {
var popped = op_stack.pop();
if (popped === ")") {
this.valid = false;
break;
}
rpn_expr.push(popped);
}
this.optimize();
}
/** Returns the variables used in the currently loaded expression. */
Calculator.prototype.getVariables = function() { return this.variables; }
Calculator.prototype.optimize = function() {
// Single-pass optimization, mainly just to show the concept.
// Looks for statements that can be pre computed, eg:
// p | true
// q & false
// r ^ r
// etc...
// We do this by reading through the RPN expression as if we were
// evaluating it, except instead rebuild it as we go.
var stack = [], rpn_expr = this.rpn_expr;
rpn_expr.forEach(function(token) {
if (typeof token === "boolean") {
// Constant.
stack.push(token);
} else if (/[a-zA-Z0-9]+/.test(token)) {
// Identifier - push onto the stack
stack.push(token);
} else {
// Operator - The actual optimization takes place here.
// TODO: Add optimizations for more operators.
if (token === "^" || token === "^^") {
var a = stack.pop(), b = stack.pop();
if (a === b) { // p ^ p == false
stack.push(false);
} else {
stack.push(b);
stack.push(a);
stack.push(token);
}
} else if (token === "|" || token === "||") {
var a = stack.pop(), b = stack.pop();
if (a === true || b === true) {
// If either of the operands is a tautology, OR is
// also a tautology.
stack.push(true);
} else if (a === b) { // p | p == p
stack.push(a);
} else {
stack.push(b);
stack.push(a);
stack.push(token);
}
} else if (token === "!" || token === "~") {
var p = stack.pop();
if (typeof p === "boolean") {
// NOT of a constant value can always
// be precalculated.
stack.push(!p);
} else {
stack.push(p);
stack.push(token);
}
} else {
stack.push(token);
}
}
});
this.rpn_expr = stack;
}
/**
* returns the result of evaluating the current expressions
* with the passed in <code>variables</code> object. <i>variables</i>
* should be an object who properties map from key => value
*/
Calculator.prototype.eval = function(variables) {
var stack = [], rpn_expr = this.rpn_expr;
rpn_expr.forEach(function(token) {
if (typeof token === "boolean") {
// Constant.
stack.push(token);
} else if (/[a-zA-Z0-9]+/.test(token)) {
// Identifier - push its boolean value onto the stack
stack.push(!!variables[token]);
} else {
// Operator
var q = stack.pop(), p = stack.pop();
if (token === "^" || token === "^^") {
stack.push((p? 1:0) ^ (q? 1:0));
} else if (token === "|" || token === "||") {
stack.push(p || q);
} else if (token === "&" || token === "&&") {
stack.push(p && q);
} else if (token === "!" || token === "~") {
stack.push(p);
stack.push(!q);
} else if (token === "->") {
stack.push((!p) || q);
} else if (token === "<-") {
stack.push((!q) || p);
}
}
});
return stack.pop()? 1:0;
};

Maybe you could try LatexJS. LatexJS is an API service that I put together in order to convert latex math notation into Javascript functions. So you would input latex expressions and get back Javascript functions dynamically. For example:
Input
x^2+3x-10sin\left(2x\right)
Output
{
"func": "(x)=>{return Math.pow(x,2)+3*x-10*Math.sin(2*x)};",
"params": ["x"]
}
Evaluation
> func = (x)=>{return Math.pow(x,2)+3*x-10*Math.sin(2*x)};
> func(2)
< 17.56802495307928

Related

Why is my code returning the else statement and undefined and not the totalBasketballScore?

I really thought I had this code correct. I am trying to calculate basketball score with free throws, 2 pointers and 3 pointers. The output when I console.log the totalBasketballScore ends up being 'All entries must be numbers' and undefined. What do I need to change so I get the score when I put the 3 values in the parameters?
function totalBasketballScore(numberFreeThrows, numberMidRange, numberThreePointers) {
const freeThrows = 1;
const midRange = 2;
const threePointers = 3;
if (typeof numberFreeThrows === 'number' && numberMidRange === 'number' && numberThreePointers === 'number') {
let totalFreeThrows = freeThrows * numberFreeThrows;
let totalMidRange = midRange * numberMidRange;
let totalThreePointers = threePointers * numberThreePointers;
let gameTotal = totalFreeThrows + totalMidRange + totalThreePointers;
return gameTotal;
} else {
console.log('All Entries Must Be a Number');
}
}
console.log(totalBasketballScore(1, 2, 4));
Another approach that can be used is isNaN(). isNaN can be used to check if the value is Not a Number. If isNaN() returns false, the value is a number.
function totalBasketballScore(numberFreeThrows, numberMidRange, numberThreePointers) {
const freeThrows = 1;
const midRange = 2;
const threePointers = 3;
if(isNaN(numberFreeThrows)=== false && isNaN(numberMidRange)=== false && isNaN(numberThreePointers)=== false) {
let totalFreeThrows = freeThrows * numberFreeThrows;
let totalMidRange = midRange * numberMidRange;
let totalThreePointers = threePointers * numberThreePointers;
let gameTotal = totalFreeThrows + totalMidRange + totalThreePointers;
return gameTotal;
} else {
console.log('All Entries Must Be a Number');
}
}
console.log(totalBasketballScore(1, 2, 4));
You should use typeof per each parameter, like this:
if(typeof numberFreeThrows === 'number' && typeof numberMidRange === 'number' && typeof numberThreePointers === 'number'){
} else {
}

How does the modulus operator handle strings in Javascript

I know how modulus works in general, but it is not clear to me how the operator handles strings.
Recently, I had to write a script which checks if a name (string) contains an even number of letters. This actually worked, using modulus 2 and checking if result was 1 or 0:
function isNameEven(firstName) {
if (firstName % 2 === 0) {
return true;
}
else {
return false;
}
}
So I'm assuming the letters in the string were counted?
The result is always NaN
const oneLetter = "a";
const twoLetters = "ab";
const threeLetters = "abc";
console.log(oneLetter % 2);
console.log(twoLetters % 2);
console.log(threeLetters % 2);
Your function doesn't work if you pass it a string that can't be implicitly converted to a number that isn't NaN.
function isNameEven(firstName) {
if (firstName % 2 === 0) {
return true;
} else {
return false;
}
}
const oneLetter = "a";
const twoLetters = "ab";
const threeLetters = "abc";
console.log(isNameEven(oneLetter));
console.log(isNameEven(twoLetters));
console.log(isNameEven(threeLetters));
You could check the length property of the string though.
function isNameEven(firstName) {
if (firstName.length % 2 === 0) {
return true;
} else {
return false;
}
}
const oneLetter = "a";
const twoLetters = "ab";
const threeLetters = "abc";
console.log(isNameEven(oneLetter));
console.log(isNameEven(twoLetters));
console.log(isNameEven(threeLetters));

in Java script Given two strings, find if they are one edit away from each other

can you help me to write a function in javascript to Given two strings, find if they are one edit away from each other example :
(pale, ple ) true
(pales, pale ) true
(pale, bale ) true
(pale, bake) false
(face, facts ) false
Can you try this function to check that string only differs by one edit.
function checkDifferntString(str1, str2) {
let diff = 0;
if (str1 === str2) return true; // equal return true
let lengthDiff = Math.abs(str1.length - str2.length)
if (lengthDiff > 1) return false; // checks length diff if > 2 return false
for (let i=0; (i<str1.length || i < str2.length);i++) {
if (diff > 1) return false; // diff greater than 1 return false
if (str1.charAt(i) !== str2.charAt(i)) diff++
}
if (diff <= 1) return true
else return false;
}
console.log(checkDifferntString("pale", "pale")) // true
console.log(checkDifferntString("pale", "pales")) // true
console.log(checkDifferntString("pales", "pale")) // true
console.log(checkDifferntString("pales", "bale")) // false
I hope it helps. Thanks!
Check this out.
I made a simple function that iterates through the given two strings and check if there's more than 1 difference (in terms of characters) between these strings, an optional argument cs to allow case sensitivity, by default it equals to false, so 'a' and 'A' are the same.
function isEditFrom(str1, str2, cs) {
var cs = cs || false, i = 0, diff = 2, len1 = str1.length, len2 = str2.length, l = (len1 > len2) ? len1: len2;
if(len1 !== 0 && len2 !== 0) {
if(cs === false) {
str1 = str1.toLowerCase();
str2 = str2.toLowerCase();
}
for(; i < l; i++) {
if(str1[i] !== str2[i]) {
if(--diff === 0) {
return false;
}
}
}
return true;
} else {
return false;
}
}
and now we call that function:
isEditFrom('Pale', 'bAle'); // returns True
isEditFrom('Pale', 'bAle', true); // returns False as we set the third argument to true enabling case sensitivity, 'a' != 'A'
isEditFrom('face', 'facts'); // returns False

Damerau-Levenshtein distance Implementation

I'm trying to create a damerau-levenshtein distance function in JS.
I've found a description off the algorithm on WIkipedia, but they is no implementation off it. It says:
To devise a proper algorithm to calculate unrestricted
Damerau–Levenshtein distance note that there always exists an optimal
sequence of edit operations, where once-transposed letters are never
modified afterwards. Thus, we need to consider only two symmetric ways
of modifying a substring more than once: (1) transpose letters and
insert an arbitrary number of characters between them, or (2) delete a
sequence of characters and transpose letters that become adjacent
after deletion. The straightforward implementation of this idea gives
an algorithm of cubic complexity: O\left (M \cdot N \cdot \max(M, N)
\right ), where M and N are string lengths. Using the ideas of
Lowrance and Wagner,[7] this naive algorithm can be improved to be
O\left (M \cdot N \right) in the worst case. It is interesting that
the bitap algorithm can be modified to process transposition. See the
information retrieval section of[1] for an example of such an
adaptation.
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
The section [1] points to http://acl.ldc.upenn.edu/P/P00/P00-1037.pdf which is even more complicated to me.
If I understood this correctly, it's not that easy to create an implementation off it.
Here's the levenshtein implementation I currently use :
levenshtein=function (s1, s2) {
// discuss at: http://phpjs.org/functions/levenshtein/
// original by: Carlos R. L. Rodrigues (http://www.jsfromhell.com)
// bugfixed by: Onno Marsman
// revised by: Andrea Giammarchi (http://webreflection.blogspot.com)
// reimplemented by: Brett Zamir (http://brett-zamir.me)
// reimplemented by: Alexander M Beedie
// example 1: levenshtein('Kevin van Zonneveld', 'Kevin van Sommeveld');
// returns 1: 3
if (s1 == s2) {
return 0;
}
var s1_len = s1.length;
var s2_len = s2.length;
if (s1_len === 0) {
return s2_len;
}
if (s2_len === 0) {
return s1_len;
}
// BEGIN STATIC
var split = false;
try {
split = !('0')[0];
} catch (e) {
// Earlier IE may not support access by string index
split = true;
}
// END STATIC
if (split) {
s1 = s1.split('');
s2 = s2.split('');
}
var v0 = new Array(s1_len + 1);
var v1 = new Array(s1_len + 1);
var s1_idx = 0,
s2_idx = 0,
cost = 0;
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) {
v0[s1_idx] = s1_idx;
}
var char_s1 = '',
char_s2 = '';
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) {
v1[0] = s2_idx;
char_s2 = s2[s2_idx - 1];
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) {
char_s1 = s1[s1_idx];
cost = (char_s1 == char_s2) ? 0 : 1;
var m_min = v0[s1_idx + 1] + 1;
var b = v1[s1_idx] + 1;
var c = v0[s1_idx] + cost;
if (b < m_min) {
m_min = b;
}
if (c < m_min) {
m_min = c;
}
v1[s1_idx + 1] = m_min;
}
var v_tmp = v0;
v0 = v1;
v1 = v_tmp;
}
return v0[s1_len];
}
What are your ideas for building such an algorithm and, if you think it would be too complicated, what could I do to make no difference between 'l' (L lowercase) and 'I' (i uppercase) for example.
The gist #doukremt gave: https://gist.github.com/doukremt/9473228
gives the following in Javascript.
You can change the weights of operations in the weighter object.
var levenshteinWeighted= function(seq1,seq2)
{
var len1=seq1.length;
var len2=seq2.length;
var i, j;
var dist;
var ic, dc, rc;
var last, old, column;
var weighter={
insert:function(c) { return 1.; },
delete:function(c) { return 0.5; },
replace:function(c, d) { return 0.3; }
};
/* don't swap the sequences, or this is gonna be painful */
if (len1 == 0 || len2 == 0) {
dist = 0;
while (len1)
dist += weighter.delete(seq1[--len1]);
while (len2)
dist += weighter.insert(seq2[--len2]);
return dist;
}
column = []; // malloc((len2 + 1) * sizeof(double));
//if (!column) return -1;
column[0] = 0;
for (j = 1; j <= len2; ++j)
column[j] = column[j - 1] + weighter.insert(seq2[j - 1]);
for (i = 1; i <= len1; ++i) {
last = column[0]; /* m[i-1][0] */
column[0] += weighter.delete(seq1[i - 1]); /* m[i][0] */
for (j = 1; j <= len2; ++j) {
old = column[j];
if (seq1[i - 1] == seq2[j - 1]) {
column[j] = last; /* m[i-1][j-1] */
} else {
ic = column[j - 1] + weighter.insert(seq2[j - 1]); /* m[i][j-1] */
dc = column[j] + weighter.delete(seq1[i - 1]); /* m[i-1][j] */
rc = last + weighter.replace(seq1[i - 1], seq2[j - 1]); /* m[i-1][j-1] */
column[j] = ic < dc ? ic : (dc < rc ? dc : rc);
}
last = old;
}
}
dist = column[len2];
return dist;
}
Stolen from here, with formatting and some examples on how to use it:
function DamerauLevenshtein(prices, damerau) {
//'prices' customisation of the edit costs by passing an object with optional 'insert', 'remove', 'substitute', and
//'transpose' keys, corresponding to either a constant number, or a function that returns the cost. The default cost
//for each operation is 1. The price functions take relevant character(s) as arguments, should return numbers, and
//have the following form:
//
//insert: function (inserted) { return NUMBER; }
//
//remove: function (removed) { return NUMBER; }
//
//substitute: function (from, to) { return NUMBER; }
//
//transpose: function (backward, forward) { return NUMBER; }
//
//The damerau flag allows us to turn off transposition and only do plain Levenshtein distance.
if (damerau !== false) {
damerau = true;
}
if (!prices) {
prices = {};
}
let insert, remove, substitute, transpose;
switch (typeof prices.insert) {
case 'function':
insert = prices.insert;
break;
case 'number':
insert = function (c) {
return prices.insert;
};
break;
default:
insert = function (c) {
return 1;
};
break;
}
switch (typeof prices.remove) {
case 'function':
remove = prices.remove;
break;
case 'number':
remove = function (c) {
return prices.remove;
};
break;
default:
remove = function (c) {
return 1;
};
break;
}
switch (typeof prices.substitute) {
case 'function':
substitute = prices.substitute;
break;
case 'number':
substitute = function (from, to) {
return prices.substitute;
};
break;
default:
substitute = function (from, to) {
return 1;
};
break;
}
switch (typeof prices.transpose) {
case 'function':
transpose = prices.transpose;
break;
case 'number':
transpose = function (backward, forward) {
return prices.transpose;
};
break;
default:
transpose = function (backward, forward) {
return 1;
};
break;
}
function distance(down, across) {
//http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
let ds = [];
if (down === across) {
return 0;
} else {
down = down.split('');
down.unshift(null);
across = across.split('');
across.unshift(null);
down.forEach(function (d, i) {
if (!ds[i]) {
ds[i] = [];
}
across.forEach(function (a, j) {
if (i === 0 && j === 0) {
ds[i][j] = 0;
} else if (i === 0) {
//Empty down (i == 0) -> across[1..j] by inserting
ds[i][j] = ds[i][j - 1] + insert(a);
} else if (j === 0) {
//Down -> empty across (j == 0) by deleting
ds[i][j] = ds[i - 1][j] + remove(d);
} else {
//Find the least costly operation that turns the prefix down[1..i] into the prefix across[1..j] using
//already calculated costs for getting to shorter matches.
ds[i][j] = Math.min(
//Cost of editing down[1..i-1] to across[1..j] plus cost of deleting
//down[i] to get to down[1..i-1].
ds[i - 1][j] + remove(d),
//Cost of editing down[1..i] to across[1..j-1] plus cost of inserting across[j] to get to across[1..j].
ds[i][j - 1] + insert(a),
//Cost of editing down[1..i-1] to across[1..j-1] plus cost of substituting down[i] (d) with across[j]
//(a) to get to across[1..j].
ds[i - 1][j - 1] + (d === a ? 0 : substitute(d, a))
);
//Can we match the last two letters of down with across by transposing them? Cost of getting from
//down[i-2] to across[j-2] plus cost of moving down[i-1] forward and down[i] backward to match
//across[j-1..j].
if (damerau && i > 1 && j > 1 && down[i - 1] === a && d === across[j - 1]) {
ds[i][j] = Math.min(ds[i][j], ds[i - 2][j - 2] + (d === a ? 0 : transpose(d, down[i - 1])));
}
}
});
});
return ds[down.length - 1][across.length - 1];
}
}
return distance;
}
//Returns a distance function to call.
let dl = DamerauLevenshtein();
console.log(dl('12345', '23451'));
console.log(dl('this is a test', 'this is not a test'));
console.log(dl('testing testing 123', 'test'));

Constructing Regular Expressions to match numeric ranges

I'm looking for a way to construct regular expressions to match numeric inputs specified by a given integer range, ie. if I pass in a range of 1,3-4 then a regex would be returned matching just 1, 3 and 4.
I wrote the following method to try and do this:
function generateRegex(values) {
if (values == "*") {
return new RegExp("^[0-9]+$");
} else {
return new RegExp("^[" + values + "]+$");
}
}
I'm having issues however as sometimes I need to match double digits, such as "8-16", and I also need to ensure that if I am passed a single digit value, such as "1", that the generated regex matches only 1, and not say 11.
I really would like this to remain a pretty small snippet of code, but am not sure enough about regexs to know how to do this. Would be massively grateful for any help!
EDIT: I realise I wasn't clear, with my original paragraph, so have edited it. I realise the regex's that I originally generated do not work at all
Regexes don't know anything about numbers, only digits. So [8-16] is invalid because you say match between 8 and 1 (instead of 1 and 8 e.g.) plus the digit 6.
If you want to match numbers, you have to consider them lexically. For example, to match numbers between 1 and 30, you have to write something like (other regexes exist):
/^(30|[1-2]\d|[1-9])$/
I was sure it was 4-8 hours :-) In the end (and in its uselessness) it was a good exercise in composing Regexes. You are free to try it. If we exclude one use of continue and the use of the Array constructor, it's fully jsLint ok.
var BuildRegex = function(matches) {
"use strict";
var splits = matches.split(','),
res = '^(',
i, subSplit, min, max, temp, tempMin;
if (splits.length === 0) {
return new RegExp('^()$');
}
for (i = 0; i < splits.length; i += 1) {
if (splits[i] === '*') {
return new RegExp('^([0-9]+)$');
}
subSplit = splits[i].split('-');
min = BuildRegex.Trim(subSplit[0], '0');
if (min === '') {
return null;
}
if (subSplit.length === 1) {
res += min;
res += '|';
continue;
} else if (subSplit.length > 2) {
return null;
}
max = BuildRegex.Trim(subSplit[1], '0');
if (max === '') {
return null;
}
if (min.length > max.length) {
return null;
}
// For 2-998 we first produce 2-9, then 10-99
temp = BuildRegex.DifferentLength(res, min, max);
tempMin = temp.min;
if (tempMin === null) {
return null;
}
res = temp.res;
// Then here 100-998
res = BuildRegex.SameLength(res, tempMin, max);
}
res = res.substr(0, res.length - 1);
res += ')$';
return new RegExp(res);
};
BuildRegex.Repeat = function(ch, n) {
"use strict";
return new Array(n + 1).join(ch);
};
BuildRegex.Trim = function(str, ch) {
"use strict";
var i = 0;
while (i < str.length && str[i] === ch) {
i += 1;
}
return str.substr(i);
};
BuildRegex.IsOnlyDigit = function(str, start, digit) {
"use strict";
var i;
for (i = start; i < str.length; i += 1) {
if (str[i] !== digit) {
return false;
}
}
return true;
};
BuildRegex.RangeDigit = function(min, max) {
"use strict";
if (min === max) {
return min;
}
return '[' + min + '-' + max + ']';
};
BuildRegex.DifferentLength = function(res, min, max) {
"use strict";
var tempMin = min,
i, tempMax;
for (i = min.length; i < max.length; i += 1) {
tempMax = BuildRegex.Repeat('9', i);
res = BuildRegex.SameLength(res, tempMin, tempMax);
tempMin = '1' + BuildRegex.Repeat('0', i);
}
if (tempMin > tempMax) {
return null;
}
return {
min: tempMin,
res: res
};
};
BuildRegex.SameLength = function(res, min, max) {
"use strict";
var commonPart;
// 100-100
if (min === max) {
res += min;
res += '|';
return res;
}
for (commonPart = 0; commonPart < min.length; commonPart += 1) {
if (min[commonPart] !== max[commonPart]) {
break;
}
}
res = BuildRegex.RecursivelyAddRange(res, min.substr(0, commonPart), min.substr(commonPart), max.substr(commonPart));
return res;
};
BuildRegex.RecursivelyAddRange = function(res, prefix, min, max) {
"use strict";
var only0Min, only9Max, i, middleMin, middleMax;
if (min.length === 1) {
res += prefix;
res += BuildRegex.RangeDigit(min[0], max[0]);
res += '|';
return res;
}
// Check if
only0Min = BuildRegex.IsOnlyDigit(min, 1, '0');
only9Max = BuildRegex.IsOnlyDigit(max, 1, '9');
if (only0Min && only9Max) {
res += prefix;
res += BuildRegex.RangeDigit(min[0], max[0]);
for (i = 1; i < min.length; i += 1) {
res += '[0-9]';
}
res += '|';
return res;
}
middleMin = min;
if (!only0Min) {
res = BuildRegex.RecursivelyAddRange(res, prefix + min[0], min.substr(1), BuildRegex.Repeat('9', min.length - 1));
if (min[0] !== '9') {
middleMin = String.fromCharCode(min.charCodeAt(0) + 1) + BuildRegex.Repeat('0', min.length - 1);
} else {
middleMin = null;
}
}
middleMax = max;
if (!only9Max) {
if (max[0] !== '0') {
middleMax = String.fromCharCode(max.charCodeAt(0) - 1) + BuildRegex.Repeat('9', max.length - 1);
} else {
middleMax = null;
}
}
if (middleMin !== null && middleMax !== null && middleMin[0] <= middleMax[0]) {
res = BuildRegex.RecursivelyAddRange(res, prefix + BuildRegex.RangeDigit(middleMin[0], middleMax[0]), middleMin.substr(1), middleMax.substr(1));
}
if (!only9Max) {
res = BuildRegex.RecursivelyAddRange(res, prefix + max[0], BuildRegex.Repeat('0', max.length - 1), max.substr(1));
}
return res;
};
// ----------------------------------------------------------
var printRegex = function(p) {
"use strict";
document.write(p + ': ' + BuildRegex(p) + '<br>');
};
printRegex('*');
printRegex('1');
printRegex('1,*');
printRegex('1,2,3,4');
printRegex('1,11-88');
printRegex('1,11-88,90-101');
printRegex('1-11111');
printRegex('75-11119');
Test here http://jsfiddle.net/dnqYV/
The C# version is here http://ideone.com/3aEt3E
I'm not sure there is a (sane) way to test integer ranges with RegExp. I believe you're fixated on RegExp, where there are much simpler (more flexible) approaches. Take a look at IntRangeTest().
var range = new IntRangeTest('0,10-20');
console.log(
"0,10-20",
range.test("") == false,
range.test("-5") == false,
range.test("0") == true,
range.test("5") == false,
range.test("11") == true,
range.test("123.23") == false
);
If you feel like it, you can easily add this to Number.prototype. You could also quite easily make this an extension to RegExp, if that's what you're worried about.
Ok so it seems that there are 4 main cases that I need to address:
Single digits, ie 1, would simply generate the regex /^1$/
Multiple digits, ie 12, would require the regex /^12&/
Single digit ranges, ie 3-6, would generate the regex /^[3-6]$/
And finally, multiple digit ranges work in a similar method to multiple digits but with a range, ie 11-14 would become /^1[1-4]$/. These would need to be split into multiple regexes if they span over multiple start digits, Ie 23-31 would become /^2[3-9]|3[0-1]$/
Therefore, all I need to do is identify each of these cases and create a compound regex using | like xanatos suggested. Ie, to match all of the above criteria would generate a regex like:
/^( 1 | 12 | [3-6] | 1[1-4] | 2[3-9]|3[0-1] )$/
Do other agree this seems like a decent way to progress?

Categories