Programming language interpreter in a non-english language; - javascript

I was trying to implement this using a different language. but it doesn't seem to handle the character that has been inserted, This actually works with english I tweaked it in amharic, and I was thinking to add more things if the handling works like creating an editor or something. If anyone is willing to help, I would appreciate it. I know that I should normalize the unicode characters, but I don't know efficient way I could do that.
function InputStream(input) {
var pos = 0, line = 1, col = 0;
return {
next : next,
peek : peek,
eof : eof,
croak : croak,
};
function next() {
var ch = input.charAt(pos++);
if (ch == "\n") line++, col = 0; else col++;
return ch;
}
function peek() {
return input.charAt(pos);
}
function eof() {
return peek() == "";
}
function croak(msg) {
throw new Error(msg + " (" + line + ":" + col + ")");
}
}
function TokenStream(input) {
var current = null;
var keywords = " ከ ወደ ሌላ ስራ ስራ አውነት ሀሰት ";
return {
next : next,
peek : peek,
eof : eof,
croak : input.croak
};
function is_keyword(x) {
return keywords.indexOf(" " + x + " ") >= 0;
}
function is_digit(ch) {
return /[0-9]/i.test(ch);
}
function is_id_start(ch) {
return /[^\u1380-\u1380f]+[a-z_]/i.test(ch);
}
function is_id(ch) {
return is_id_start(ch) || "?!-<>=0123456789".indexOf(ch) >= 0;
}
function is_op_char(ch) {
return "+-*/%=&|<>!".indexOf(ch) >= 0;
}
function is_punc(ch) {
return "፣፤(){}[]".indexOf(ch) >= 0;
}
function is_whitespace(ch) {
return " \t\n".indexOf(ch) >= 0;
}
function read_while(predicate) {
var str = "";
while (!input.eof() && predicate(input.peek()))
str += input.next();
return str;
}
function read_number() {
var has_dot = false;
var number = read_while(function(ch){
if (ch == ".") {
if (has_dot) return false;
has_dot = true;
return true;
}
return is_digit(ch);
});
return { type: "num", value: parseFloat(number) };
}
function read_ident() {
var id = read_while(is_id);
return {
type : is_keyword(id) ? "kw" : "var",
value : id
};
}
function read_escaped(end) {
var escaped = false, str = "";
input.next();
while (!input.eof()) {
var ch = input.next();
if (escaped) {
str += ch;
escaped = false;
} else if (ch == "\\") {
escaped = true;
} else if (ch == end) {
break;
} else {
str += ch;
}
}
return str;
}
function read_string() {
return { type: "str", value: read_escaped('"') };
}
function skip_comment() {
read_while(function(ch){ return ch != "\n" });
input.next();
}
function read_next() {
read_while(is_whitespace);
if (input.eof()) return null;
var ch = input.peek();
if (ch == "#") {
skip_comment();
return read_next();
}
if (ch == '"') return read_string();
if (is_digit(ch)) return read_number();
if (is_id_start(ch)) return read_ident();
if (is_punc(ch)) return {
type : "punc",
value : input.next()
};
if (is_op_char(ch)) return {
type : "op",
value : read_while(is_op_char)
};
input.croak("Can't handle character: " + ch);
}
function peek() {
return current || (current = read_next());
}
function next() {
var tok = current;
current = null;
return tok || read_next();
}
function eof() {
return peek() == null;
}
}
function parse(input) {
var PRECEDENCE = {
"=": 1,
"||": 2,
"&&": 3,
"<": 7, ">": 7, "<=": 7, ">=": 7, "==": 7, "!=": 7,
"+": 10, "-": 10,
"*": 20, "/": 20, "%": 20,
};
var FALSE = { type: "bool", value: false };
return parse_toplevel();
function is_punc(ch) {
var tok = input.peek();
return tok && tok.type == "punc" && (!ch || tok.value == ch) && tok;
}
function is_kw(kw) {
var tok = input.peek();
return tok && tok.type == "kw" && (!kw || tok.value == kw) && tok;
}
function is_op(op) {
var tok = input.peek();
return tok && tok.type == "op" && (!op || tok.value == op) && tok;
}
function skip_punc(ch) {
if (is_punc(ch)) input.next();
else input.croak("Expecting punctuation: \"" + ch + "\"");
}
function skip_kw(kw) {
if (is_kw(kw)) input.next();
else input.croak("Expecting keyword: \"" + kw + "\"");
}
function skip_op(op) {
if (is_op(op)) input.next();
else input.croak("Expecting operator: \"" + op + "\"");
}
function unexpected() {
input.croak("Unexpected token: " + JSON.stringify(input.peek()));
}
function maybe_binary(left, my_prec) {
var tok = is_op();
if (tok) {
var his_prec = PRECEDENCE[tok.value];
if (his_prec > my_prec) {
input.next();
return maybe_binary({
type : tok.value == "=" ? "assign" : "binary",
operator : tok.value,
left : left,
right : maybe_binary(parse_atom(), his_prec)
}, my_prec);
}
}
return left;
}
function delimited(start, stop, separator, parser) {
var a = [], first = true;
skip_punc(start);
while (!input.eof()) {
if (is_punc(stop)) break;
if (first) first = false; else skip_punc(separator);
if (is_punc(stop)) break;
a.push(parser());
}
skip_punc(stop);
return a;
}
function parse_call(func) {
return {
type: "call",
func: func,
args: delimited("(", ")", "፣", parse_expression),
};
}
function parse_varname() {
var name = input.next();
if (name.type != "var") input.croak("Expecting variable name");
return name.value;
}
function parse_if() {
skip_kw("ከ");
var cond = parse_expression();
if (!is_punc("{")) skip_kw("ወደ");
var then = parse_expression();
var ret = {
type: "if",
cond: cond,
then: then,
};
if (is_kw("ሌላ")) {
input.next();
ret.else = parse_expression();
}
return ret;
}
function parse_lambda() {
return {
type: "lambda",
vars: delimited("(", ")", "፣", parse_varname),
body: parse_expression()
};
}
function parse_bool() {
return {
type : "bool",
value : input.next().value == "አውነት"
};
}
function maybe_call(expr) {
expr = expr();
return is_punc("(") ? parse_call(expr) : expr;
}
function parse_atom() {
return maybe_call(function(){
if (is_punc("(")) {
input.next();
var exp = parse_expression();
skip_punc(")");
return exp;
}
if (is_punc("{")) return parse_prog();
if (is_kw("if")) return parse_if();
if (is_kw("አውነት") || is_kw("ሀሰት")) return parse_bool();
if (is_kw("ስራ") || is_kw("ሥራ")) {
input.next();
return parse_lambda();
}
var tok = input.next();
if (tok.type == "var" || tok.type == "num" || tok.type == "str")
return tok;
unexpected();
});
}
function parse_toplevel() {
var prog = [];
while (!input.eof()) {
prog.push(parse_expression());
if (!input.eof()) skip_punc("፤");
}
return { type: "prog", prog: prog };
}
function parse_prog() {
var prog = delimited("{", "}", "፤", parse_expression);
if (prog.length == 0) return FALSE;
if (prog.length == 1) return prog[0];
return { type: "prog", prog: prog };
}
function parse_expression() {
return maybe_call(function(){
return maybe_binary(parse_atom(), 0);
});
}
}
function Environment(parent) {
this.vars = Object.create(parent ? parent.vars : null);
this.parent = parent;
}
Environment.prototype = {
extend: function() {
return new Environment(this);
},
lookup: function(name) {
var scope = this;
while (scope) {
if (Object.prototype.hasOwnProperty.call(scope.vars, name))
return scope;
scope = scope.parent;
}
},
get: function(name) {
if (name in this.vars)
return this.vars[name];
throw new Error("Undefined variable " + name);
},
set: function(name, value) {
var scope = this.lookup(name);
if (!scope && this.parent)
throw new Error("Undefined variable " + name);
return (scope || this).vars[name] = value;
},
def: function(name, value) {
return this.vars[name] = value;
}
};
function evaluate(exp, env) {
switch (exp.type) {
case "num":
case "str":
case "bool":
return exp.value;
case "var":
return env.get(exp.value);
case "assign":
if (exp.left.type != "ተጠቀመም")
throw new Error("Cannot assign to " + JSON.stringify(exp.left));
return env.set(exp.left.value, evaluate(exp.right, env));
case "binary":
return apply_op(exp.operator,
evaluate(exp.left, env),
evaluate(exp.right, env));
case "lambda":
return make_lambda(env, exp);
case "if":
var cond = evaluate(exp.cond, env);
if (cond !== false) return evaluate(exp.then, env);
return exp.else ? evaluate(exp.else, env) : false;
case "prog":
var val = false;
exp.prog.forEach(function(exp){ val = evaluate(exp, env) });
return val;
case "call":
var func = evaluate(exp.func, env);
return func.apply(null, exp.args.map(function(arg){
return evaluate(arg, env);
}));
default:
throw new Error("I don't know how to evaluate " + exp.type);
}
}
function apply_op(op, a, b) {
function num(x) {
if (typeof x != "number")
throw new Error("Expected number but got " + x);
return x;
}
function div(x) {
if (num(x) == 0)
throw new Error("Divide by zero");
return x;
}
switch (op) {
case "+": return num(a) + num(b);
case "-": return num(a) - num(b);
case "*": return num(a) * num(b);
case "/": return num(a) / div(b);
case "%": return num(a) % div(b);
case "&&": return a !== false && b;
case "||": return a !== false ? a : b;
case "<": return num(a) < num(b);
case ">": return num(a) > num(b);
case "<=": return num(a) <= num(b);
case ">=": return num(a) >= num(b);
case "==": return a === b;
case "!=": return a !== b;
}
throw new Error("Can't apply operator " + op);
}
function make_lambda(env, exp) {
function lambda() {
var names = exp.vars;
var scope = env.extend();
for (var i = 0; i < names.length; ++i)
scope.def(names[i], i < arguments.length ? arguments[i] : false);
return evaluate(exp.body, scope);
}
return lambda;
}
/* -----[ entry point for NodeJS ]----- */
var globalEnv = new Environment();
globalEnv.def("time", function(func){
try {
console.time("time");
return func();
} finally {
console.timeEnd("time");
}
});
if (typeof process != "undefined") (function(){
var util = require("util");
globalEnv.def("ፃፍ", function(val){
console.log(val);
});
globalEnv.def("print", function(val){
util.print(val);
});
var code = "";
process.stdin.setEncoding("utf8");
process.stdin.on("readable", function(){
var chunk = process.stdin.read();
if (chunk) code += chunk;
});
process.stdin.on("end", function(){
var ast = parse(TokenStream(InputStream(code)));
evaluate(ast, globalEnv);
});
})();
Here's the english version this should be how print works "println("hello world!");" and i would like to use "ፃፍ("ሰላም ሰላም")፤" where 'print' and ';' are converted to 'ፃፍ' and "፤

That's a lot of code, and no explanation of what it fails to do, other than "it doesn't seem to handle the character that has been inserted" which could mean just about anything. If I had an Amharic keyboard layout and knew how to type with it, I might give the code a try to see what's wrong, but I don't have one and I wouldn't know how to use it, so I didn't try.
However, I think your is_id and is_id_start function is not likely to work. You have:
function is_id_start(ch) {
return /[^\u1380-\u1380f]+[a-z_]/i.test(ch);
}
function is_id(ch) {
return is_id_start(ch) || "?!-<>=0123456789".indexOf(ch) >= 0;
}
I was a little puzzled to see !-<>= as possible identifier characters, since they are also operator characters. Presumably, the intention is that if an identifier is followed by one of those operators, there must be whitespace in between. But I don't think that's the problem you are experiencing with Amharic. That seems more likely to have to do with this slightly odd regular expression: /[^\u1380-\u1380f]+[a-z_]/i.
To start with, \u1380f is not a single unicode escape character. \u must be followed by exactly four hex digits, so the f is not part of the escape. It's just an ordinary f. That makes the range \u1380-\u1380, which consists of the single character ᎀ.
It's possible that you meant \u1380-\u138f, which would be 16 of the 453 Ethiopic letters. My knowledge of Amharic is pretty limited, and certainly not adequate to understand what differentiates those particular characters, so I can't even begin to guess whether that's reasonable.
However, you use that range in an inverted regular expression; even making that correction, what [^\u1380-\u138f] matches is any character other than a character in that range. That would include characters from all over the Unicode galaxy, including lots of other scripts, so I'm pretty certain that it is not what you intended.
Furthermore, your ID start pattern is actually /[^\u1380-\u1380f]+[a-z_]/i, which means "one or more [...] followed by a Latin alphabetic character or underscore". In other words, the pattern requires at least two characters to match, first an Amharic character (or, as written, anything other than one of those Amharic characters) and then a Latin alphabetic character.
That's clearly wrong, because you are matching that pattern against the result of input.peek(), which can only be a single character. So is_id is guaranteed to return false, which is probably related to your problem.
Personally, I think you'd be a lot better off using Unicode Property classes than trying to write down a list of valid identifier characters. (Why restrict identifiers to just English and Amharic, for example?) Ecmascript conveniently comes with the standard character sets recommended by the Unicode technical experts to be used for identifiers. You could just use them:
function is_id_start(ch) {
return /\p{XID_Start}/u.test(ch);
}
function is_id(ch) {
return /[\p{XID_Continue}?!-<>=]/u.test(ch);
}
Note that you must use the u flag to enable the use of Unicode properties.
If you really wanted to restrict that to Amharic and Latin, you could do so by also requiring that the characters also match [\p{Script=Ethiopic}\p{Script=Latin}]. (You can use a lookahead assertion to do that.)

Related

How to make a fully functional brainf*ck interpreter?

I have tried to implement a BF interpreter in Javascript. It works for many programs like printing Hello world, looping, etc.
Here is link to a sample interpreter that I use for comparing outputs: https://sange.fi/esoteric/brainfuck/impl/interp/i.html
But when I try to run a BF to C program, it gets stuck like it is in an infinite loop. It however does work in the sample interpreter above. What am I doing wrong?
Here is a BF code that converts an input BF code to C.
+++[>+++++<-]>>+<[>>++++>++>+++++>+++++>+>>+<++[++<]>---]
>++++.>>>.+++++.>------.<--.+++++++++.>+.+.<<<<---.[>]<<.<<<.-------.>++++.
<+++++.+.>-----.>+.<++++.>>++.>-----.
<<<-----.+++++.-------.<--.<<<.>>>.<<+.>------.-..--.+++.-----<++.<--[>+<-]
>>>>>--.--.<++++.>>-.<<<.>>>--.>.
<<<<-----.>----.++++++++.----<+.+++++++++>>--.+.++<<<<.[>]<.>>
,[>>+++[<+++++++>-]<[<[-[-<]]>>[>]<-]<[<+++++>-[<+++>-[<-->-[<+++>-
[<++++[>[->>]<[>>]<<-]>[<+++>-[<--->-[<++++>-[<+++[>[-[-[-[->>]]]]<[>>]<<-]
>[<+>-[<->-[<++>-[<[-]>-]]]]]]]]]]]]]
<[
-[-[>+<-]>]
<[<<<<.>+++.+.+++.-------.>---.++.<.>-.++<<<<.[>]>>>>>>>>>]
<[[<]>++.--[>]>>>>>>>>]
<[<<++..-->>>>>>]
<[<<..>>>>>]
<[<<..-.+>>>>]
<[<<++..---.+>>>]
<[<<<.>>.>>>>>]
<[<<<<-----.+++++>.----.+++.+>---.<<<-.[>]>]
<[<<<<.-----.>++++.<++.+++>----.>---.<<<.-[>]]
<[<<<<<----.>>.<<.+++++.>>>+.++>.>>]
<.>
]>
,]
<<<<<.<+.>++++.<----.>>---.<<<-.>>>+.>.>.[<]>++.[>]<.
Here is my implementation:
class Node {
constructor() {
this.value = 0;
this.next = null;
this.prev = null;
}
increment() {
this.value++;
}
decrement() {
this.value--;
}
}
class Memory {
constructor() {
this.current = new Node();
this.outputBuffer = [];
}
moveRight() {
if (this.current.next === null) {
const rightNode = new Node();
rightNode.prev = this.current
this.current.next = rightNode;
}
this.current = this.current.next;
}
moveLeft() {
if (this.current.prev === null) {
const leftNode = new Node()
leftNode.next = this.current;
this.current.prev = leftNode;
}
this.current = this.current.prev;
}
increment() {
this.current.increment();
}
decrement() {
this.current.decrement();
}
print() {
this.outputBuffer.push(String.fromCharCode(this.current.value));
}
input(ch) {
this.current.value = ch.charCodeAt(0);
}
}
class Interpreter {
reset() {
this.memory = new Memory();
this.instructionPointer = 0;
this.inputPointer = 0;
this.openingToClosingBrackets = new Map();
this.closingToOpeningBrackets = new Map();
}
interpret(code, input = "") {
this.reset();
this.code = code;
this.matchSquareBrackets();
this.input = input;
while (!this.reachedEOF()) {
const instruction = this.code[this.instructionPointer];
switch (instruction) {
case "+": this.memory.increment(); break;
case "-": this.memory.decrement(); break;
case ">": this.memory.moveRight(); break;
case "<": this.memory.moveLeft(); break;
case ".": this.memory.print(); break;
case ",": this.memory.input(this.getNextCharacter()); break;
case "[": this.loopStart(); break;
case "]": this.loopEnd(); break;
}
this.instructionPointer++;
}
return this.memory.outputBuffer.join("");
}
reachedEOF() {
return this.instructionPointer >= this.code.length;
}
getNextCharacter() {
if (this.inputPointer >= this.input.length) {
throw new Error("EOF. Expected more input characters.");
}
return this.input[this.inputPointer];
}
loopStart() {
if (this.memory.current.value !== 0) {
return;
}
this.instructionPointer = this.openingToClosingBrackets.get(
this.instructionPointer
);
}
loopEnd() {
if (this.memory.current.value === 0) {
return;
}
this.instructionPointer = this.closingToOpeningBrackets.get(
this.instructionPointer
);
}
matchSquareBrackets() {
const openingStack = [];
for (let i = 0; i < this.code.length; i++) {
const ch = this.code[i];
if (ch === "[") {
openingStack.push(i);
}
if (ch === "]") {
if (openingStack.length === 0) {
throw new Error("No matching '[' for ']' at index: " + i);
}
const openingMatch = openingStack.pop();
this.openingToClosingBrackets.set(openingMatch, i);
this.closingToOpeningBrackets.set(i, openingMatch);
}
}
if (openingStack.length > 0) {
throw new Error(
"No matching ']' for '[' at indices: " + openingStack.join(", ")
);
}
}
}
Your getNextCharacter doesn't work correctly: if there's at least one character of input, it will return that character each time it's called - it never increments the input index. Since the bf2c program keeps reading input until there is no more input, this causes your infinite loop.
Another problem with your code is that you throw an exception when , is used and there is no more input, causing the bf2c to abort with an exception when it reaches the end of the input. So you'll either need to explicitly terminate the input with a \0, so that the bf2c program knows when to stop reading or change getNextCharacter to return '\0' at the end of input instead of throwing an exception.

Fastest and robust way to replace various occurencies of a string in dom objects with JavaScript/jQuery

I have written a function which replace all occurence of "#TransResource....." e.g. "#TransResource.Contact.Send" with the replacement from a json array.
example for occurence could be:
<button class="btn btn-primary btn-block js-send">#TransResource.Contact.Send</button>
<input type="text" class="form-control" id="LastName" name="LastName" placeholder="#TransResource.Contact.LastName">
#TransResource.Contact.LastName"
All went fine, except IE/edge lost some translations and I am unable to figure out why.
Can one solve this and/or has a better or robust approach?
You can see, the fiddle works perfect in Chrome, but the button text translation is missing in edge.
here is a fiddle
my JavaScript code.
var
getLocalResource = function (key) {
try {
var retVal = key;
retVal = StringResource[retVal] === null || StringResource[retVal] === undefined ? retVal : StringResource[retVal];
return retVal;
}
catch (err) {
console.log(arguments.callee.name + ": " + err);
}
},
translate = function (node) {
try {
var pattern = /#TransResource.[a-zA-Z0-9.]+/g;
if (!node) node = $("body *");
node.contents().each(function () {
if (this.nodeType === 3) {
this.nodeValue = this.nodeValue.replace(pattern, function (match, entity) {
return getLocalResource(match.slice(15));
});
}
if (this.attributes) {
for (var i = 0, atts = this.attributes, n = atts.length, arr = []; i < n; i++) {
if (atts[i].nodeValue !== "") { // Ignore this node it is an empty text node
atts[i].nodeValue = atts[i].nodeValue.trim().replace(pattern, function (match, entity) {
return getLocalResource(match.slice(15));
});
}
}
}
});
}
catch (err) {
console.log(arguments.callee.name + ": " + err);
}
};
and the json:
var StringResource = {
"Contact.EmailAddress": "Email address",
"Contact.Headline": "Contact",
"Contact.Description": "please leaf us a message...?",
"Contact.Teasertext": "Please leaf us a message <b>bold text</b>",
"Contact.Telephone": "Telephone",
"Contact.Send": "Send",
"Page.Contact": "Contact"
};
edit (this is now my solution):
thanks to #Chris-G, his comment removes the IE issue and also thanks to #trincot for the perfomance update. All together is now this script:
var
getLocalResource = function (key) {
try {
var retVal = key;
retVal = StringResource[retVal] === null || StringResource[retVal] === undefined ? retVal : StringResource[retVal];
return retVal;
}
catch (err) {
console.log(arguments.callee.name + ": " + err);
}
},
translate = function (node) {
try {
var pattern = /#TransResource.[a-zA-Z0-9.]+/g;
if (!node) node = $("body *");
node.contents().each(function () {
if (this.nodeType === 3 && this.nodeValue.trim().length) {
var s = this.nodeValue.replace(pattern, function (match, entity) {
return getLocalResource(match.slice(15));
});
if (this.nodeValue !== s) this.nodeValue = s;
}
if (this.attributes) {
for (var i = 0, atts = this.attributes, n = atts.length, arr = []; i < n; i++) {
if (atts[i].nodeValue !== "") { // Ignore this node it is an empty text node
atts[i].nodeValue = atts[i].nodeValue.trim().replace(pattern, function (match, entity) {
return getLocalResource(match.slice(15));
});
}
}
}
});
}
catch (err) {
console.log(arguments.callee.name + ": " + err);
}
};
The problem in IE is that for the textarea node, when it has no content, the assignment to node.nodeValue will trigger an exception "Invalid argument". Don't ask me why.
If you even add as little as a space in the HTML between the opening and closing textarea tag, the error disappears, but if you do that the placeholder attribute becomes useless.
But you could also work around the problem in the code, by only assigning to node.nodeValue when the assigned value is different from its current value:
if (this.nodeType === 3) {
var s = this.nodeValue.replace(pattern, function (match, entity) {
return getLocalResource(match.slice(15));
});
if (this.nodeValue !== s) this.nodeValue = s;
}

Correcting incorrectly encoded string (ASCII characters back to UTF-8)

Here is a sample WiFi ssid I have extracted from an Android "wifi config file" (wpa_supplicant.conf).
I'm trying to display all the ssid's in the file, most are okay as they are normal strings wrapped in quotes, for example,
network={
ssid="Linksys"
...
}
However, some entries just wanted to be different and special, for example,
network={
ssid=e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab
...
}
Now, the question is, how do I convert it back to a readable string (preferably in JS)? I suspect the encoding was wrong (it displays correctly on a native device though.)
Apparently the string is in hex unencoded. By turning it back to binary following by some string manipulation, I am able to encode it back to the readable form.
function HextoUTF8(txt) {
function HexStringToBytes(str) {
if (str.length % 2) throw TypeError("Not a valid length");
return [].map.call(str, function(e) {
return ("000" + parseInt(e, 16).toString(2)).slice(-4);
}).join("").match(/.{8}/g);
}
function BytesToUTF8(bytes) {
var inExpectationMode = false,
itr = new Iterator(bytes),
byte,
availableBitsTable = {
"1": -7,
"2": -5,
"3": -4,
"4": -3
},
expectingBitsLeft = 0,
currectCharacter = "",
result = "";
while (byte = itr.next(), !byte.ended) {
byte = byte.value;
if (inExpectationMode) {
currectCharacter += byte.slice(-6);
} else {
//First in sequence
expectingBitsLeft = determineSequenceLength(byte);
currectCharacter += byte.slice(availableBitsTable[expectingBitsLeft]);
}
inExpectationMode = true;
expectingBitsLeft--;
if (!expectingBitsLeft) {
inExpectationMode = false;
result += String.fromCharCode(parseInt(currectCharacter, 2));
currectCharacter = "";
}
}
return result;
}
function determineSequenceLength(byte) {
if (byte[0] === "0") return 1;
else if (byte.slice(0, 3) === "110") return 2;
else if (byte.slice(0, 4) === "1110") return 3;
else if (byte.slice(0, 5) === "11110") return 4;
}
function Iterator(array) {
if (this === window) throw TypeError("This is a class");
if (!Array.isArray(array)) throw TypeError("An array is required");
this.i = -1;
this.ended = !array.length;
this.array = function() {
return array;
};
}
Iterator.prototype.next = function() {
if (this.ended || ++this.i == this.array().length) {
this.ended = true;
return {
ended: true
};
} else {
return {
ended: this.ended,
value: this.array()[this.i]
};
}
}
return BytesToUTF8(HexStringToBytes(txt));
}
Optimally I should be doing bit manipulation instead, but whatever, it works,
> HextoUTF8("e299aa20e6b7a1e5ae9ae69c89e98ca2e589a920e299ab");
> "♪ 淡定有錢剩 ♫"

Evaluate string expression using Javascript or jQuery

I have to evaluate expressions like below, which are available in string variable, without using eval or external libraries or third party packages:
"abs(add(multiply(-1,multiply(-1,subtract(89,19880))),subtract(add(12,add(247,45986)),98)))"
Can anyone suggest how it can be done using Javascript or jQuery?
add(expr1,expr2) - takes two operands and returns their sum.
subtract(expr1,expr2) - takes two operands and returns their difference (expr1 - expr2).
multiply(expr1,expr2) - takes two operands and returns their product.
abs(expr1) - takes one operand and returns its absolute value.
Implement the traditional way of expression evaluation using stacks - operand stack, operator stack. Push-pop from the stacks. That's the best way I can think of.
I did not really bother making the code efficient or bullet proof (there are probably some bugs in here), but here's an example on how you could do it. You could convert your infix expression to a postfix representation as a tokens array and then perform the evaluation of that postfix representation.
NOTE: I did not allow for expressions with spaces to be correctly tokenized, but you could change this if that's not what you need.
var logEl = document.getElementById('log'),
expInput = document.querySelector('input');
document.querySelector('button').addEventListener('click', function () {
updateEvaluationResult(expInput.value);
});
updateEvaluationResult(expInput.value);
function updateEvaluationResult(exp) {
var logMsg;
try { logMsg = evaluateExpression(exp); }
catch (e) {
logMsg = e.message + ('charIndex' in e? ' (char index ' + e.charIndex + ')' : '')
}
logEl.textContent = logMsg;
}
function evaluateExpression(exp) {
var functions = {
abs: Math.abs,
multiply: function(num1, num2) {
return num1 * num2;
},
substract: function(num1, num2) {
return num1 - num2;
},
add: function(num1, num2) {
return num1 + num2;
}
};
return (evaluateExpression = function(exp) {
var fn;
return postfixTokenizationOf(exp).reduce(function(resultStack, token) {
if (typeof token == 'number') resultStack.push(token);
else {
fn = functions[token];
if (!fn) throw new Error("'" + token + "' is an invalid function");
resultStack.push(functions[token].apply(
null,
resultStack.splice(resultStack.length - fn.length)
));
}
return resultStack;
}, []).pop();
})(exp);
}
function postfixTokenizationOf(exp) {
if (!exp) return [];
var stack = [],
output = [],
indexOf = [].indexOf,
tokenRx = /[^-(),\d]+|[(),]|[-\d]+/g,
functionTokenRx = /[^-(),\d]+/,
charTokenHandlers = {
'(': stack.push.bind(stack, '('),
')': handleRightParenthesis,
',': handleArgSeperator
},
charTokenHandler, token, match;
while (match = tokenRx.exec(exp)) {
token = match[0];
if ((charTokenHandler = charTokenHandlers[token])) {
charTokenHandler();
continue;
}
if (isNumeric(token)) {
output.push(+token);
continue;
}
handleFunction();
}
if (popStackUntilOneOfTokenFound('()')) throwMismatchedParenthesisError();
return output;
function handleFunction() {
var nextCharIndex = tokenRx.lastIndex;
if (exp[nextCharIndex] != '(') throwError('expected a function call');
else stack.push(token);
}
function handleRightParenthesis() {
if (!popStackUntilOneOfTokenFound('(')) throwMismatchedParenthesisError();
stack.pop();
if (isFunction(topOfStack())) output.push(stack.pop());
}
function handleArgSeperator() {
if (!popStackUntilOneOfTokenFound('(')) throwError(
'mismatched parenthesis or misplaced argument seperator'
);
}
function popStackUntilOneOfTokenFound(tokenChars) {
var t;
while (indexOf.call(tokenChars, t = topOfStack()) == -1 && t) output.push(stack.pop());
return !!stack.length;
}
function throwError(msg) {
var err = new Error(msg);
err.charIndex = tokenRx.lastIndex;
throw err;
}
function throwMismatchedParenthesisError() {
throwError('mismatched parenthesis');
}
function topOfStack() {
return stack[stack.length - 1];
}
function isFunction(token) {
return functionTokenRx.test(token);
}
function isNumeric(token) {
return parseFloat(token) == token;
}
}
input {
width: 500px;
}
<label>Exp:
<input type="text" value="abs(add(multiply(-1,multiply(-1,substract(89,19880))),substract(add(12,add(247,45986)),98)))">
</label>
<div id="log"></div>
<button>Evaluate</button>
The easiest way to do this is to create a bunch of functions that do what you want, then eval() the code.
function calc(str){
function subtract(a, b){
return a-b;
}
function add(a, b){
return a+b;
}
function mulitiply(a, b){
return a*b;
}
function divide(a, b){
return a/b;
}
function abs(a){
return Math.abs(a);
}
return eval('('+str+')');
}
console.log(calc('abs(add(multiply(-1,multiply(-1,subtract(89,19880))),subtract(add(12,add(247,45986)),98)))'));
How about to write your custom functions?
function add(ad1, ad2) {
var sum = parseFloat(ad1) + parseFloat(ad2);
return sum;
}
function subtract(sub1, sub2) {
var sub = parseFloat(sub1) - parseFloat(sub2);
return sub;
}
function multiply(mlt1, mlt2) {
var mlt = parseFloat(mlt1) * parseFloat(mlt2);
return mlt;
}
function abs(ab1) {
var abs = Math.abs(parseFloat(ab1));
return abs;
}
function evaluatethis(exp) {
document.write('<script>alert(' + exp + ');</' + 'script>');
}
evaluatethis("abs(add(multiply(-1,multiply(-1,subtract(89,19880))),subtract(add(12,add(247,45986)),98)))");

In-browser JavaScript code editor with syntax highlighting support for Smarty template tags?

I have searched high and low on the Interwebs, and found some really awesome JS code editors with syntax highlighting and indentation and more... but none seem to have support for Smarty template tags yet.
A new Smarty mode for CodeMirror would be the best, but I'll use a different editor if I need to.
I did find this blog post... but it is VERY simple, and I would like to still support mixed HTML/CSS/JS highlighting, like the PHP mode for CodeMirror.
I just thought I would check with the SO hive mind before embarking on rolling my own CodeMirror mode. If I do make a new mode (and get anywhere with it) I'll post it here.
Thanks!
I made some tries to get a mixed mode with smarty and although my work is not perfect, so far it works well enough for me. I started from de htmlmixedmode to add a smarty mode :
CodeMirror.defineMode("smartymixed", function(config, parserConfig) {
var htmlMode = CodeMirror.getMode(config, {name: "xml", htmlMode: true});
var smartyMode = CodeMirror.getMode(config, "smarty");
var jsMode = CodeMirror.getMode(config, "javascript");
var cssMode = CodeMirror.getMode(config, "css");
function html(stream, state) {
var style = htmlMode.token(stream, state.htmlState);
if (style == "tag" && stream.current() == ">" && state.htmlState.context) {
if (/^script$/i.test(state.htmlState.context.tagName)) {
state.token = javascript;
state.localState = jsMode.startState(htmlMode.indent(state.htmlState, ""));
state.mode = "javascript";
}
else if (/^style$/i.test(state.htmlState.context.tagName)) {
state.token = css;
state.localState = cssMode.startState(htmlMode.indent(state.htmlState, ""));
state.mode = "css";
}
}
return style;
}
function maybeBackup(stream, pat, style) {
var cur = stream.current();
var close = cur.search(pat);
if (close > -1) stream.backUp(cur.length - close);
return style;
}
function javascript(stream, state) {
if (stream.match(/^<\/\s*script\s*>/i, false)) {
state.token = html;
state.localState = null;
state.mode = "html";
return html(stream, state);
}
return maybeBackup(stream, /<\/\s*script\s*>/,
jsMode.token(stream, state.localState));
}
function css(stream, state) {
if (stream.match(/^<\/\s*style\s*>/i, false)) {
state.token = html;
state.localState = null;
state.mode = "html";
return html(stream, state);
}
return maybeBackup(stream, /<\/\s*style\s*>/,
cssMode.token(stream, state.localState));
}
function smarty(stream, state) {
style = smartyMode.token(stream, state.localState);
if ( state.localState.tokenize == null )
{ // back to anything from smarty
state.token = state.htmlState.tokens.pop();
state.mode = state.htmlState.modes.pop();
state.localState = state.htmlState.states.pop(); // state.htmlState;
}
return(style);
}
return {
startState: function() {
var state = htmlMode.startState();
state.modes = [];
state.tokens = [];
state.states = [];
return {token: html, localState: null, mode: "html", htmlState: state};
},
copyState: function(state) {
if (state.localState)
var local = CodeMirror.copyState(
( state.token == css ) ? cssMode : (( state.token == javascript ) ? jsMode : smartyMode ),
state.localState);
return {token: state.token, localState: local, mode: state.mode,
htmlState: CodeMirror.copyState(htmlMode, state.htmlState)};
},
token: function(stream, state) {
if ( stream.match(/^{[^ ]{1}/,false) )
{ // leaving anything to smarty
state.htmlState.states.push(state.localState);
state.htmlState.tokens.push(state.token);
state.htmlState.modes.push(state.mode);
state.token = smarty;
state.localState = smartyMode.startState();
state.mode = "smarty";
}
return state.token(stream, state);
},
compareStates: function(a, b) {
if (a.mode != b.mode) return false;
if (a.localState) return CodeMirror.Pass;
return htmlMode.compareStates(a.htmlState, b.htmlState);
},
electricChars: "/{}:"
}
}, "xml", "javascript", "css", "smarty");
CodeMirror.defineMIME("text/html", "smartymixed");
The switch to smarty mode is made in token function only but ...
You also have to patch the other basic modes ( css , javascript & xml ) to stop them on the { character so you can fall back in the token function to test it against a regexp ( { followed by a non blank character ).
This may help. I wrote a Smarty mode for CodeMirror2 this weekend. See:
http://www.benjaminkeen.com/misc/CodeMirror2/mode/smarty/
I've also forked the CodeMirror project with my change here:
https://github.com/benkeen/CodeMirror2
All the best -
Ben
[EDIT: this is now part of the main script. I'll be shortly adding a Smarty/HTML/CSS/JS mode].
The second part of the answer : a patch in benjamin smarty file to be able to leave it and fall back in smartymixedmode. So here is the patched verson of mode/smarty/smarty.js
CodeMirror.defineMode("smarty", function(config, parserConfig) {
var breakOnSmarty = ( config.mode == "smartymixed" ) ? true : false; // we are called in a "smartymixed" context
var keyFuncs = ["debug", "extends", "function", "include", "literal"];
var last;
var regs = {
operatorChars: /[+\-*&%=<>!?]/,
validIdentifier: /[a-zA-Z0-9\_]/,
stringChar: /[\'\"]/
}
var leftDelim = (typeof config.mode.leftDelimiter != 'undefined') ? config.mode.leftDelimiter : "{";
var rightDelim = (typeof config.mode.rightDelimiter != 'undefined') ? config.mode.rightDelimiter : "}";
function ret(style, lst) { last = lst; return style; }
function tokenizer(stream, state) {
function chain(parser) {
state.tokenize = parser;
return parser(stream, state);
}
if (stream.match(leftDelim, true)) {
if (stream.eat("*")) {
return chain(inBlock("comment", "*" + rightDelim));
}
else {
state.tokenize = inSmarty;
return ( breakOnSmarty == true ) ? "bracket" : "tag";
}
}
else {
// I'd like to do an eatWhile() here, but I can't get it to eat only up to the rightDelim string/char
stream.next();
return null;
}
}
function inSmarty(stream, state) {
if (stream.match(rightDelim, true)) {
state.tokenize = ( breakOnSmarty ) ? null : tokenizer;
return ( breakOnSmarty == true ) ? ret("bracket", null) : ret("tag", null);
}
var ch = stream.next();
if (ch == "$") {
stream.eatWhile(regs.validIdentifier);
return ret("variable-2", "variable");
}
else if (ch == ".") {
return ret("operator", "property");
}
else if (regs.stringChar.test(ch)) {
state.tokenize = inAttribute(ch);
return ret("string", "string");
}
else if (regs.operatorChars.test(ch)) {
stream.eatWhile(regs.operatorChars);
return ret("operator", "operator");
}
else if (ch == "[" || ch == "]") {
return ret("bracket", "bracket");
}
else if (/\d/.test(ch)) {
stream.eatWhile(/\d/);
return ret("number", "number");
}
else {
if (state.last == "variable") {
if (ch == "#") {
stream.eatWhile(regs.validIdentifier);
return ret("property", "property");
}
else if (ch == "|") {
stream.eatWhile(regs.validIdentifier);
return ret("qualifier", "modifier");
}
}
else if (state.last == "whitespace") {
stream.eatWhile(regs.validIdentifier);
return ret("attribute", "modifier");
}
else if (state.last == "property") {
stream.eatWhile(regs.validIdentifier);
return ret("property", null);
}
else if (/\s/.test(ch)) {
last = "whitespace";
return null;
}
var str = "";
if (ch != "/") {
str += ch;
}
var c = "";
while ((c = stream.eat(regs.validIdentifier))) {
str += c;
}
var i, j;
for (i=0, j=keyFuncs.length; i<j; i++) {
if (keyFuncs[i] == str) {
return ret("keyword", "keyword");
}
}
if (/\s/.test(ch)) {
return null;
}
return ret("tag", "tag");
}
}
function inAttribute(quote) {
return function(stream, state) {
while (!stream.eol()) {
if (stream.next() == quote) {
state.tokenize = inSmarty;
break;
}
}
return "string";
};
}
function inBlock(style, terminator) {
return function(stream, state) {
while (!stream.eol()) {
if (stream.match(terminator)) {
state.tokenize = ( breakOnSmarty == true ) ? null : tokenizer;
break;
}
stream.next();
}
return style;
};
}
return {
startState: function() {
return { tokenize: tokenizer, mode: "smarty", last: null };
},
token: function(stream, state) {
var style = state.tokenize(stream, state);
state.last = last;
return style;
},
electricChars: ""
}
});
CodeMirror.defineMIME("text/x-smarty", "smarty");
The 1st line check if we are called by the smartymixed mode and tests are made on this contition, allowing smarty mode to run as before.

Categories