How to parse cursor ANSI escape codes? - javascript

I'm writing code for processing ANSI escape codes for cursor for jQuery Terminal. but have problems, not sure how it should work, I've got weird results.
I'm testing with ervy library.
and using this code:
function scatter_plot() {
const scatterData = [];
for (let i = 1; i < 17; i++) {
i < 6 ? scatterData.push({ key: 'A', value: [i, i], style: ervy.fg('red', '*') })
: scatterData.push({ key: 'A', value: [i, 6], style: ervy.fg('red', '*') });
}
scatterData.push({ key: 'B', value: [2, 6], style: ervy.fg('blue', '# '), side: 2 });
scatterData.push({ key: 'C', value: [0, 0], style: ervy.bg('cyan', 2) });
var plot = ervy.scatter(scatterData, { legendGap: 18, width: 15 });
// same as Linux XTERM where 0 code is interpreted as 1.
var formatting = $.terminal.from_ansi(plot.replace(/\x1b\[0([A-D])/g, '\x1b[1$1'));
return formatting;
}
$.terminal.defaults.formatters = [];
var term = $('body').terminal();
term.echo(scatter_plot());
it should look like in Linux Xterm:
But it looks like this, see codepen demo
While I was writing the question changing few +1 and -1 (see processing A-F ANSI escapes in the code) when moving cursor give this result (code snippet have latest code).
First line got overwritten by spaces and whole plot is one to top and one to right (except 0,0 cyan dot that should be below " |" and 2 characters wide, so you should see right half of it, this one is correct but the rest is not)
this is my new code for processing cursor, I'm doing this just before processing colors, so the code is not that complex.
// -------------------------------------------------------------------------------
var ansi_re = /(\x1B\[[0-9;]*[A-Za-z])/g;
var cursor_re = /(.*)\r?\n\x1b\[1A\x1b\[([0-9]+)C/;
var move_cursor_split = /(\x1b\[[0-9]+[A-G])/g;
var move_cursor_match = /^\x1b\[([0-9]+)([A-G])/;
// -------------------------------------------------------------------------------
function parse_ansi_cursor(input) {
/*
(function(log) {
console.log = function(...args) {
if (true || cursor.y === 11) {
return log.apply(console, args);
}
};
})(console.log);
*/
function length(text) {
return text.replace(ansi_re, '').length;
}
function get_index(text, x) {
var splitted = text.split(ansi_re);
var format = 0;
var count = 0;
var prev_count = 0;
for (var i = 0; i < splitted.length; i++) {
var string = splitted[i];
if (string) {
if (string.match(ansi_re)) {
format += string.length;
} else {
count += string.length;
if (count >= x) {
var rest = x - prev_count;
return format + rest;
}
prev_count = count;
}
}
}
return i;
}
// ansi aware substring, it just and add removed ansi escapes
// at the beginning we don't care if the were disabled with 0m
function substring(text, start, end) {
var result = text.substring(start, end);
if (start === 0 || !text.match(ansi_re)) {
return result;
}
var before = text.substring(0, start);
var match = before.match(ansi_re);
if (match) {
return before.match(ansi_re).join('') + result;
}
return result;
}
// insert text at cursor position
// result is array of splitted arrays that form single line
function insert(text) {
if (!text) {
return;
}
if (!result[cursor.y]) {
result[cursor.y] = [];
}
var index = 0;
var sum = 0;
var len, after;
function inject() {
index++;
if (result[cursor.y][index]) {
result[cursor.y].splice(index, 0, null);
}
}
if (cursor.y === 11) {
//debugger;
}
if (text == "[46m [0m") {
//debugger;
}
console.log({...cursor, text});
if (cursor.x === 0 && result[cursor.y][index]) {
source = result[cursor.y][0];
len = length(text);
var i = get_index(source, len);
if (length(source) < len) {
after = result[cursor.y][index + 1];
if (after) {
i = get_index(after, len - length(source));
after = substring(after, i);
result[cursor.y].splice(index, 2, null, after);
} else {
result[cursor.y].splice(index, 1, null);
}
} else {
after = substring(source, i);
result[cursor.y].splice(index, 1, null, after);
}
} else {
var limit = 100000; // infite loop guard
var prev_sum = 0;
// find in which substring to insert the text
while (index < cursor.x) {
if (!limit--) {
warn('[WARN] To many loops');
break;
}
var source = result[cursor.y][index];
if (!source) {
result[cursor.y].push(new Array(cursor.x - prev_sum).join(' '));
index++;
break;
}
if (sum === cursor.x) {
inject();
break;
}
len = length(source);
prev_sum = sum;
sum += len;
if (sum === cursor.x) {
inject();
break;
}
if (sum > cursor.x) {
var pivot = get_index(source, cursor.x - prev_sum);
var before = substring(source, 0, pivot);
var end = get_index(source, length(text));
after = substring(source, pivot + end);
if (!after.length) {
result[cursor.y].splice(index, 1, before);
} else {
result[cursor.y].splice(index, 1, before, null, after);
}
index++;
break;
} else {
index++;
}
}
}
cursor.x += length(text);
result[cursor.y][index] = text;
}
if (input.match(move_cursor_split)) {
var lines = input.split('\n').filter(Boolean);
var cursor = {x: 0, y: -1};
var result = [];
for (var i = 0; i < lines.length; ++i) {
console.log('-------------------------------------------------');
var string = lines[i];
cursor.x = 0;
cursor.y++;
var splitted = string.split(move_cursor_split).filter(Boolean);
for (var j = 0; j < splitted.length; ++j) {
var part = splitted[j];
console.log(part);
var match = part.match(move_cursor_match);
if (match) {
var ansi_code = match[2];
var value = +match[1];
console.log({code: ansi_code, value, ...cursor});
if (value === 0) {
continue;
}
switch (ansi_code) {
case 'A': // UP
cursor.y -= value;
break;
case 'B': // Down
cursor.y += value - 1;
break;
case 'C': // forward
cursor.x += value + 1;
break;
case 'D': // Back
cursor.x -= value + 1;
break;
case 'E': // Cursor Next Line
cursor.x = 0;
cursor.y += value - 1;
break;
case 'F': // Cursor Previous Line
cursor.x = 0;
cursor.y -= value + 1;
break;
}
if (cursor.x < 0) {
cursor.x = 0;
}
if (cursor.y < 0) {
cursor.y = 0;
}
} else {
insert(part);
}
}
}
return result.map(function(line) {
return line.join('');
}).join('\n');
}
return input;
}
The result = []; in code is array of lines where single line may be split into multiple sub strings when inserting the text at cursor, maybe the code would be simpler if they would be array of strings. Right now I want only fix the cursor position.
Here is the codepen demo with from_ansi function embeded (inside there is parse_ansi_cursor that is problematic). Sorry there is lot of code, but parsing ANSI escape codes is not simple.
What I'm not sure how should work is moving the cursor (right now it have + 1 or - 1, I'm not sure about this) I'm also not sure if I should increase cursor.y before each line. I'm not 100% sure how this should work. I've looked into Linux Xterm code but didn't found a clues. Looked at Xterm.js but the ervy plot is completely broken for those scatter plot.
my from_ansi function had original code that was processing some ANSI cursor codes like this one:
input = input.replace(/\x1b\[([0-9]+)C/g, function(_, num) {
return new Array(+num + 1).join(' ');
});
only C, forward just add blanks, it was working for ANSI art but not work with ervy scatter plot.
I think it's not too broad, it's just question about moving cursor and processing newlines using ANSI escape codes. Also it's suppose to be simple case, cursor should move only inside single string not outside like in real terminal (ervy plot output ANSI escape codes like that).
I'm fine with answers that explain how to process the string and how to move the cursor that will work, but if you can provide fixes to the code I would be great. I prefer fixes to my code now whole new implementation unless is much simpler and it's a function parse_ansi_cursor(input) and work the same with rest of the code but with fixed cursor movement.
EDIT:
I've found that my input.split('\n').filter(Boolean) was wrong it should be:
var lines = input.split('\n');
if (input.match(/^\n/)) {
lines.shift();
}
if (input.match(/\n$/)) {
lines.pop();
}
and it seems that some old spec for ANSI escapes say that 0 is not zero but placeholder for default which is 1. That was removed from spec but Xterm is still using this. So I've added this line for parsing code, if there is 0A or A got value 1.
var value = match[1].match(/^0?$/) ? 1 : +match[1];
the plot looks better, but there are still issues with the cursor. (I think it's cursor - I'm not 100% sure).
I've changed the +1/-1 again now it's closer (Almost the same as in XTerm). Buss still there's need to be bug in my code.
EDIT:
afer answer by #jerch I've tried to use node ansi parser, have the same issue don't know how to process the cursor:
var cursor = {x:0,y:0};
result = [];
var terminal = {
inst_p: function(s) {
var line = result[cursor.y];
if (!line) {
result[cursor.y] = s;
} else if (cursor.x === 0) {
result[cursor.y] = s + line.substring(s.length);
} else if (line.length < cursor.x) {
var len = cursor.x - (line.length - 1);
result[cursor.y] += new Array(len).join(' ') + s;
} else if (line.length === cursor.x) {
result[cursor.y] += s;
} else {
var before = line.substring(0, cursor.x);
var after = line.substring(cursor.x + s.length);
result[cursor.y] = before + s + after;
}
cursor.x += s.length;
console.log({s, ...cursor, line: result[cursor.y]});
},
inst_o: function(s) {console.log('osc', s);},
inst_x: function(flag) {
var code = flag.charCodeAt(0);
if (code === 10) {
cursor.y++;
cursor.x = 0;
}
},
inst_c: function(collected, params, flag) {
console.log({collected, params, flag});
var value = params[0] === 0 ? 1 : params[0];
switch(flag) {
case 'A': // UP
cursor.y -= value;
break;
case 'B': // Down
cursor.y += value - 1;
break;
case 'C': // forward
cursor.x += value;
break;
case 'D': // Back
cursor.x -= value;
break;
case 'E': // Cursor Next Line
cursor.x = 0;
cursor.y += value;
break;
case 'F': // Cursor Previous Line
cursor.x = 0;
cursor.y -= value;
break;
}
},
inst_e: function(collected, flag) {console.log('esc', collected, flag);},
inst_H: function(collected, params, flag) {console.log('dcs-Hook', collected, params, flag);},
inst_P: function(dcs) {console.log('dcs-Put', dcs);},
inst_U: function() {console.log('dcs-Unhook');}
};
var parser = new AnsiParser(terminal);
parser.parse(input);
return result.join('\n');
This is just simple example that ignore everything except newline and cursor movement.
Here is the output:
UPDATE:
It seems that every cursor movement should be just += value or -= value and my value - 1; was just correcting to bug in ervy library that was not working on clear terminal.

To begin with - a Regexp based approach is not ideal to handle escape sequences. The reason for this are complicated interactions between various terminal sequences, as some break a former not yet closed one while others keep working in the middle of another (like some control codes) and the "outer" sequence would still finish correctly. You would have to pull in all these edge cases into every single regexp (see https://github.com/xtermjs/xterm.js/issues/2607#issuecomment-562648768 for an illustration).
In general parsing escape sequences is quite tricky, we even have an issue regarding that in terminal-wg. Hopefully we manage to get some minimal parsing requirements from this in the future. Most certainly it will not be regexp-based ;)
All that said, its much easier to go with a real parser, that deals with all the edge cases. A good starting point for a DEC compatible parser is https://vt100.net/emu/dec_ansi_parser. For cursor handling you have to handle at least these states with all actions:
ground
escape
csi_entry
csi_ignore
csi_param
csi_intermediate
plus all other states as dummy entries. Also control codes need special care (action execute), as they might interfer anytime with any other sequence with different results.
To make things even worse, the official ECMA-48 specifiction slightly differs for certain aspects from the DEC parser. Still most emulators used these days try to aim for DEC VT100+ compatibility.
If you dont want to write the parser yourself, you can either use/modify my old parser or the one we have in xterm.js (the latter might be harder to integrate as it operates on UTF32 codepoints).

Related

Minimax algorithm doesnt go for mate in chess

My goal is to code a somewhat OK chess engine, in the following position it's a mate in 2 which the engine should easily find with its depth of 4-5.
The first move the AI makes is Ra2 to trap the white king, the white king goes to f1 and instead of mating the AI moves the Rook to c2.
var initial_depth = depth;
var bestMove = null;
var nodes = 0;
var ret = await minimax(position, depth, alpha, beta, maximizingPlayer);
console.log("nodes visited: " + nodes);
return ret;
async function minimax(position, depth, alpha, beta, maximizingPlayer) {
nodes++;
if (maximizingPlayer) {
var validMoves = await getValidMoves(position, ArrtoFEN(position) + " w");
} else {
var validMoves = await getValidMoves(position, ArrtoFEN(position) + " b");
}
if (validMoves.length < 1 || depth == 0) {
var eval = await getEval(position);
return [eval, null];
}
if (maximizingPlayer) {
var maxEval = Number.NEGATIVE_INFINITY;
for (var i = 0; i < validMoves.length; i++) {
var move = validMoves[i];
var testbrd = makeMove(move, position) //not the actual code. shortend for Readability
var eval = await minimax(testbrd, depth - 1, alpha, beta, false);
if (eval[0] > maxEval) {
maxEval = eval[0];
if (initial_depth == depth) {
bestMove = move;
console.log("current bestmove: " + bestMove);
}
}
alpha = Math.max(alpha, eval[0]);
if (beta <= alpha) {
break;
}
}
return [maxEval, bestMove];
} else {
var minEval = Number.POSITIVE_INFINITY;
for (var i = 0; i < validMoves.length; i++) {
var move = validMoves[i];
var testbrd = makeMove(move, position)//not the actual code. shortend for Readability
var eval = await minimax(testbrd, depth - 1, alpha, beta, true);
if (eval[0] < minEval) {
minEval = eval[0];
if (initial_depth == depth) {
bestMove = move;
console.log("current bestmove: " + bestMove);
}
}
beta = Math.min(beta, eval[0]);
if (beta <= alpha) {
break;
}
}
return [minEval, bestMove];
}
}
}
This is because it sees that any move will win, and you don't have a condition that tells the engine that it is better to do mate in 1 move than in 5 moves. If at the end of a search find that you have 0 legal moves and you are in check, then you are checkmated. In this case you want to send back a checkmate score (large negative value) and add the ply from this. This way you will make it better to mate in fewer moves than in a larger amount of moves.
I suggest you go for Negamax alogirthm in stead of minimax. It will mean much less code and much easier to debug.

NodeJS require with asynch functions when synch is wanted

I have the following code
var utils = require(`${__dirname}/../../utils/utils.js`);
...
let object = utils.parse(input);
if (object === undefined){
let helper = utils.recognize(input);
msg.channel.sendMessage("\"" + input + "\" not recognized. Did you mean \"" + helper[0] + "\"?");
object = utils.parse(helper[0]);
}
//code related to object
console.log(object.strLength);
where "parse" tries to match the input to an object in a database, and "recognize" tries to find the best match if the input is spelled incorrectly (Levenshtein) (along with additional info such as how close the match was).
Currently the issue is that the code is ran asynchronously; "object.strLength" returns an undefined before utils.recognize() returns a value. If I copy/paste the recognize() and parse() functions into the file, then the code is run synchronously and I do not run into any issues. However I would rather keep those functions in a separate file as I reuse them in other files.
Is there a way to specify that the functions in utils must be synch? I know that there are libraries that convert asynch into synch but I prefer to use as few libraries as I can help it. I tried to have the recognize functions return a Promise but it ended up as a jumbled mess
edit: here's parse. I did not think it was necessary to answer this question so I did not include it initially:
var db = require(`${__dirname}/../data/database.js`);
...
var parse = (input) => {
let output = db[output];
if (output === null) {
Object.keys(db).forEach((item) => {
if (db[item].num === parseInt(input) || (db[item].color + db[item].type === input)){
output = db[item];
return false;
}
});
}
return output;
}
I solved the issue, thanks everyone. Here's what was wrong, it was with recognize(). It was my mistake to not show the code for it initially.
Original recognize:
var recognize = (item) => {
//iterate through our databases and get a best fit
let bestItem = null;
let bestScore = 99999; //arbitrary large number
//let bestType = null;
//found algorithm online by milot-mirdita
var levenshtein = function(a, b) {
if (a.length == 0) { return b.length; }
if (b.length == 0) { return a.length; }
// swap to save some memory O(min(a,b)) instead of O(a)
if(a.length > b.length) {
let tmp = a;
a = b;
b = tmp;
}
let row = [];
for(let i = 0; i <= a.length; i++) {
row[i] = i;
}
for (let i = 1; i <= b.length; i++) {
let prev = i;
for (let j = 1; j <= a.length; j++) {
let val;
if (b.charAt(i-1) == a.charAt(j-1)) {
val = row[j-1]; // match
} else {
val = Math.min(row[j-1] + 1, // substitution
prev + 1, // insertion
row[j] + 1); // deletion
}
row[j - 1] = prev;
prev = val;
}
row[a.length] = prev;
}
return row[a.length];
}
//putting this here would make the code work
//console.log("hi");
Object.keys(db).forEach((key) => {
if (levenshtein(item, key) < bestScore) {
bestItem = key;
bestScore = levenshtein(item, key);
}
});
return [bestItem, bestScore];
}
My solution was to move the levenshtein function outside of the recognize function, so if I wanted to I can call levenshtein from another function
#user949300 and #Robert Moskal, I changed the forEach loop into a let...in loop. There is no functional difference (as far as I can tell) but the code does look cleaner.
#Thomas, I fixed the let output = db[output]; issue, oops.
Again, thanks for all of your help, I appreciate it. And happy New Year too

Porting breadth-first search to Javascript

I created AI for snake in Python. The game works nicely. I started learning Javascript recently and I tried writting the Javascript equivalent of the game.
The game is played on x*y grid (eg. 30*20). In Python I used (x, y) tuples for game position. In JS I use integers which I map ussing:
function map(x, y) {
return x + y * size.width;
}
function unmap(pos) {
return {x: pos % size.width, y: Math.floor(pos/size.width)};
}
My problem is that the search doesn't work. When I try to create path from it enters infinite loop. The search function is:
function search(start, goal) {
var frontier = new PriorityQueue({
comparator: function(a, b) {
return a.score - b.score;
}
});
frontier.queue({value: start, score: 0});
var cameFrom = {};
cameFrom[start] = null;
while (frontier.length !== 0) {
var current = frontier.dequeue().value;
if (current === goal) {
break;
}
var nbs = neighbors(current);
for(var i = 0; i < nbs.length; i++) {
var next = nbs[i];
if (Object.keys(cameFrom).indexOf(next) === -1) {
var priority = heuristic(goal, next);
frontier.queue({value: next, score: priority});
cameFrom[next] = current;
}
}
}
return cameFrom;
}
I use this priority queue.
The search in Python is more OOP but I don't want to include more code - the question is already long. But I'll include the search:
def search(self, grid, start, goal):
frontier = PriorityQueue()
frontier.put(start, 0)
came_from = {}
came_from[start] = None
while not frontier.empty():
current = frontier.get()
if current == goal:
break
for next in grid.neighbors(current):
if next not in came_from:
priority = self.heuristic(goal, next)
frontier.put(next, priority)
came_from[next] = current
return came_from
If anything more is needed please ask. I'm bad at JS.
Problem was with this line:
if (Object.keys(cameFrom).indexOf(next) === -1) {
I was searching for integer but keys are always strings. You can clearly the behavior in this example:
var foo = {0: "fooBar", 1: "bar"};
console.log("keys", Object.keys(foo));
console.log("int", Object.keys(foo).indexOf(0)); // -1
console.log("string", Object.keys(foo).indexOf("0")); // 0
This is more readable, shorter and works:
if (cameFrom[next] === undefined) {
Thanks to IVlad who pointed out the bad line.

Check for continuous order in array in javascript

var userInput = prompt('enter number here');
var number = new Array(userInput.toString().split(''));
if (number ????){ //checks if the number is in a continuous stream
alert(correct);
}
else{
alert(invalid);
}
In Javascript, what can I do at "????" to check if it is in a continuous order/stream? Also how can I do this so that it only checks for this order/stream after a specific index in the array? Meaning the user enters say "12345678901234" which would pop up correct, but "12347678901234" would pop up invalid?(note there are two 7's) For the second part "3312345678901234" would pop up correct, how can this be implemented?
You can make a function that checks any string for a stream of continuous/increasing alpha-numeric characters starting at a given index like this:
function checkContinuous(str, startIndex) {
startindex = startIndex || 0;
if (str.length <= startIndex) {
return false;
}
var last = str.charCodeAt(startIndex);
for (var i = startIndex + 1; i < str.length; i++) {
++last;
if (str.charCodeAt(i) !== last) {
return false;
}
}
return true;
}
If it's numbers only and wrapping from 9 back to 0 is considered continuous, then it's a little more complicated like this:
function checkContinuous(str, startIndex) {
// make sure startIndex is set to zero if not passed in
startIndex = startIndex || 0;
// skip chars before startIndex
str = str.substr(startIndex);
// string must be at least 2 chars long and must be all numbers
if (str.length < 2 || !/^\d+$/.test(str)) {
return false;
}
// get first char code in string
var last = str.charCodeAt(0);
// for the rest of the string, compare to last code
for (var i = 1; i < str.length; i++) {
// increment last charCode so we can compare to sequence
if (last === 57) {
// if 9, wrap back to 0
last = 48;
} else {
// else just increment
++last;
}
// if we find one char out of sequence, then it's not continuous so return false
if (str.charCodeAt(i) !== last) {
return false;
}
}
// everything was continuous
return true;
}
Working demo: http://jsfiddle.net/jfriend00/rHH4B/
No need for arrays, just back though the string one character at a time.
When you hit a 0, substitute 10, and continue until the number
is not one more than the previous one.
function continuousFromChar(str, start){
start= start || 0;
var i= 0, L= str.length, prev;
while(L){
c= +(str.charAt(-- L)) || 10; // use 10 for 0
prev=+(str.charAt(L- 1));
if(c-prev !== 1) break;
}
return start>=L;
}
var s= "3312345678901234";
continuousFromChar(s,2)
/* returned value: (Boolean)
true
*/
This will do the checking in real-time entry, but a similar principle could be used to check an entry on a button submit or similar. I was not 100% sure as to which way you wanted it, so I went for the live method.
HTML
<input id="stream" type="text" />
Javascript
window.addEventListener("load", function () {
document.getElementById("stream").addEventListener("keyup", function (evt) {
var target = evt.target;
var value = target.value;
var prev;
var last;
var expect;
target.value = value.replace(/[^\d]/, "");
if (value.length > 1) {
prev = parseInt(value.slice(-2, -1), 10);
last = parseInt(value.slice(-1), 10);
expect = prev + 1;
if (expect > 9) {
expect = 0;
}
if (last !== expect) {
target.value = value.slice(0, value.length - 1);
}
}
}, false);
});
On jsfiddle
By changing the value here
if (value.length > 1) {
You can change where the checking starts.
Update: Ok, so it is function that you want, and you insist that it splits the string into an array. Then using the above as a reference, you could convert it to something like this.
Javascript
window.addEventListener("load", function () {
var testStrings = [
"0123456789012",
"0123456789",
"0123455555",
"555012345678901234",
"0123455555"];
function test(string, offset) {
if (typeof string !== "string" || /[^\d]/.test(string)) {
return false;
}
var array = string.split("");
var prev;
var last;
var expect;
return !array.some(function (digit, index) {
if (index >= offset) {
prev = parseInt(array[index - 1], 10);
last = parseInt(digit, 10);
expect = prev + 1;
if (expect > 9) {
expect = 0;
}
if (last !== expect) {
return true;
}
}
return false;
});
}
testStrings.forEach(function (string) {
console.log(string, test(string, 1));
});
});
On jsfiddle
As your question does not fully specify all possibilities, the above will return true for an empty string (""), of course you can simply add a check at the very beginning for that.
I also do not perform any checking for a valid number for your offset, but again this is something simple that you can add.
Of course these are just one (two) of many possible solutions, but hopefully it will set your mind in the right direction of thought.
There are some good answers here, but I would like to show a slight variation. I think it is important to showcase some different aspects of JavaScript and separating interests in code.
Functions as first class objects are cool - the exact rules for "continuous" can be changed with only changing the predicate function. Perhaps we should allow skipping numbers? No problem. Perhaps we allow hex digits? No problem. Just change the appropriate follows function for the specific rules.
This can be implemented generically because strings support indexing. This will work just as well over other array-like objects with an appropriate follows function. Note that there are no string-specific functions used in the continuous function.
Code also on jsfiddle:
// returns true only iff b "follows" a; this can be changed
function follows_1Through9WithWrappingTo0(b,a) {
if (b === "1" && a === undefined) {
// start of sequence
return true;
} else if (b === "0" && a === "9") {
// wrap
return true;
} else {
// or whatever
return (+b) === (+a) + 1;
}
}
function continuous(seq, accordingTo, from) {
// strings can be treated like arrays; this code really doesn't care
// and could work with arbitrary array-like objects
var i = from || 0;
if ((seq.length - i) < 1) {
return true;
}
var a = undefined;
var b = undefined;
for (; i < seq.length; i++) {
b = seq[i];
if (!accordingTo(b, a)) {
return false; // not continuous
}
a = b;
}
return true;
}
function assert(label, expr, value) {
if (!(expr === value)) {
alert("FAILED: " + label);
}
}
var follows = follows_1Through9WithWrappingTo0;
assert("empty1", continuous("", follows), true);
assert("empty2", continuous("foobar", follows, 6), true);
assert("skip", continuous("331234", follows, 2), true);
assert("good 1", continuous("123456789", follows), true);
assert("good 2", continuous("12345678901234", follows), true);
assert("bad seq 1", continuous("12347678901234", follows), false);
assert("bad seq 2", continuous("10", follows), false);
// here a different predicate ensures all the elements are the same
var areAllSame = function (b, a) {
return a === undefined || a === b;
};
assert("same", continuous("aaaaa", areAllSame), true);
Note that the skipping could also be extracted out of the continuous function: in a language with better "functional" collection support, such as C#, this is exactly what I'd do first.

How to parse pure functions

Let's say you have the following function
var action = (function () {
var a = 42;
var b = 2;
function action(c) {
return a + 4 * b + c;
}
return action;
}());
// how would you parse action into it's serialized LISP / AST format?
var parsed = parse(action);
Is it possible to have a function that takes a reference to the function action and outputs say the LISP format (lambda (c) (plus (plus 42 (multiply 4 2)) c))
We're allowed to put some restrictions on what action can be.
the body should only be a single expression
it should be a pure function
any free variables are constants
The main question is given a function you can invoke with a range of inputs and it's source code can you discover the correct value to substitute the free variables with?
For the above example you know that a and b are constant and you could intellectually plot the output for a few values and see the pattern and then just know what the constants are.
Question:
How would you write a function that takes a function reference and it's source code and produces some form of AST for the function with any free variables substituted for their run-time values.
An example of an AST format would be the LISP equivalent of the code.
I basically want to serialize and deserialize the function and have it behave the same
It should be noted that the problem becomes trivial if you pass { a: a, b: b } to the analysis function. That would be cheating.
Use-case:
I want to generate a language agnostic form of a pure JavaScript function so I can effectively pass it to C++ without requiring the user of my library to use a DSL to create this function
Let's imagine you had a database driver
var cursor = db.table("my-table").map(function (row) {
return ["foo", row.foo]
})
You want to determine at run-time what the function is and convert it into an AST format so that you can use your efficient query builder to convert it into SQL or whatever query engine your database has.
This means you don't have to write:
var cursor = db.table("my-table").map(function (rowQueryObject) {
return db.createArray(db.StringConstant("foo"), rowQueryObject.getProperty("foo"))
})
Which is a function the DB library can execute with a query object and have you build the query object transformation without verbose methods.
Here is a full solution (using catalog of variables which is accessible by the parse function):
var CONSTANTS = {
a: 42,
b: 2,
c: 4
};
function test() {
return a + 4 * b + c;
}
function getReturnStatement(func) {
var funcStr = func.toString();
return (/return\s+(.*?);/g).exec(funcStr)[1];
}
function replaceVariables(expr) {
var current = '';
for (var i = 0; i < expr.length; i += 1) {
while (/[a-zA-Z_$]/.test(expr[i]) && i < expr.length) {
current += expr[i];
i += 1;
}
if (isNumber(CONSTANTS[current])) {
expr = expr.replace(current, CONSTANTS[current]);
}
current = '';
}
return expr;
}
function isNumber(arg) {
return !isNaN(parseInt(arg, 10));
}
function tokenize(expr) {
var tokens = [];
for (var i = 0; i < expr.length; i += 1) {
if (isWhitespace(expr[i])) {
continue;
} else if (isOperator(expr[i])) {
tokens.push({
type: 'operator',
value: expr[i]
});
} else if (isParentheses(expr[i])) {
tokens.push({
type: 'parant',
value: expr[i]
});
} else {
var num = '';
while (isNumber(expr[i]) && i < expr.length) {
num += expr[i];
i += 1;
}
i -= 1;
tokens.push({
type: 'number',
value: parseInt(num, 10)
});
}
}
return tokens;
}
function toPrefix(tokens) {
var operandStack = [],
operatorStack = [],
current,
top = function (stack) {
if (stack) {
return stack[stack.length - 1];
}
return undefined;
};
while (tokens.length) {
current = tokens.pop();
if (current.type === 'number') {
operandStack.push(current);
} else if (current.value === '(' ||
!operatorStack.length ||
(getPrecendence(current.value) >
getPrecendence(top(operatorStack).value))) {
operatorStack.push(current);
} else if (current.value === ')') {
while (top(operatorStack).value !== '(') {
var tempOperator = operatorStack.pop(),
right = operandStack.pop(),
left = operandStack.pop();
operandStack.push(tempOperator, left, right);
}
operatorStack.pop();
} else if (getPrecendence(current.value) <=
getPrecendence(top(operatorStack).value)) {
while (operatorStack.length &&
getPrecendence(current.value) <=
getPrecendence(top(operatorStack).value)) {
tempOperator = operatorStack.pop();
right = operandStack.pop();
left = operandStack.pop();
operandStack.push(tempOperator, left, right);
}
}
}
while (operatorStack.length) {
tempOperator = operatorStack.pop();
right = operandStack.pop();
left = operandStack.pop();
operandStack.push(tempOperator, left, right);
}
return operandStack;
}
function isWhitespace(arg) {
return (/^\s$/).test(arg);
}
function isOperator(arg) {
return (/^[*+\/-]$/).test(arg);
}
function isParentheses(arg) {
return (/^[)(]$/).test(arg);
}
function getPrecendence(operator) {
console.log(operator);
switch (operator) {
case '*':
return 4;
case '/':
return 4;
case '+':
return 2;
case '-':
return 2;
default:
return undefined;
}
}
function getLispString(tokens) {
var result = '';
tokens.forEach(function (e) {
if (e)
switch (e.type) {
case 'number':
result += e.value;
break;
case 'parant':
result += e.value;
break;
case 'operator':
result += getOperator(e.value);
break;
default:
break;
}
result += ' ';
});
return result;
}
function getOperator(operator) {
switch (operator) {
case '+':
return 'plus';
case '*':
return 'multiplicate';
case '-':
return 'minus';
case '\\':
return 'divide';
default:
return undefined;
}
}
var res = getReturnStatement(test);
console.log(res);
res = replaceVariables(res);
console.log(res);
var tokens = tokenize(res);
console.log(tokens);
var prefix = toPrefix(tokens);
console.log(prefix);
console.log(getLispString(prefix));
I just wrote it so there might be some problems in the style but I think that the idea is clear.
You can get the function body by using the .toString method. After that you can use regular expression to match the return statement
(/return\s+(.*?);/g).exec(funcStr)[1];
Note that here you must use semicolons for successful match! In the next step all variables are transformed to number values using the CONSTANTS object (I see that you have some parameters left so you may need little modifications here). After that the string is being tokenized, for easier parsing. In next step the infix expression is transformed into a prefix one. At the last step I build a string which will make the output looks like what you need (+ - plus, - - minus and so on).
Since I'm not sure you're able to get the method's body after having invoked it, here is an alternative solution:
var a = 42;
var b = 2;
function action(c) {
return a + 4 * b + c;
}
/**
* get the given func body
* after having replaced any available var from the given scope
* by its *real* value
*/
function getFunctionBody(func, scope) {
// get the method body
var body = func.toString().replace(/^.*?{\s*((.|[\r\n])*?)\s*}.*?$/igm, "$1");
var matches = body.match(/[a-z][a-z0-9]*/igm);
// for each potential var
for(var i=0; i<matches.length; i++) {
var potentialVar = matches[i];
var scopedValue = scope[potentialVar];
// if the given scope has the var defined
if(typeof scopedValue !== "undefined") {
// add "..." for strings
if(typeof scopedValue === "string") {
scopedValue = '"' + scopedValue + '"';
}
// replace the var by its scoped value
var regex = new RegExp("([^a-z0-9]+|^)" + potentialVar + "([^a-z0-9]+|$)", "igm");
var replacement = "$1" + scopedValue + "$2";
body = body.replace(regex, replacement);
}
}
return body;
}
// calling
var actionBody = getFunctionBody(action, this);
// log
alert(actionBody);
Prints:
return 42 + 4 * 2 + c;
DEMO
You would then have to implement your own function toLISP(body) or any function else you may need.
Note that it won't work for complex scoped variables such as var a = {foo: "bar"}.

Categories