Mapping transpiled code back to the original markup script

Mapping transpiled code back to the original markup script - javascript

Someone recently asked if there was a simple way to transform custom markup as follows, including nested markings. Examples included...
for \k[hello] the output will be <b>hello</b>
for \i[world], the output will be <em>world</em>
for hello \k[dear \i[world]], the output will be hello <b>dear <em>world</em></b>
for \b[some text](url), the output will be <a href=”url”>some text</a>
for \r[some text](url), the output will be <img alt=”some text” src=”url” />
Interestingly enough, transpiling the above to javascript, including consideration for nesting, is remarkably straightforward, particularly if the markup grammar is consistent.
//
// Define the syntax and translation to javascript.
//
const grammar = {
syntax: {
k: {markUp: `\k[`, javascript: `"+grammar.oneArg("k","`, pre: `<b>`, post: `</b>`},
i: {markUp: `\i[`, javascript: `"+grammar.oneArg("i","`, pre: `<em>`, post: `</em>`},
b: {markUp: `\b[`, javascript: `"+grammar.twoArgs("b","`, pattern: `$1`},
r: {markUp: `\r[`, javascript: `"+grammar.twoArgs("r","`, pattern: `<img alt="$1" src="$2"/>`},
close0: {markUp: `](`, javascript: `","`},
close1: {markUp: `)`, javascript: `")+"`},
close2: {markUp: `]`, javascript: `")+"`}
},
oneArg: function( command, arg1 ) {
return grammar.syntax[ command ].pre + arg1 + grammar.syntax[ command ].post;
},
twoArgs: function( command, arg1, arg2 ) {
return grammar.syntax[ command ].pattern.split( `$1` ).join( arg1 ).split( `$2` ).join( arg2 );
}
}
function transpileAndExecute( markUpString ) {
// Convert the markUp to javascript.
for ( command in grammar.syntax ) {
markUpString = markUpString.split( grammar.syntax[ command ].markUp ).join( grammar.syntax[ command ].javascript );
}
// With the markUp now converted to javascript, let's execute it!
return new Function( `return "${markUpString}"` )();
}
var markUpTest = `Hello \k[dear \i[world!]] \b[\i[Search:] \k[Engine 1]](http://www.google.com) \r[\i[Search:] \k[Engine 2]](http://www.yahoo.com)`;
console.log( transpileAndExecute( markUpTest ) );
Note that there are obviously pre-processing issues that must also be addressed, such as how to handle the inclusion of tokens in normal text. Eg, including a ']' as part of a text string will throw the transpiler a curve ball, so enforcing a rule such as using '\]' to represent a ']', and then replacing all such occurrences of '\]' with innocuous text before transpiling and then re-replacing afterwards solves this problem simply...
In terms of transpiling, using the grammar defined above, the following markup...
Hello \k[dear \i[world!]] \b[\i[Search:] \k[Engine 1]](http://www.google.com) \r[\i[Search:] \k[Engine 2]](http://www.yahoo.com)
...is transpiled to...
"Hello world! "+grammar.oneArg("k","dear "+grammar.oneArg("i","world")+"")+" "+grammar.twoArgs("b",""+grammar.oneArg("i","Search:")+" "+grammar.oneArg("k","Engine 1")+"","http://www.google.com")+" "+grammar.twoArgs("r",""+grammar.oneArg("i","Search:")+" "+grammar.oneArg("k","Engine 2")+"","http://www.yahoo.com")+""
...and once executed as a javascript Function, results in...
Hello <b>dear <em>world!</em></b> <em>Search:</em> <b>Engine 1</b> <img alt="<em>Search:</em> <b>Engine 2</b>" src="http://www.yahoo.com"/>
The real challenge though is the handling of syntax errors, particularly if one has large amounts of markup to transpile. The crystal clear answer by CertainPerformance (see Find details of SyntaxError thrown by javascript new Function() constructor ) provides a means of capturing the line number and character number of a syntax error from a dynamically compiled javascript function, but am not quite sure of the best means of mapping a syntax error of the transpiled code back to the original markup.
Eg, if an extra ']' is out of place (after "Goodbye")...
Hello World! \b[\i[Goodbye]]] \k[World!]]
...this transpiles to...
"Hello World! "+grammar.twoArgs("b",""+grammar.oneArg("i","Goodbye")+"")+"")+" "+grammar.oneArg("k","World!")+"")+""
^
...and CertainPerformance's checkSyntax function returns "Error thrown at: 1:76", as expected, marked above with the "^".
The question is, how to map this back to the original markup to aid in narrowing down the error in the markup? (Obviously in this case, it's simple to see the error in the markup, but if one has pages of markup being transpiled, then assistance in narrowing down the syntax error is a must.) Maintaining a map between the markup and the transpiled code seems tricky, as the transpiler is mutating the markup to javascript code step-by-step as it walks the grammar transformation matrix. My gut tells me there's a simpler way... Thanks for looking.

I would suggest you write a syntax checker, kinda like jsonlint or jslint etc... that checks if everything is checked and closed properly, before actually compiling the text to human readable text.
This allows for debugging, and prevents from malformed code running haywire, and allows you to provide an error highlighted document editor when they are editing the text.
Below a proof of concept which just checks if brackets are closed properly.
var grammarLint = function(text) {
var nestingCounter = 0;
var isCommand = char => char == '\\';
var isOpen = char => char == '[';
var isClose = char => char == ']';
var lines = text.split('\n');
for(var i = 0; i < lines.length; i++) {
text = lines[i];
for(var c = 0; c < text.length; c++) {
var char = text.charAt(c);
if(isCommand(char) && isOpen(text.charAt(c+2))) {
c += 2;
nestingCounter++;
continue;
}
if(isClose(char)) {
nestingCounter--;
if(nestingCounter < 0) {
throw new Error('Command closed but not opened at on line '+(i+1)+' char '+(c+1));
}
}
}
}
if(nestingCounter > 0) {
throw new Error(nestingCounter + ' Unclosed command brackets found');
}
}
text = 'Hello World! \\b[\\i[Goodbye]]] \\k[World!]]';
try {
grammarLint(text);
}
catch(e) {
console.error(e.message);
}
text = 'Hello World! \\b[\\i[Goodbye \\k[World!]]';
try {
grammarLint(text);
}
catch(e) {
console.error(e.message);
}

Chased down the ability to leverage the javascript compiler to capture syntax errors in the transpiled code, and reference this back to the original markup. In short, this involves a scheme of incorporating comments in the transpiled code to permit references back to the markup, providing the means of narrowing down the markup error. (There is a bit of shortcoming in that the error message is really a transpiler syntax error, and doesn't necessarily correspond exactly to the markup error, but gives one a fighting chance to figure out where the markup issue lies.)
This algorithm also leverages the concepts of CertainPerformance's technique ( Find details of SyntaxError thrown by javascript new Function() constructor ) of using setTimeout to capture the syntax errors of the transpiled code. I have interspersed a javascript Promise to smooth the flow.
"use strict";
//
// Define the syntax and translation to javascript.
//
class Transpiler {
static _syntaxCheckCounter = 0;
static _syntaxCheck = {};
static _currentSyntaxCheck = null;
constructor() {
this.grammar = {
syntax: {
k: {markUp: `\k[`, javascript: `"►+grammar.oneArg("k",◄"`, pre: `<b>`, post: `</b>`},
i: {markUp: `\i[`, javascript: `"►+grammar.oneArg("i",◄"`, pre: `<em>`, post: `</em>`},
b: {markUp: `\b[`, javascript: `"►+grammar.twoArgs("b",◄"`, pattern: `$1`},
r: {markUp: `\r[`, javascript: `"►+grammar.twoArgs("r",◄"`, pattern: `<img alt="$1" src="$2"/>`},
close0: {markUp: `](`, javascript: `"►,◄"`},
close1: {markUp: `)`, javascript: `"►)+◄"`},
close2: {markUp: `]`, javascript: `"►)+◄"`}
},
marker: { // https://www.w3schools.com/charsets/ref_utf_geometric.asp
begMarker: `►`, // 25ba
endMarker: `◄`, // 25c4
begComment: `◆`, // 25c6
endComment: `◇`, // 25c7
fillerChar: `●` // 25cf
},
oneArg: function( command, arg1 ) {
return this.syntax[ command ].pre + arg1 + this.syntax[ command ].post;
},
twoArgs: function( command, arg1, arg2 ) {
return this.syntax[ command ].pattern.split( `$1` ).join( arg1 ).split( `$2` ).join( arg2 );
}
};
};
static transpilerSyntaxChecker(err) {
// Uncomment the following line to disable default console error message.
//err.preventDefault();
let transpiledLine = Transpiler._syntaxCheck[ Transpiler._currentSyntaxCheck ].transpiledFunction.split(`\n`)[1];
let lo = parseInt( transpiledLine.substr( transpiledLine.substr( 0, err.colno ).lastIndexOf( `●` ) + 1 ) );
let hi = parseInt( transpiledLine.substr( transpiledLine.substr( err.colno ).indexOf( `●` ) + err.colno + 1 ) );
let markUpLine = Transpiler._syntaxCheck[ Transpiler._currentSyntaxCheck ].markUp;
let errString = markUpLine.substring( lo - 40, hi + 40 ).split(`\n`).join(`↵`) + `\n`;
errString += ( `.`.repeat( lo ) + `^`.repeat( hi - lo ) ).substring( lo - 40, hi + 40 );
Transpiler._syntaxCheck[Transpiler._currentSyntaxCheck].rejectFunction( new Error(`'${ err.message }' in transpiled code, corresponding to character range ${ lo }:${ hi } in the markup.\n${ errString }`) );
window.removeEventListener('error', Transpiler.transpilerSyntaxChecker);
delete Transpiler._syntaxCheck[Transpiler._currentSyntaxCheck];
};
async transpileAndExecute( markUpString ) {
// Convert the markUp to javascript.
console.log( markUpString );
let gm = this.grammar.marker;
let markUpIndex = markUpString;
let transpiled = markUpString;
for ( let n in this.grammar.syntax ) {
let command = this.grammar.syntax[ n ];
let markUpIndexSplit = markUpIndex.split( command.markUp );
let transpiledSplit = transpiled.split( command.markUp );
if ( markUpIndexSplit.length !== transpiledSplit.length ) {
throw `Ambiguous grammar when searching for "${ command.markUp }" to replace with "${ command.javascript }".`;
}
for ( let i = 0; i < markUpIndexSplit.length; i++ ) {
if ( i === 0 ) {
markUpIndex = markUpIndexSplit[ 0 ];
transpiled = transpiledSplit[ 0 ];
} else {
let js = command.javascript.replace( gm.begMarker, gm.begComment + gm.fillerChar + markUpIndex.length + gm.endComment );
markUpIndex += gm.fillerChar.repeat( command.markUp.length );
js = js.replace( gm.endMarker, gm.begComment + gm.fillerChar + markUpIndex.length + gm.endComment );
markUpIndex += markUpIndexSplit[ i ];
transpiled += js + transpiledSplit[ i ];
}
}
};
transpiled = transpiled.split( gm.begComment ).join( `/*` );
transpiled = transpiled.split( gm.endComment ).join( `*/` );
transpiled = `/*${ gm.fillerChar }0*/"${ transpiled }"/*${ gm.fillerChar }${ markUpIndex.length + 1 }*/`;
console.log( markUpIndex );
console.log( transpiled );
let self = this;
var id = ++Transpiler._syntaxCheckCounter;
Transpiler._syntaxCheck[id] = {};
let transpiledFunction = `"use strict"; if ( run ) return\n${ transpiled.split(`\n`).join(` `) }`;
Transpiler._syntaxCheck[id].markUp = markUpString;
Transpiler._syntaxCheck[id].transpiledFunction = transpiledFunction;
//
// Here's where it gets tricky. (See "CertainPerformance's" post at
// https://stackoverflow.com/questions/35252731
// for details behind the concept.) In this implementation a Promise
// is created, which on success of the JS compiler syntax check, is resolved
// immediately. Otherwise, if there is a syntax error, the transpilerSyntaxChecker
// routine, which has access to a reference to the Promise reject function,
// calls the reject function to resolve the promise, returning the error back
// to the calling process.
//
let checkSyntaxPromise = new Promise((resolve, reject) => {
setTimeout( () => {
Transpiler._currentSyntaxCheck = id;
window.addEventListener('error', Transpiler.transpilerSyntaxChecker);
// Perform the syntax check by attempting to compile the transpiled function.
new Function( `grammar`, `run`, transpiledFunction )( self.grammar );
resolve( null );
window.removeEventListener('error', Transpiler.transpilerSyntaxChecker);
delete Transpiler._syntaxCheck[id];
});
Transpiler._syntaxCheck[id].rejectFunction = reject;
});
let result = await checkSyntaxPromise;
// With the markUp now converted to javascript and syntax checked, let's execute it!
return ( new Function( `grammar`, `run`, transpiledFunction.replace(`return\n`,`return `) )( this.grammar, true ) );
};
}
Here are some sample runs with botched markup, and the corresponding console output. The following markup has an extra ]...
let markUp = `Hello World \k[Goodbye]] World`;
new Transpiler().transpileAndExecute( markUp ).then(result => console.log( result )).catch( err => console.log( err ));
...resulting in transpiled code of...
/*●0*/""/*●0*/+grammar.oneArg("i",/*●2*/"Hello World"/*●13*/)+/*●14*/" "/*●15*/+grammar.oneArg("k",/*●17*/""/*●17*/+grammar.oneArg("i",/*●19*/"Goodbye"/*●26*/)+/*●27*/" World"/*●34*/
Note the interspersed comments, which point back to the character position in the original markup. Then, when the javascript compiler throws an error, it is trapped by transpilerSyntaxChecker which uses the embedded comments to identify the location in the markup, dumping the following results to the console...
Uncaught SyntaxError: Unexpected token )
at new Function (<anonymous>)
at markUp.html:127
Error: 'Uncaught SyntaxError: Unexpected token )' in transpiled code, corresponding to character range 22:23 in the markup.
Hello World k[Goodbye]] World
......................^
at transpilerSyntaxChecker (markUp.html:59)
Note that the Unexpected token ) message refers to the transpiled code, not the markup script, but the output points to the offending ].
Here's another sample run, in this case missing a close ]...
let markUp = `\i[Hello World] \k[\i[Goodbye] World`;
new Transpiler().transpileAndExecute( markUp ).then(result => console.log( result )).catch(err => console.log( err ));
...which produces the following transpiled code...
/*●0*/""/*●0*/+grammar.oneArg("i",/*●2*/"Hello World"/*●13*/)+/*●14*/" "/*●15*/+grammar.oneArg("k",/*●17*/""/*●17*/+grammar.oneArg("i",/*●19*/"Goodbye"/*●26*/)+/*●27*/" World"/*●34*/
...throwing the following error...
Uncaught SyntaxError: missing ) after argument list
at new Function (<anonymous>)
at markUp.html:127
Error: 'Uncaught SyntaxError: missing ) after argument list' in transpiled code, corresponding to character range 27:34 in the markup.
i[Hello World] k[i[Goodbye] World
...........................^^^^^^^
at transpilerSyntaxChecker (markUp.html:59)
Maybe not the best solution, but a lazy man's solution. Tschallacka's response has merit (ie, a custom syntax checker or using something like Jison) in performing a true syntax check against the markup, without the setTimeout / Promise complexities nor the somewhat imprecise method of using the transpiler error messages to refer to the original markup...

Related

JS: getting error line number inside `new Function(...)`

In my application I parse some user input and then run it as Javascipt code using (new Function(...))(). If the input is incorrect, this throws an exception. What I need is a way to get the line number where the exception happened in the parsed string that had been provided to new Function(). Is it possible?

For this we need to write a logic to capture the stacktrace from the error object and find out where exactly the anonymous function has indicated the error has been thrown.
The line number where the error is thrown in Chrome is indicated as <anonymous>:5:17, where as in Firefox it is Function:5:17
try{
(new Function(`var hello = 10;
const world = 20;
let foo = 'bar';
xyz; //simulating error here
`))();
}catch(err){
let line = err.stack.split("\n").find(e => e.includes("<anonymous>:") || e.includes("Function:"));
let lineIndex = (line.includes("<anonymous>:") && line.indexOf("<anonymous>:") + "<anonymous>:".length) || (line.includes("Function:") && line.indexOf("Function:") + "Function:".length);
console.log(+line.substring(lineIndex, lineIndex + 1) - 2);
}

How can I scrape values from embedded Javascript in HTML?

I need to parse some values out of embedded Javascript in a webpage.
I tried to tokenize the HTML with something like this but it doesn't tokenize the Javascript part.
func CheckSitegroup(httpBody io.Reader) []string {
sitegroups := make([]string, 0)
page := html.NewTokenizer(httpBody)
for {
tokenType := page.Next()
fmt.Println("TokenType:", tokenType)
// check if HTML file has ended
if tokenType == html.ErrorToken {
return sitegroups
}
token := page.Token()
fmt.Println("Token:", token)
if tokenType == html.StartTagToken && token.DataAtom.String() == "script" {
for _, attr := range token.Attr {
fmt.Println("ATTR.KEY:", attr.Key)
sitegroups = append(sitegroups, attr.Val)
}
}
}
}
The Script in the HTML-body looks like this and I need the campaign number (nil / "" if there is no number or if there is no test.campaign = at all - same goes for the sitegroup).
Is there an easy way to get the information? I thought about regular expressions but maybe there is something else? Never worked with regex.
<script type="text/javascript" >
var test = {};
test.campaign = "8d26113ba";
test.isTest = "false";
test.sitegroup = "Homepage";
</script>

first you need to get the JS code safely. The easiest way would be with the goquery lib: https://github.com/PuerkitoBio/goquery
after that you need to get the variables safely. Depending on how complicated it gets you could either parse the real JS Abstract syntax tree and look for the right variables for example with the excellent JS interpreter in GO: http://godoc.org/github.com/robertkrimen/otto/parser
or as you mentioned in the case mentioned above regex would be really easy. There is a really nice tutorial on regexes in go : https://github.com/StefanSchroeder/Golang-Regex-Tutorial

The Go standard strings library comes with a lot of useful functions which you can use to parse the JavaScript code to get campaign number you need.
The following code can get the campaign number from the js code provided in your question (Run code on Go Playground):
package main
import (
"bufio"
"fmt"
"os"
"strings"
)
const js = `
<script type="text/javascript" >
var test = {};
test.campaign = "8d26113ba";
test.isTest = "false";
test.sitegroup = "Homepage";
</script>
`
func StringToLines(s string) []string {
var lines []string
scanner := bufio.NewScanner(strings.NewReader(s))
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading standard input:", err)
}
return lines
}
func getCampaignNumber(line string) string {
tmp := strings.Split(line, "=")[1]
tmp = strings.TrimSpace(tmp)
tmp = tmp[1 : len(tmp)-2]
return tmp
}
func main() {
lines := StringToLines(js)
for _, line := range lines {
if strings.Contains(line, "campaign") {
result := getCampaignNumber(line)
println(result)
}
}
}

Number formatting in template strings (Javascript - ES6)

I was wondering if it is possible to format numbers in Javascript template strings, for example something like:
var n = 5.1234;
console.log(`This is a number: $.2d{n}`);
// -> 5.12
Or possibly
var n = 5.1234;
console.log(`This is a number: ${n.toString('.2d')}`);
// -> 5.12
That syntax obviously doesn't work, it is just an illustration of the type of thing I'm looking for.
I am aware of tools like sprintf from underscore.string, but this seems like something that JS should be able to do out the box, especially given the power of template strings.
EDIT
As stated above, I am already aware of 3rd party tools (e.g. sprintf) and customised functions to do this. Similar questions (e.g. JavaScript equivalent to printf/String.Format) don't mention template strings at all, probably because they were asked before the ES6 template strings were around. My question is specific to ES6, and is independent of implementation. I am quite happy to accept an answer of "No, this is not possible" if that is case, but what would be great is either info about a new ES6 feature that provides this, or some insight into whether such a feature is on its way.

No, ES6 does not introduce any new number formatting functions, you will have to live with the existing .toExponential(fractionDigits), .toFixed(fractionDigits), .toPrecision(precision), .toString([radix]) and toLocaleString(…) (which has been updated to optionally support the ECMA-402 Standard, though).
Template strings have nothing to do with number formatting, they just desugar to a function call (if tagged) or string concatenation (default).
If those Number methods are not sufficient for you, you will have to roll your own. You can of course write your formatting function as a template string tag if you wish to do so.

You should be able to use the toFixed() method of a number:
var num = 5.1234;
var n = num.toFixed(2);

If you want to use ES6 tag functions here's how such a tag function would look,
function d2(pieces) {
var result = pieces[0];
var substitutions = [].slice.call(arguments, 1);
for (var i = 0; i < substitutions.length; ++i) {
var n = substitutions[i];
if (Number(n) == n) {
result += Number(substitutions[i]).toFixed(2);
} else {
result += substitutions[i];
}
result += pieces[i + 1];
}
return result;
}
which can then be applied to a template string thusly,
d2`${some_float} (you can interpolate as many floats as you want) of ${some_string}`;
that will format the float and leave the string alone.

Here's a fully ES6 version of Filip Allberg's solution above, using ES6 "rest" params. The only thing missing is being able to vary the precision; that could be done by making a factory function. Left as an exercise for the reader.
function d2(strs, ...args) {
var result = strs[0];
for (var i = 0; i < args.length; ++i) {
var n = args[i];
if (Number(n) == n) {
result += Number(args[i]).toFixed(2);
} else {
result += args[i];
}
result += strs[i+1];
}
return result;
}
f=1.2345678;
s="a string";
console.log(d2`template: ${f} ${f*100} and ${s} (literal:${9.0001})`);

While template-string interpolation formatting is not available as a built-in, you can get equivalent behavior with Intl.NumberFormat:
const format = (num, fraction = 2) => new Intl.NumberFormat([], {
minimumFractionDigits: fraction,
maximumFractionDigits: fraction,
}).format(num);
format(5.1234); // -> '5.12'
Note that regardless of your implementation of choice, you might get bitten by rounding errors:
(9.999).toFixed(2) // -> '10.00'
new Intl.NumberFormat([], {
minimumFractionDigits: 2,
maximumFractionDigits: 2, // <- implicit rounding!
}).format(9.999) // -> '10.00'

based on ES6 Tagged Templates (credit to https://stackoverflow.com/a/51680250/711085), this will emulate typical template string syntax in other languages (this is loosely based on python f-strings; I avoid calling it f in case of name overlaps):
Demo:
> F`${(Math.sqrt(2))**2}{.0f}` // normally 2.0000000000000004
"2"
> F`${1/3}{%} ~ ${1/3}{.2%} ~ ${1/3}{d} ~ ${1/3}{.2f} ~ ${1/3}"
"33% ~ 33.33% ~ 0 ~ 0.33 ~ 0.3333333333333333"
> F`${[1/3,1/3]}{.2f} ~ ${{a:1/3, b:1/3}}{.2f} ~ ${"someStr"}`
"[0.33,0.33] ~ {\"a\":\"0.33\",\"b\":\"0.33\"} ~ someStr
Fairly simple code using :
var FORMATTER = function(obj,fmt) {
/* implements things using (Number).toFixed:
${1/3}{.2f} -> 0.33
${1/3}{.0f} -> 1
${1/3}{%} -> 33%
${1/3}{.3%} -> 33.333%
${1/3}{d} -> 0
${{a:1/3,b:1/3}}{.2f} -> {"a":0.33, "b":0.33}
${{a:1/3,b:1/3}}{*:'.2f',b:'%'} -> {"a":0.33, "b":'33%'} //TODO not implemented
${[1/3,1/3]}{.2f} -> [0.33, 0.33]
${someObj} -> if the object/class defines a method [Symbol.FTemplate](){...},
it will be evaluated; alternatively if a method [Symbol.FTemplateKey](key){...}
that can be evaluated to a fmt string; alternatively in the future
once decorators exist, metadata may be appended to object properties to derive
formats //TODO not implemented
*/
try {
let fracDigits=0,percent;
if (fmt===undefined) {
if (typeof obj === 'string')
return obj;
else
return JSON.stringify(obj);
} else if (obj instanceof Array)
return '['+obj.map(x=> FORMATTER(x,fmt))+']'
else if (typeof obj==='object' && obj!==null /*&&!Array.isArray(obj)*/)
return JSON.stringify(Object.fromEntries(Object.entries(obj).map(([k,v])=> [k,FORMATTER(v,fmt)])));
else if (matches = fmt.match(/^\.(\d+)f$/))
[_,fracDigits] = matches;
else if (matches = fmt.match(/^(?:\.(\d+))?(%)$/))
[_,fracDigits,percent] = matches;
else if (matches = fmt.match(/^d$/))
fracDigits = 0;
else
throw 'format not recognized';
if (obj===null)
return 'null';
if (obj===undefined) {
// one might extend the above syntax to
// allow for example for .3f? -> "undefined"|"0.123"
return 'undefined';
}
if (percent)
obj *= 100;
fracDigits = parseFloat(fracDigits);
return obj.toFixed(fracDigits) + (percent? '%':'');
} catch(err) {
throw `error executing F\`$\{${someObj}\}{${fmt}}\` specification: ${err}`
}
}
function F(strs, ...args) {
/* usage: F`Demo: 1+1.5 = ${1+1.5}{.2f}`
--> "Demo: 1+1.5 = 2.50"
*/
let R = strs[0];
args.forEach((arg,i)=> {
let [_,fmt,str] = strs[i+1].match(/(?:\{(.*)(?<!\\)\})?(.*)/);
R += FORMATTER(arg,fmt) + str;
});
return R;
}
sidenote: The core of the code is as follows. The heavy lifting is done by the formatter. The negative lookbehind is somewhat optional, and to let one escape actual curly braces.
let R = strs[0];
args.forEach((arg,i)=> {
let [_,fmt,str] = strs[i+1].match(/(?:\{(.*)(?<!\\)\})?(.*)/);
R += FORMATTER(arg,fmt) + str;
});

You can use es6 tag functions. I don't know ready for use of that.
It might look like this:
num`This is a number: $.2d{n}`
Learn more:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals
https://developers.google.com/web/updates/2015/01/ES6-Template-Strings

Pre-processing Emulator for JavaScript ( Timing / Debug Example )

In C++ you can omit compiling debug code by using pre-processing directives in order to keep your compiled code fast and not hindered by debug code not needed in production.
Is there a correlative way to do this in JavaScript? What I have been doing in the past is commenting out debug code, but I want a cleaner way to do it.
An example below shows 4 if statements that activate if debug is set to true. However in production I don't want this checked 4 times when I know it will be set to false. As I mentioned I could cram it into one line and comment it out...but I want a clean way to do it?
/**
** cType
*/
function cType( o_p ) {
if( debug ) {
var t1, t2, t3, t4, i1, i2, i3; t1 = new Date().getTime();
}
o_p = MType[ o_p.model ].pre( o_p );
if ( o_p.result !== 'complete' ) {
if( debug ) {
t2 = new Date().getTime();
console.log( '---------------Send to Server - object_pipe: \n ' + o_p.toSource() );
}
var string_pipe = JSON.stringify( o_p );
cMachine( 'pipe=' + string_pipe , function( string_pipe ) {
if( debug ) {
console.log( '---------------Receive from Server - object_pipe: \n ' + string_pipe );
t3 = new Date().getTime();
}
MType[ o_p.model ].post( JSON.parse( string_pipe ) );
if( debug ) {
t4 = new Date().getTime(); i1 = t2-t1 ; i2 = t3-t2 ; i3 = t4-t3;
console.log( '---------------Pre, Transit, Post = ', i1, i2, i3 );
}
} );
}
}

You can always pass it through c preprocessor like:
gcc -E input.js -o output.js
This will allow you to use #if and even include and macros.

If you use RequireJS, you can use build pragmas and even the has.js integration to disable/enable code fragments at optimization (minification) time.

No, Javascript is not compiled, it's interpreted. Therefore it's impossible to have preprocessing directives unless you pass non-standard Javascript - it probably won't be Javascript anymore - code through another utility.

How to get Abstract Syntax Tree (AST) out of JISON parser?

So I have generated a parser via JISON:
// mygenerator.js
var Parser = require("jison").Parser;
// a grammar in JSON
var grammar = {
"lex": {
"rules": [
["\\s+", "/* skip whitespace */"],
["[a-f0-9]+", "return 'HEX';"]
]
},
"bnf": {
"hex_strings" :[ "hex_strings HEX",
"HEX" ]
}
};
// `grammar` can also be a string that uses jison's grammar format
var parser = new Parser(grammar);
// generate source, ready to be written to disk
var parserSource = parser.generate();
// you can also use the parser directly from memory
// returns true
parser.parse("adfe34bc e82a");
// throws lexical error
parser.parse("adfe34bc zxg");
My question is, how do I retrieve the AST now? I can see that I can run the parser against input, but it just returns true if it works or fails if not.
For the record, I am using JISON: http://zaach.github.com/jison/docs/

I discovered an easier and cleaner way than the one in the other answer.
This post is divided into 2 parts:
General way: Read how to implement my way.
Actual answer: An implementation of the previously described way specific to OP's request.
General way
Add a return statement to your start rule.
Example:
start
: xyz EOF
{return $1;}
;
xyz is another production rule. $1 accesses the value of the first symbol (either terminal or non-terminal) of the associated production rule. In the above code $1 contains the result from xyz.
Add $$ = ... statements to all other rules.
Warning: Use $$ = ..., don't return! return will immediately abort further execution by returning the specified value, as the name indicates.
Example:
multiplication
: variable '*' variable
{$$ = {
type: 'multiplication',
arguments: [
$1,
$3
]
};
}
;
The above production rule will pass the object $$ to the higher level (i.e. the production rule which used this rule).
Let's complement the multiplication rule in order to achieve a runnable example:
/* lexical grammar */
%lex
%%
\s+ /* skip whitespace */
[0-9]+("."[0-9]+)?\b return 'NUMBER'
[a-zA-Z]+ return 'CHARACTER'
"*" return '*'
<<EOF>> return 'EOF'
. return 'INVALID'
/lex
%start start
%% /* language grammar */
start
: multiplication EOF
{return $1;}
;
multiplication
: variable '*' variable
{$$ = {
type: 'multiplication',
arguments: [
$1,
$3
]
};
}
;
variable
: 'NUMBER'
{$$ = {
type: 'number',
arguments: [$1]
};
}
| 'CHARACTER'
{$$ = {
type: 'character',
arguments: [$1]
};
}
;
You can try it online: http://zaach.github.io/jison/try/. At the time of this edit (12.02.2017), the online generator sadly throws an error - independently of the Jison file you feed in. See the addendum after step 3 for hints on how to generate the parser on your local machine.
If you input for example a*3, you get the object structure below:
{
"type": "multiplication",
"arguments": [
{
"type": "character",
"arguments": ["a"]
},
{
"type": "number",
"arguments": ["3"]
}
]
}
Clean the code and generated AST by injecting custom objects
When using the Jison-generated parser, you can inject arbitrary objects into the scope of the 'code blocks' in the syntax file:
const MyParser = require('./my-parser.js');
MyParser.parser.yy = {
MultiplicationTerm
/*, AdditionTerm, NegationTerm etc. */
};
let calculation = MyParser.parse("3*4");
// Using the modification below, calculation will now be an object of type MultiplicationTerm
If MultiplicationTerm had a constructor accepting both factors, the new part for multiplication would look like this:
multiplication
: variable '*' variable
{$$ = new yy.MultiplicationTerm($1, $3);}
;
Addendum on how to create the Jison parser:
Download the Jison NPM module. Then you can create the Jison-parser either by using Jison's command-line or running new jison.Generator(fileContents).generate() in your build file and write the returned string to your preferred file, e.g. my-parser.js.
Actual answer
Applying the rules above leads to the Jison file below.
The Jison file format and the JavaScript API (as stated in the question) are interchangeable as far as I know.
Also note that this Jison file only produces a flat tree (i.e. a list) since the input format is only a list as well (or how would you nest concatenated hex strings in a logical way?).
/* lexical grammar */
%lex
%%
\s+ /* skip whitespace */
[a-f0-9]+ return 'HEX'
<<EOF>> return 'EOF'
. return 'INVALID'
/lex
%start start
%% /* language grammar */
start
: hex_strings EOF
{return $1;}
;
hex_strings
: hex_strings HEX
{$$ = $1.concat([$2]);}
| HEX
{$$ = [$1];}
;

I'm not too familiar with Jison's inner workings, so I don't know any method that would do it.
But in case you're interested in a little bruteforce to solve this problem, try this:
First, create an object to hold the AST
function jisonAST(name, x) { this.name = name; this.x = x; }
// return the indented AST
jisonAST.prototype.get = function(indent){
// create an indentation for level l
function indentString(l) { var r=""; for(var i=0;i<l;i++){r+=" "}; return r }
var r = indentString(indent) + "["+this.name+": ";
var rem = this.x;
if( rem.length == 1 && !(rem[0] instanceof jisonAST) ) r += "'"+rem[0]+"'";
else for( i in rem ){
if( rem[i] instanceof jisonAST ) r += "\n" + rem[i].get(indent+1);
else { r += "\n" + indentString(indent+1); r += "'"+rem[i]+"'"; }
}
return r + "]";
}
Add a little helper function for Jison's BNF
function o( s ){
r = "$$ = new yy.jisonAST('"+s+"',[";
for( i = 1; i <= s.split(" ").length; i++ ){ r += "$"+i+"," }
r = r.slice(0,-1) + "]);";
return [s,r];
}
With this, continue to the example code (slight modification):
var Parser = require("jison").Parser;
// a grammar in JSON
var grammar = {
"lex": {
"rules": [
["\\s+", "/* skip whitespace */"],
["[a-f0-9]+", "return 'HEX';"]
]
},
"bnf": {
// had to add a start/end, see below
"start" : [ [ "hex_strings", "return $1" ] ],
"hex_strings" :[
o("hex_strings HEX"),
o("HEX")
]
}
};
var parser = new Parser(grammar);
// expose the AST object to Jison
parser.yy.jisonAST = jisonAST
Now you can try parsing:
console.log( parser.parse("adfe34bc e82a 43af").get(0) );
This will give you:
[hex_strings HEX:
[hex_strings HEX:
[HEX: 'adfe34bc']
'e82a']
'43af']
Small note: I had to add a "start" rule, in order to only have one statement that returns the result. It is not clean (since the BNF works fine without it). Set it as an entry point to be sure...

We Keep Coding

JavaScript is the programming language of the Web.

Mapping transpiled code back to the original markup script - javascript

Related

JS: getting error line number inside `new Function(...)`

How can I scrape values from embedded Javascript in HTML?

Number formatting in template strings (Javascript - ES6)

Pre-processing Emulator for JavaScript ( Timing / Debug Example )

How to get Abstract Syntax Tree (AST) out of JISON parser?

Categories

Resources