RegExp doesn't work fine - javascript

I'm working on a template engine, I try to catch all strings inside <% %>, but when I work it on the <%object.property%> pattern, everything fails.
My code:
var render = function(input, data){
var re = /<%([^%>]+)?%>/g;
var templateVarArray;
// var step = "";
while((templateVarArray = re.exec(input))!=null){
var strArray = templateVarArray[1].split(".");
// step+= templateVarArray[1]+" ";
if(strArray.length==1)
input = input.replace(templateVarArray[0], data[templateVarArray[1]]);
if(strArray.length==2){
input = input.replace(templateVarArray[0], data[strArray[0]][strArray[1]]);
}
}
// return step;
return input;
}
var input = "<%test.child%><%more%><%name%><%age%>";
document.write(render(input,{
test: { child: "abc"},
more: "MORE",
name:"ivan",
age: 22
}));
My result:
abc<%more%><%name%>22
what I want is: abc MORE ivan 22
Also, the RegExp /<%([^%>]+)?%>/g is referenced online, I did search its meaning, but still quite not sure the meaning. Especially why does it need "+" and "?", thanks a lot!

If you add a console.log() statement it will show where the next search is going to take place:
while((templateVarArray = re.exec(input))!=null){
console.log(re.lastIndex); // <-- insert this
var strArray = templateVarArray[1].split(".");
// step+= templateVarArray[1]+" ";
if(strArray.length==1)
input = input.replace(templateVarArray[0], data[templateVarArray[1]]);
if(strArray.length==2){
input = input.replace(templateVarArray[0], data[strArray[0]][strArray[1]]);
}
}
You will see something like:
14
26
This means that the next time you run re.exec(...) it will start at index 14 and 26 respectively. Consequently, you miss some of the matches after you substitute data in.
As #Alexander points out take the 'g' off the end of the regex. Now you will see something like this:
0
0
This means the search will start each time from the beginning of the string, and you should now get what you were looking for:
abcMOREivan22
Regarding your questions on the RegEx and what it is doing, let's break the pieces apart:
<% - this matches the literal '<' followed immediately by '%'
([^%>]+) - the brackets (...) indicate we want to capture the portion of the string that matches the expression within the brackets
[^...] - indicates to match anything except what follows the '^'; without the '^' would match whatever pattern is within the []
[^%>] - indicates to match and exclude a single character - either a '%' or '>'
[^%>]+ - '+' indicates to match one or more; in other words match one or more series of characters that is not a '%' and not a '>'
? - this indicates we want to do reluctant matching (without it we do what is called 'greedy' matching)
%> - this matches the literal '%' followed immediately by '>'
The trickiest part to understand is the '?'. Used in this context it means that we stop matching with the shortest pattern that will still match the overall regex. In this case, it doesn't make any difference whether you include it though there are times where it will matter depending on the matching patterns.
Suggested Improvement
The current logic is limited to data that nests two levels deep. To make it so it can handle an arbitrary nesting you could do this:
First, add a small function to do the substitution:
var substitute = function (str, data) {
return str.split('.').reduce(function (res, item) {
return res[item];
}, data);
};
Then, change your while loop to look like this:
while ((templateVarArray = re.exec(input)) != null) {
input = input.replace(templateVarArray[0], substitute(templateVarArray[1], data));
}
Not only does it handle any number of levels, you might find other uses for the 'substitute()' function.

The RegExp.prototype.exec() documentation says:
If your regular expression uses the "g" flag, you can use the exec() method multiple times to find successive matches in the same string. When you do so, the search starts at the substring of str specified by the regular expression's lastIndex property (test() will also advance the lastIndex property).
But you are replacing each match in the original string so next re.exec with a lastIndex already set not to zero will continue to search not from beginning and will omit something.
So if you want to search and substitute found results in original string - just omit \g global key:
var render = function(input, data) {
var re = /<%([^%>]+)?%>/;
var templateVarArray;
// var step = "";
while (!!(templateVarArray = re.exec(input))) {
var strArray = templateVarArray[1].split(".");
if (strArray.length == 1)
input = input.replace(templateVarArray[0], data[templateVarArray[1]]);
if (strArray.length == 2) {
input = input.replace(templateVarArray[0], data[strArray[0]][strArray[1]]);
}
}
// return step;
return input;
}
var input = "<%test.child%><%more%><%name%><%age%>";
document.write(render(input, {
test: {
child: "abc"
},
more: "MORE",
name: "ivan",
age: 22
}));

Related

Grab strings before and after inner string in regex Javascript

I have a string like this:
20 EQUALS 'Value goes here'
I want to split it up into 3 separate strings:
conditionField = 20
conditionOperation = 'EQUALS'
conditionValue = 'Value goes here'
I tried this to get the Condition Field:
var conditionField = condition.replace(/(.*)(.*EQUALS)/, '$1');
But it get's the beginning and the end.
I'm having trouble splitting it up and dealing with the white space and spaces in the value.
Your question would actually be a bit of challenge if you wanted to arbitrarily extract quoted terms along with individual words. But since you appear to have a rather fixed structure, starting with a single number, then a single word command, followed by a third term, we can use the following regex pattern here:
([^\\s]*)\\s+([^\\s]*)\\s+(.*)
Each term in parentheses above will be made available as a capture group after the match has been run. In this case, I just blanket everything after the first two terms together.
var string = "20 EQUALS 'Value goes here'";
var re = new RegExp("([^\\s]*)\\s+([^\\s]*)\\s+(.*)");
match = re.exec(string);
if (match != null) {
console.log(match[1])
console.log(match[2])
console.log(match[3])
}
Try this :
var data = '20 EQUALS 30';
var a = data.split(/(\d*) ([a-zA-Z]*) (\d*)/g);
conditionField = a[1];
conditionOperation = a[2];
conditionValue = a[3];

JavaScript regex \G + offset equivalent

Is there an equivalent of \G in JavaScript regular expressions? I need to match a pattern at an exact offset. Setting the g flag and .lastIndex searches forward from the given index, but won't match at the offset exactly.
XRegExp has this y modifier which might be what I'm looking for, but it doesn't appear to work in Node/V8.
I think the only thing you can do is set .lastIndex to your starting point, try a match with .exec(), and then if you get a match check the updated value of .lastIndex. If it's equal to your starting position plus the length of the match result, then the match began exactly where you wanted.
So:
var re = /banana/g;
re.lastIndex = 6;
var mr = re.exec("hello banana");
if (mr[0].length + 6 === re.lastIndex)
alert("good banana");
I would never claim that this is a "good" way of doing things, it's just the only possibility I know of.
Did some hackery:
var XRegExp = require('xregexp').XRegExp;
function makeOffsetPatt(patt, flags) {
if(_.isUndefined(flags)) flags = 'gnsm';
return XRegExp('(?:'+patt+')|(?<__NO_MATCH>)', flags);
}
XRegExp.prototype.match = function(str, offset) {
var result = XRegExp.exec(str, this, offset);
if(result === null || !_.isUndefined(result.__NO_MATCH)) return null;
delete result.__NO_MATCH;
return result;
};
var word = makeOffsetPatt('\\w+');
console.log(word.match('html { head {'));
console.log(word.match('html { head {',4));
console.log(word.match('html { head {',7));
Outputs:
[ 'html', index: 0, input: 'html { head {' ]
null
[ 'head', index: 7, input: 'html { head {' ]
If your pattern isn't found, it will try to match an empty string instead, which should always succeed. I then check for the existence of this empty match to determine if your pattern failed, clean up the match object and return it.

How can I improve the performance of my JavaScript text formatter?

I am allowing my users to wrap words with "*", "/", "_", and "-" as a shorthand way to indicate they'd like to bold, italicize, underline, or strikethrough their text. Unfortunately, when the page is filled with text using this markup, I'm seeing a noticeable (borderline acceptable) slow down.
Here's the JavaScript I wrote to handle this task. Can you please provide feedback on how I could speed things up?
function handleContentFormatting(content) {
content = handleLineBreaks(content);
var bold_object = {'regex': /\*(.|\n)+?\*/i, 'open': '<b>', 'close': '</b>'};
var italic_object = {'regex': /\/(?!\D>|>)(.|\n)+?\//i, 'open': '<i>', 'close': '</i>'};
var underline_object = {'regex': /\_(.|\n)+?\_/i, 'open': '<u>', 'close': '</u>'};
var strikethrough_object = {'regex': /\-(.|\n)+?\-/i, 'open': '<del>', 'close': '</del>'};
var format_objects = [bold_object, italic_object, underline_object, strikethrough_object];
for( obj in format_objects ) {
content = handleTextFormatIndicators(content, format_objects[obj]);
}
return content;
}
//#param obj --- an object with 3 properties:
// 1.) the regex to search with
// 2.) the opening HTML tag that will replace the opening format indicator
// 3.) the closing HTML tag that will replace the closing format indicator
function handleTextFormatIndicators(content, obj) {
while(content.search(obj.regex) > -1) {
var matches = content.match(obj.regex);
if( matches && matches.length > 0) {
var new_segment = obj.open + matches[0].slice(1,matches[0].length-1) + obj.close;
content = content.replace(matches[0],new_segment);
}
}
return content;
}
Change your regex with the flags /ig and remove the while loop.
Change your for(obj in format_objects) loop with a normal for loop, because format_objects is an array.
Update
Okay, I took the time to write an even faster and simplified solution, based on your code:
function handleContentFormatting(content) {
content = handleLineBreaks(content);
var bold_object = {'regex': /\*([^*]+)\*/ig, 'replace': '<b>$1</b>'},
italic_object = {'regex': /\/(?!\D>|>)([^\/]+)\//ig, 'replace': '<i>$1</i>'},
underline_object = {'regex': /\_([^_]+)\_/ig, 'replace': '<u>$1</u>'},
strikethrough_object = {'regex': /\-([^-]+)\-/ig, 'replace': '<del>$1</del>'};
var format_objects = [bold_object, italic_object, underline_object, strikethrough_object],
i = 0, foObjSize = format_objects.length;
for( i; i < foObjSize; i++ ) {
content = handleTextFormatIndicators(content, format_objects[i]);
}
return content;
}
//#param obj --- an object with 2 properties:
// 1.) the regex to search with
// 2.) the replace string
function handleTextFormatIndicators(content, obj) {
return content.replace(obj.regex, obj.replace);
}
Here is a demo.
This will work with nested and/or not nested formatting boundaries. You can omit the function handleTextFormatIndicators altogether if you want to, and do the replacements inline inside handleContentFormatting.
Your code is forcing the browser to do a whole lot of repeated, wasted work. The approach you should be taking is this:
Concoct a regex that combines all of your "target" regexes with another that matches a leading string of characters that are not your special meta-characters.
Change the loop so that it does the following:
Grab the next match from the source string. That match, due to the way you changed your regex, will be a string of non-meta characters followed by your matched portion.
Append the non-meta characters and the replacement for the target portion onto a separate array of strings.
At the end of that process, the separate accumulator array can be joined and used to replace the content.
As to how to combine the regular expressions, well, it's not very pretty in JavaScript but it looks like this. First, you need a regex for a string of zero or more "uninteresting" characters. That should be the first capturing group in the regex. Next should be the alternates for the target strings you're looking for. Thus the general form is:
var tokenizer = /(uninteresting pattern)?(?:(target 1)|(target 2)|(target 3)| ... )?/;
When you match that against the source string, you'll get back a result array that will contain the following:
result[0] - entire chunk of string (not used)
result[1] - run of uninteresting characters
result[2] - either an instance of target type 1, or null
result[3] - either an instance of target type 2, or null
...
Thus you'll know which kind of replacement target you saw by checking which of the target regexes are non empty. (Note that in your case the targets can conceivably overlap; if you intend for that to work, then you'll have to approach this as a full-blown parsing problem I suspect.)
You can do things like:
function formatText(text){
return text.replace(
/\*([^*]*)\*|\/([^\/]*)\/|_([^_]*)_|-([^-]*)-/gi,
function(m, tb, ti, tu, ts){
if(typeof(tb) != 'undefined')
return '<b>' + formatText(tb) + '</b>';
if(typeof(ti) != 'undefined')
return '<i>' + formatText(ti) + '</i>';
if(typeof(tu) != 'undefined')
return '<u>' + formatText(tu) + '</u>';
if(typeof(ts) != 'undefined')
return '<del>' + formatText(ts) + '</del>';
return 'ERR('+m+')';
}
);
}
This will work fine on nested tags, but will not with overlapping tags, which are invalid anyway.
Example at http://jsfiddle.net/m5Rju/

Why does this jQuery code not work?

Why doesn't the following jQuery code work?
$(function() {
var regex = /\?fb=[0-9]+/g;
var input = window.location.href;
var scrape = input.match(regex); // returns ?fb=4
var numeral = /\?fb=/g;
scrape.replace(numeral,'');
alert(scrape); // Should alert the number?
});
Basically I have a link like this:
http://foo.com/?fb=4
How do I first locate the ?fb=4 and then retrieve the number only?
Consider using the following code instead:
$(function() {
var matches = window.location.href.match(/\?fb=([0-9]+)/i);
if (matches) {
var number = matches[1];
alert(number); // will alert 4!
}
});
Test an example of it here: http://jsfiddle.net/GLAXS/
The regular expression is only slightly modified from what you provided. The global flag was removed, as you're not going to have multiple fb='s to match (otherwise your URL will be invalid!). The case insensitive flag flag was added to match FB= as well as fb=.
The number is wrapped in curly brackets to denote a capturing group which is the magic which allows us to use match.
If match matches the regular expression we specify, it'll return the matched string in the first array element. The remaining elements contain the value of each capturing group we define.
In our running example, the string "?fb=4" is matched and so is the first value of the returned array. The only capturing group we have defined is the number matcher; which is why 4 is contained in the second element.
If you all you need is to grab the value of fb, just use capturing parenthesis:
var regex = /\?fb=([0-9]+)/g;
var input = window.location.href;
var tokens = regex.exec(input);
if (tokens) { // there's a match
alert(tokens[1]); // grab first captured token
}
So, you want to feed a querystring and then get its value based on parameters?
I had had half a mind to offer Get query string values in JavaScript.
But then I saw a small kid abusing a much respectful Stack Overflow answer.
// Revised, cooler.
function getParameterByName(name) {
var match = RegExp('[?&]' + name + '=([^&]*)')
.exec(window.location.search);
return match ?
decodeURIComponent(match[1].replace(/\+/g, ' '))
: null;
}
And while you are at it, just call the function like this.
getParameterByName("fb")
How about using the following function to read the query string parameter in JavaScript:
function getQuerystring(key, default_) {
if (default_==null)
default_="";
key = key.replace(/[\[]/,"\\\[").replace(/[\]]/,"\\\]");
var regex = new RegExp("[\\?&]"+key+"=([^&#]*)");
var qs = regex.exec(window.location.href);
if(qs == null)
return default_;
else
return qs[1];
}
and then:
alert(getQuerystring('fb'));
If you are new to Regex, why not try Program that illustrates the ins and outs of Regular Expressions

How to get portion of the attribute value using jquery

I have attribute value as:
<div id = "2fComponents-2fPromotion-2f" class = "promotion">
Now I want to get only portion of it, say Promotion and its value 2f, how can I get this using jquery ? Do we have built in function for it ?
You can use a regular expression here:
var attId = $(".promotion").attr("id");
// Perform a match on "Promotion-" followed by 2 characters in the range [0-9a-f]
var match = attId.match(/Promotion-([0-9a-f]{2})/);
alert(match[1]); // match[0] contains "Promotion-2f", match[1] contains "2f"
This assumes that the "value" of Promotion is a hexadecimal value and the characters [a-f] will always be lower case. It's also easily adjusted to match other values, for instance, if I change the regex to /component-([0-9a-f]{2})/, the match array would be ["component-3a", "3a"].
The match method takes a regular expression as its input and searches the string for the results. The result is returned as an array of matches, with the first index being the complete match (equivalent regex for this only would be /Promotion-[0-9a-f]{2}/). Any sub-expression (expressions enclosed in parenthesis) matches are added to the array in the order they appear in the expression, so the (Promotion) part of the expression is added to the array at index 1 and ([0-9a-f]{2}) is added at index 2.
match method on MSDN
var id = $("div.promotion").attr("id");
var index = id.indexOf("Promotion");
var promotion = '';
// if the word 'Promotion' is present
if(index !== -1) {
// extract it up to the end of the string
promotion = id.substring(index);
// split it at the hyphen '-', the second offset is the promotion code
alert(promotion.split('-')[1]);
} else {
alert("promotion code not found");
}
you can get the id attribute like this:
var id= $('div.promotion').attr('id');
But then I think you would have to use regular expressions to parse data from the string, the format doesn't appear to be straight forward.
If you are storing lots of info in the id could you consider using multiple attributes like:
<div class="promotion" zone="3a-2f-2f" home="2f"></div>
Then you could get the data like this:
var zone= $('div.promotion').attr('zone');
var home= $('div.promotion').attr('home');
Or you could use jQuery.data()
HTH
$(function () {
var promotion = $('.promotion').attr('id').match(/Promotion-([0-9a-f]{2})/);
if (promotion.length > 0) {
alert(promotion[1]);
}
else {
return false;
}
});

Categories