Regex poor performance when nothing matches - javascript

I have a problem with slow working regex, but only in case when the patter doesn't match. In all other cases performance are acceptable, even if patter matches in the end of text. I'am testing performance on 100KB text input.
What I am trying to do is to convert input in HTML-like syntax which is using [] instead of <> brackets and translate it to valid XML.
Sample input:
...some content[vc_row param="test1"][vc_column]text [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content
Sample output:
...some content<div class="vc_row" param="test1"><div class="vc_column" >text [brackets in text] content</div></div><div class="vc_row" param="xxx">text content</div>...some more content
To do this I am using regex:
/(.*)(\[\/?vc_column|\[\/?vc_row)( ?)(.*?)(\])(.*)/
And I do this in while loop until the patter matches.
As I mentioned before this works, but last iteration is extremly slow (or first if nothing matches). Here is complete javascript I am using:
var str = '...some content[vc_row param="test1"][vc_column]text content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
var regex = /(.*)(\[\/?vc_column|\[\/?vc_row)( ?)(.*?)(\])(.*)/;
while (matches = str.match(regex)) {
matches = str.match(regex);
if (matches[2].slice(1, 2) !== '/')
str = matches[1] + "<div class=\"" + matches[2].slice(1) + "\"" + " " + matches[4] + ">" + matches[6];
else
str = matches[1] + "</div>" + matches[6];
}
How could i improve my regex "not match" performance?

You can split it up in 2 regex.
One for the start tags, one for the closing tags.
And then chain 2 global g replaces.
var str = '...some content[vc_row param="test1"][vc_column]text with [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
const reg1 = /\[(vc_(?:column|row))(\s+[^\]]+)?\s*\]/g;
const reg2 = /\[\/(vc_(?:column|row))\s*\]/g;
var result = str.replace(reg1, "<div class=\"$1\"$2>").replace(reg2, "</div>");
console.log(result);
Note that those (.*) in the original regex aren't needed this way.
Using a nameless function, then it could be done via 1 regex replace.
var str = '...some content[vc_row param="test1"][vc_column]text with [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
const reg = /\[(\/)?(vc_(?:column|row))(\s+[^\]]+)?\s*\]/g;
var result = str.replace(reg, function(m,c1,c2,c3){
if(c1) return "</div>";
else return "<div class=\""+ c2 +"\""+ (c3?c3:"") +">";
});
console.log(result);

How about a replace... Like
str.replace(/\[(\/?)(vc_column|vc_row)([^\]]*?)\]/g, function(a,b,c,d) {
return '<' + b + 'div' + (b==='/' ? '' : ' class="' + c + '"') + d + '>';
});
This matches a tag (start or end) and all attributes, including brackets, capturing everything except the brackets. Then puts it back together in the correct format (divs with classes).
And the global flag (/../g) removes the need for any loops.
var sInput = '...some content[vc_row param="test1"][vc_column]text [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
console.log(sInput.replace(/\[(\/?)(vc_column|vc_row)([^\]]*?)\]/g, function(a,b,c,d) {
return '<' + b + 'div' + (b==='/' ? '' : ' class="' + c + '"') + d + '>';
})
);

Related

Javascript string remove line if not includes

I am trying to get a code for a webpage using javascript. But I only want to return lines that include the word "array"
How can I remove all lines that do not include "array?"
I can't find anything online and my skills are very basic.
"I type this sentence because it says the post is mostly a code, and it still says the same so I'm just extending this."
function DOMtoString(document_root) {
var sourcecode = '',
node = document_root.firstChild;
while (node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE:
sourcecode += node.outerHTML;
break;
case Node.TEXT_NODE:
sourcecode += node.nodeValue;
break;
case Node.CDATA_SECTION_NODE:
sourcecode += '<![CDATA[' + node.nodeValue + ']]>';
break;
case Node.COMMENT_NODE:
sourcecode += '<!--' + node.nodeValue + '-->';
break;
case Node.DOCUMENT_TYPE_NODE:
// (X)HTML documents are identified by public identifiers
sourcecode += "<!DOCTYPE " + node.name + (node.publicId ? ' PUBLIC "' + node.publicId + '"' : '') + (!node.publicId && node.systemId ? ' SYSTEM' : '') + (node.systemId ? ' "' + node.systemId + '"' : '') + '>\n';
break;
}
node = node.nextSibling;
}
var public = sourcecode.substring(
sourcecode.lastIndexOf("'[[") + 1,
sourcecode.lastIndexOf("]]';")
);
var matematika = sourcecode.substring(
sourcecode.lastIndexOf("var arraypaginascopia;") + 1,
sourcecode.lastIndexOf("var rellenado;")
);
var test = ("testovaci zprava");
var test2 = ("testovaci zprava druha");
var fail = ("Internal Error");
//arrayrespuestas
var currenturl = document.URL;
var url = currenturl.includes("www.liveworksheets.com/workbooks")
//return url;
if (url == true){
return matematika;
} else {
return public;
}
For example:
This is an example
This is an example
This is an example
This is a banana
This is an example
This is an example
This is a banana
The result should be:
This is a banana
This is a banana
In your question, you asked for the word array, but I'll use your example banana for the code.
const regex = /.*banana.*/gm;
// (.*) matches any character before and after the search string 'banana';
// The g flag is to match all occurrences and;
// The m flag is for a multi-line string
const str = `This is an example
This is an example
This is an example
This is a banana
This is an example
This is an example
This is a banana`;
console.log(str.match(regex));
In the end, I managed to do this.
const regex = /.*array.*/gm;
// (.*) matches any character before and after the search string 'banana';
// The g flag is to match all occurrences and;
// The m flag is for a multi-line string
const matematika2 = matematika;
// console.log(matematika2.match(regex));
return matematika2.match(regex);

javascript regex to capture word in an html text

I'm explaining my issue.
I'm trying to do a javascript function to highlight words (change their color) in an html text. I have another function to un highlight them.
I have a list of keywords that i have to highlight.
Here is the code i've writed so far
function highlight_words(keywords) {
unHighlight_words(keywords);
$('.rubricContent').each(function(index, element) {
//get elements for each rubrics
var content = $(element).html();
if (keywords) {
$(keywords).each(function(i, e) {
var term = e
var re = new RegExp('(?:[^.;\w]|^|^\\W+){0}('+ term + ' )(?:[^.\w]|\\W(?=\\W+|$)|$){0}', "gmi");
var subst = '<span style="color:red">' + term + '</span> ';
content = content.replace(re, subst);
});
$(element).html(content);
}
});
The result is not that bad my words are red colored but not when they are followed by a "." or a ","
Anyone have the solution for me ?
Thanks !!
You can use \b word boundary as follows.
term='Test';
content='TestTest. Test Test: Test. Test, TESTtest'
var re = new RegExp('(\\b'+term+'\\b)', "gmi");
var subst = '<span style="color:red">' + term + '</span> ';
content = content.replace(re, subst);
alert(content)
https://jsfiddle.net/3royvd66/1/

How to reference found matches?

I'm having a small problem with a regexp pattern. I don't have regexp knowledge, so I couldn't solve it.
I have this text:
var text = "this (is) some (ran)dom text";
and I want to capture anything between (). So after following this tutorial I came up with this pattern:
var re = /(\(\w*\))/g;
which works fine. But what I want to do now is replace the found matches, or rather modify. I want to wrap the found matches with a span tag. So I used this code:
var spanOpen = '<span style="color: silver;">';
var spanClose = '</span>';
text.replace(re, spanOpen + text.match(re) + spanClose);
even though the code works, I don't get the result I want. It outputs:
as HTML
this <span style="color: silver;">(is),(ran)</span> some <span style="color: silver;">(is),(ran)</span>dom text
as text
this (is),(ran) some (is),(ran)dom text
You can check the example in fiddle. How can I fix this?
The code in fiddle:
var text = "this (is) some (ran)dom text";
var re = /(\(\w*\))/g;
var spanOpen = '<span style="color: silver;">';
var spanClose = '</span>';
var original = "original: " + text + "<br>";
var desired = "desired: this " +spanOpen+"(is)"+spanClose+ " some " +spanOpen+"(ran)"+spanClose+ "dom text<br>";
var output = "output: " + text.replace(re, spanOpen + text.match(re) + spanClose);
var result = original + desired + output;
document.body.innerHTML = result;
If the title is wrong or misleading, I'll change it.
The .replace() method can take a function as the 2nd parameter. That will come in handy here.
var output = "output: " + text.replace(re, function(match){
return spanOpen + match + spanClose
});
The function will be called for each individual match.
You can also use '$&' in your replace string to reference each match
var output = "output: " + text.replace(re, spanOpen + '$&' + spanClose);
See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
text.match(re) is returning an array of the result, so what you can do is loop this array and replace your string with each items, like this:
var matches = text.match(re);
var output = "output: " + text;
for (var i = 0; i < matches.length; i++)
{
output = output.replace(matches[i], spanOpen + matches[i] + spanClose);
}
See this FIDDLE

Why won't replace work with my regex object when test does?

I feel silly asking this because I'm betting the answer is staring right at me but here goes.
I'm taking a string from the CSS style textDecoration and trying to remove the underline portion of the string (and any whitespace around it). It returns true when I run test() but when I do the replace method the string is unaltered. Help?
My code:
textDecoration = function(str) {
var n_str = str + '|/\s' + str + '|/\s' + str + '/\s|' + str + '/\s';
var nre = new RegExp(n_str, "g");
debug_log('Found or not: ' + nre.test(txt));
txt.replace(nre, '');
debug_log('Result: ' + txt);
debug_log('-----------------------');
}
var txt = "underline";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
txt = "underline overline line-through";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
txt = "overline underline line-through";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
txt = "overline line-through underline";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
Output:
replace() returns a new string with the replaces and don't change the actual string. You should do something like:
var newString = txt.replace(nre, '');
debug_log('Result: ' + newString);
test returns a boolean. replace returns a new string. It does not alter the string.
Also, your regular expression is quite odd. Applying str = "underline", you will get:
/underline|\/sunderline|\/sunderline\/s|underline\/s/
which does not match whitespaces, but "/s".

javascript regex for escaping comma and double quote

I have to escape two special characters " and , in the string with the following rules.
Example:-
Mercu"ry should be converted into "Mercu""ry"
Mercu,ry should be converted into "Mercu,ry"
Mer"cu,ry should be converted into "Mer""cu,ry"
Rules:-
Meaning comma or double quote should be escaped with double quote.
Comma will escaped by wrapping the whole word in double quotes.
If Double quote is found, then it double quote should be added at its
position. Also the whole word should be wrapped inside the double
quotes.
Please suggest the regex pattern in javascript.
var test = [
'Mercu"ry', 'Mercu,ry', 'Mer"cu,ry', 'Mercury'
];
for (x in test) {
var s = test[x];
if (s.indexOf('"') != -1) {
s = s.replace(/"/g, '""');
}
if (s.match(/"|,/)) {
s = '"' + s + '"';
}
alert(s);
}
Test: http://jsfiddle.net/ZGFV5/
Try to run the code with Mer""cury :)
Just always wrap the word in double quotes, and replace all double quotes with two:
function escapeWord(word) {
return '"' + word.replace(/"/g, '""') + '"';
}
The regular expression to achieve this is /"/g, so the following will work for your examples:
var test1 = 'Mercu"ry'
var test2 = 'Mercu,ry'
var test3 = 'Mer"cu,ry'
var regex = /"/g;
var example1 = '"' + test1.replace(regex, '""') + '"';
var example2 = '"' + test2.replace(regex, '""') + '"';
var example3 = '"' + test3.replace(regex, '""') + '"';
alert(example1 + " : " + example2 + " : " + example3);
Example fiddle

Categories