Regex for escaped characters highlighting - javascript

I am building an autocomplete in JavaScript that needs to highlight words when doing a search:
That works fine, but there's an issue with escaped characters.
When trying to highlight a text with escaped characters (for example regex &>< example), the following is happening:
That's happening because I am doing the following:
element.innerHTML.replace(/a/g, highlight)
function highlight(str) {
return '<span class="foo"' + '>' + str + '</span>';
}
and innerHTML includes the word &, so it makes sense.
In conclusion, I need a way to solve that so I would like a function that:
receives a and regex <br> example and returns regex <br> ex<span class="foo">a</span>mple
receives r and regex <br> example and returns <span class="foo">r</span>egex <b<span class="foo">r</span>> example
receives < and regex <br> example and returns regex <span class="foo"><</span>br> example
The entries may or may not contain html blocks, see the issue here (search for <br> or &)

str.replace only returns a new string with the intended replacements. The original string is unchanged.
var str = 'replace me';
var str2 = str.replace(/e/g, 'E');
// For display only
document.write('<pre>' + JSON.stringify({
str: str,
str2: str2
}, null, 2) + '</pre>');
Therefore the code needs to set the returned value from the replace back to the desired element.
Also, innerHTML will return the escaped text rather than the unescaped text. This could be unescaped itself within the function but why bother if you can use textContent. However by using innerHTML when it's time to set the highlighted text to the element it will auto-escape the text for us. :)
UPDATE: the values are passed to the function and then set to the element:
NOTES:
The regexp could probably be made a bit more robust to avoid having to handle the special case using lastIndex
There needs to be some protection on the input as someone could provide a nasty regexp pattern. There is a minimal protection check in this example.
higlightElemById('a', 'regex &>< example', 'a');
higlightElemById('b', 'regex &>< example', '&');
higlightElemById('c', 'regex <br> example', '<');
higlightElemById('d', 'regex <br> example', 'e');
higlightElemById('e', 'regex <br> example', '[aex]');
function higlightElemById(id, str, match) {
var itemElem = document.getElementById(id);
// minimal regexp escape to prevent shenanigans
var safeMatch = match.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
// construct regexp to match highlight text
var regexp = new RegExp('(.*?)(' + safeMatch + ')', 'g');
var text = '';
var lastIndex;
var matches;
while (matches = regexp.exec(str)) {
// Escape the non-matching prefix
text += escapeHTML(matches[1]);
// Highlight the match
text += highlight(matches[2]);
// Cache the lastIndex in case no regexp at end
lastIndex = regexp.lastIndex;
}
if (text) {
text += escapeHTML(str.substr(lastIndex));
} else {
text += escapeHTML(str);
}
itemElem.innerHTML = text;
}
function highlight(str) {
return '<span class="myHighlightClass">' + str + '</span>';
}
function escapeHTML(html) {
this.el = this.el || document.createElement('textarea');
this.el.textContent = html;
return this.el.innerHTML;
}
.myHighlightClass {
text-decoration: underline;
color: red;
}
<div id="a"></div>
<div id="b"></div>
<div id="c"></div>
<div id="d"></div>
<div id="e"></div>

Related

Highlighting lines that contain a phrase using regex

I'm highlighting lines that contain a certain phrase using regex.
My current highlight function will read the whole text and place every instance of the phrase within a highlight span.
const START = "<span name='highlight' style='background-color: yellow;'>";
const END = "</span>"
function highlight(text, toReplace) {
let reg = new RegExp(toReplace, 'ig');
return text.replace(reg, START + toReplace + END);
}
I want to expand my regex so that, for each phrase, it highlights from the preceding <br> to the following <br>.
highlight("This<br>is some text to<br>highlight.", "text");
Current output:
This<br>is some<span name="highlight" style="background-color:yellow;">text</span> to<br>highlight."
Wanted output:
This<br><span name="highlight" style="background-color:yellow;">is some text to</span><br>highlight.
You may want to match all chars other than < and > before and after the text and it is advisable to escape the literal text you pass to the RegExp constructor. Also, to replace with the whole match, just use $& placeholder:
const START = "<span name='highlight' style='background-color: yellow;'>";
const END = "</span>"
function highlight(text, toReplace) {
let reg = new RegExp("(<br/?>)|[^<>]*" + toReplace.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&') + "[^<>]*", 'ig');
return text.replace(reg, function ($0,$1) { return $1 ? $1 : START + $0 + END; });
}
console.log(highlight("This<br>is some text to<br>highlight.", "text"));
console.log(highlight("This<br>is a bunch of<br>text", "b"));
The regex will look like /[^<>]*text[^<>]*/gi, it will match 0 or more chars other than < and >, then text in a case insensitive way and then again 0 or more chars other than < and >, and the $& in the replacement will put the matched value into the highlighting tags.
My guess is that this simple expression,
(<br>)(.*?)(\1)
might work here.
const regex = /(<br>)(.*?)(\1)/gs;
const str = `This<br>is some text to<br>highlight. This<br>is some text to<br>highlight. This<br>is some text to<br>highlight.
This<br>is some
text to<br>highlight. This<br>is some text to<br>highlight. This<br>is some text to<br>highlight.`;
const subst = `$1<span name='highlight' style='background-color: yellow;'>$2</span>$3`;
// The substituted value will be contained in the result variable
const result = str.replace(regex, subst);
console.log(result);
In this demo, the expression is explained, if you might be interested.

How to turn a String with square brackets and forward slashes + variables into a regex?

I've tried to get my head around regex, but I still can't get it.
I want to turn the following String + some variables into a regex:
"[url href=" + objectId + "]" + objectId2 + "[/url]"
I tried the following, since I read somewhere that brackets and slashes need to be escaped:
/\[url href=/ + objectId + /\]/ + objectId2 + /\[\/\url\]/g
But that isn't working.
I want to use it to replace the whole expression into HTML wherever it matches in a String.
You are correct that brackets and backslashes need to be escaped in a regular expression, but you can't create a regex by adding together regex literals like your /\[url href=/ + objectId + /\]/ attempt. To build a regex dynamically like that you have to use string concatenation and pass the result to new RegExp(). So as a starting point for your text you'd need this:
new RegExp("\\[url href=" + objectId + "\\]" + objectId2 + "\\[/url\\]")
Note all of the double-backslashes - that's because backslashes need to be escaped in string literals, so "\\[" creates a string containing a single backslash and then a bracket, which is what you want in your regex.
But if you want to extract the matched href and content for use in creating an anchor then you need capturing parentheses:
new RegExp("\\[url href=(" + objectId + ")\\](" + objectId2 + ")\\[/url\\]")
But that's still not enough for your purposes because objectId and objectId2 could (or will, given the first is a url) contain other characters that need to be escaped in a regex too, e.g., .+?(), etc. So here's a function that can escape all of the necessary characters:
function escapeStringForRegex(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
We can't just call that function on the whole thing, because you need unescaped parentheses for your capturing sub matches, so just call it on the two variables:
var urlRegex = new RegExp("\\[url href=("
+ escapeStringForRegex(objectId)
+ ")\\]("
+ escapeStringForRegex(objectId2)
+ ")\\[/url\\]");
Kind of messy, but seems to do the job as you can see here:
function escapeStringForRegex(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
function createAnchors(str, objectId, objectId2) {
var urlRegex = new RegExp("\\[url href=(" + escapeStringForRegex(objectId) + ")\\](" + escapeStringForRegex(objectId2) + ")\\[/url\\]", "g");
return str.replace(urlRegex, "<a href='$1'>$2</a>");
}
document.querySelector("button").addEventListener("click", function() {
var str = document.getElementById("input").value;
var objectId = document.getElementById("objectId").value;
var objectId2 = document.getElementById("objectId2").value;
document.getElementById("output").value =
createAnchors(str, objectId, objectId2);
});
textarea { width : 100%; height: 80px; }
Input:<br><textarea id="input">This is just some text that you can edit to try things out. [url href=http://test.com.au?param=1]Test URL[/url]. Thanks.</textarea>
ObjectId:<input id="objectId" value="http://test.com.au?param=1"><br>
ObjectId2:<input id="objectId2" value="Test URL"><br>
<button>Test</button>
<textarea id="output"></textarea>
Note that the above searches only for [url]s in your string that have the particular href and content specified in the objectId and objectId2 variables. If you just want to change all [url]s into anchors regardless of what href and text they contain then use this:
.replace(/\[url href=([^\]]+)\]([^\]]+)\[\/url\]/g, "<a href='$1'>$2</a>")
Demo:
function escapeStringForRegex(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
function createAnchors(str) {
return str.replace(/\[url href=([^\]]+)\]([^\]]+)\[\/url\]/g, "<a href='$1'>$2</a>");
}
document.querySelector("button").addEventListener("click", function() {
var str = document.getElementById("input").value;
document.getElementById("output").value = createAnchors(str);
});
textarea { width : 100%; height: 80px; }
Input:<br><textarea id="input">Testing. [url href=http://test.com.au?param=1]Test URL[/url]. Thanks. Another URL: [url href=https://something.com/test?param=1&param2=123]Test URL 2[/url]</textarea>
<button>Test</button>
<textarea id="output"></textarea>
It's like:
var rx = new RegExp('\\[url\\shref='+objectId+'\\]'+objectId2+'\\[\\/url\\]');
new RegExp("[url href=" + objectId + "]" + objectId2 + "[\url]")
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp

Highlight my "#users' name" in a string in Javascript

I'm trying to highlight the username of a user when it's detected in a string like #login. Here's the code which doesn't seem to work:
function parse(string) {
var regex = new RegExp('(\W|^)#('+username+')(\W|$)', 'i');
return String(string).replace(regex, '<span class="label radius">#$2</span>');
}
And here's a jsfiddle to test it: http://jsfiddle.net/2pmLT/3/
When I type my #login, it highlight it but only if it's the only text in the string: "#login haha" won't work.
You need to escape your backslashes. Also, you should also add $1 and $3, else you drop your spaces. Use a g-flag for multiple replaces
http://jsfiddle.net/7h8Aq/
function parse(string) {
var regex = new RegExp('(\\W|^)#('+username+')(\\W|$)', 'ig');
return string.replace(regex, '$1<span class="label radius">#$2</span>$3');
}
Here you go, you only need to replace certain instances, so there's no nee to match the whole string, you also need to specify the global g flag for regex:
function parse(text) {
return text.replace(new RegExp('(#' + username + ')', 'g'),
'<span class="label radius">$1</span>');
}
Consider this example instance.
HTML
<div id="test-block">
<div class='label'></div>
</div>
JQuery
$('#test').on('keyup', function(e) {
$('.label').text(parse($(this).val()));
});
function parse(myString) {
var find = "(\W|^)#(" + username + ")(\W|$)";
var regex = new RegExp(find, 'i');
return myString.replace(regex, '#$2');
}

JS - Get original value of string replace using regex

We have a string:
var dynamicString = "This isn't so dynamic, but it will be in real life.";
User types in some input:
var userInput = "REAL";
I want to match on this input, and wrap it with a span to highlight it:
var result = " ... but it will be in <span class='highlight'>real</span> life.";
So I use some RegExp magic to do that:
// Escapes user input,
var searchString = userInput.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
// Now we make a regex that matches *all* instances
// and (most important point) is case-insensitive.
var searchRegex = new RegExp(searchString , 'ig');
// Now we highlight the matches on the dynamic string:
dynamicString = dynamicString.replace(reg, '<span class="highlight">' + userInput + '</span>');
This is all great, except here is the result:
console.log(dynamicString);
// -> " ... but it will be in <span class='highlight'>REAL</span> life.";
I replaced the content with the user's input, which means the text now gets the user's dirty case-insensitivity.
How do I wrap all matches with the span shown above, while maintaining the original value of the matches?
Figured out, the ideal result would be:
// user inputs 'REAL',
// We get:
console.log(dynamicString);
// -> " ... but it will be in <span class='highlight'>real</span> life.";
You'd use regex capturing groups and backreferences to capture the match and insert it in the string
var searchRegex = new RegExp('('+userInput+')' , 'ig');
dynamicString = dynamicString.replace(searchRegex, '<span class="highlight">$1</span>');
FIDDLE
You can use it without capturing groups too.
dynamicString = text.replace(new RegExp(userInput, 'ig'), '<span class="highlight">$&</span>');

Using js regex to replace simple markup styles like **bold** to <b>bold</b>

I'm trying to take a chunk of plain text and convert parts of it into html tags. I don't need a full rich editor, just these few tags:
**bold**
__underline__
~~italics~~
--strike--
<<http://www.link.com>>
This is the method I have attempted to write but my lack of regex/js seems to be holding it back:
function toMarkup($this) {
var text = $this.text();
text = text.replace("\*\*(.*)\*\*", "<b>$1</b>");
text = text.replace("__(.*)__", "<u>$1</u>");
text = text.replace("~~(.*)~~", "<i>$1</i>");
text = text.replace("--(.*)--", "<del>$1</del>");
text = text.replace("<<(.*)>>", "<a href='$1'>Link</a>");
$this.html(text);
}
Any glaring errors as to why these replaces are not working? Another issue I'm just now realizing is by converting this text to html I am unescaping any other potential tags that may be malicious. A bonus would be any advice on how to only escape these elements and nothing else.
First of all, they are just string, not regexs. Secondly you should use not-greedy .*.
Also, you may want to use the g modifier to match every occourrence in the text.
function toMarkup($this) {
var text = $this.text();
text = text.replace(/\*\*(.*?)\*\*/g, "<b>$1</b>");
text = text.replace(/__(.*?)__/g, "<u>$1</u>");
text = text.replace(/~~(.*?)~~/g, "<i>$1</i>");
text = text.replace(/--(.*?)--/g, "<del>$1</del>");
text = text.replace(/<<(.*?)>>/g, "<a href='$1'>Link</a>");
$this.html(text);
}
Use a Regexp object as the first argument to text.replace() instead of a string:
function toMarkup($this) {
var text = $this.text();
text = text.replace(/\*\*(.*?)\*\*/g, "<b>$1</b>");
text = text.replace(/__(.*?)__/g, "<u>$1</u>");
text = text.replace(/~~(.*?)~~/g, "<i>$1</i>");
text = text.replace(/--(.*?)--/g, "<del>$1</del>");
text = text.replace(/<<(.*?)>>/g, "<a href='$1'>Link</a>");
$this.html(text);
}
Note that I also replaced all of the .* with .*? which will match as few characters as possible, otherwise your matches may be too long. For example you would match from the first ** to the very last ** instead of stopping at the next one. The regex also needs the g flag so that all matches will be replaced (thanks Aaron).
function toMarkup($this) {
$this.html ($this.text ().replace (/(__|~~|--|\*\*)(.*?)\1|<<(.*?)>>\/g,
function (m, m1, m2, m3) {
m[1] = {'**' : 'b>', '__': 'u>', '--': 'del>', '~~': 'i>'}[m[1]];
return m[3] ? 'Link'
: ('<' + m[1] + m[2] + '</' + m[1]);
});
}
Note that you cannot nest these, i.e. if you say __--abc--__ will be converted to <u>--abc--</u>.

Categories