trim whitespace and from a javascript string - javascript

I am trying to trim all whitespaces from a string, including
I can't seem to achieve this.
The example I have tried was:
var txt = ' hallo<span> again</span> &nbsp ';
txt = txt.replace(/(?:^(?: )+)|(?:(?: )+$)/ig,'');
txt = txt.trim();
console.log(txt);//should only display 'hallo<span> again</span>'
Basically I want the output only to be hallo<span> again</span> from the string.

You can create a element say, textarea and assign the value there so that the special characters are decoded and then use trim() on that value:
var txt = ' hallo<span> again</span> &nbsp ';
var elem = document.createElement("textarea");
elem.innerHTML = txt;
var txtValue = elem.value;
txt = txtValue.trim();
console.log(txt);

var txt = ' hallo &nbsp ';
txt = txt.replace(/^(?: |\s)+|(?: |\s)+$/ig,'');
console.log(txt);//should only display 'hallo'
This'll do the trick, but since the input text contains a &nbsp without a semicolon behind it, the output is "hallo &nbsp"
For input ' hallo ' the output will be hallo

You can convert the into hard spaces (U+00A0), then trim:
var txt = ' hallo<span> again</span> &nbsp ';
txt = txt.replace(/&nbsp(?:;?)/ig,'\u00A0');
txt = txt.trim();
console.log('"' + txt + '"');//should only display 'hallo'
Note that since you had &nbsp (without ;) in there, I made the ; optional in the regular expression.

Knowing I'm late to the party, here's my contribution.
Used a Non Capturing Group "?:" and replaced all of them with an empty string "".
How it works:
(?:&nbsp|;|\s+)
?: non-capturing group, doesn't match everything in a group ()
&nbsp|;|\s+, &nbsp or ; or
\s+ matches unlimited amount of whitespaces.
let regex = /(?:&nbsp|;|\s+)/gm;
//let str = ` hallo &nbsp `;
let str = ` hallo<span> again</span> &nbsp `;
console.log(str.replace(regex, ""));

var txt = ' hallo ';
var reg = new RegExp(" ", "g"); // By RegExp constructor creating regular expression object for matching text with a pattern. and "g" is used from global i.e. apply on every match.
var result = txt.replace(reg, " ");
console.log(result.trim());
//2nd Method
var txt = ' hallo ';
var reg = new RegExp("[ |' ']", "g");
var result = txt.replace(reg, "");
console.log(result);

Related

Regex poor performance when nothing matches

I have a problem with slow working regex, but only in case when the patter doesn't match. In all other cases performance are acceptable, even if patter matches in the end of text. I'am testing performance on 100KB text input.
What I am trying to do is to convert input in HTML-like syntax which is using [] instead of <> brackets and translate it to valid XML.
Sample input:
...some content[vc_row param="test1"][vc_column]text [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content
Sample output:
...some content<div class="vc_row" param="test1"><div class="vc_column" >text [brackets in text] content</div></div><div class="vc_row" param="xxx">text content</div>...some more content
To do this I am using regex:
/(.*)(\[\/?vc_column|\[\/?vc_row)( ?)(.*?)(\])(.*)/
And I do this in while loop until the patter matches.
As I mentioned before this works, but last iteration is extremly slow (or first if nothing matches). Here is complete javascript I am using:
var str = '...some content[vc_row param="test1"][vc_column]text content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
var regex = /(.*)(\[\/?vc_column|\[\/?vc_row)( ?)(.*?)(\])(.*)/;
while (matches = str.match(regex)) {
matches = str.match(regex);
if (matches[2].slice(1, 2) !== '/')
str = matches[1] + "<div class=\"" + matches[2].slice(1) + "\"" + " " + matches[4] + ">" + matches[6];
else
str = matches[1] + "</div>" + matches[6];
}
How could i improve my regex "not match" performance?
You can split it up in 2 regex.
One for the start tags, one for the closing tags.
And then chain 2 global g replaces.
var str = '...some content[vc_row param="test1"][vc_column]text with [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
const reg1 = /\[(vc_(?:column|row))(\s+[^\]]+)?\s*\]/g;
const reg2 = /\[\/(vc_(?:column|row))\s*\]/g;
var result = str.replace(reg1, "<div class=\"$1\"$2>").replace(reg2, "</div>");
console.log(result);
Note that those (.*) in the original regex aren't needed this way.
Using a nameless function, then it could be done via 1 regex replace.
var str = '...some content[vc_row param="test1"][vc_column]text with [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
const reg = /\[(\/)?(vc_(?:column|row))(\s+[^\]]+)?\s*\]/g;
var result = str.replace(reg, function(m,c1,c2,c3){
if(c1) return "</div>";
else return "<div class=\""+ c2 +"\""+ (c3?c3:"") +">";
});
console.log(result);
How about a replace... Like
str.replace(/\[(\/?)(vc_column|vc_row)([^\]]*?)\]/g, function(a,b,c,d) {
return '<' + b + 'div' + (b==='/' ? '' : ' class="' + c + '"') + d + '>';
});
This matches a tag (start or end) and all attributes, including brackets, capturing everything except the brackets. Then puts it back together in the correct format (divs with classes).
And the global flag (/../g) removes the need for any loops.
var sInput = '...some content[vc_row param="test1"][vc_column]text [brackets in text] content[/vc_column][/vc_row][vc_row param="xxx"]text content[/vc_row]...some more content';
console.log(sInput.replace(/\[(\/?)(vc_column|vc_row)([^\]]*?)\]/g, function(a,b,c,d) {
return '<' + b + 'div' + (b==='/' ? '' : ' class="' + c + '"') + d + '>';
})
);

javascript regex to capture word in an html text

I'm explaining my issue.
I'm trying to do a javascript function to highlight words (change their color) in an html text. I have another function to un highlight them.
I have a list of keywords that i have to highlight.
Here is the code i've writed so far
function highlight_words(keywords) {
unHighlight_words(keywords);
$('.rubricContent').each(function(index, element) {
//get elements for each rubrics
var content = $(element).html();
if (keywords) {
$(keywords).each(function(i, e) {
var term = e
var re = new RegExp('(?:[^.;\w]|^|^\\W+){0}('+ term + ' )(?:[^.\w]|\\W(?=\\W+|$)|$){0}', "gmi");
var subst = '<span style="color:red">' + term + '</span> ';
content = content.replace(re, subst);
});
$(element).html(content);
}
});
The result is not that bad my words are red colored but not when they are followed by a "." or a ","
Anyone have the solution for me ?
Thanks !!
You can use \b word boundary as follows.
term='Test';
content='TestTest. Test Test: Test. Test, TESTtest'
var re = new RegExp('(\\b'+term+'\\b)', "gmi");
var subst = '<span style="color:red">' + term + '</span> ';
content = content.replace(re, subst);
alert(content)
https://jsfiddle.net/3royvd66/1/

How to reference found matches?

I'm having a small problem with a regexp pattern. I don't have regexp knowledge, so I couldn't solve it.
I have this text:
var text = "this (is) some (ran)dom text";
and I want to capture anything between (). So after following this tutorial I came up with this pattern:
var re = /(\(\w*\))/g;
which works fine. But what I want to do now is replace the found matches, or rather modify. I want to wrap the found matches with a span tag. So I used this code:
var spanOpen = '<span style="color: silver;">';
var spanClose = '</span>';
text.replace(re, spanOpen + text.match(re) + spanClose);
even though the code works, I don't get the result I want. It outputs:
as HTML
this <span style="color: silver;">(is),(ran)</span> some <span style="color: silver;">(is),(ran)</span>dom text
as text
this (is),(ran) some (is),(ran)dom text
You can check the example in fiddle. How can I fix this?
The code in fiddle:
var text = "this (is) some (ran)dom text";
var re = /(\(\w*\))/g;
var spanOpen = '<span style="color: silver;">';
var spanClose = '</span>';
var original = "original: " + text + "<br>";
var desired = "desired: this " +spanOpen+"(is)"+spanClose+ " some " +spanOpen+"(ran)"+spanClose+ "dom text<br>";
var output = "output: " + text.replace(re, spanOpen + text.match(re) + spanClose);
var result = original + desired + output;
document.body.innerHTML = result;
If the title is wrong or misleading, I'll change it.
The .replace() method can take a function as the 2nd parameter. That will come in handy here.
var output = "output: " + text.replace(re, function(match){
return spanOpen + match + spanClose
});
The function will be called for each individual match.
You can also use '$&' in your replace string to reference each match
var output = "output: " + text.replace(re, spanOpen + '$&' + spanClose);
See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
text.match(re) is returning an array of the result, so what you can do is loop this array and replace your string with each items, like this:
var matches = text.match(re);
var output = "output: " + text;
for (var i = 0; i < matches.length; i++)
{
output = output.replace(matches[i], spanOpen + matches[i] + spanClose);
}
See this FIDDLE

How can I remove extra white space in a string in JavaScript?

How can I remove extra white space (i.e. more than one white space character in a row) from text in JavaScript?
E.g
match the start using.
How can I remove all but one of the spaces between "match" and "the"?
Use regex. Example code below:
var string = 'match the start using. Remove the extra space between match and the';
string = string.replace(/\s{2,}/g, ' ');
For better performance, use below regex:
string = string.replace(/ +/g, ' ');
Profiling with firebug resulted in following:
str.replace(/ +/g, ' ') -> 790ms
str.replace(/ +/g, ' ') -> 380ms
str.replace(/ {2,}/g, ' ') -> 470ms
str.replace(/\s\s+/g, ' ') -> 390ms
str.replace(/ +(?= )/g, ' ') -> 3250ms
See string.replace on MDN
You can do something like this:
var string = "Multiple spaces between words";
string = string.replace(/\s+/,' ', g);
Just do,
var str = "match the start using. Remove the extra space between match and the";
str = str.replace( /\s\s+/g, ' ' );
function RemoveExtraSpace(value)
{
return value.replace(/\s+/g,' ');
}
myString = Regex.Replace(myString, #"\s+", " ");
or even:
RegexOptions options = RegexOptions.None;
Regex regex = new Regex(#"[ ]{2,}", options);
tempo = regex.Replace(tempo, #" ");
Using regular expression.
var string = "match the start using. Remove the extra space between match and the";
string = string.replace(/\s+/g, " ");
Here is jsfiddle for this
Sure, using a regex:
var str = "match the start using. Remove the extra space between match and the";
str = str.replace(/\s/g, ' ')
This can be done also with javascript logic. here is a reusable function I wrote for that task.
LIVE DEMO
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<div>result:
<span id="spn">
</span>
</div>
<input type="button" value="click me" onClick="ClearWhiteSpace('match the start using. JAVASCRIPT CAN BE VERY FUN')"/>
<script>
function ClearWhiteSpace(text) {
var result = "";
var newrow = false;
for (var i = 0; i < text.length; i++) {
if (text[i] === "\n") {
result += text[i];
// add the new line
newrow = true;
}
else if (newrow == true && text[i] == " ") {
// do nothing
}
else if (text[i - 1] == " " && text[i] == " " && newrow == false) {
// do nothing
}
else {
newrow = false;
if (text[i + 1] === "\n" && text[i] == " ") {
// do nothing it is a space before a new line
}
else {
result += text[i];
}
}
}
alert(result);
document.getElementById("spn").innerHTML = result;
return result;
}
</script>
</body>
</html>
Try this regex
var st = "hello world".replace(/\s/g,'');
or as a function
function removeSpace(str){
return str.replace(/\s/g,'');
}
Here is a working demo

Why won't replace work with my regex object when test does?

I feel silly asking this because I'm betting the answer is staring right at me but here goes.
I'm taking a string from the CSS style textDecoration and trying to remove the underline portion of the string (and any whitespace around it). It returns true when I run test() but when I do the replace method the string is unaltered. Help?
My code:
textDecoration = function(str) {
var n_str = str + '|/\s' + str + '|/\s' + str + '/\s|' + str + '/\s';
var nre = new RegExp(n_str, "g");
debug_log('Found or not: ' + nre.test(txt));
txt.replace(nre, '');
debug_log('Result: ' + txt);
debug_log('-----------------------');
}
var txt = "underline";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
txt = "underline overline line-through";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
txt = "overline underline line-through";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
txt = "overline line-through underline";
debug_log('-----------------------');
debug_log('Starting String: ' + txt);
textDecoration("underline");
Output:
replace() returns a new string with the replaces and don't change the actual string. You should do something like:
var newString = txt.replace(nre, '');
debug_log('Result: ' + newString);
test returns a boolean. replace returns a new string. It does not alter the string.
Also, your regular expression is quite odd. Applying str = "underline", you will get:
/underline|\/sunderline|\/sunderline\/s|underline\/s/
which does not match whitespaces, but "/s".

Categories