Escape less than and greater than in HTML - javascript

content = '5<x<div></div>'
Basically I am looking for a regular expression that will make the string like above into 5<x<div></div>
5x<div></div> will still be 5x<div></div>. I am just trying to escape unclosed html tags
If there is such a library then I will be very happy to use it as long as it meets my main goal of trying to escape unclosed html tags

Rewrite each open tag character "<" with the symbol + unique value ... in this case ",,#*&,,"
Split the string at the unique value
The "replaceString ()" function checks if the passed value is really a tag ... whether both "<" and ">" characters are present in the string. If not present, rewrite the character with "& lt;".
The whole process is repeated for the symbol ">"
This is not the most beautiful solution to this task but it works.
var str = '5<x<div>s>7</div>';
for (var i = 0; i < 2; i++) {
if (i === 0) {
var str2 = str.replace(/</gi, ",,#*&,,<");
var spl = str2.split(",,#*&,,");
} else {
var str2 = str.replace(/>/gi, ">,,#*&,,");
var spl = str2.split(",,#*&,,");
}
replaceString(spl);
}
function replaceString(spl) {
for (let i = 0; i < spl.length; i++) {
if (spl[i].indexOf('<') > -1 && spl[i].indexOf('>') > -1) {
//.......
} else {
if (spl[i].indexOf('<') > -1) {
spl[i] = spl[i].replace(/</gi, "<");
}
else if (spl[i].indexOf('>') > -1) {
spl[i] = spl[i].replace(/>/gi, ">");
}
}
}
str = spl.join('');
}
console.log(str);

Related

How to alternate the case of a string

I'm working on alternating the case of a string (for example asdfghjkl to AsDfGhJkL).
I tried to do this. I found some code that is supposed to do it, but it doesn't seem to be working.
var str="";
var txt=document.getElementById('input').value;
for (var i=0; i<txt.length; i+2){
str = str.concat(String.fromCharCode(txt.charCodeAt(i).toUpperCase()));
}
Here's a quick function to do it. It makes the entire string lowercase and then iterates through the string with a step of 2 to make every other character uppercase.
var alternateCase = function (s) {
var chars = s.toLowerCase().split("");
for (var i = 0; i < chars.length; i += 2) {
chars[i] = chars[i].toUpperCase();
}
return chars.join("");
};
var txt = "hello world";
console.log(alternateCase(txt));
HeLlO WoRlD
The reason it converts the string to an array is to make the individual characters easier to manipulate (i.e. no need for String.prototype.concat()).
Here an ES6 approach:
function swapCase(text) {
return text.split('').map((c,i) =>
i % 2 == 0 ? c.toLowerCase() : c.toUpperCase()
).join('');
}
console.log(swapCase("test"))
You should iterate the string and alternate between upper-casing the character and lower-casing it:
for (var i=0; i<txt.length; i++) {
var ch = String.fromCharCode(txt.charCodeAt(i);
if (i % 2 == 1) {
ch = ch.toUpperCase();
} else {
ch = ch.toLowerCase();
}
str = str.concat(ch);
}

XRegExp to JavaScript RegExp

I want to convert a XRegExp function to pure JavaScript RegExp. Basically all non-alphanumeric character will be replaced with "_" including spaces.
The text
This is a sample text *\&^%$##!~'
will be like
This_is_a_sample_text____________
I have the following code.
var text = "This is a sample text *\&^%$##!~'";
var matchAlphaNumeric = new XRegExp('[\\p{L}\\p{N}]');
var result = substituteNotAcceptedCharactersforTag(text, matchAlphaNumeric);
function substituteNotAcceptedCharactersforTag(text, regex) {
var tagWithAlphaAndNumeric = '';
for (var i = 0; i < text.length; i++) {
var characterBeingTested = text.charAt(i);
if (XRegExp.test(characterBeingTested, regex) === true) {
tagWithAlphaAndNumeric += characterBeingTested.toLowerCase();
} else {
tagWithAlphaAndNumeric += '_';
}
}
return tagWithAlphaAndNumeric;
}
Replace all non-alphanumeric characters with _:
s = s.replace(/[^0-9A-Za-z]/g, '_');

Getting out an expression

Let say I have these two examples
(A = 1) and ( B = 2)
(A = 1)(B = 2 ()).
I need a way to get the following array:
[(],[A][=][1],[)],[and],[(],[B],[=],[2],[)]
[(],[A][=][1],[)],[(],[B],[=],[2],[(],,[)][)]
What I tried to do is the following
Find the delimiters using the following function (in this case the delimiters are the space "" and any brackets ( or ) )
function findExpressionDelimeter (textAreaValue){
var delimiterPositions = [];
var bracesDepth = 0;
var squareBracketsDepth = 0;
var bracketsDepth = 0;
for (var i = 0; i < textAreaValue.length; i++) {
switch (textAreaValue[i]) {
case '(':
bracketsDepth++;
delimiterPositions.push(i);
break;
case ')':
bracketsDepth--;
delimiterPositions.push(i);
break;
case '[':
squareBracketsDepth++;
break;
case ']':
squareBracketsDepth--;
break;
default:
if (squareBracketsDepth == 0 && textAreaValue[i] == ' ') {
delimiterPositions.push(i);
}
}
}
return delimiterPositions;
}
Then I tried to loop trough the values returned and extract the values using substring. The issue is that when I have a ( or ) I need to get the next substring as well as the bracket. This is where I am stuck.
function getTextByDelimeter(delimiterPositions, value) {
var output = [];
var index = 0;
var length = 0;
var string = "";
for (var j = 0; j < delimiterPositions.length; j++) {
if (j == 0) {
index = 0;
} else {
index = delimiterPositions[j - 1] + 1;
}
length = delimiterPositions[j];
string = value.substring(index, length);
output.push(string);
}
string = value.substring(length, value.length);
output.push(string);
return output;
}
Any help would be appreciated.
You could just match the tokens you are interested in:
var str = "(A = 1) and ( B = 2)";
var arr = str.match(/[()]|[^()\s]+/g);
Result:
["(", "A", "=", "1", ")", "and", "(", "B", "=", "2", ")"]
The regex with some comments:
[()] # match a single character token
| # or
[^()\s]+ # match everything else except spaces
If you would like to add more single character tokens, like for example a =, just add it to both character classes. Ie: [()=]|[^()=\s]+
What you want to do is a lexical analyser.
Regular expressions won't allow you to parse a language (a mathematical expression is one). The tree decomposition of the formula cannot be done with it.
However, regex can allow you to discriminate tokens. This is usually done by reading the stream of character. Once you've detect a lexeme, you generate the token.
If you want to check the validity of the formula, or compute the value: you need a parser (semantic analyser). This can't be done using regex.
The similar question with the answer is here.
You can split your string(string.split('')) And then delete whitespaces from array or just check if array[i] != ' ' before your switch block.

Remove all the dots of the string outside the brackets

I have string
var string = .row-4 .col-2.grid-unit+.grid-unit+.grid-unit,.row-4 .col-3 .grid-unit .row-4 .grid-unit:nth-of-type(2n+3) .show-grid+.show-grid-reportdiv
and i need to remove all plus sign leaving the plus sign inside the brackets from the string using javascript
I'd go with something along those lines:
var i, splits, string = ".row-4 .col-2.grid-unit+.grid-unit+.grid-unit,.row-4 .col-3 .grid-unit .row-4 .grid-unit:nth-of-type(2n+3) .show-grid+.show-grid-reportdiv";
splits = string.split(/(\([^)]+\))/);
for (i = 0; i< splits.length; i++) {
if (splits[i].charAt(0) !== "(") {
splits[i] = splits[i].replace("+"," ");
}
}
string = splits.join();
Another way around (dunno if it's better performance wise) would be to use the following:
var string = ".row-4 .col-2.grid-unit+.grid-unit+.grid-unit,.row-4 .col-3 .grid-unit .row-4 .grid-unit:nth-of-type(2n+3) .show-grid+.show-grid-reportdiv";
function replacer (match, offset, string) {
var posOpen = string.indexOf("(",offset);
var posClose = string.indexOf(")",offset);
// we replace it if there are no more closing parenthesis or if there is one that is located after an opening one.
if (posClose === -1 || (posClose > posOpen && posOpen !== -1)) {
return " ";
} else {
return "+";
}
};
string.replace(/\+/g, replacer);
EDIT: added bergi suggestion for a quicker check inside the loop.
EDIT2: Second solution
Use the following code, and let me know if it works :)
var myString = ".row-4 .col-2.grid-unit+.grid-unit+.grid-unit,.row-4:nth-of-type(2n+3) .col-3 .grid-unit .row-4 .grid-unit:nth-of-type(2n+3) .show-grid+.show-grid-reportdiv";
var myArray = myString.split(/\(.[\(\)A-Za-z0-9-.+]*\)/);
for(var i = 0; i < myArray.length; i++) {
myString = myString.replace(myArray[i], myArray[i].replace(/[+]/g,' '));
}

Javascript word-count for any given DOM element

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:
<div id="content">
hello how are you?
</div>
Then have the JS function return an integer of 4.
Is this possible? I have done this with form elements but can't seem to do it for non-form ones.
Any ideas?
g
If you know that the DIV is only going to have text in it, you can KISS:
var count = document.getElementById('content').innerHTML.split(' ').length;
If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
}
}
return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;
This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.
EDIT:
As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:
var count = words.split(/\s+/).length;
The only difference being on what we're passing to the split function.
Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:
var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);
Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.
string_var.match(/[^\s]+/g).length
seems like it's a better method than
string_var.split(/\s+/).length
At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.
Or just use Countable.js to do the hard job ;)
document.deepText= function(hoo){
var A= [];
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
A[A.length]= hoo.data;
}
else A= A.concat(arguments.callee(hoo));
hoo= hoo.nextSibling;
}
}
return A;
}
I'd be fairly strict about what a word is-
function countwords(hoo){
var text= document.deepText(hoo).join(' ');
return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))
Or you can do this:
function CountWords (this_field, show_word_count, show_char_count) {
if (show_word_count == null) {
show_word_count = true;
}
if (show_char_count == null) {
show_char_count = false;
}
var char_count = this_field.value.length;
var fullStr = this_field.value + " ";
var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
var splitString = cleanedStr.split(" ");
var word_count = splitString.length -1;
if (fullStr.length <2) {
word_count = 0;
}
if (word_count == 1) {
wordOrWords = " word";
} else {
wordOrWords = " words";
}
if (char_count == 1) {
charOrChars = " character";
} else {
charOrChars = " characters";
}
if (show_word_count & show_char_count) {
alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
} else {
if (show_word_count) {
alert ("Word Count: " + word_count + wordOrWords);
} else {
if (show_char_count) {
alert ("Character Count: " + char_count + charOrChars);
}
}
}
return word_count;
}
The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
}
}
return ret;
}
function wordCount(fullStr) {
if (fullStr.length == 0) {
return 0;
} else {
fullStr = fullStr.replace(/\r+/g, " ");
fullStr = fullStr.replace(/\n+/g, " ");
fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
fullStr = fullStr.replace(/^\s+/, "");
fullStr = fullStr.replace(/\s+$/, "");
fullStr = fullStr.replace(/\s+/gi, " ");
var splitString = fullStr.split(" ");
return splitString.length;
}
}
EDIT
kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.
This should account for preceding & trailing whitespaces
const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;
string_var.match(/[^\s]+/g).length - 1;

Categories