Replace with RegEx and JavaScript - javascript

I am attempting to do a quick replace of the 'innerHTML' of the 'code' element. I thought this may work:
function codeDisplay ( ) {
var code = document.getElementsByTagName('code').innerHTML;
var codeExam1 = new RegExp('<', 'gm');
var codeExam2 = new RegExp('>', 'gm');
code.replace(codeExam1, '<');
code.replace(codeExam2, '>');
}
Do I need to perform any additional steps to push the information back to the browser or conversion of data types maybe? Or am I completely wrong in how 'RegEx' and 'innerHTML' work? I appreciate the feedback in advance.

So, first fo all:
var code = document.getElementsByTagName('code').innerHTML;
document.getElementsByTagName returns a list of elements not just one. So, if your purpose is escaping all the code tags you have in the page, you need to iterate them.
Second, I believe you can avoid regexp just using textContent (where supported) or innerText.
var codes = document.getElementsByTagName("code");
for (var i = 0, code; code = codes[i++];) {
if ("textContent" in code)
code.textContent = code.innerHTML;
else if ("innerText" in code)
code.innerText = code.innerHTML;
}
or create a new text node:
var codes = document.getElementsByTagName("code");
for (var i = 0, code, html; code = codes[i++];) {
html = code.innerHTML;
code.innerHTML = "";
code.appendChild(document.createTextNode(html));
}
That's should escape every html entities. If you still want to use the regexp, maybe as fallback, you can have this kind of function:
var escapeEntities = (function(){
var entities = {"<" : "lt", ">" : "gt", "&" : "amp" };
var re = new RegExp("[" + Object.keys(entities).join("") + "]", "g");
function replaceEntities(match) {
return match in entities ? "&" + entities[match] + ";" : match;
}
return function(value) {
return value.replace(re, replaceEntities);
}
})()
And then in your code:
code.innerHTML = escapeEntities(code.innerHTML);
Note that if Object.keys is not supported you can easily use a shims (as indicated in the link); or simply replace manually the list of entities you support:
var entities = {"<" : "lt", ">" : "gt", "&" : "amp" };
var re = /[<>&]/g;
In that case you need to remember to add in both entities and re variables a new entity you want to support in the future; Object.keys just help you in maintenance.

Use assignment:
code = code.replace(codeExam1, '<');
code = code.replace(codeExam2, '>');
And modify your code like this:
function codeDisplay ( ) {
var codeArray = document.getElementsByTagName('code');
var codeExam1 = new RegExp('<', 'gm');
var codeExam2 = new RegExp('>', 'gm');
for ( var i = 0 ; i < codeArray.length ; ++i ){
var code = codeArray[i].innerHTML;
code.replace(codeExam1, '<');
code.replace(codeExam2, '>');
codeArray[i].innerHTML = code;
}
}

Replace returns a new string containing the result. See MDN for example.
To actually replace the contents of code you code has to look like this:
function codeDisplay ( ) {
var code = document.getElementsByTagName('code').innerHTML;
var codeExam1 = new RegExp('<', 'gm');
var codeExam2 = new RegExp('>', 'gm');
code = code.replace( codeExam1, '<');
code = code.replace(codeExam2, '>');
document.getElementsByTagName('code').innerHTML = code;
}
Or a shorter version (could be even shorter, but in my opinion just at the cost of readability):
function codeDisplay ( ) {
var code = document.getElementsByTagName('code').innerHTML;
code = code.replace( /</gm , '<' )
.replace( />/gm, '>' );
document.getElementsByTagName('code').innerHTML = code;
}

Try this:
function codeDisplay ( ) {
var s = document.getElementsByTagName('code').innerHTML;
s = s.replace(/\</g,'<');
s = s.replace(/\>/g,'>');
document.getElementsByTagName('code').innerHTML = s;
}

Related

Regex, jQuery, Replace, Function, String Manipulation, Templating

I have the following function:
function parseEntry(query, html, url) {
// logic draft :(
var re = new RegExp('{{{(.*)}}}');
regex = query.replace(re, "$1");
var newre = new RegExp(regex);
regged = html.replace(newre, "$1");
ret = query.replace(regex, regged);
// parse selectors
var re = new RegExp('{{(.*)}}');
newtext = html.replace(re, "$1");
ret = ret.replace(newtext, $(newtext).clone().html());
// parse %url%
ret = ret.replace("%url%", url);
// ret remaining
return ret;
}
// Called this way:
let output = parseEntry('static value %url% {{.thisclass}} {{{(\d+)}}}', '<h1 class="thisclass">Test</h1><h2 class="thisclass">Test2</h2> 1234 12', "http://perdu.com");
console.log(output)
/**
should return:
static value http://perdu.com TestTest2 123412
{{{triple brackets = regex}}}
{{double brackets = jquery}}
**/
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
Can you help refactoring parseEntry() function to return expected output?
All help appreciated!
I'm not sure if I undersand, but this is an attempt using different approaches I think are useful in this kind of situations. There are examples of split(), replace() and the createElement hack to parse html.
var query = 'static value %url% {{.thisclass}} {{{(\d+)}}}';
var html = '<h1 class="thisclass">Test</h1><h2 class="thisclass">Test2</h2> 1234 12';
var url = "http://perdu.com";
query = query.split(" ").map(o=>{
return o.replace(/\{\{\{(.*)\}\}\}/g, "$1");
}).map(o=>{
return o.replace(/\{\{(.*)\}\}/g, "$1");
});
var el = document.createElement( 'div' );
el.innerHTML = "<div class='outer'>"+html+"</div>";
var t1 = $("h1").text();
var t2 = $("h2").text();
var out = $(".outer").text();
var newArr = [];
newArr.push(query[0]+" "+query[1]+" "+url+" "+t1+t2+out);
newArr.push("{{{triple brackets = "+query[4]+"}}}");
newArr.push("{{double brackets = "+query[3]+"}}");
console.log(newArr);
newArr.map(o=>{
$("#res").append(o+"<br>");
});
Full example here: http://jsfiddle.net/k8em5twd/6/
So if this question is as simple as "why didn't the backslash show up in my output", then the answer is also very simple. Try escaping the backslash in your input string like so:
let output = parseEntry('static value %url% {{.thisclass}} {{{(\\d+)}}}', '<h1 class="thisclass">Test</h1><h2 class="thisclass">Test2</h2> 1234 12', "http://perdu.com");
The key is that {{{(\d+)}}} becomes {{{(\\d+)}}}. This way the slash is recognized as a character. Otherwise, \d is treated as an escape sequence. Output below.
function parseEntry(query, html, url) {
// logic draft :(
var re = new RegExp('{{{(.*)}}}');
regex = query.replace(re, "$1");
var newre = new RegExp(regex);
regged = html.replace(newre, "$1");
ret = query.replace(regex, regged);
// parse selectors
var re = new RegExp('{{(.*)}}');
newtext = html.replace(re, "$1");
ret = ret.replace(newtext, $(newtext).clone().html());
// parse %url%
ret = ret.replace("%url%", url);
// ret remaining
return ret;
}
// Called this way:
// THIS LINE IS CHANGED:
let output = parseEntry('static value %url% {{.thisclass}} {{{(\\d+)}}}', '<h1 class="thisclass">Test</h1><h2 class="thisclass">Test2</h2> 1234 12', "http://perdu.com");
console.log(output)
/**
should return:
static value http://perdu.com TestTest2 123412
{{{triple brackets = regex}}}
{{double brackets = jquery}}
**/
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
Ended up doing it myself, for the curious:
function parseEntry(query, url, ht) {
// parse regex expressions (triple brackets)
var re = new RegExp('{{{(.*)}}}', 'g');
q = query.match(re);
for (qq in q) {
var newregex = q[qq].replace("{{{", '').replace("}}}", '');
newregex = new RegExp(newregex, 'g');
newq = ht.match(newregex).join("");
query = query.replace(q[qq], newq);
}
// parse jquery expressions (double brackets)
re = new RegExp('{{(.*)}}', 'g');
q = query.match(re);
for (qq in q) {
var newjq = q[qq].replace("{{", '').replace("}}", '');
code = $('<div>'+ht+'</div>').find(newjq);
appendHTML = '';
code.each(function() {
appendHTML += $(this).html();
})
query = query.replace(q[qq], appendHTML);
}
// parse %url%
ret = query.replace("%url%", url);
// ret remaining
return ret;
}
let output = parseEntry('static value %url% {{.thisclass}} {{{(\\d+)}}}', "http://perdu.com", '<h1 class="thisclass">Test</h1><h2 class="thisclass">Test2</h2> 1234 12');
console.log(output);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Want to put all words at the site with the color blue and uppercase

I'm building one site at JOOMLA and at this site i want to put all the word "Inovflow" on the site, at the color blue and upercase. Like this "INOVFLOW".
I put this code on the js folder of the site:
jQuery(document).fn.findText = function(params){
var phrases = params.query,
ignorance = params.ignorecase;
wrapper = $(this);
var source = wrapper.html();
selection_class_name = params.style;
source = source.replace(/[\n|\t]+/gi, '');
source = source.replace(/\s+/gi, ' ');
source = source.replace(/> /gi, '>');
source = source.replace(/(\w)</gi, function(m, w){return(w + " <");});
phrases.forEach(function(str){
var regexp = makeRegexp(str);
source = source.replace(regexp, function (m){
return (emulateSelection(m));
});
});
wrapper.html(source);
var res_array = wrapper.find("[search=xxxxx]")
return(res_array);
};
function makeRegexp(s){
var space = '( )?(<span[^>]*>)?(</span[^>]*>)?( )?';
var result = s.replace(/\s/gi, space);
result = new RegExp(space + result + space, "gi");
return(result);
}
function emulateSelection (htmlPiece){
htmlPiece = htmlPiece.replace(/(?!=>)[^><]+(?=<)/g, function(w){
return(wrapWords(w));}
);
htmlPiece = htmlPiece.replace(/^[^><]+/, function(w){
return(wrapWords(w));}
);
htmlPiece = htmlPiece.replace(/[^><]+$/, function(w){
return(wrapWords(w));}
);
htmlPiece = htmlPiece.replace(/^[^><]+$/, function(w){
return(wrapWords(w));}
);
return( htmlPiece );
}
function wrapWords(plainPiece){
console.log("plain: " + plainPiece);
var start = '<span search="xxxxx">',
stop = '</span>';
return(start + plainPiece + stop);
}
jQuery(document).each($('.container').findText({query: ['INOVFLOW']}), function (){
$(this).addClass("changeColorInovflow");
});
After this, the page seems to get on a Infinite loop and doesn't load.
if instead of jQuery(document) I use $. the JS returns a error and doesn't run.
Am I doing something wrong?
If findText is intended to be a jQuery plugin, you'll need to update the way the function is declared.
$.fn.findText = function(params) {
var phrases = params.query;
// removed unused 'ignorance' var
var wrapper = this; // this is already a jQuery object
var source = wrapper.html();
selection_class_name = params.style;
source = source.replace(/[\n|\t]+/gi, '');
source = source.replace(/\s+/gi, ' ');
source = source.replace(/> /gi, '>');
source = source.replace(/(\w)</gi, function(m, w){return(w + " <");});
phrases.forEach(function(str){
var regexp = makeRegexp(str);
source = source.replace(regexp, function (m){
return (emulateSelection(m));
});
});
wrapper.html(source);
var res_array = wrapper.find("[search=xxxxx]")
return this; // return 'this' to make it chainable
}
Here are the relevant docs:
https://learn.jquery.com/plugins/basic-plugin-creation/
Then, when you call findText, you can use a much simpler selector:
$('.container').each(function() {
$(this).findText({query: ['INOVFLOW']}).addClass("changeColorInovflow");
});
The original code wouldn't work because each() takes either a function or an array with a callback as parameters, not a selector.
.each(): http://api.jquery.com/each/
jQuery.each(): http://api.jquery.com/jquery.each/

Node.js: Replace a second part of a pattern in a text file line-by-line

I've a less file with content like this:
#background: #345602;
#imgBack: #000;
I can read the whole text file into a variable, and latter save the content of the variable modifying the file:
var lessStr = grunt.file.read('./myLessFile.less');
Now I want to change the variable, say, #imgBack to #ff0000. So that, the modified file would look like:
#background: #345602;
#imgBack: #ff0000;
I there any way to do this by regular expression match and replace? Please help.
EDIT
I've code like:
var str = '#black: #000;\n#grayDarker: #222;\n#grayDark: #333;\n#gray: #555;\n#grayLight: #999;';
var varName = '#black';
var replace = '#ab4564';
var regex = '(' + varName + ':\\s*)(?:#[a-z0-9]+)(.*)$';
var re = new RegExp(regex, 'm');
var replaceStr = '$1' + replace;
str.replace(re, replaceStr);
console.log(str);
But it's not working. Have I mistaken something.
I think you mean this,
> var str = "#background: #345602;\n#imgBack: #000;";
> str.replace(/^(#imgBack:\s*#).*$/gm, "$1ff0000;");
'#background: #345602;\n#imgBack: #ff0000;'
Update:
> var str = '#black: #000;\n#grayDarker: #222;\n#grayDark: #333;\n#gray: #555;\n#grayLight: #999;';
undefined
> var varName = '#black';
> var regex = '(' + varName + ':\\s*)(?:#[a-z0-9]+)(.*)$';
undefined
> var re = new RegExp(regex, 'm');
> var replace = '#ab4564';
undefined
> var replaceStr = "$1" + replace;
undefined
> str.replace(re, replaceStr);
'#black: #ab4564\n#grayDarker: #222;\n#grayDark: #333;\n#gray: #555;\n#grayLight: #999;'
(#imgBack:\s*#)(?:\d+)(.*)$
You can use this.Replacement string will be.
$1#ff0000$2.
See demo.
http://regex101.com/r/iO1uK1/6

how do you do html encode using javascript? [duplicate]

I’m using JavaScript to pull a value out from a hidden field and display it in a textbox. The value in the hidden field is encoded.
For example,
<input id='hiddenId' type='hidden' value='chalk & cheese' />
gets pulled into
<input type='text' value='chalk & cheese' />
via some jQuery to get the value from the hidden field (it’s at this point that I lose the encoding):
$('#hiddenId').attr('value')
The problem is that when I read chalk & cheese from the hidden field, JavaScript seems to lose the encoding. I do not want the value to be chalk & cheese. I want the literal amp; to be retained.
Is there a JavaScript library or a jQuery method that will HTML-encode a string?
EDIT: This answer was posted a long ago, and the htmlDecode function introduced a XSS vulnerability. It has been modified changing the temporary element from a div to a textarea reducing the XSS chance. But nowadays, I would encourage you to use the DOMParser API as suggested in other anwswer.
I use these functions:
function htmlEncode(value){
// Create a in-memory element, set its inner text (which is automatically encoded)
// Then grab the encoded contents back out. The element never exists on the DOM.
return $('<textarea/>').text(value).html();
}
function htmlDecode(value){
return $('<textarea/>').html(value).text();
}
Basically a textarea element is created in memory, but it is never appended to the document.
On the htmlEncode function I set the innerText of the element, and retrieve the encoded innerHTML; on the htmlDecode function I set the innerHTML value of the element and the innerText is retrieved.
Check a running example here.
The jQuery trick doesn't encode quote marks and in IE it will strip your whitespace.
Based on the escape templatetag in Django, which I guess is heavily used/tested already, I made this function which does what's needed.
It's arguably simpler (and possibly faster) than any of the workarounds for the whitespace-stripping issue - and it encodes quote marks, which is essential if you're going to use the result inside an attribute value for example.
function htmlEscape(str) {
return str
.replace(/&/g, '&')
.replace(/"/g, '"')
.replace(/'/g, ''')
.replace(/</g, '<')
.replace(/>/g, '>');
}
// I needed the opposite function today, so adding here too:
function htmlUnescape(str){
return str
.replace(/"/g, '"')
.replace(/'/g, "'")
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/&/g, '&');
}
Update 2013-06-17:
In the search for the fastest escaping I have found this implementation of a replaceAll method:
http://dumpsite.com/forum/index.php?topic=4.msg29#msg29
(also referenced here: Fastest method to replace all instances of a character in a string)
Some performance results here:
http://jsperf.com/htmlencoderegex/25
It gives identical result string to the builtin replace chains above. I'd be very happy if someone could explain why it's faster!?
Update 2015-03-04:
I just noticed that AngularJS are using exactly the method above:
https://github.com/angular/angular.js/blob/v1.3.14/src/ngSanitize/sanitize.js#L435
They add a couple of refinements - they appear to be handling an obscure Unicode issue as well as converting all non-alphanumeric characters to entities. I was under the impression the latter was not necessary as long as you have an UTF8 charset specified for your document.
I will note that (4 years later) Django still does not do either of these things, so I'm not sure how important they are:
https://github.com/django/django/blob/1.8b1/django/utils/html.py#L44
Update 2016-04-06:
You may also wish to escape forward-slash /. This is not required for correct HTML encoding, however it is recommended by OWASP as an anti-XSS safety measure. (thanks to #JNF for suggesting this in comments)
.replace(/\//g, '/');
Here's a non-jQuery version that is considerably faster than both the jQuery .html() version and the .replace() version. This preserves all whitespace, but like the jQuery version, doesn't handle quotes.
function htmlEncode( html ) {
return document.createElement( 'a' ).appendChild(
document.createTextNode( html ) ).parentNode.innerHTML;
};
Speed: http://jsperf.com/htmlencoderegex/17
Demo:
Output:
Script:
function htmlEncode( html ) {
return document.createElement( 'a' ).appendChild(
document.createTextNode( html ) ).parentNode.innerHTML;
};
function htmlDecode( html ) {
var a = document.createElement( 'a' ); a.innerHTML = html;
return a.textContent;
};
document.getElementById( 'text' ).value = htmlEncode( document.getElementById( 'hidden' ).value );
//sanity check
var html = '<div> & hello</div>';
document.getElementById( 'same' ).textContent =
'html === htmlDecode( htmlEncode( html ) ): '
+ ( html === htmlDecode( htmlEncode( html ) ) );
HTML:
<input id="hidden" type="hidden" value="chalk & cheese" />
<input id="text" value="" />
<div id="same"></div>
I know this is an old one, but I wanted to post a variation of the accepted answer that will work in IE without removing lines:
function multiLineHtmlEncode(value) {
var lines = value.split(/\r\n|\r|\n/);
for (var i = 0; i < lines.length; i++) {
lines[i] = htmlEncode(lines[i]);
}
return lines.join('\r\n');
}
function htmlEncode(value) {
return $('<div/>').text(value).html();
}
Underscore provides _.escape() and _.unescape() methods that do this.
> _.unescape( "chalk & cheese" );
"chalk & cheese"
> _.escape( "chalk & cheese" );
"chalk & cheese"
Good answer. Note that if the value to encode is undefined or null with jQuery 1.4.2 you might get errors such as:
jQuery("<div/>").text(value).html is not a function
OR
Uncaught TypeError: Object has no method 'html'
The solution is to modify the function to check for an actual value:
function htmlEncode(value){
if (value) {
return jQuery('<div/>').text(value).html();
} else {
return '';
}
}
For those who prefer plain javascript, here is the method I have used successfully:
function escapeHTML (str)
{
var div = document.createElement('div');
var text = document.createTextNode(str);
div.appendChild(text);
return div.innerHTML;
}
FWIW, the encoding is not being lost. The encoding is used by the markup parser (browser) during the page load. Once the source is read and parsed and the browser has the DOM loaded into memory, the encoding has been parsed into what it represents. So by the time your JS is execute to read anything in memory, the char it gets is what the encoding represented.
I may be operating strictly on semantics here, but I wanted you to understand the purpose of encoding. The word "lost" makes it sound like something isn't working like it should.
Faster without Jquery. You can encode every character in your string:
function encode(e){return e.replace(/[^]/g,function(e){return"&#"+e.charCodeAt(0)+";"})}
Or just target the main characters to worry about (&, inebreaks, <, >, " and ') like:
function encode(r){
return r.replace(/[\x26\x0A\<>'"]/g,function(r){return"&#"+r.charCodeAt(0)+";"})
}
test.value=encode('Encode HTML entities!\n\n"Safe" escape <script id=\'\'> & useful in <pre> tags!');
testing.innerHTML=test.value;
/*************
* \x26 is &ampersand (it has to be first),
* \x0A is newline,
*************/
<textarea id=test rows="9" cols="55"></textarea>
<div id="testing">www.WHAK.com</div>
Prototype has it built-in the String class. So if you are using/plan to use Prototype, it does something like:
'<div class="article">This is an article</div>'.escapeHTML();
// -> "<div class="article">This is an article</div>"
Here is a simple javascript solution. It extends String object with a method "HTMLEncode" which can be used on an object without parameter, or with a parameter.
String.prototype.HTMLEncode = function(str) {
var result = "";
var str = (arguments.length===1) ? str : this;
for(var i=0; i<str.length; i++) {
var chrcode = str.charCodeAt(i);
result+=(chrcode>128) ? "&#"+chrcode+";" : str.substr(i,1)
}
return result;
}
// TEST
console.log("stetaewteaw æø".HTMLEncode());
console.log("stetaewteaw æø".HTMLEncode("æåøåæå"))
I have made a gist "HTMLEncode method for javascript".
Based on angular's sanitize... (es6 module syntax)
// ref: https://github.com/angular/angular.js/blob/v1.3.14/src/ngSanitize/sanitize.js
const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
const NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
const decodeElem = document.createElement('pre');
/**
* Decodes html encoded text, so that the actual string may
* be used.
* #param value
* #returns {string} decoded text
*/
export function decode(value) {
if (!value) return '';
decodeElem.innerHTML = value.replace(/</g, '<');
return decodeElem.textContent;
}
/**
* Encodes all potentially dangerous characters, so that the
* resulting string can be safely inserted into attribute or
* element text.
* #param value
* #returns {string} encoded text
*/
export function encode(value) {
if (value === null || value === undefined) return '';
return String(value).
replace(/&/g, '&').
replace(SURROGATE_PAIR_REGEXP, value => {
var hi = value.charCodeAt(0);
var low = value.charCodeAt(1);
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
}).
replace(NON_ALPHANUMERIC_REGEXP, value => {
return '&#' + value.charCodeAt(0) + ';';
}).
replace(/</g, '<').
replace(/>/g, '>');
}
export default {encode,decode};
My pure-JS function:
/**
* HTML entities encode
*
* #param {string} str Input text
* #return {string} Filtered text
*/
function htmlencode (str){
var div = document.createElement('div');
div.appendChild(document.createTextNode(str));
return div.innerHTML;
}
JavaScript HTML Entities Encode & Decode
As far as I know there isn't any straight forward HTML Encode/Decode method in javascript.
However, what you can do, is to use JS to create an arbitrary element, set its inner text, then read it using innerHTML.
Let's say, with jQuery, this should work:
var helper = $('chalk & cheese').hide().appendTo('body');
var htmled = helper.html();
helper.remove();
Or something along these lines.
You shouldn't have to escape/encode values in order to shuttle them from one input field to another.
<form>
<input id="button" type="button" value="Click me">
<input type="hidden" id="hiddenId" name="hiddenId" value="I like cheese">
<input type="text" id="output" name="output">
</form>
<script>
$(document).ready(function(e) {
$('#button').click(function(e) {
$('#output').val($('#hiddenId').val());
});
});
</script>
JS doesn't go inserting raw HTML or anything; it just tells the DOM to set the value property (or attribute; not sure). Either way, the DOM handles any encoding issues for you. Unless you're doing something odd like using document.write or eval, HTML-encoding will be effectively transparent.
If you're talking about generating a new textbox to hold the result...it's still as easy. Just pass the static part of the HTML to jQuery, and then set the rest of the properties/attributes on the object it returns to you.
$box = $('<input type="text" name="whatever">').val($('#hiddenId').val());
I had a similar problem and solve it using the function encodeURIComponent from JavaScript (documentation)
For example, in your case if you use:
<input id='hiddenId' type='hidden' value='chalk & cheese' />
and
encodeURIComponent($('#hiddenId').attr('value'))
you will get chalk%20%26%20cheese. Even spaces are kept.
In my case, I had to encode one backslash and this code works perfectly
encodeURIComponent('name/surname')
and I got name%2Fsurname
Here's a little bit that emulates the Server.HTMLEncode function from Microsoft's ASP, written in pure JavaScript:
function htmlEncode(s) {
var ntable = {
"&": "amp",
"<": "lt",
">": "gt",
"\"": "quot"
};
s = s.replace(/[&<>"]/g, function(ch) {
return "&" + ntable[ch] + ";";
})
s = s.replace(/[^ -\x7e]/g, function(ch) {
return "&#" + ch.charCodeAt(0).toString() + ";";
});
return s;
}
The result does not encode apostrophes, but encodes the other HTML specials and any character outside the 0x20-0x7e range.
If you want to use jQuery. I found this:
http://www.jquerysdk.com/api/jQuery.htmlspecialchars
(part of jquery.string plugin offered by jQuery SDK)
The problem with Prototype I believe is that it extends base objects in JavaScript and will be incompatible with any jQuery you may have used. Of course, if you are already using Prototype and not jQuery, it won't be a problem.
EDIT: Also there is this, which is a port of Prototype's string utilities for jQuery:
http://stilldesigning.com/dotstring/
var htmlEnDeCode = (function() {
var charToEntityRegex,
entityToCharRegex,
charToEntity,
entityToChar;
function resetCharacterEntities() {
charToEntity = {};
entityToChar = {};
// add the default set
addCharacterEntities({
'&' : '&',
'>' : '>',
'<' : '<',
'"' : '"',
''' : "'"
});
}
function addCharacterEntities(newEntities) {
var charKeys = [],
entityKeys = [],
key, echar;
for (key in newEntities) {
echar = newEntities[key];
entityToChar[key] = echar;
charToEntity[echar] = key;
charKeys.push(echar);
entityKeys.push(key);
}
charToEntityRegex = new RegExp('(' + charKeys.join('|') + ')', 'g');
entityToCharRegex = new RegExp('(' + entityKeys.join('|') + '|&#[0-9]{1,5};' + ')', 'g');
}
function htmlEncode(value){
var htmlEncodeReplaceFn = function(match, capture) {
return charToEntity[capture];
};
return (!value) ? value : String(value).replace(charToEntityRegex, htmlEncodeReplaceFn);
}
function htmlDecode(value) {
var htmlDecodeReplaceFn = function(match, capture) {
return (capture in entityToChar) ? entityToChar[capture] : String.fromCharCode(parseInt(capture.substr(2), 10));
};
return (!value) ? value : String(value).replace(entityToCharRegex, htmlDecodeReplaceFn);
}
resetCharacterEntities();
return {
htmlEncode: htmlEncode,
htmlDecode: htmlDecode
};
})();
This is from ExtJS source code.
<script>
String.prototype.htmlEncode = function () {
return String(this)
.replace(/&/g, '&')
.replace(/"/g, '"')
.replace(/'/g, ''')
.replace(/</g, '<')
.replace(/>/g, '>');
}
var aString = '<script>alert("I hack your site")</script>';
console.log(aString.htmlEncode());
</script>
Will output: <script>alert("I hack your site")</script>
.htmlEncode() will be accessible on all strings once defined.
HtmlEncodes the given value
var htmlEncodeContainer = $('<div />');
function htmlEncode(value) {
if (value) {
return htmlEncodeContainer.text(value).html();
} else {
return '';
}
}
I ran into some issues with backslash in my Domain\User string.
I added this to the other escapes from Anentropic's answer
.replace(/\\/g, '\')
Which I found here:
How to escape backslash in JavaScript?
Picking what escapeHTML() is doing in the prototype.js
Adding this script helps you escapeHTML:
String.prototype.escapeHTML = function() {
return this.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')
}
now you can call escapeHTML method on strings in your script, like:
var escapedString = "<h1>this is HTML</h1>".escapeHTML();
// gives: "<h1>this is HTML</h1>"
Hope it helps anyone looking for a simple solution without having to include the entire prototype.js
Using some of the other answers here I made a version that replaces all the pertinent characters in one pass irrespective of the number of distinct encoded characters (only one call to replace()) so will be faster for larger strings.
It doesn't rely on the DOM API to exist or on other libraries.
window.encodeHTML = (function() {
function escapeRegex(s) {
return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
var encodings = {
'&' : '&',
'"' : '"',
'\'' : ''',
'<' : '<',
'>' : '>',
'\\' : '/'
};
function encode(what) { return encodings[what]; };
var specialChars = new RegExp('[' +
escapeRegex(Object.keys(encodings).join('')) +
']', 'g');
return function(text) { return text.replace(specialChars, encode); };
})();
Having ran that once, you can now call
encodeHTML('<>&"\'')
To get <>&"'
function encodeHTML(str) {
return document.createElement("a").appendChild(
document.createTextNode(str)).parentNode.innerHTML;
};
function decodeHTML(str) {
var element = document.createElement("a");
element.innerHTML = str;
return element.textContent;
};
var str = "<"
var enc = encodeHTML(str);
var dec = decodeHTML(enc);
console.log("str: " + str, "\nenc: " + enc, "\ndec: " + dec);
Necromancing.
There's certainly no jQuery required for that !
Here a JavaScript port from System.Web.HttpUtility (C# - disclaimer: not very tested):
"use strict";
function htmlDecode(s) {
if (s == null)
return null;
if (s.length == 0)
return "";
if (s.indexOf('&') == -1)
return s;
function isDigit(str) {
return /^\d+$/.test(str);
}
function isHexDigit(str) {
return /[0-9A-Fa-f]{6}/g.test(str);
}
function initEntities() {
var entities = {};
entities["nbsp"] = '\u00A0';
entities["iexcl"] = '\u00A1';
entities["cent"] = '\u00A2';
entities["pound"] = '\u00A3';
entities["curren"] = '\u00A4';
entities["yen"] = '\u00A5';
entities["brvbar"] = '\u00A6';
entities["sect"] = '\u00A7';
entities["uml"] = '\u00A8';
entities["copy"] = '\u00A9';
entities["ordf"] = '\u00AA';
entities["laquo"] = '\u00AB';
entities["not"] = '\u00AC';
entities["shy"] = '\u00AD';
entities["reg"] = '\u00AE';
entities["macr"] = '\u00AF';
entities["deg"] = '\u00B0';
entities["plusmn"] = '\u00B1';
entities["sup2"] = '\u00B2';
entities["sup3"] = '\u00B3';
entities["acute"] = '\u00B4';
entities["micro"] = '\u00B5';
entities["para"] = '\u00B6';
entities["middot"] = '\u00B7';
entities["cedil"] = '\u00B8';
entities["sup1"] = '\u00B9';
entities["ordm"] = '\u00BA';
entities["raquo"] = '\u00BB';
entities["frac14"] = '\u00BC';
entities["frac12"] = '\u00BD';
entities["frac34"] = '\u00BE';
entities["iquest"] = '\u00BF';
entities["Agrave"] = '\u00C0';
entities["Aacute"] = '\u00C1';
entities["Acirc"] = '\u00C2';
entities["Atilde"] = '\u00C3';
entities["Auml"] = '\u00C4';
entities["Aring"] = '\u00C5';
entities["AElig"] = '\u00C6';
entities["Ccedil"] = '\u00C7';
entities["Egrave"] = '\u00C8';
entities["Eacute"] = '\u00C9';
entities["Ecirc"] = '\u00CA';
entities["Euml"] = '\u00CB';
entities["Igrave"] = '\u00CC';
entities["Iacute"] = '\u00CD';
entities["Icirc"] = '\u00CE';
entities["Iuml"] = '\u00CF';
entities["ETH"] = '\u00D0';
entities["Ntilde"] = '\u00D1';
entities["Ograve"] = '\u00D2';
entities["Oacute"] = '\u00D3';
entities["Ocirc"] = '\u00D4';
entities["Otilde"] = '\u00D5';
entities["Ouml"] = '\u00D6';
entities["times"] = '\u00D7';
entities["Oslash"] = '\u00D8';
entities["Ugrave"] = '\u00D9';
entities["Uacute"] = '\u00DA';
entities["Ucirc"] = '\u00DB';
entities["Uuml"] = '\u00DC';
entities["Yacute"] = '\u00DD';
entities["THORN"] = '\u00DE';
entities["szlig"] = '\u00DF';
entities["agrave"] = '\u00E0';
entities["aacute"] = '\u00E1';
entities["acirc"] = '\u00E2';
entities["atilde"] = '\u00E3';
entities["auml"] = '\u00E4';
entities["aring"] = '\u00E5';
entities["aelig"] = '\u00E6';
entities["ccedil"] = '\u00E7';
entities["egrave"] = '\u00E8';
entities["eacute"] = '\u00E9';
entities["ecirc"] = '\u00EA';
entities["euml"] = '\u00EB';
entities["igrave"] = '\u00EC';
entities["iacute"] = '\u00ED';
entities["icirc"] = '\u00EE';
entities["iuml"] = '\u00EF';
entities["eth"] = '\u00F0';
entities["ntilde"] = '\u00F1';
entities["ograve"] = '\u00F2';
entities["oacute"] = '\u00F3';
entities["ocirc"] = '\u00F4';
entities["otilde"] = '\u00F5';
entities["ouml"] = '\u00F6';
entities["divide"] = '\u00F7';
entities["oslash"] = '\u00F8';
entities["ugrave"] = '\u00F9';
entities["uacute"] = '\u00FA';
entities["ucirc"] = '\u00FB';
entities["uuml"] = '\u00FC';
entities["yacute"] = '\u00FD';
entities["thorn"] = '\u00FE';
entities["yuml"] = '\u00FF';
entities["fnof"] = '\u0192';
entities["Alpha"] = '\u0391';
entities["Beta"] = '\u0392';
entities["Gamma"] = '\u0393';
entities["Delta"] = '\u0394';
entities["Epsilon"] = '\u0395';
entities["Zeta"] = '\u0396';
entities["Eta"] = '\u0397';
entities["Theta"] = '\u0398';
entities["Iota"] = '\u0399';
entities["Kappa"] = '\u039A';
entities["Lambda"] = '\u039B';
entities["Mu"] = '\u039C';
entities["Nu"] = '\u039D';
entities["Xi"] = '\u039E';
entities["Omicron"] = '\u039F';
entities["Pi"] = '\u03A0';
entities["Rho"] = '\u03A1';
entities["Sigma"] = '\u03A3';
entities["Tau"] = '\u03A4';
entities["Upsilon"] = '\u03A5';
entities["Phi"] = '\u03A6';
entities["Chi"] = '\u03A7';
entities["Psi"] = '\u03A8';
entities["Omega"] = '\u03A9';
entities["alpha"] = '\u03B1';
entities["beta"] = '\u03B2';
entities["gamma"] = '\u03B3';
entities["delta"] = '\u03B4';
entities["epsilon"] = '\u03B5';
entities["zeta"] = '\u03B6';
entities["eta"] = '\u03B7';
entities["theta"] = '\u03B8';
entities["iota"] = '\u03B9';
entities["kappa"] = '\u03BA';
entities["lambda"] = '\u03BB';
entities["mu"] = '\u03BC';
entities["nu"] = '\u03BD';
entities["xi"] = '\u03BE';
entities["omicron"] = '\u03BF';
entities["pi"] = '\u03C0';
entities["rho"] = '\u03C1';
entities["sigmaf"] = '\u03C2';
entities["sigma"] = '\u03C3';
entities["tau"] = '\u03C4';
entities["upsilon"] = '\u03C5';
entities["phi"] = '\u03C6';
entities["chi"] = '\u03C7';
entities["psi"] = '\u03C8';
entities["omega"] = '\u03C9';
entities["thetasym"] = '\u03D1';
entities["upsih"] = '\u03D2';
entities["piv"] = '\u03D6';
entities["bull"] = '\u2022';
entities["hellip"] = '\u2026';
entities["prime"] = '\u2032';
entities["Prime"] = '\u2033';
entities["oline"] = '\u203E';
entities["frasl"] = '\u2044';
entities["weierp"] = '\u2118';
entities["image"] = '\u2111';
entities["real"] = '\u211C';
entities["trade"] = '\u2122';
entities["alefsym"] = '\u2135';
entities["larr"] = '\u2190';
entities["uarr"] = '\u2191';
entities["rarr"] = '\u2192';
entities["darr"] = '\u2193';
entities["harr"] = '\u2194';
entities["crarr"] = '\u21B5';
entities["lArr"] = '\u21D0';
entities["uArr"] = '\u21D1';
entities["rArr"] = '\u21D2';
entities["dArr"] = '\u21D3';
entities["hArr"] = '\u21D4';
entities["forall"] = '\u2200';
entities["part"] = '\u2202';
entities["exist"] = '\u2203';
entities["empty"] = '\u2205';
entities["nabla"] = '\u2207';
entities["isin"] = '\u2208';
entities["notin"] = '\u2209';
entities["ni"] = '\u220B';
entities["prod"] = '\u220F';
entities["sum"] = '\u2211';
entities["minus"] = '\u2212';
entities["lowast"] = '\u2217';
entities["radic"] = '\u221A';
entities["prop"] = '\u221D';
entities["infin"] = '\u221E';
entities["ang"] = '\u2220';
entities["and"] = '\u2227';
entities["or"] = '\u2228';
entities["cap"] = '\u2229';
entities["cup"] = '\u222A';
entities["int"] = '\u222B';
entities["there4"] = '\u2234';
entities["sim"] = '\u223C';
entities["cong"] = '\u2245';
entities["asymp"] = '\u2248';
entities["ne"] = '\u2260';
entities["equiv"] = '\u2261';
entities["le"] = '\u2264';
entities["ge"] = '\u2265';
entities["sub"] = '\u2282';
entities["sup"] = '\u2283';
entities["nsub"] = '\u2284';
entities["sube"] = '\u2286';
entities["supe"] = '\u2287';
entities["oplus"] = '\u2295';
entities["otimes"] = '\u2297';
entities["perp"] = '\u22A5';
entities["sdot"] = '\u22C5';
entities["lceil"] = '\u2308';
entities["rceil"] = '\u2309';
entities["lfloor"] = '\u230A';
entities["rfloor"] = '\u230B';
entities["lang"] = '\u2329';
entities["rang"] = '\u232A';
entities["loz"] = '\u25CA';
entities["spades"] = '\u2660';
entities["clubs"] = '\u2663';
entities["hearts"] = '\u2665';
entities["diams"] = '\u2666';
entities["quot"] = '\u0022';
entities["amp"] = '\u0026';
entities["lt"] = '\u003C';
entities["gt"] = '\u003E';
entities["OElig"] = '\u0152';
entities["oelig"] = '\u0153';
entities["Scaron"] = '\u0160';
entities["scaron"] = '\u0161';
entities["Yuml"] = '\u0178';
entities["circ"] = '\u02C6';
entities["tilde"] = '\u02DC';
entities["ensp"] = '\u2002';
entities["emsp"] = '\u2003';
entities["thinsp"] = '\u2009';
entities["zwnj"] = '\u200C';
entities["zwj"] = '\u200D';
entities["lrm"] = '\u200E';
entities["rlm"] = '\u200F';
entities["ndash"] = '\u2013';
entities["mdash"] = '\u2014';
entities["lsquo"] = '\u2018';
entities["rsquo"] = '\u2019';
entities["sbquo"] = '\u201A';
entities["ldquo"] = '\u201C';
entities["rdquo"] = '\u201D';
entities["bdquo"] = '\u201E';
entities["dagger"] = '\u2020';
entities["Dagger"] = '\u2021';
entities["permil"] = '\u2030';
entities["lsaquo"] = '\u2039';
entities["rsaquo"] = '\u203A';
entities["euro"] = '\u20AC';
return entities;
}
var Entities = initEntities();
var rawEntity = [];
var entity = [];
var output = [];
var len = s.length;
var state = 0;
var number = 0;
var is_hex_value = false;
var have_trailing_digits = false;
for (var i = 0; i < len; i++) {
var c = s[i];
if (state == 0) {
if (c == '&') {
entity.push(c);
rawEntity.push(c);
state = 1;
}
else {
output.push(c);
}
continue;
}
if (c == '&') {
state = 1;
if (have_trailing_digits) {
entity.push(number.toString());
have_trailing_digits = false;
}
output.push(entity.join(""));
entity = [];
entity.push('&');
continue;
}
if (state == 1) {
if (c == ';') {
state = 0;
output.push(entity.join(""));
output.push(c);
entity = [];
}
else {
number = 0;
is_hex_value = false;
if (c != '#') {
state = 2;
}
else {
state = 3;
}
entity.push(c);
rawEntity.push(c);
}
}
else if (state == 2) {
entity.push(c);
if (c == ';') {
var key = entity.join("");
if (key.length > 1 && Entities.hasOwnProperty(key.substr(1, key.length - 2)))
key = Entities[key.substr(1, key.length - 2)].toString();
output.push(key);
state = 0;
entity = [];
rawEntity = [];
}
}
else if (state == 3) {
if (c == ';') {
if (number == 0)
output.push(rawEntity.join("") + ";");
else if (number > 65535) {
output.push("&#");
output.push(number.toString());
output.push(";");
}
else {
output.push(String.fromCharCode(number));
}
state = 0;
entity = [];
rawEntity = [];
have_trailing_digits = false;
}
else if (is_hex_value && isHexDigit(c)) {
number = number * 16 + parseInt(c, 16);
have_trailing_digits = true;
rawEntity.push(c);
}
else if (isDigit(c)) {
number = number * 10 + (c.charCodeAt(0) - '0'.charCodeAt(0));
have_trailing_digits = true;
rawEntity.push(c);
}
else if (number == 0 && (c == 'x' || c == 'X')) {
is_hex_value = true;
rawEntity.push(c);
}
else {
state = 2;
if (have_trailing_digits) {
entity.push(number.toString());
have_trailing_digits = false;
}
entity.push(c);
}
}
}
if (entity.length > 0) {
output.push(entity.join(""));
}
else if (have_trailing_digits) {
output.push(number.toString());
}
return output.join("");
}
function htmlEncode(s) {
if (s == null)
return null;
if (s.length == 0)
return s;
var needEncode = false;
for (var i = 0; i < s.length; i++) {
var c = s[i];
if (c == '&' || c == '"' || c == '<' || c == '>' || c.charCodeAt(0) > 159
|| c == '\'') {
needEncode = true;
break;
}
}
if (!needEncode)
return s;
var output = [];
var len = s.length;
for (var i = 0; i < len; i++) {
var ch = s[i];
switch (ch) {
case '&':
output.push("&");
break;
case '>':
output.push(">");
break;
case '<':
output.push("<");
break;
case '"':
output.push(""");
break;
case '\'':
output.push("'");
break;
case '\uff1c':
output.push("<");
break;
case '\uff1e':
output.push(">");
break;
default:
if (ch.charCodeAt(0) > 159 && ch.charCodeAt(0) < 256) {
output.push("&#");
output.push(ch.charCodeAt(0).toString());
output.push(";");
}
else
output.push(ch);
break;
}
}
return output.join("");
}

How to parse XML string with Prototype?

I have a string <ul><li e="100" n="50">Foo</li><li e="200" n="150">Bar</li></ul> and on client side I have to convert it to JSON. Something like {data:['Foo','Bar'],params:['100;50','200;150']}
I found a pretty good way to achieve it in here so my code should be something like that
var $input = $(input);
var data = "data:[";
var params = "params:[";
var first = true;
$input.find("li").each(function() {
if (!first) {
data += ",";
params += ",";
} else {
first = false;
}
data += "'" + $(this).text() + "'";
var e = $(this).attr("e");
var n = $(this).attr("n");
params += "'" + e + ';' + n + "'";
});
return "{data + "]," + params + "]}";
But the problem is that I can't use jquery. How can I do the same thing with prototype?
You want to use a DOM parser:
https://developer.mozilla.org/en/DOMParser
Something like this...
var xmlStr = '<ul><li e="100" n="50">Foo</li><li e="200" n="150">Bar</li></ul>';
var parser = new DOMParser();
var doc = parser.parseFromString(xmlStr, "application/xml");
var rootElement = doc.documentElement;
var children = rootElement.childNodes;
var jsonObj = {
data: [],
params: []
};
for (var i = 0; i < children.length; i++) {
// I realize this is not how your implementation is, but this should give
// you an idea of how to work on the DOM element
jsonObj.data.push( children[i].getAttribute('e') );
jsonObj.params.push( children[i].getAttribute('n') );
}
return jsonObj.toJSON();
Also, don't manually build your JSON string. Populate an object, then JSON-encode it.
Edit: Note that you need to test for DOMParser before you can use it. Check here for how you can do that. Sorry for the W3Schools link.
Why you are building an array object with string? Why not
var data = new Array();
var params = new Array();
$$("li").each(function() {
data.push ($(this).text());
params.psuh($(this).attr("e") + ";" + $(this).attr("n"));
});
return {data:data.toString(), params:params.toString()};
or
return {data:data, params:params};

Categories