Trouble using string.replace() Javascript with Unicode Symbols - javascript

I'm writing a javascript function to move a pointing arrow from one html element to another. "&#9664" displays an arrow in HTML.
The trouble is that while I can add an arrow to innerHTML I can't seem to remove the arrow from the current selection.
Here is the relevent portion of my code:
var current;
function changeArrowFunction(line) {
if (typeof current != 'undefined')
current.innerHTML = current.innerHTML.replace(" &#9664","");
line.innerHTML = line.innerHTML + " &#9664";
current = line;
}
I tried changing around the typeof condition or removing it completely with no sign of improvement, so it seems the problem is with replace().

The problem is inner html does not preserve the html codes so.
If you log/alert the value of innerHTML you could see that the character ◀ is visible there not the string &#9664, so the replace function won't be able to find the character sequence to replace it.
var current;
function changeArrayFunction(line) {
if (typeof current != 'undefined') {
current.innerHTML = current.innerHTML.replace(" ◀", "");
}
line.innerHTML = line.innerHTML + " &#9664";
current = line;
}
var c = 0;
function test() {
changeArrayFunction(document.getElementById('d-' + (++c)))
}
<div id="d-1">adf</div>
<div id="d-2">adf</div>
<div id="d-3">adf</div>
<div id="d-4">adf</div>
<button onclick="test();">Test</button>

Related

Unable to Get Output From While Loop in Javascript

I'm working on my final project of the Winter 2017 quarter to demonstrate how to use Regular Expressions in both C# and JavaScript code behind pages. I've got the C# version of my demonstration program done, but the JavaScript version is making me pull what little hair I have left on my head out (no small achievement since I got a fresh buzz cut this morning!). The problem involves not getting any output after applying a Regular Expression in a While loop to get each instance of the expression and printing it out.
On my HTML page I have an input textarea, seven radio buttons, an output textarea, and two buttons underneath (one button is to move the output text to the input area to perform multiple iterations of applying expressions, and the other button to clear all textareas for starting from scratch). Each radio button links to a function that applies a regular expression to the text in the input area. Five of my seven functions work; the sixth is the one I can't figure out, and the seventh is essentially the same but with a slightly different RegEx pattern, so if I fix the sixth function, the seventh function will be a snap.
(I tried to insert/upload a JPG of the front end, but the photo upload doesn't seem to be working. Hopefully you get the drift of what I've set up.)
Here are my problem children from my JS code behind:
// RegEx_Demo_JS.js - code behind for RegEx_Demo_JS
var inputString; // Global variable for the input from the input text box.
var pattern; // Global variable for the regular expression.
var result; // Global variable for the result of applying the regular expression to the user input.
// Initializes a new instance of the StringBuilder class
// and appends the given value if supplied
function StringBuilder()
{
var strings = [];
this.append = function (string)
{
string = verify(string);
if (string.length > 0) strings[strings.length] = string;
}
this.appendLine = function (string)
{
string = verify(string);
if (this.isEmpty())
{
if (string.length > 0) strings[strings.length] = string;
else return;
}
else strings[strings.length] = string.length > 0 ? "\r\n" + string : "\r\n";
}
this.clear = function () { strings = []; };
this.isEmpty = function () { return strings.length == 0; };
this.toString = function () { return strings.join(""); };
var verify = function (string)
{
if (!defined(string)) return "";
if (getType(string) != getType(new String())) return String(string);
return string;
}
var defined = function (el)
{
// Changed per Ryan O'Hara's comment:
return el != null && typeof(el) != "undefined";
}
var getType = function (instance)
{
if (!defined(instance.constructor)) throw Error("Unexpected object type");
var type = String(instance.constructor).match(/function\s+(\w+)/);
return defined(type) ? type[1] : "undefined";
}
}
Within the code of the second radio button (which will be the seventh and last function to complete), I tested the ScriptBuilder with data in a local variable, and it ran successfully and produced output into the output textarea. But I get no output from this next function that invokes a While loop:
function RegEx_Match_TheOnly_AllInstances()
{
inputString = document.getElementById("txtUserInput").value;
pattern = /(\s+the\s+)/ig; // Using an Flag (/i) to select either lowercase or uppercase version. Finds first occurrence either as a standalone word or inside a word.
//result = pattern.exec(inputString); // Finds the first index location
var arrResult; // Array for the results of the search.
var sb = getStringBuilder(); // Variable to hold iterations of the result and the text
while ((arrResult = pattern.exec(inputString)) !==null)
{
sb.appendLine = "Match: " + arrResult[0] ;
}
document.getElementById("txtRegExOutput").value = sb.toString();
/* Original code from C# version:
// string pattern = #"\s+(?i)the\s+"; // Same as above, but using Option construct for case insensitive search.
string pattern = #"(^|\s+)(?i)the(\W|\s+)";
MatchCollection matches = Regex.Matches(userTextInput, pattern);
StringBuilder outputString = new StringBuilder();
foreach (Match match in matches)
{
string outputRegExs = "Match: " + "\"" + match.Value + "\"" + " at index [" + match.Index + ","
+ (match.Index + match.Length) + "]" + "\n";
outputString.Append(outputRegExs);
}
txtRegExOutput.Text = outputString.ToString();
*/
} // End RegEx_Match_The_AllInstances
I left the commented code in to show what I had used in the C# code behind version to illustrate what I'm trying to accomplish.
The test input/string I used for this function is:
Don’t go there. If you want to be the Man, you have to beat The Man.
That should return two hits. Ideally, I want it to show the word that it found and the index where it found the word, but at this point I'd be happy to just get some output showing every instance it found, and then build on that with the index and possibly the lastIndex.
So, is my problem in my While loop, the way I'm applying the StringBuilder, or a combination of the two? I know the StringBuilder code works, at least when not being used in a loop and using some test data from the site I found that code. And the code for simply finding the first instance of "the" as a standalone or inside another word does work and returns output, but that doesn't use a loop.
I've looked through Stack Overflow and several other JavaScript websites for inspiration, but nothing I've tried so far has worked. I appreciate any help anyone can provide! (If you need me to post any other code, please advise and I'll be happy to oblige.)

Is it possible to highlight all words on a web page without destroying the layout?

I've written an extension for firefox which highlights all words on a web page (excluding some words in a given list).
What i've noticed is that (besides that my extension is terribly slow) some web pages get "destroyed", more specifically the layout gets destroyed (particularly websites with overlay advertising or fancy drop-down menus).
My code wraps <span> tags around every "word", or to be precise around every token, because i'm splitting the text nodes with a whitespace as seperator.
So is it possible anyway to realize this task without destroying the page's layout?
I'm iterating over all text nodes, split them, and iterate over every token.
When the token is in my list, i don't highlight it, else i wrap the <span> tag around it.
So any suggestions how this could be done faster would be helpful, too.
Here are some screenshots for a correctly highlighted and a not correctly highlighted web page:
right:
en.wikipedia.org before highlighting,
en.wikipedia.org after highlighting.
wrong:
developer.mozilla.org before highlighting,
developer.mozilla.org after highlighting.
OK. Study this code. It searches for all instances of "is" and highlights if it is not surrounded by word characters. Put this in your scratchpad while this tab is focused. You will see that words like "List" and other words containing "Is" are no highlighted, but all the "Is"'s are.
I basically made an addon here for you. You can now release this as an addon called RegEx FindBar and take all the credit....
var doc = gBrowser.contentDocument;
var ctrler = _getSelectionController(doc.defaultView);
var searchRange = doc.createRange();
searchRange.selectNodeContents(doc.documentElement);
let startPt = searchRange.cloneRange();
startPt.collapse(true);
let endPt = searchRange.cloneRange();
endPt.collapse(false);
let retRane = null;
let finder = Cc["#mozilla.org/embedcomp/rangefind;1"].createInstance().QueryInterface(Ci.nsIFind);
finder.caseSensitive = false;
var i = 0;
while (retRange = finder.Find('is', searchRange, startPt, endPt)) {
i++;
var stCont = retRange.startContainer;
var endCont = retRange.endContainer;
console.log('retRange(' + i + ') = ', retRange);
console.log('var txt = retRange.commonAncestorContainer.data',retRange.commonAncestorContainer.data);
//now test if one posiion before startOffset and one position after endOffset are WORD characters
var isOneCharBeforeStCharWordChar; //var that holds if the character before the start character is a word character
if (retRange.startOffset == 0) {
//no characters befor this characte so obviously not a word char
isOneCharBeforeStCharWordChar = false;
} else {
var oneCharBeforeStChar = stCont.data.substr(retRange.startOffset-1,1);
if (/\w/.test(oneCharBeforeStChar)) {
isOneCharBeforeStCharWordChar = true;
} else {
isOneCharBeforeStCharWordChar = false;
}
console.log('oneCharBeforeStChar',oneCharBeforeStChar);
}
var isOneCharAfterEndCharWordChar; //var that holds if the character before the start character is a word character
if (retRange.endOffset == endCont.length - 1) {
//no characters after this characte so obviously not a word char
isOneCharAfterEndCharWordChar = false;
} else {
var oneCharAferEndChar = endCont.data.substr(retRange.endOffset,1); //no need to subtract 1 from endOffset, it takes into account substr 2nd arg is length and is treated like length I THINK
if (/\w/.test(oneCharAferEndChar)) {
isOneCharAfterEndCharWordChar = true;
} else {
isOneCharAfterEndCharWordChar = false;
}
console.log('oneCharAferEndChar',oneCharAferEndChar);
}
if (isOneCharBeforeStCharWordChar == false && isOneCharAfterEndCharWordChar == false) {
//highlight it as surrounding characters are no word characters
_highlightRange(retRange, ctrler);
console.log('highlighted it as it was not surrounded by word charactes');
} else {
console.log('NOT hilte it as it was not surrounded by word charactes');
}
//break;
startPt = retRange.cloneRange();
startPt.collapse(false);
}
/*********************/
function _getEditableNode(aNode) {
while (aNode) {
if (aNode instanceof Ci.nsIDOMNSEditableElement)
return aNode.editor ? aNode : null;
aNode = aNode.parentNode;
}
return null;
}
function _highlightRange(aRange, aController) {
let node = aRange.startContainer;
let controller = aController;
let editableNode = this._getEditableNode(node);
if (editableNode)
controller = editableNode.editor.selectionController;
let findSelection = controller.getSelection(Ci.nsISelectionController.SELECTION_FIND);
findSelection.addRange(aRange);
if (editableNode) {
// Highlighting added, so cache this editor, and hook up listeners
// to ensure we deal properly with edits within the highlighting
if (!this._editors) {
this._editors = [];
this._stateListeners = [];
}
let existingIndex = this._editors.indexOf(editableNode.editor);
if (existingIndex == -1) {
let x = this._editors.length;
this._editors[x] = editableNode.editor;
this._stateListeners[x] = this._createStateListener();
this._editors[x].addEditActionListener(this);
this._editors[x].addDocumentStateListener(this._stateListeners[x]);
}
}
}
function _getSelectionController(aWindow) {
// display: none iframes don't have a selection controller, see bug 493658
if (!aWindow.innerWidth || !aWindow.innerHeight)
return null;
// Yuck. See bug 138068.
let docShell = aWindow.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsIWebNavigation)
.QueryInterface(Ci.nsIDocShell);
let controller = docShell.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsISelectionDisplay)
.QueryInterface(Ci.nsISelectionController);
return controller;
}
Oh edit my solution out, will update with proper solution, I see you want to highlight all words
This is the code how firefox highlights stuff without changing document: Finder.jsm - _highlight function. You will have to copy this and use it for the whole document, if you need help let me know and I'll do it.
Here was my solution to highlight all matches of single word: https://stackoverflow.com/a/22206366/1828637
Here man this is how you are going to highlight the whole document, I didn't finish the snippet but this is the start of it: Gist - HighlightTextInDocument
Here's the copy paste answer to highlight everything in the document. As you learn more about it share with us, like how you can highlight with a different color, right now its all pink O_O
function _getEditableNode(aNode) {
while (aNode) {
if (aNode instanceof Ci.nsIDOMNSEditableElement)
return aNode.editor ? aNode : null;
aNode = aNode.parentNode;
}
return null;
}
function _highlightRange(aRange, aController) {
let node = aRange.startContainer;
let controller = aController;
let editableNode = this._getEditableNode(node);
if (editableNode)
controller = editableNode.editor.selectionController;
let findSelection = controller.getSelection(Ci.nsISelectionController.SELECTION_FIND);
findSelection.addRange(aRange);
if (editableNode) {
// Highlighting added, so cache this editor, and hook up listeners
// to ensure we deal properly with edits within the highlighting
if (!this._editors) {
this._editors = [];
this._stateListeners = [];
}
let existingIndex = this._editors.indexOf(editableNode.editor);
if (existingIndex == -1) {
let x = this._editors.length;
this._editors[x] = editableNode.editor;
this._stateListeners[x] = this._createStateListener();
this._editors[x].addEditActionListener(this);
this._editors[x].addDocumentStateListener(this._stateListeners[x]);
}
}
}
function _getSelectionController(aWindow) {
// display: none iframes don't have a selection controller, see bug 493658
if (!aWindow.innerWidth || !aWindow.innerHeight)
return null;
// Yuck. See bug 138068.
let docShell = aWindow.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsIWebNavigation)
.QueryInterface(Ci.nsIDocShell);
let controller = docShell.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsISelectionDisplay)
.QueryInterface(Ci.nsISelectionController);
return controller;
}
var doc = gBrowser.contentDocument;
var searchRange = doc.createRange();
searchRange.selectNodeContents(doc.documentElement);
_highlightRange(searchRange,_getSelectionController(gBrowser.contentWindow))
#jervis, I can't make a comment on your comment under #Noitidart code as I don't have 50rep yet. So I have to post here.
Re:
I did it with 'gFindBar._highlightDoc(true, word)' now. I'm using firefox 17, so i dont know if gFindBar is state of the art. – jervis 40 mins ago
But I tested his code and and it works.
Don't use gFindBar.
Copy it and then paste it into your Scratchpad.
Why are you using gFindBar._highlightDoc(true, word) ? I thoght you wanted to highlight everything in the document? Where did you get _highlightDoc from? I don't see that anywhere in #Noitidart's code.
Regading yoru comment on iterate all words and use gFindBar._highlightDoc:
I did it with 'gFindBar._highlightDoc(true, word)' now. I'm using firefox 17, so i dont know if gFindBar is state of the art. – jervis 39 mins ago
Dude why do that.... I saw #Noitidart posted a per word solution on the linked topic: gBrowser.tabContainer.childNodes[0].linkedBrowser.finder.highlight(true, 'YOUR_WORD_HERE'); that is extremely easy, one line and no need to create text nodes spans or anything. You have to run this code on each tab you want to highlight in.

Searching for most performant way for string replacing with javascript

I'm programming my own autocomplete textbox control using C# and javascript on clientside. On client side i want to replace the characters in string which matching the characters the user was searching for to highlight it. For example if the user was searching for the characters 'bue' i want to replace this letters in the word 'marbuel' like so:
mar<span style="color:#81BEF7;font-weight:bold">bue</span>l
in order to give the matching part another color. This works pretty fine if i have 100-200 items in my autocomplete, but when it comes to 500 or more, it takes too mutch time.
The following code shows my method which does the logic for this:
HighlightTextPart: function (text, part) {
var currentPartIndex = 0;
var partLength = part.length;
var finalString = '';
var highlightPart = '';
var bFoundPart = false;
var bFoundPartHandled = false;
var charToAdd;
for (var i = 0; i < text.length; i++) {
var myChar = text[i];
charToAdd = null;
if (!bFoundPart) {
var myCharLower = myChar.toLowerCase();
var charToCompare = part[currentPartIndex].toLowerCase();
if (charToCompare == myCharLower) {
highlightPart += myChar;
if (currentPartIndex == partLength - 1)
bFoundPart = true;
currentPartIndex++;
}
else {
currentPartIndex = 0;
highlightPart = '';
charToAdd = myChar;
}
}
else
charToAdd = myChar;
if (bFoundPart && !bFoundPartHandled) {
finalString += '<span style="color:#81BEF7;font-weight:bold">' + highlightPart + '</span>';
bFoundPartHandled = true;
}
if (charToAdd != null)
finalString += charToAdd;
}
return finalString;
},
This method only highlight the first occurence of the matching part.
I use it as follows. Once the request is coming back from server i build an html UL list with the matching items by looping over each item and in each loop i call this method in order to highlight the matching part.
As i told for up to 100 items it woks pretty nice but it is too mutch for 500 or more.
Is there any way to make it faster? Maybe by using regex or some other technique?
I also thought about using "setTimeOut" to do it in a extra function or maybe do it only for the items, which currently are visible, because only a couple of items are visible while for the others you have to scroll.
Try limiting visible list size, so you are only showing 100 items at maximum for example. From a usability standpoint, perhaps even go down to only 20 items, so it would be even faster than that. Also consider using classes - see if it improves performance. So instead of
mar<span style="color:#81BEF7;font-weight:bold">bue</span>l
You will have this:
mar<span class="highlight">bue</span>l
String replacement in JavaScript is pretty easy with String.replace():
function linkify(s, part)
{
return s.replace(part, function(m) {
return '<span style="color:#81BEF7;font-weight:bold">' + htmlspecialchars(m) + '</span>';
});
}
function htmlspecialchars(txt)
{
return txt.replace('<', '<')
.replace('>', '>')
.replace('"', '"')
.replace('&', '&');
}
console.log(linkify('marbuel', 'bue'));
I fixed this problem by using regex instead of my method posted previous. I replace the string now with the following code:
return text.replace(new RegExp('(' + part + ')', 'gi'), "<span>$1</span>");
This is pretty fast. Much faster as the code above. 500 items in the autocomplete seems to be no problem. But can anybody explain, why this is so mutch faster as my method or doing it with string.replace without regex? I have no idea.
Thx!

Replace only once body text, with some html, by an Iframe

What I need to do
I display an iframe with javascript in the body of an HTML page.
With something like that document.write('<iframe ...></iframe'>);
In this iframe there is my javascript function witch search a keyword in the body of the parent document, and replace it with an html link keyword in the parent document.
What I've tried
Javascript Bookmarklet to replace text with a link : complex script, but I need the skipTags
and Javascript .replace command replace page text? : very short and nice script, but there is not the skipTags function...
Those worked like a charm when the script is in the document but not in an iframe to work with the parent document.
My problems/questions
The problem is that the 'keyword' is replaced with a
'non-interpreted' html as text. (Browser displays keyword).
My second question is how to do the replace just once, and not for
all the matching expressions ?
Usualy I use some jQuery but in this project I need to use only some javascript without any library.
Any idea to help me ? (I don't want anyone to "write my code", I just want some advices to make it by myself)
P.S. 1 : I use Chrome, but I would like to make it work in every browser.
P.S. 2 : English is not my first language, so if you don't understand something, don't hesitate to ask it to me, I'll try to explain it better.
Edit 2
First script now works for the HTML, so question 1 is solved, but how to do the replace only once, even if the keyword is repeated several times ? (question 2)
With the help of xiaoyi, I've found some solutions :
Stop the loop and replace only the first match
Globalize the functions to search/replace multiple keywords
I think that it could be optimized, but for me it works like a charm, and I share it with you, if it can help anyone (don't forget to change the target of the document, here "parent") :
(function(){
// don't replace text within these tags
var skipTags = { 'a': 1, 'style': 1, 'script': 1, 'iframe': 1, 'meta':1, 'title':1, 'img':1, 'h':1 };
// find text nodes to apply replFn to
function findKW( el, term, replFn )
{
var child, tag,found=false;
for (var i = 0;i<=el.childNodes.length - 1 && !found; i++)
{
child = el.childNodes[i];
if (child.nodeType == 1)
{ // ELEMENT_NODE
tag = child.nodeName.toLowerCase();
if (!(tag in skipTags))
{
findKW(child, term, replFn);
}
}
else if (child.nodeType == 3)
{ // TEXT_NODE
found=replaceKW(child, term, replFn); // if found=true, we stop the loop
}
}
};
// replace terms in text according to replFn
function replaceKW( text, term, replFn)
{
var match,
matches = [],found=false;
while (match = term.exec(text.data))
{
matches.push(match);
}
for (var i = 0;i<=matches.length - 1 && !found; i++)
{
match = matches[i];
// cut out the text node to replace
text.splitText(match.index);
text.nextSibling.splitText(match[1].length);
text.parentNode.replaceChild(replFn(match[1]), text.nextSibling);
if(matches[i])found=true;// To stop the loop
}
return found;
};
// First search/replace
var replTerm = 'keyword';
findKW(
parent.document.body,
new RegExp('\\b(' + replTerm + ')\\b', 'gi'),
function (match)
{
var link = parent.document.createElement('a');
link.href = 'http://www.okisurf.com/#q=' + replTerm;
link.target = '_blank';
link.innerHTML = match;
return link;
}
);
// A second search/replace
var replTerm = 'word';
findKW(
parent.document.body,
new RegExp('\\b(' + replTerm + ')\\b', 'gi'),
function (match)
{
var link = parent.document.createElement('a');
link.href = 'http://www.okisurf.com/#q=' + replTerm;
link.target = '_blank';
link.innerHTML = match;
return link;
}
);
// Other search/replace
// ...
}());
I've also discovered that the second solution doesn't works with Internet Explorer witch doesn't accept the createTreeWalker() DOM function

Javascript Regular Expression [Remove Events]

does anyone know of a good regular expression to remove events from html.
For example the string:
"<h1 onmouseover="top.location='http://www.google.com">Large Text</h1>
Becomes
"<h1>Large Text</h1>
So HTML tags are preserved but events like onmouseover, onmouseout, onclick, etc. are removed.
Thanks in Advance!
How about:
data.replace(/ on\w+="[^"]*"/g, '');
Edit from the comments:
This is intended to be run on your markup as a one time thing. If you're trying to remove events dynamically during the execution of the page, that's a slightly different story. A javascript library like jQuery makes it extremely easy, though:
$('*').unbind();
Edit:
Restricting this to only within tags is a lot harder. I'm not confident it can be done with a single regex expression. However, this should get you by if no one can come up with one:
var matched;
do
{
matched = false;
data = data.replace(/(<[^>]+)( on\w+="[^"]*")+/g,
function(match, goodPart)
{
matched = true;
return goodPart;
});
} while(matched);
Edit:
I surrender at writing a single regex for this. There must be some way to check the context of a match without actually capturing the beginning of the tag in your match, but my RegEx-fu is not strong enough. This is the most elegant solution I'm going to come up with:
data = data.replace(/<[^>]+/g, function(match)
{
return match.replace(/ on\w+="[^"]*"/g, '');
});
Here's a pure JS way to do it:
function clean(html) {
function stripHTML(){
html = html.slice(0, strip) + html.slice(j);
j = strip;
strip = false;
}
function isValidTagChar(str) {
return str.match(/[a-z?\\\/!]/i);
}
var strip = false; //keeps track of index to strip from
var lastQuote = false; //keeps track of whether or not we're inside quotes and what type of quotes
for(var i=0; i<html.length; i++){
if(html[i] === "<" && html[i+1] && isValidTagChar(html[i+1])) {
i++;
//Enter element
for(var j=i; j<html.length; j++){
if(!lastQuote && html[j] === ">"){
if(strip) {
stripHTML();
}
i = j;
break;
}
if(lastQuote === html[j]){
lastQuote = false;
continue;
}
if(!lastQuote && html[j-1] === "=" && (html[j] === "'" || html[j] === '"')){
lastQuote = html[j];
}
//Find on statements
if(!lastQuote && html[j-2] === " " && html[j-1] === "o" && html[j] === "n"){
strip = j-2;
}
if(strip && html[j] === " " && !lastQuote){
stripHTML();
}
}
}
}
return html;
}

Categories