Find and replace certain text in a document with their HTML counterpart - javascript

I'm trying to scan a document using JavaScript and replace all occurrences of [x] with its HTML counterpart, such as <b>[x] </b>, where x is a number. This is what I have so far, which does add the tag around the target text, but as HTML entities, not actual HTML:
function walkText(node) {
if (node.nodeType == 3) {
node.data = node.data.replace(/\[(\d+)\]/g, '<b>[$1]</b>');
}
if (node.nodeType == 1 && node.nodeName != "SCRIPT") {
for (var i = 0; i < node.childNodes.length; i++) {
walkText(node.childNodes[i]);
}
}
}
walkText(document.body);
The end result is just the text <b> wrapped around the text, and not actually bolding them. Does anyone know an efficient way to actually search for certain text in a long document and actually replace the target text with HTML?

TextNode objects contain only text content without any HTML or XML markup.
So replace them is not so easy that people first saw.
function walkText(node) {
// if (node.nodeType == 3) {
if (node instanceof Text) {
if (!node.data.trim().length) {
return;
}
// parentheses would reserve the splitters
const parts = node.data.split(/(\d+)/).filter(Boolean);
if (parts.length < 2) {
return;
}
//
// ─── Method 1. WRAP WITH SPAN ───────────────────────────────────────────
//
const span = document.createElement('span');
span.innerHTML = parts
.map(item => {
// simple check again
return /\d/.test(item) ? `<b>${item}</b>` : item;
})
.join('');
node.parentNode.replaceChild(span, node);
//
// ─── METHOD 2. WITHOUT SPAN ──────────────────────────────────────
//
// const frag = document.createDocumentFragment();
// parts
// .map(item => {
// if (/\d+/.test(item)) {
// const b = document.createElement('b');
// b.innerText = item;
// return b;
// } else {
// return document.createTextNode(item);
// }
// })
// .forEach(item => {
// frag.appendChild(item);
// });
// node.parentNode.replaceChild(frag, node);
}
if (node.nodeType == 1 && node.nodeName != 'SCRIPT') {
for (var i = 0; i < node.childNodes.length; i++) {
walkText(node.childNodes[i]);
}
}
}
walkText(document.body);
<div>
hello 113 world at 3
</div>

If you just want to make the element bold, replace
node.data = node.data.replace(/\[(\d+)\]/g, '<b>[$1]</b>');
with
node.style.fontWeight='bold';
If you want to replace the contents of the element with some other html, do this:
node.innerHTML = "<mytag>" + node.innerHTML + "</mytag>";

Related

How to skip a specific Div-Area while replacing text in DOM-Nodes

With this edit I try to be more specific with my question and add my last try to solve the problem:
A browser add-on replaces some words on a website. This part works! Now there is some text which should not be replaced. This text is within the class "msg_content", e.g.
<span class="msg_content">don't replace my words</span>.
This part does the replacement-job and should skip elements with class "msg_content":
function replaceText (node) {
if (node.nodeType === Node.TEXT_NODE) {
//skip msg_content
try {
let note = document.querySelector('.msg_content');
if (node == note){
console.log("skip chat-area");
return;
}
} catch (e) {
console.log(e);
}
let content = node.textContent;
for (let [word, emoji] of langMap) {
const regex = regexs.get(word);
content = content.replace(regex, emoji);
}
node.textContent = content;
}
else {
for (let i = 0; i < node.childNodes.length; i++) {
replaceText(node.childNodes[i]);
}
}
}
replaceText(document.body);
The replacement works finde, but it doesn't skip the msg_content.
If you want to check if a string is empty you can use string.length I added string.trim() in case the div contains any whitespace which is common in HTML:
function replaceText (node) {
if (node.nodeType === Node.TEXT_NODE) {
// My try to skip specific area here (doesn't work)
//if (document.getElementById("chatBar")) {
// return;
//}
let content = node.textContent;
if (!content.trim().length) return; // skip if content is empty
for (let [word, emoji] of langMap) {
const regex = regexs.get(word);
content = content.replace(regex, emoji);
}
node.textContent = content;
}
else {
// This node contains more than just text, call replaceText() on each
// of its children.
for (let i = 0; i < node.childNodes.length; i++) {
replaceText(node.childNodes[i]);
}
}
}
replaceText(document.body);

Javascript search text highlight with Swift

I'm making a text-searching mechanism (like ⌘ + F) for an iOS app and It's working but I have two issues.
Whenever someone searches something in Arabic, the word becomes disconnected.
Users can't search if there are diacritics in the text but their search does not (so basically I'm trying to make it diacritic-insensitive)
Here's the code for my highlighting (which I found from this):
var uiWebview_SearchResultCount = 0;
/*!
#method uiWebview_HighlightAllOccurencesOfStringForElement
#abstract // helper function, recursively searches in elements and their child nodes
#discussion // helper function, recursively searches in elements and their child nodes
element - HTML elements
keyword - string to search
*/
function uiWebview_HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
var count = 0;
var elementTmp = element;
while (true) {
var value = elementTmp.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break;
count++;
elementTmp = document.createTextNode(value.substr(idx+keyword.length));
}
uiWebview_SearchResultCount += count;
var index = uiWebview_SearchResultCount;
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break; // not found, abort
//we create a SPAN element for every parts of matched keywords
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
var spacetxt = document.createTextNode("\u200D");//\u200D
span.appendChild(text);
span.appendChild(spacetxt);
span.setAttribute("class","uiWebviewHighlight");
span.style.backgroundColor="#007DC8a3";
span.style.borderRadius="3px";
index--;
span.setAttribute("id", "SEARCH WORD"+(index));
//span.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);
//element.parentNode.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);
//uiWebview_SearchResultCount++; // update the counter
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
//alert(element.parentNode);
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
uiWebview_HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function uiWebview_HighlightAllOccurencesOfString(keyword) {
uiWebview_RemoveAllHighlights();
uiWebview_HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}
// helper function, recursively removes the highlights in elements and their childs
function uiWebview_RemoveAllHighlightsForElement(element) {
if (element) {
if (element.nodeType == 1) {
if (element.getAttribute("class") == "uiWebviewHighlight") {
var text = element.removeChild(element.firstChild);
element.parentNode.insertBefore(text,element);
element.parentNode.removeChild(element);
return true;
} else {
var normalize = false;
for (var i=element.childNodes.length-1; i>=0; i--) {
if (uiWebview_RemoveAllHighlightsForElement(element.childNodes[i])) {
normalize = true;
}
}
if (normalize) {
element.normalize();
}
}
}
}
return false;
}
// the main entry point to remove the highlights
function uiWebview_RemoveAllHighlights() {
uiWebview_SearchResultCount = 0;
uiWebview_RemoveAllHighlightsForElement(document.body);
}
function uiWebview_ScrollTo(idx) {
var idkNum = uiWebview_SearchResultCount - idx
var scrollTo = document.getElementById("SEARCH WORD" + idkNum);
if (scrollTo) scrollTo.scrollIntoView();
}
and I also found this that actually does exactly what I want (does not disconnect words and is diacritic-insensitive) but it's in JQuery and I couldn't figure out how to implement it in my code.
Instead of using indexOf, you can convert the string to an NSString and then use range(of:options:):
var range = value.range(of: keyword, options: [.caseInsensitive, .diacriticInsensitive])

javascript doing multiple createrange() while encountering unexpected anchorOffset

My goal:
Let users highlight different substring in a single long string.
However, once I've highlighted one substring with range.surroundContents(newNode) (newNode is a span with yellow background), the innerHTML of the whole long string changed-- it started to contain the span element; consequently, if the user wants to highlight a substring after the previous highlighted substring in the same long string, the anchorOffset will return the index starting after the previous span.
For example, in this long string:
"Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much."
this long sentence is wrapped by a p whose class name is noting. If the range.surroundContents() method the substring "Privet Drive", then, when I want to get the window.getSelection().anchorOffset of the substring "thank", the answer wrongly is 53 while the correct answer should be 102.
How should I do? Thank you!!
P.S. I don't want to use substring method to find the position, thank you!
$(".noting").mouseup(function(e){
$("#noteContent").val("");/*flushing*/
curSentNum = $(this).attr("id").split("-")[1];
$('#curSentNum').val(curSentNum);
highlightLangName = $(this).attr("id").split("-")[2];
$('#highlightLangName').val(highlightLangName);
//console.log(".noting $(this).html()"+$(this).html()+" "+$(this).attr("id"));//id, for example: p-2-French
if (window.getSelection) {
highlightedText = window.getSelection().toString();
curAnchorOffset = window.getSelection().anchorOffset;
$('#anchorAt').val(curAnchorOffset);
$('#highlightLen').val(highlightedText.length);
}
else if (document.selection && document.selection.type != "Control") {
highlightedText = document.selection.createRange().text;
}
});
And then I'll save the anchorAt information to db; after the db operation, I'll immediately call this function using the previous variables remained:
function highlightNoteJustSaved(){
var curI = noteCounter;
var anchorAt = parseInt($("#anchorAt").val());
var highlightLen = parseInt($("#highlightLen").val());
/*p to find, for example: p-2-French*/
var curP = document.getElementById('p-'+curSentNum.toString()+"-"+$("#highlightLangName").val());
var range = document.createRange();
root_node = curP;
range.setStart(root_node.childNodes[0], anchorAt);
range.setEnd(root_node.childNodes[0], anchorAt+highlightLen);
var newNode = document.createElement("span");
newNode.style.cssText="background-color:#ceff99";//yellow
newNode.className = alreadyNoteStr;
newNode.setAttribute('id','already-note-'+curI.toString());
range.surroundContents(newNode);
}
for HTML tree node structure, please take a look at the comment below( I didn't figure out how to copy-paste the code at this asking area).
I replaced your method to highlight text with 2 methods. highlightTextNodes finds the word in the content of the node. Searching each child. Also I implemented a highlight remover to show how it works. I replaced the span with a mark tag.
let alreadyNoteStr = 'already';
let noteCounter = 0;
let elementId;
$('p.noting').mouseup(function(e) {
elementId = $(this).attr('id');
$('#noteContent').val(''); /*flushing*/
curSentNum = elementId.split('-')[1];
$('#curSentNum').val(curSentNum);
highlightLangName = elementId.split('-')[2];
$('#highlightLangName').val(highlightLangName);
//console.log(".noting $(this).html()"+$(this).html()+" "+$(this).attr("id"));//id, for example: p-2-French
if (window.getSelection) {
highlightedText = window.getSelection().toString();
curAnchorOffset = window.getSelection().anchorOffset;
$("#noteContent").val(highlightedText);
$('#anchorAt').val(curAnchorOffset);
$('#highlightLen').val(highlightedText.length);
highlight(elementId, highlightedText);
} else if (document.selection && document.selection.type != "Control") {
highlightedText = document.selection.createRange().text;
}
});
function highlightNoteJustSaved() {
let curI = noteCounter;
let anchorAt = parseInt($("#anchorAt").val());
let highlightLen = parseInt($("#highlightLen").val());
/*p to find, for example: p-2-French*/
let curP = document.getElementById('p-' + curSentNum.toString() + "-" + $("#highlightLangName").val());
let range = document.createRange();
rootNode = curP;
let childNode = rootNode.childNodes[0];
range.setStart(rootNode.childNodes[0], anchorAt);
range.setEnd(rootNode.childNodes[0], anchorAt + highlightLen);
var newNode = document.createElement("span");
newNode.style.cssText = "background-color:#ceff99"; //yellow
newNode.className = alreadyNoteStr;
newNode.setAttribute('id', 'already-note-' + curI.toString());
range.surroundContents(newNode);
}
/*
* Takes in an array of consecutive TextNodes and returns a document fragment with `word` highlighted
*/
function highlightTextNodes(nodes, word) {
if (!nodes.length) {
return;
}
let text = '';
// Concatenate the consecutive nodes to get the actual text
for (var i = 0; i < nodes.length; i++) {
text += nodes[i].textContent;
}
let fragment = document.createDocumentFragment();
while (true) {
// Tweak this if you want to change the highlighting behavior
var index = text.toLowerCase().indexOf(word.toLowerCase());
if (index === -1) {
break;
}
// Split the text into [before, match, after]
var before = text.slice(0, index);
var match = text.slice(index, index + word.length);
text = text.slice(index + word.length);
// Create the <mark>
let mark = document.createElement('mark');
mark.className = 'found';
mark.appendChild(document.createTextNode(match));
// Append it to the fragment
fragment.appendChild(document.createTextNode(before));
fragment.appendChild(mark);
}
// If we have leftover text, just append it to the end
if (text.length) {
fragment.appendChild(document.createTextNode(text));
}
// Replace the nodes with the fragment
nodes[0].parentNode.insertBefore(fragment, nodes[0]);
for (var i = 0; i < nodes.length; i++) {
let node = nodes[nodes.length - i - 1];
node.parentNode.removeChild(node);
}
}
/*
* Highlights all instances of `word` in `$node` and its children
*/
function highlight(id, word) {
let node = document.getElementById(id);
let children = node.childNodes;
let currentRun = [];
for (var i = 0; i < children.length; i++) {
let child = children[i];
if (child.nodeType === Node.TEXT_NODE) {
// Keep track of consecutive text nodes
currentRun.push(child);
} else {
// If we hit a regular element, highlight what we have and start over
highlightTextNodes(currentRun, word);
currentRun = [];
// Ignore text inside of our <mark>s
if (child.nodeType === Node.ELEMENT_NODE && child.className !== 'found') {
highlight(child, word);
}
}
}
// Just in case we have only text nodes as children
if (currentRun.length) {
highlightTextNodes(currentRun, word);
}
}
/*
* Removes all highlighted <mark>s from the given node
*/
function unhighlight(id) {
let node = document.getElementById(id);
let marks = [].slice.call(node.querySelectorAll('mark.found'));
for (var i = 0; i < marks.length; i++) {
let mark = marks[i];
// Replace each <mark> with just a text node of its contents
mark.parentNode.replaceChild(document.createTextNode(mark.childNodes[0].textContent), mark);
}
}
label {
display: block;
position: relative;
padding-left: 100px;
}
button {
margin-top: 20px;
margin-bottom: 20px;
padding: 10px;
}
label>span {
position: absolute;
left: 0;
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<button type="button" onclick="unhighlight(elementId);">Unhighlight</button>
<div id="div-0" class="only-left-border">
<p class="lan-English noting" id="p-1-English">Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much.</p>
</div>
<label><span>Content:</span><input type="text" id="noteContent"></input></label>
<label><span>Numer:</span><input type="text" id="curSentNum"></input></label>
<label><span>Language:</span><input type="text" id="highlightLangName"></input></label>
<label><span>Anchor:</span><input type="text" id="anchorAt"></input></label>
<label><span>Length:</span><input type="text" id="highlightLen"></input></label>

Convert html element to string Javascript (obtained using search, not inner/outerHTML)

I have a JS function which searches for a string in the HTML source, and outputs the parent node:
function searchHTML(searchTerm) {
queue = [document.body],
curr
;
while (curr = queue.pop()) {
if (!curr.textContent.match(searchTerm)) continue;
for (var i = 0; i < curr.childNodes.length; ++i) {
switch (curr.childNodes[i].nodeType) {
case Node.TEXT_NODE : // 3
if (curr.childNodes[i].textContent.match(searchTerm)) {
console.log(curr);
// End of search
}
break;
case Node.ELEMENT_NODE : // 1
queue.push(curr.childNodes[i]);
break;
}
}
}
}
Currently, its output (in Javascript console) is not a string.
I need to perform regex on the output (curr), so I need it to be a string.
What I have tried:
curr = curr.toString()
curr = curr.replace(/[0-9]/g, "")
You can use .text() jQuery function to get the string from an HTML.
Here is an example of how you get string :
text= curr.text();
curr = text.replace(/[0-9]/g, "");
It seems to me you need to find the commonAncestorContainer for the term searched. That means if the term starts in a node and ends in another, you don't really have a clear definition of the common parent, until you get the a range.
I put together the function below where you can call search('My Term') and it should get a commonAncestorContainer. Some tweek should still be needed to search the same term more than once and to make sure that words ending inside an element still consider that element as the parent instead of the next one after the parent.
var search = function (searchTerm) {
// Stop if there is nothing to look for
if (!searchTerm || typeof searchTerm !== 'string')
return null;
searchTerm = searchTerm.toLowerCase();
var bodyText = document.body.textContent.toLowerCase(),
range = document.createRange(),
startOffset = bodyText.indexOf(searchTerm),
endOffset = startOffset + searchTerm.length,
iterationObject = {
index: 0,
length: bodyText.length,
startOffset: startOffset,
endOffset: endOffset,
startInNodeOffset: -1,
endInNodeOffset: -1,
startNode: null,
endNode: null
};
var textContent = function (textNode) {
return textNode.nodeValue || textNode.textContent || textNode.wholeText;
};
(function iterate (node, iterationObject) {
if (node.nodeType === 1) {
var childNodes = node.childNodes;
// Keep iterating but we should try to stop it when nodes are found
for (var i = 0, iLen = childNodes.length; i < iLen; i++)
iterate(childNodes[i], iterationObject);
} else if (node.nodeType === 3) {
var text = textContent(node),
startInNodeOffset,
endInNodeOffset;
// Change index and move on
if (iterationObject.index + text.length < iterationObject.startOffset)
iterationObject.index += text.length;
else if (iterationObject.startNode === null) {
startInNodeOffset = iterationObject.startOffset - iterationObject.index;
// Start range in the current node
// This condition should really only be needed to decide if the selection should start
// before or after this node. But that is another story.
if (startInNodeOffset <= text.length) {
iterationObject.startNode = node;
iterationObject.startInNodeOffset = startInNodeOffset;
}
iterationObject.index += text.length;
} else {
// Now try to find the endNode
if (iterationObject.index + text.length < iterationObject.endOffset)
iterationObject.index += text.length;
else if (iterationObject.endNode === null) {
endInNodeOffset = iterationObject.endOffset - iterationObject.index;
if (endInNodeOffset <= text.length) {
iterationObject.endNode = node;
iterationObject.endInNodeOffset = endInNodeOffset;
}
}
}
}
if (iterationObject.startNode !== null && iterationObject.endNode !== null)
return;
})(document.body, iterationObject);
if (iterationObject.startInNodeOffset > -1 && iterationObject.endInNodeOffset > -1) {
range.setStart(iterationObject.startNode, iterationObject.startInNodeOffset);
range.setEnd(iterationObject.endNode, iterationObject.endInNodeOffset);
return range.commonAncestorContainer;
}
return null;
};
If you are using jQuery, you can try outerHTML to get the string from the commonAncestorContainer.
var parentElement = search('Whatever'),
result = '';
if (parentElement !== null)
result = $(parentElement).outerHTML();
You can create a temporary DOM node, and then append curr to it. Then get the innerHTML and the result will be a string:
var tempNode = document.createElement("div");
tempNode.appendChild(curr);
console.log(temp.innerHTML);
Working example here:
http://codepen.io/anon/pen/QypxwM

Javascript textcontain get all html tag text as well as script tag text

My goal is to count all word in html page as well as count fixed word in html page the prob is that using that function script tag text also get in count so how i remove script tag from counting keywords.
i this code MSO_ContentTable is id 0f div tag. give me any other solution on jquery also if there.
function CountWord(keyword) {
var word = keyword.toUpperCase(),
total = 0,
queue = [document.getElementById('MSO_ContentTable')],
curr, count = 0;
while (curr = queue.pop()) {
var check = curr.textContent;
if (check != undefined) {
for (var i = 0; i < curr.childNodes.length; ++i) {
if (curr.childNodes[i].nodeName == "SCRIPT") {
// do nothing
}
else {
switch (curr.childNodes[i].nodeType) {
case 3: // 3
var myword = curr.childNodes[i].textContent.split(" ");
for (var k = 0; k < myword.length; k++) {
var upper = myword[k].toUpperCase();
if (upper.match(word)) {
count++;
wc++;
}
else if((upper[0] >= 'A' && upper[0] <= 'Z') ||
(upper[0] >= 'a' && upper[0] <= 'z') ||
(upper[0] >= '0' && upper[0] <= '9')) {
wc++
}
}
case 1: // 1
queue.push(curr.childNodes[i]);
}
}
}
}
}
thx
other problem is how i remove the tag which have their display property none?
In your code:
> queue = [document.getElementById('MSO_ContentTable')],
> curr, count = 0;
>
> while (curr = queue.pop()) {
getElementById will only ever return a single node, so no need to put it in an array and no need to pop it later:
curr = document.getElementById('MSO_ContentTable');
if (curr) {
// do stuff
.
> var check = curr.textContent;
The DOM 3 Core textContent property is not supported by all browsers, you need to offer an alternative such as innerText, e.g.:
// Get the text within an element
// Doesn't do any normalising, returns a string
// of text as found.
function getTextRecursive(element) {
var text = [];
var self = arguments.callee;
var el, els = element.childNodes;
for (var i=0, iLen=els.length; i<iLen; i++) {
el = els[i];
// May need to add other node types here
// Exclude script element content
if (el.nodeType == 1 && el.tagName && el.tagName.toLowerCase() != 'script') {
text.push(self(el));
// If working with XML, add nodeType 4 to get text from CDATA nodes
} else if (el.nodeType == 3) {
// Deal with extra whitespace and returns in text here.
text.push(el.data);
}
}
return text.join('');
}
.
> if (check != undefined) {
Given that check will always be a string (even if textContent or innerText are used instead of the above function), testing against undefined doesn't seem appropriate. Also, I don't understand why this test is done before looping over the child nodes.
Anyhow, the getText function above will return the text content without script elements, so you can just use that to get the text then play with it as you want. You may need to normalise whitespace as different browsers will return different amounts.
PS. I should note that arguments.callee is restricted in ES5 strict mode, so if yo plan on using strict mode, replace that expression with an explicit call to the function.
Edit
To exclude not visible elements, you need to test each one to see if it's visible. Only test elements, don't test text nodes as if their parent element is not visible, the text won't be.
Note that the following is not widely tested yet, but works in IE 6 and recent Firefox, Opera and Chrome at least. Please test thoroughly before using more widely.
// The following is mostly from "myLibrary"
// <http://www.cinsoft.net/mylib.html>
function getElementDocument(el) {
if (el.ownerDocument) {
return el.ownerDocument;
}
if (el.parentNode) {
while (el.parentNode) {
el = el.parentNode;
}
if (el.nodeType == 9 || (!el.nodeType && !el.tagName)) {
return el;
}
if (el.document && typeof el.tagName == 'string') {
return el.document;
}
return null;
}
}
// Return true if element is visible, otherwise false
//
// Parts borrowed from "myLibrary"
// <http://www.cinsoft.net/mylib.html>
function isVisible(el) {
if (typeof el == 'string') el = document.getElementById(el);
var doc = getElementDocument(el);
var reVis = /\bhidden\b|\bnone\b/;
var styleObj, isVis;
// DOM compatible
if (doc && doc.defaultView && doc.defaultView.getComputedStyle) {
styleObj = doc.defaultView.getComputedStyle(el, null);
// MS compatible
} else if (el.currentStyle) {
styleObj = el.currentStyle;
}
// If either visibility == hidden || display == none
// then element is not visible
return !reVis.test(styleObj.visibility + ' ' + styleObj.display);
}

Categories