I have a JS function which searches for a string in the HTML source, and outputs the parent node:
function searchHTML(searchTerm) {
queue = [document.body],
curr
;
while (curr = queue.pop()) {
if (!curr.textContent.match(searchTerm)) continue;
for (var i = 0; i < curr.childNodes.length; ++i) {
switch (curr.childNodes[i].nodeType) {
case Node.TEXT_NODE : // 3
if (curr.childNodes[i].textContent.match(searchTerm)) {
console.log(curr);
// End of search
}
break;
case Node.ELEMENT_NODE : // 1
queue.push(curr.childNodes[i]);
break;
}
}
}
}
Currently, its output (in Javascript console) is not a string.
I need to perform regex on the output (curr), so I need it to be a string.
What I have tried:
curr = curr.toString()
curr = curr.replace(/[0-9]/g, "")
You can use .text() jQuery function to get the string from an HTML.
Here is an example of how you get string :
text= curr.text();
curr = text.replace(/[0-9]/g, "");
It seems to me you need to find the commonAncestorContainer for the term searched. That means if the term starts in a node and ends in another, you don't really have a clear definition of the common parent, until you get the a range.
I put together the function below where you can call search('My Term') and it should get a commonAncestorContainer. Some tweek should still be needed to search the same term more than once and to make sure that words ending inside an element still consider that element as the parent instead of the next one after the parent.
var search = function (searchTerm) {
// Stop if there is nothing to look for
if (!searchTerm || typeof searchTerm !== 'string')
return null;
searchTerm = searchTerm.toLowerCase();
var bodyText = document.body.textContent.toLowerCase(),
range = document.createRange(),
startOffset = bodyText.indexOf(searchTerm),
endOffset = startOffset + searchTerm.length,
iterationObject = {
index: 0,
length: bodyText.length,
startOffset: startOffset,
endOffset: endOffset,
startInNodeOffset: -1,
endInNodeOffset: -1,
startNode: null,
endNode: null
};
var textContent = function (textNode) {
return textNode.nodeValue || textNode.textContent || textNode.wholeText;
};
(function iterate (node, iterationObject) {
if (node.nodeType === 1) {
var childNodes = node.childNodes;
// Keep iterating but we should try to stop it when nodes are found
for (var i = 0, iLen = childNodes.length; i < iLen; i++)
iterate(childNodes[i], iterationObject);
} else if (node.nodeType === 3) {
var text = textContent(node),
startInNodeOffset,
endInNodeOffset;
// Change index and move on
if (iterationObject.index + text.length < iterationObject.startOffset)
iterationObject.index += text.length;
else if (iterationObject.startNode === null) {
startInNodeOffset = iterationObject.startOffset - iterationObject.index;
// Start range in the current node
// This condition should really only be needed to decide if the selection should start
// before or after this node. But that is another story.
if (startInNodeOffset <= text.length) {
iterationObject.startNode = node;
iterationObject.startInNodeOffset = startInNodeOffset;
}
iterationObject.index += text.length;
} else {
// Now try to find the endNode
if (iterationObject.index + text.length < iterationObject.endOffset)
iterationObject.index += text.length;
else if (iterationObject.endNode === null) {
endInNodeOffset = iterationObject.endOffset - iterationObject.index;
if (endInNodeOffset <= text.length) {
iterationObject.endNode = node;
iterationObject.endInNodeOffset = endInNodeOffset;
}
}
}
}
if (iterationObject.startNode !== null && iterationObject.endNode !== null)
return;
})(document.body, iterationObject);
if (iterationObject.startInNodeOffset > -1 && iterationObject.endInNodeOffset > -1) {
range.setStart(iterationObject.startNode, iterationObject.startInNodeOffset);
range.setEnd(iterationObject.endNode, iterationObject.endInNodeOffset);
return range.commonAncestorContainer;
}
return null;
};
If you are using jQuery, you can try outerHTML to get the string from the commonAncestorContainer.
var parentElement = search('Whatever'),
result = '';
if (parentElement !== null)
result = $(parentElement).outerHTML();
You can create a temporary DOM node, and then append curr to it. Then get the innerHTML and the result will be a string:
var tempNode = document.createElement("div");
tempNode.appendChild(curr);
console.log(temp.innerHTML);
Working example here:
http://codepen.io/anon/pen/QypxwM
Related
I'm making a text-searching mechanism (like ⌘ + F) for an iOS app and It's working but I have two issues.
Whenever someone searches something in Arabic, the word becomes disconnected.
Users can't search if there are diacritics in the text but their search does not (so basically I'm trying to make it diacritic-insensitive)
Here's the code for my highlighting (which I found from this):
var uiWebview_SearchResultCount = 0;
/*!
#method uiWebview_HighlightAllOccurencesOfStringForElement
#abstract // helper function, recursively searches in elements and their child nodes
#discussion // helper function, recursively searches in elements and their child nodes
element - HTML elements
keyword - string to search
*/
function uiWebview_HighlightAllOccurencesOfStringForElement(element,keyword) {
if (element) {
if (element.nodeType == 3) { // Text node
var count = 0;
var elementTmp = element;
while (true) {
var value = elementTmp.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break;
count++;
elementTmp = document.createTextNode(value.substr(idx+keyword.length));
}
uiWebview_SearchResultCount += count;
var index = uiWebview_SearchResultCount;
while (true) {
var value = element.nodeValue; // Search for keyword in text node
var idx = value.toLowerCase().indexOf(keyword);
if (idx < 0) break; // not found, abort
//we create a SPAN element for every parts of matched keywords
var span = document.createElement("span");
var text = document.createTextNode(value.substr(idx,keyword.length));
var spacetxt = document.createTextNode("\u200D");//\u200D
span.appendChild(text);
span.appendChild(spacetxt);
span.setAttribute("class","uiWebviewHighlight");
span.style.backgroundColor="#007DC8a3";
span.style.borderRadius="3px";
index--;
span.setAttribute("id", "SEARCH WORD"+(index));
//span.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);
//element.parentNode.setAttribute("id", "SEARCH WORD"+uiWebview_SearchResultCount);
//uiWebview_SearchResultCount++; // update the counter
text = document.createTextNode(value.substr(idx+keyword.length));
element.deleteData(idx, value.length - idx);
var next = element.nextSibling;
//alert(element.parentNode);
element.parentNode.insertBefore(span, next);
element.parentNode.insertBefore(text, next);
element = text;
}
} else if (element.nodeType == 1) { // Element node
if (element.style.display != "none" && element.nodeName.toLowerCase() != 'select') {
for (var i=element.childNodes.length-1; i>=0; i--) {
uiWebview_HighlightAllOccurencesOfStringForElement(element.childNodes[i],keyword);
}
}
}
}
}
// the main entry point to start the search
function uiWebview_HighlightAllOccurencesOfString(keyword) {
uiWebview_RemoveAllHighlights();
uiWebview_HighlightAllOccurencesOfStringForElement(document.body, keyword.toLowerCase());
}
// helper function, recursively removes the highlights in elements and their childs
function uiWebview_RemoveAllHighlightsForElement(element) {
if (element) {
if (element.nodeType == 1) {
if (element.getAttribute("class") == "uiWebviewHighlight") {
var text = element.removeChild(element.firstChild);
element.parentNode.insertBefore(text,element);
element.parentNode.removeChild(element);
return true;
} else {
var normalize = false;
for (var i=element.childNodes.length-1; i>=0; i--) {
if (uiWebview_RemoveAllHighlightsForElement(element.childNodes[i])) {
normalize = true;
}
}
if (normalize) {
element.normalize();
}
}
}
}
return false;
}
// the main entry point to remove the highlights
function uiWebview_RemoveAllHighlights() {
uiWebview_SearchResultCount = 0;
uiWebview_RemoveAllHighlightsForElement(document.body);
}
function uiWebview_ScrollTo(idx) {
var idkNum = uiWebview_SearchResultCount - idx
var scrollTo = document.getElementById("SEARCH WORD" + idkNum);
if (scrollTo) scrollTo.scrollIntoView();
}
and I also found this that actually does exactly what I want (does not disconnect words and is diacritic-insensitive) but it's in JQuery and I couldn't figure out how to implement it in my code.
Instead of using indexOf, you can convert the string to an NSString and then use range(of:options:):
var range = value.range(of: keyword, options: [.caseInsensitive, .diacriticInsensitive])
I'm trying to scan a document using JavaScript and replace all occurrences of [x] with its HTML counterpart, such as <b>[x] </b>, where x is a number. This is what I have so far, which does add the tag around the target text, but as HTML entities, not actual HTML:
function walkText(node) {
if (node.nodeType == 3) {
node.data = node.data.replace(/\[(\d+)\]/g, '<b>[$1]</b>');
}
if (node.nodeType == 1 && node.nodeName != "SCRIPT") {
for (var i = 0; i < node.childNodes.length; i++) {
walkText(node.childNodes[i]);
}
}
}
walkText(document.body);
The end result is just the text <b> wrapped around the text, and not actually bolding them. Does anyone know an efficient way to actually search for certain text in a long document and actually replace the target text with HTML?
TextNode objects contain only text content without any HTML or XML markup.
So replace them is not so easy that people first saw.
function walkText(node) {
// if (node.nodeType == 3) {
if (node instanceof Text) {
if (!node.data.trim().length) {
return;
}
// parentheses would reserve the splitters
const parts = node.data.split(/(\d+)/).filter(Boolean);
if (parts.length < 2) {
return;
}
//
// ─── Method 1. WRAP WITH SPAN ───────────────────────────────────────────
//
const span = document.createElement('span');
span.innerHTML = parts
.map(item => {
// simple check again
return /\d/.test(item) ? `<b>${item}</b>` : item;
})
.join('');
node.parentNode.replaceChild(span, node);
//
// ─── METHOD 2. WITHOUT SPAN ──────────────────────────────────────
//
// const frag = document.createDocumentFragment();
// parts
// .map(item => {
// if (/\d+/.test(item)) {
// const b = document.createElement('b');
// b.innerText = item;
// return b;
// } else {
// return document.createTextNode(item);
// }
// })
// .forEach(item => {
// frag.appendChild(item);
// });
// node.parentNode.replaceChild(frag, node);
}
if (node.nodeType == 1 && node.nodeName != 'SCRIPT') {
for (var i = 0; i < node.childNodes.length; i++) {
walkText(node.childNodes[i]);
}
}
}
walkText(document.body);
<div>
hello 113 world at 3
</div>
If you just want to make the element bold, replace
node.data = node.data.replace(/\[(\d+)\]/g, '<b>[$1]</b>');
with
node.style.fontWeight='bold';
If you want to replace the contents of the element with some other html, do this:
node.innerHTML = "<mytag>" + node.innerHTML + "</mytag>";
I am doing some cleaning up of an html page by removing anchor and just leaving the text node, wrapping all the text nodes (no elements surrounding it) with the tag <asdf>, remove all empty elements like <div></div> or <span> </span>.
When I try it on different websites, it seems to have different levels of success when I copy paste the entire script. However, when I run it chunk by chunk, it works as expected and no error is thrown.
//remove anchors but text intact
$('a').replaceWith(function() {
return $.text([this]);
});
//wrap text nodes
var items = window.document.getElementsByTagName("*"); for (var i = items.length; i--;) { wrap(items[i]) }; function wrap(el){ var oDiv = el; for (var i = 0; i < oDiv.childNodes.length; i++) { var curNode = oDiv.childNodes[i]; if (curNode.nodeName === "#text" && oDiv.childNodes.length !== 1) { var firstText = curNode; var newNode = document.createElement("asdf"); newNode.textContent = firstText.nodeValue; firstText.parentNode.replaceChild(newNode, firstText); } } }
//remove empty elements
$("*").filter(function () {
return !($.trim($(this).text()).length);
}).hide();
$('*').filter(function() {
return $.trim($(this).text()) === '' && $(this).children().length == 0
}).remove()
It throws an error like
NotFoundError: An attempt was made to reference a Node in a context where it does not exist.
this is caused by:
$('a').replaceWith(function() {
return $.text([this]);
});
so maybe if I fix that, it will work.
Did you test the script by having it written all in one line:
$('a').replaceWith(function() { return document.createTextNode($.text([this]));}); var items = window.document.getElementsByTagName("*"); for (var i = items.length; i--;) { wrap(items[i]) }; function wrap(el){ var oDiv = el; for (var i = 0; i < oDiv.childNodes.length; i++) { var curNode = oDiv.childNodes[i]; if (curNode.nodeName === "#text" && oDiv.childNodes.length !== 1) { var firstText = curNode; var newNode = document.createElement("asdf"); newNode.textContent = firstText.nodeValue; firstText.parentNode.replaceChild(newNode, firstText); } } };$("*").filter(function () { return !($.trim($(this).text()).length);}).hide();$('*').filter(function() { return $.trim($(this).text()) === '' && $(this).children().length == 0;}).remove();
On Chrome it worked everywhere I tested and jQuery was present.
I am just wondering if there is a simple solution already to the problem of turning selected content in tinymce to upperCase letters.
Anyone got a solution?
PS: The upperCase-function is known, but won't solve the tinymce setting of selected content alone.
This is what i came up with after some fiddling
// check if a node intersects the given range
rangeIntersectsNode: function (range, node) {
var nodeRange;
if (range.intersectsNode) {
return range.intersectsNode(node);
}
else {
nodeRange = node.ownerDocument.createRange();
try {
nodeRange.selectNode(node);
} catch (e) {
nodeRange.selectNodeContents(node);
}
return range.compareBoundaryPoints(Range.END_TO_START, nodeRange) == -1 &&
range.compareBoundaryPoints(Range.START_TO_END, nodeRange) == 1;
}
},
// Tinymce-Shortcut: (cmd/ctrl + shift +a)
if ( ( (mac && evt.metaKey)|| (!mac && evt.ctrlKey)) && evt.shiftKey && evt.keyCode == 65 ){
if (!ed.selection.isCollapsed()) {
var selection = ed.getWin().getSelection(); // user selection
var range = selection.getRangeAt(0); // erste range
var start = range.startContainer;
var start_offset = range.startOffset;
var end = range.endContainer;
var end_offset = range.endOffset;
// Get all textnodes of the common ancestor
// Check foreach of those textnodes if they are inside the selection
// StartContainer and EndContainer may be partially inside the selection (if textnodes)
// concatenate those textnode parts and make toUppercase the selected part only
// all textnodes inbetween need to become upperCased (the nodeContents)
// Selection needs to be reset afterwards.
var textnodes = t.getTextNodes(range.commonAncestorContainer);
for (var i=0; i<textnodes.length; i++) {
if (t.rangeIntersectsNode(range, textnodes[i])){
if (textnodes[i] == start && textnodes[i] == end) {
var text_content = start.textContent;
text_content = start.textContent.substring(0, start_offset) + text_content.substring(start_offset, end_offset).toUpperCase() + end.textContent.substring(end_offset);
textnodes[i].nodeValue = text_content;
}
else if (textnodes[i] == start){
var text_content = start.textContent.substring(0, start_offset) + start.textContent.substring(start_offset).toUpperCase();
textnodes[i].nodeValue = text_content;
}
else if (textnodes[i] == end){
var text_content = end.textContent.substring(0, end_offset).toUpperCase() + end.textContent.substring(end_offset);
textnodes[i].nodeValue = text_content;
}
else {
// Textnodes between Start- and Endcontainer
textnodes[i].nodeValue = textnodes[i].nodeValue.toUpperCase();
}
}
}
// reset selection
var r = ed.selection.dom.createRng();
r.setStart(start, start_offset);
r.setEnd(end, end_offset);
ed.selection.setRng(r);
evt.preventDefault();
return false;
}
}
My goal is to count all word in html page as well as count fixed word in html page the prob is that using that function script tag text also get in count so how i remove script tag from counting keywords.
i this code MSO_ContentTable is id 0f div tag. give me any other solution on jquery also if there.
function CountWord(keyword) {
var word = keyword.toUpperCase(),
total = 0,
queue = [document.getElementById('MSO_ContentTable')],
curr, count = 0;
while (curr = queue.pop()) {
var check = curr.textContent;
if (check != undefined) {
for (var i = 0; i < curr.childNodes.length; ++i) {
if (curr.childNodes[i].nodeName == "SCRIPT") {
// do nothing
}
else {
switch (curr.childNodes[i].nodeType) {
case 3: // 3
var myword = curr.childNodes[i].textContent.split(" ");
for (var k = 0; k < myword.length; k++) {
var upper = myword[k].toUpperCase();
if (upper.match(word)) {
count++;
wc++;
}
else if((upper[0] >= 'A' && upper[0] <= 'Z') ||
(upper[0] >= 'a' && upper[0] <= 'z') ||
(upper[0] >= '0' && upper[0] <= '9')) {
wc++
}
}
case 1: // 1
queue.push(curr.childNodes[i]);
}
}
}
}
}
thx
other problem is how i remove the tag which have their display property none?
In your code:
> queue = [document.getElementById('MSO_ContentTable')],
> curr, count = 0;
>
> while (curr = queue.pop()) {
getElementById will only ever return a single node, so no need to put it in an array and no need to pop it later:
curr = document.getElementById('MSO_ContentTable');
if (curr) {
// do stuff
.
> var check = curr.textContent;
The DOM 3 Core textContent property is not supported by all browsers, you need to offer an alternative such as innerText, e.g.:
// Get the text within an element
// Doesn't do any normalising, returns a string
// of text as found.
function getTextRecursive(element) {
var text = [];
var self = arguments.callee;
var el, els = element.childNodes;
for (var i=0, iLen=els.length; i<iLen; i++) {
el = els[i];
// May need to add other node types here
// Exclude script element content
if (el.nodeType == 1 && el.tagName && el.tagName.toLowerCase() != 'script') {
text.push(self(el));
// If working with XML, add nodeType 4 to get text from CDATA nodes
} else if (el.nodeType == 3) {
// Deal with extra whitespace and returns in text here.
text.push(el.data);
}
}
return text.join('');
}
.
> if (check != undefined) {
Given that check will always be a string (even if textContent or innerText are used instead of the above function), testing against undefined doesn't seem appropriate. Also, I don't understand why this test is done before looping over the child nodes.
Anyhow, the getText function above will return the text content without script elements, so you can just use that to get the text then play with it as you want. You may need to normalise whitespace as different browsers will return different amounts.
PS. I should note that arguments.callee is restricted in ES5 strict mode, so if yo plan on using strict mode, replace that expression with an explicit call to the function.
Edit
To exclude not visible elements, you need to test each one to see if it's visible. Only test elements, don't test text nodes as if their parent element is not visible, the text won't be.
Note that the following is not widely tested yet, but works in IE 6 and recent Firefox, Opera and Chrome at least. Please test thoroughly before using more widely.
// The following is mostly from "myLibrary"
// <http://www.cinsoft.net/mylib.html>
function getElementDocument(el) {
if (el.ownerDocument) {
return el.ownerDocument;
}
if (el.parentNode) {
while (el.parentNode) {
el = el.parentNode;
}
if (el.nodeType == 9 || (!el.nodeType && !el.tagName)) {
return el;
}
if (el.document && typeof el.tagName == 'string') {
return el.document;
}
return null;
}
}
// Return true if element is visible, otherwise false
//
// Parts borrowed from "myLibrary"
// <http://www.cinsoft.net/mylib.html>
function isVisible(el) {
if (typeof el == 'string') el = document.getElementById(el);
var doc = getElementDocument(el);
var reVis = /\bhidden\b|\bnone\b/;
var styleObj, isVis;
// DOM compatible
if (doc && doc.defaultView && doc.defaultView.getComputedStyle) {
styleObj = doc.defaultView.getComputedStyle(el, null);
// MS compatible
} else if (el.currentStyle) {
styleObj = el.currentStyle;
}
// If either visibility == hidden || display == none
// then element is not visible
return !reVis.test(styleObj.visibility + ' ' + styleObj.display);
}